[svn:parrot] r46822 - in trunk: . compilers/imcc src/packfile
NotFound at svn.parrot.org
NotFound at svn.parrot.org
Thu May 20 19:45:08 UTC 2010
Author: NotFound
Date: Thu May 20 19:45:07 2010
New Revision: 46822
URL: https://trac.parrot.org/parrot/changeset/46822
Log:
store string encoding in PBC and change semantic of wide char encodings in PIR string literals
Modified:
trunk/PBC_COMPAT
trunk/compilers/imcc/pbc.c
trunk/src/packfile/pf_items.c
Modified: trunk/PBC_COMPAT
==============================================================================
--- trunk/PBC_COMPAT Thu May 20 18:12:32 2010 (r46821)
+++ trunk/PBC_COMPAT Thu May 20 19:45:07 2010 (r46822)
@@ -27,6 +27,7 @@
# please insert tab separated entries at the top of the list
+6.17 2010.05.20 NotFound store encoding of string constants
6.16 2010.05.18 plobsing move freeze/thaw adjacent to visit
6.15 2010.05.06 bacek add StringBuilder PMC
6.14 2010.05.03 coke remove popaction, pushmark, pushaction ops.
Modified: trunk/compilers/imcc/pbc.c
==============================================================================
--- trunk/compilers/imcc/pbc.c Thu May 20 18:12:32 2010 (r46821)
+++ trunk/compilers/imcc/pbc.c Thu May 20 19:45:07 2010 (r46822)
@@ -888,7 +888,7 @@
IMCC_string_from_reg(PARROT_INTERP, ARGIN(const SymReg *r))
{
ASSERT_ARGS(IMCC_string_from_reg)
- const char *buf = r->name;
+ char *buf = r->name;
if (r->type & VT_ENCODED) {
/*
@@ -896,19 +896,63 @@
* get first part as charset, rest as string
*/
STRING *s;
+ const CHARSET *s_charset;
+ const ENCODING *s_encoding = NULL;
+ const ENCODING *src_encoding;
const char *charset;
- char * const p = strchr(r->name, '"');
+ #define MAX_NAME 31
+ char charset_name[MAX_NAME + 1];
+ char encoding_name[MAX_NAME + 1];
+ char * p = strchr(r->name, '"');
+ char * p2 = strchr(r->name, ':');
PARROT_ASSERT(p && p[-1] == ':');
-
- p[-1] = 0;
- charset = r->name;
+ if (p2 < p -1) {
+ strncpy(encoding_name, buf, p2 - buf);
+ encoding_name[p2-buf] = '\0';
+ strncpy(charset_name, p2 +1, p - p2 - 2);
+ charset_name[p- p2 - 2] = '\0';
+ /*fprintf(stderr, "%s:%s\n", charset_name, encoding_name);*/
+ s_charset = Parrot_find_charset(interp, charset_name);
+ s_encoding = Parrot_find_encoding(interp, encoding_name);
+ }
+ else {
+ strncpy(charset_name, buf, p - buf - 1);
+ charset_name[p - buf - 1] = '\0';
+ /*fprintf(stderr, "%s\n", charset_name);*/
+ s_charset = Parrot_find_charset(interp, charset_name);
+ }
+ if (strcmp(charset_name, "unicode") == 0)
+ src_encoding = Parrot_utf8_encoding_ptr;
+ else
+ src_encoding = Parrot_fixed_8_encoding_ptr;
+ if (s_encoding == NULL)
+ s_encoding = src_encoding;
/* past delim */
buf = p + 1;
- s = Parrot_str_unescape(interp, buf, '"', charset);
-
- /* restore colon, as we may reuse this string */
- p[-1] = ':';
+ if (strcmp(charset_name, "unicode") == 0 && strcmp(encoding_name, "utf8") == 0) {
+ /* Special case needed for backward compatibility with utf8 literals
+ * using \xHH\xHH byte sequences */
+ s = Parrot_str_unescape(interp, buf, '"', "utf8:unicode");
+ }
+ else {
+ p = buf;
+ p2 = strchr(buf, '"');
+ while (p2 != NULL) {
+ p = p2;
+ p2 = strchr(p + 1, '"');
+ }
+ {
+ STRING * aux = Parrot_str_new_init(interp, buf, p - buf,
+ src_encoding, s_charset, 0);
+ s = Parrot_str_unescape_string(interp, aux,
+ s_charset, s_encoding, PObj_constant_FLAG);
+ if (!CHARSET_VALIDATE(interp, s))
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_INVALID_STRING_REPRESENTATION,
+ "Malformed string");
+ }
+ }
return s;
}
else if (*buf == '"') {
Modified: trunk/src/packfile/pf_items.c
==============================================================================
--- trunk/src/packfile/pf_items.c Thu May 20 18:12:32 2010 (r46821)
+++ trunk/src/packfile/pf_items.c Thu May 20 19:45:07 2010 (r46822)
@@ -1216,7 +1216,10 @@
ASSERT_ARGS(PF_fetch_string)
STRING *s;
UINTVAL flags;
+ UINTVAL encoding_nr;
UINTVAL charset_nr;
+ ENCODING *encoding;
+ CHARSET *charset;
size_t size;
const int wordsize = pf ? pf->header->wordsize : sizeof (opcode_t);
opcode_t flag_charset_word = PF_fetch_opcode(pf, cursor);
@@ -1224,20 +1227,31 @@
if (flag_charset_word == -1)
return STRINGNULL;
- /* decode flags and charset */
+ /* decode flags, charset and encoding */
flags = (flag_charset_word & 0x1 ? PObj_constant_FLAG : 0) |
(flag_charset_word & 0x2 ? PObj_private7_FLAG : 0) ;
- charset_nr = flag_charset_word >> 8;
+ encoding_nr = (flag_charset_word >> 16);
+ charset_nr = (flag_charset_word >> 8) & 0xFF;
size = (size_t)PF_fetch_opcode(pf, cursor);
TRACE_PRINTF(("PF_fetch_string(): flags=0x%04x, ", flags));
+ TRACE_PRINTF(("encoding_nr=%ld, ", encoding_nr));
TRACE_PRINTF(("charset_nr=%ld, ", charset_nr));
TRACE_PRINTF(("size=%ld.\n", size));
- s = string_make_from_charset(interp, (const char *)*cursor,
- size, charset_nr, flags);
+ encoding = Parrot_get_encoding(interp, encoding_nr);
+ charset = Parrot_get_charset(interp, charset_nr);
+ if (!encoding)
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
+ "Invalid encoding number '%d' specified", encoding_nr);
+ if (!charset)
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
+ "Invalid charset number '%d' specified", charset_nr);
+
+ s = Parrot_str_new_init(interp, (const char *)*cursor, size,
+ encoding, charset, flags);
/* print only printable characters */
TRACE_PRINTF_VAL(("PF_fetch_string(): string is '%s' at 0x%x\n",
@@ -1298,8 +1312,9 @@
* see also PF_fetch_string
*/
- /* encode charset_nr and flags into the same word for a 33% savings on constant overhead */
- *cursor++ = (Parrot_charset_number_of_str(NULL, s) << 8) |
+ /* encode charset_nr, encoding_nr and flags into the same word */
+ *cursor++ = (Parrot_encoding_number_of_str(NULL, s) << 16) |
+ (Parrot_charset_number_of_str(NULL, s) << 8) |
(PObj_get_FLAGS(s) & PObj_constant_FLAG ? 0x1 : 0x0) |
(PObj_get_FLAGS(s) & PObj_private7_FLAG ? 0x2 : 0x0) ;
*cursor++ = s->bufused;
More information about the parrot-commits
mailing list