[svn:parrot] r48803 - in trunk: src/pmc t/pmc

nwellnhof at svn.parrot.org nwellnhof at svn.parrot.org
Sun Sep 5 19:11:42 UTC 2010


Author: nwellnhof
Date: Sun Sep  5 19:11:41 2010
New Revision: 48803
URL: https://trac.parrot.org/parrot/changeset/48803

Log:
[pmc] StringBuilder should update compatible encodings

Also avoid unnecessary encoding conversions

Modified:
   trunk/src/pmc/stringbuilder.pmc
   trunk/t/pmc/stringbuilder.t

Modified: trunk/src/pmc/stringbuilder.pmc
==============================================================================
--- trunk/src/pmc/stringbuilder.pmc	Sun Sep  5 16:36:05 2010	(r48802)
+++ trunk/src/pmc/stringbuilder.pmc	Sun Sep  5 19:11:41 2010	(r48803)
@@ -175,21 +175,27 @@
         /* If strings are incompatible - convert them */
         /* TODO Ask chromatic why in Parrot_str_join he ignored charset */
         cs = Parrot_str_rep_compatible(interp, buffer, s, &enc);
-        if (!cs) {
-
-            /* Create new temporary string */
-            STRING * const new_buffer = Parrot_unicode_charset_ptr->to_charset(interp, buffer);
-            mem_gc_free(INTERP, buffer->_bufstart);
-            STRUCT_COPY(buffer, new_buffer);
-            buffer->flags     = PObj_is_string_FLAG | PObj_external_FLAG;
-
-            buffer->_bufstart = buffer->strstart = mem_gc_allocate_n_typed(INTERP,
-                                                    new_buffer->_buflen, char);
-            mem_sys_memcopy(buffer->_bufstart, new_buffer->_bufstart, new_buffer->_buflen);
+        if (cs) {
+            buffer->charset = cs;
+            buffer->encoding = enc;
+        }
+        else {
+            if (buffer->encoding != Parrot_utf8_encoding_ptr) {
+                /* Create new temporary string */
+                STRING * const new_buffer = Parrot_utf8_encoding_ptr->to_encoding(interp, buffer);
+                mem_gc_free(INTERP, buffer->_bufstart);
+                STRUCT_COPY(buffer, new_buffer);
+                buffer->flags     = PObj_is_string_FLAG | PObj_external_FLAG;
+
+                buffer->_bufstart = buffer->strstart = mem_gc_allocate_n_typed(INTERP,
+                                                        new_buffer->_buflen, char);
+                mem_sys_memcopy(buffer->_bufstart, new_buffer->_bufstart, new_buffer->_buflen);
 
-            SET_ATTR_buffer(INTERP, SELF, buffer);
+                SET_ATTR_buffer(INTERP, SELF, buffer);
+            }
 
-            s      = Parrot_unicode_charset_ptr->to_charset(interp, s);
+            if (s->encoding != Parrot_utf8_encoding_ptr)
+                s = Parrot_utf8_encoding_ptr->to_encoding(interp, s);
         }
 
         /* Calculate (possibly new) total size */

Modified: trunk/t/pmc/stringbuilder.t
==============================================================================
--- trunk/t/pmc/stringbuilder.t	Sun Sep  5 16:36:05 2010	(r48802)
+++ trunk/t/pmc/stringbuilder.t	Sun Sep  5 19:11:41 2010	(r48803)
@@ -37,6 +37,7 @@
     emit_with_pos_and_named_args()
 
     test_unicode_conversion_tt1665()
+    test_encodings()
 
     done_testing()
 
@@ -333,6 +334,19 @@
     is( $S0, $S1, 'init_pmc() should join all passed strings' )
 .end
 
+.sub 'test_encodings'
+    .local pmc sb
+    sb  = new ["StringBuilder"]
+
+    push sb, "foo"
+    push sb, iso-8859-1:"\x{E4}\x{F6}\x{FC}"
+    push sb, utf8:unicode:"БДЖ"
+    push sb, "bar"
+
+    $S0 = sb
+    is( $S0, utf8:unicode:"fooäöüБДЖbar", 'push strings with different encodings' )
+.end
+
 # Local Variables:
 #   mode: pir
 #   fill-column: 100


More information about the parrot-commits mailing list