[svn:parrot] r39572 - in trunk: src/string t/op

pmichaud at svn.parrot.org pmichaud at svn.parrot.org
Mon Jun 15 17:21:41 UTC 2009


Author: pmichaud
Date: Mon Jun 15 17:21:39 2009
New Revision: 39572
URL: https://trac.parrot.org/parrot/changeset/39572

Log:
[core]:  Fix to TT #752 to get concatenation of mixed string types to work.

Modified:
   trunk/src/string/api.c
   trunk/t/op/stringu.t

Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c	Mon Jun 15 13:24:40 2009	(r39571)
+++ trunk/src/string/api.c	Mon Jun 15 17:21:39 2009	(r39572)
@@ -426,23 +426,21 @@
 
     /* a table could possibly simplify the logic */
     if (a->encoding == Parrot_utf8_encoding_ptr &&
-            (b->charset == Parrot_ascii_charset_ptr ||
-             b->charset == Parrot_iso_8859_1_charset_ptr)) {
+            b->charset == Parrot_ascii_charset_ptr ) {
         if (a->strlen == a->bufused) {
             *e = Parrot_fixed_8_encoding_ptr;
-            return Parrot_ascii_charset_ptr;
+            return b->charset;
         }
         *e = a->encoding;
         return a->charset;
     }
     if (b->encoding == Parrot_utf8_encoding_ptr &&
-            (a->charset == Parrot_ascii_charset_ptr ||
-             a->charset == Parrot_iso_8859_1_charset_ptr)) {
+            a->charset == Parrot_ascii_charset_ptr) {
         if (b->strlen == b->bufused) {
             *e = Parrot_fixed_8_encoding_ptr;
             return a->charset;
         }
-        *e = Parrot_utf8_encoding_ptr;
+        *e = b->encoding;
         return b->charset;
     }
     if (a->encoding != b->encoding)
@@ -562,14 +560,22 @@
         a->encoding = enc;
     }
     else {
-        /* upgrade to utf16 */
-        Parrot_utf16_encoding_ptr->to_encoding(interp, a, NULL);
-        b = Parrot_utf16_encoding_ptr->to_encoding(interp, b,
+        /* upgrade strings for concatenation */
+        enc = (a->encoding == Parrot_utf16_encoding_ptr ||
+                  b->encoding == Parrot_utf16_encoding_ptr ||
+                  a->encoding == Parrot_ucs2_encoding_ptr ||
+                  b->encoding == Parrot_ucs2_encoding_ptr)
+              ? Parrot_utf16_encoding_ptr
+              : Parrot_utf8_encoding_ptr;
+
+        Parrot_unicode_charset_ptr->to_charset(interp, a, NULL);
+        b = Parrot_unicode_charset_ptr->to_charset(interp, b,
                 Parrot_gc_new_string_header(interp, 0));
 
-        /* result could be mixed ucs2 / utf16 */
-        if (b->encoding == Parrot_utf16_encoding_ptr)
-            a->encoding = Parrot_utf16_encoding_ptr;
+        if (a->encoding != enc)
+            enc->to_encoding(interp, a, NULL);
+        if (b->encoding != enc)
+            enc->to_encoding(interp, b, NULL);
     }
 
     /* calc usable and total bytes */

Modified: trunk/t/op/stringu.t
==============================================================================
--- trunk/t/op/stringu.t	Mon Jun 15 13:24:40 2009	(r39571)
+++ trunk/t/op/stringu.t	Mon Jun 15 17:21:39 2009	(r39572)
@@ -6,7 +6,7 @@
 use warnings;
 use lib qw( . lib ../lib ../../lib );
 use Test::More;
-use Parrot::Test tests => 31;
+use Parrot::Test tests => 32;
 use Parrot::Config;
 
 =head1 NAME
@@ -550,6 +550,31 @@
 OUT
 }
 
+pir_output_is( <<'CODE', <<'OUT', 'concatenation of utf8 and iso-8859-1 (TT#752)' );
+.sub 'main'
+
+    $S1 = chr 0xe5
+    $S2 = chr 0x263b
+
+    $S0 = unicode:"\u00e5\u263b"
+    $S3 = concat $S1, $S2
+    if $S0 == $S3 goto equal_1
+    print "not "
+  equal_1:
+    say "equal"
+
+    $S0 = unicode:"\u263b\u00e5"
+    $S3 = concat $S2, $S1
+    if $S0 == $S3 goto equal_2
+    print "not "
+  equal_2:
+    say "equal"
+.end
+CODE
+equal
+equal
+OUT
+
 
 # Local Variables:
 #   mode: cperl


More information about the parrot-commits mailing list