[svn:parrot] r49368 - branches/string_macros/src/string

nwellnhof at svn.parrot.org nwellnhof at svn.parrot.org
Tue Sep 28 19:36:01 UTC 2010


Author: nwellnhof
Date: Tue Sep 28 19:36:01 2010
New Revision: 49368
URL: https://trac.parrot.org/parrot/changeset/49368

Log:
[str] Optimize str_rep_compatible

Modified:
   branches/string_macros/src/string/api.c

Modified: branches/string_macros/src/string/api.c
==============================================================================
--- branches/string_macros/src/string/api.c	Tue Sep 28 19:35:30 2010	(r49367)
+++ branches/string_macros/src/string/api.c	Tue Sep 28 19:36:01 2010	(r49368)
@@ -245,43 +245,48 @@
 {
     ASSERT_ARGS(string_rep_compatible)
 
-    if (a->encoding == b->encoding) {
+    PARROT_ASSERT(a->encoding && b->encoding);
+
+    if (a->encoding == b->encoding)
         return a->encoding;
-    }
 
     /* a table could possibly simplify the logic */
-    if (a->encoding == Parrot_utf8_encoding_ptr
-    &&  b->encoding == Parrot_ascii_encoding_ptr) {
-        if (a->strlen == a->bufused) {
+
+    if (STRING_max_bytes_per_codepoint(a) == 1
+    &&  STRING_max_bytes_per_codepoint(b) == 1) {
+        /* Return the "largest" encoding where ascii < latin1 < binary */
+
+        if (b->encoding == Parrot_ascii_encoding_ptr)
+            return a->encoding;
+        if (a->encoding == Parrot_ascii_encoding_ptr)
+            return b->encoding;
+        if (a->encoding == Parrot_binary_encoding_ptr)
+            return a->encoding;
+        if (b->encoding == Parrot_binary_encoding_ptr)
             return b->encoding;
-        }
-        return a->encoding;
     }
-
-    if (b->encoding == Parrot_utf8_encoding_ptr
-    &&  a->encoding == Parrot_ascii_encoding_ptr) {
-        if (b->strlen == b->bufused) {
+    else {
+        /* UTF-8 strings are ASCII compatible if their byte length equals
+           their codepoint length. This is a nice trick but it can cause many
+           surprises when UTF-8 strings are suddenly "downgraded" to ASCII
+           strings. */
+
+        if (a->encoding == Parrot_utf8_encoding_ptr
+        &&  b->encoding == Parrot_ascii_encoding_ptr) {
+            if (a->strlen == a->bufused) {
+                return b->encoding;
+            }
             return a->encoding;
         }
-        return b->encoding;
-    }
-
-    /* Sanity check before dereferencing the encoding pointers */
-    if (a->encoding == NULL || b->encoding == NULL)
-        return NULL;
 
-    if (STRING_max_bytes_per_codepoint(a) != 1 ||
-        STRING_max_bytes_per_codepoint(b) != 1)
-        return NULL;
-
-    if (b->encoding == Parrot_ascii_encoding_ptr)
-        return a->encoding;
-    if (a->encoding == Parrot_ascii_encoding_ptr)
-        return b->encoding;
-    if (a->encoding == Parrot_binary_encoding_ptr)
-        return a->encoding;
-    if (b->encoding == Parrot_binary_encoding_ptr)
-        return b->encoding;
+        if (b->encoding == Parrot_utf8_encoding_ptr
+        &&  a->encoding == Parrot_ascii_encoding_ptr) {
+            if (b->strlen == b->bufused) {
+                return a->encoding;
+            }
+            return b->encoding;
+        }
+    }
 
     return NULL;
 }


More information about the parrot-commits mailing list