[svn:parrot] r49368 - branches/string_macros/src/string
nwellnhof at svn.parrot.org
nwellnhof at svn.parrot.org
Tue Sep 28 19:36:01 UTC 2010
Author: nwellnhof
Date: Tue Sep 28 19:36:01 2010
New Revision: 49368
URL: https://trac.parrot.org/parrot/changeset/49368
Log:
[str] Optimize str_rep_compatible
Modified:
branches/string_macros/src/string/api.c
Modified: branches/string_macros/src/string/api.c
==============================================================================
--- branches/string_macros/src/string/api.c Tue Sep 28 19:35:30 2010 (r49367)
+++ branches/string_macros/src/string/api.c Tue Sep 28 19:36:01 2010 (r49368)
@@ -245,43 +245,48 @@
{
ASSERT_ARGS(string_rep_compatible)
- if (a->encoding == b->encoding) {
+ PARROT_ASSERT(a->encoding && b->encoding);
+
+ if (a->encoding == b->encoding)
return a->encoding;
- }
/* a table could possibly simplify the logic */
- if (a->encoding == Parrot_utf8_encoding_ptr
- && b->encoding == Parrot_ascii_encoding_ptr) {
- if (a->strlen == a->bufused) {
+
+ if (STRING_max_bytes_per_codepoint(a) == 1
+ && STRING_max_bytes_per_codepoint(b) == 1) {
+ /* Return the "largest" encoding where ascii < latin1 < binary */
+
+ if (b->encoding == Parrot_ascii_encoding_ptr)
+ return a->encoding;
+ if (a->encoding == Parrot_ascii_encoding_ptr)
+ return b->encoding;
+ if (a->encoding == Parrot_binary_encoding_ptr)
+ return a->encoding;
+ if (b->encoding == Parrot_binary_encoding_ptr)
return b->encoding;
- }
- return a->encoding;
}
-
- if (b->encoding == Parrot_utf8_encoding_ptr
- && a->encoding == Parrot_ascii_encoding_ptr) {
- if (b->strlen == b->bufused) {
+ else {
+ /* UTF-8 strings are ASCII compatible if their byte length equals
+ their codepoint length. This is a nice trick but it can cause many
+ surprises when UTF-8 strings are suddenly "downgraded" to ASCII
+ strings. */
+
+ if (a->encoding == Parrot_utf8_encoding_ptr
+ && b->encoding == Parrot_ascii_encoding_ptr) {
+ if (a->strlen == a->bufused) {
+ return b->encoding;
+ }
return a->encoding;
}
- return b->encoding;
- }
-
- /* Sanity check before dereferencing the encoding pointers */
- if (a->encoding == NULL || b->encoding == NULL)
- return NULL;
- if (STRING_max_bytes_per_codepoint(a) != 1 ||
- STRING_max_bytes_per_codepoint(b) != 1)
- return NULL;
-
- if (b->encoding == Parrot_ascii_encoding_ptr)
- return a->encoding;
- if (a->encoding == Parrot_ascii_encoding_ptr)
- return b->encoding;
- if (a->encoding == Parrot_binary_encoding_ptr)
- return a->encoding;
- if (b->encoding == Parrot_binary_encoding_ptr)
- return b->encoding;
+ if (b->encoding == Parrot_utf8_encoding_ptr
+ && a->encoding == Parrot_ascii_encoding_ptr) {
+ if (b->strlen == b->bufused) {
+ return a->encoding;
+ }
+ return b->encoding;
+ }
+ }
return NULL;
}
More information about the parrot-commits
mailing list