[svn:parrot] r49227 - branches/string_macros/src/string/encoding

nwellnhof at svn.parrot.org nwellnhof at svn.parrot.org
Wed Sep 22 01:21:38 UTC 2010


Author: nwellnhof
Date: Wed Sep 22 01:21:38 2010
New Revision: 49227
URL: https://trac.parrot.org/parrot/changeset/49227

Log:
[str] Make some string code work without ICU

Modified:
   branches/string_macros/src/string/encoding/ucs2.c
   branches/string_macros/src/string/encoding/ucs4.c
   branches/string_macros/src/string/encoding/utf16.c

Modified: branches/string_macros/src/string/encoding/ucs2.c
==============================================================================
--- branches/string_macros/src/string/encoding/ucs2.c	Wed Sep 22 01:21:16 2010	(r49226)
+++ branches/string_macros/src/string/encoding/ucs2.c	Wed Sep 22 01:21:38 2010	(r49227)
@@ -8,7 +8,7 @@
 
 =head1 DESCRIPTION
 
-UCS-2 encoding with the help of the ICU library.
+UCS-2 encoding
 
 =head2 Functions
 
@@ -21,16 +21,6 @@
 #include "parrot/parrot.h"
 #include "shared.h"
 
-#if !PARROT_HAS_ICU
-PARROT_DOES_NOT_RETURN
-static void no_ICU_lib(PARROT_INTERP) /* HEADERIZER SKIP */
-{
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-}
-#endif
-
 /* HEADERIZER HFILE: none */
 
 /* HEADERIZER BEGIN: static */
@@ -41,47 +31,40 @@
     size_t hashval)
         __attribute__nonnull__(2);
 
-static UINTVAL ucs2_iter_get(PARROT_INTERP,
+static UINTVAL ucs2_iter_get(SHIM_INTERP,
     ARGIN(const STRING *str),
     ARGIN(const String_iter *i),
     INTVAL offset)
-        __attribute__nonnull__(1)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3);
 
-static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP,
+static UINTVAL ucs2_iter_get_and_advance(SHIM_INTERP,
     ARGIN(const STRING *str),
     ARGMOD(String_iter *i))
-        __attribute__nonnull__(1)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-static void ucs2_iter_set_and_advance(PARROT_INTERP,
+static void ucs2_iter_set_and_advance(SHIM_INTERP,
     ARGMOD(STRING *str),
     ARGMOD(String_iter *i),
     UINTVAL c)
-        __attribute__nonnull__(1)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*str)
         FUNC_MODIFIES(*i);
 
-static void ucs2_iter_set_position(PARROT_INTERP,
-    ARGIN(const STRING *str),
+static void ucs2_iter_set_position(SHIM_INTERP,
+    SHIM(const STRING *str),
     ARGMOD(String_iter *i),
     UINTVAL n)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-static void ucs2_iter_skip(PARROT_INTERP,
-    ARGIN(const STRING *str),
+static void ucs2_iter_skip(SHIM_INTERP,
+    SHIM(const STRING *str),
     ARGMOD(String_iter *i),
     INTVAL skip)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
@@ -103,25 +86,18 @@
 #define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
+       PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs2_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
+       PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs2_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
+       PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs2_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(i))
+       PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(i))
+       PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs2_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
@@ -134,10 +110,6 @@
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
-#if PARROT_HAS_ICU
-#  include <unicode/ustring.h>
-#endif
-
 #define UNIMPL Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED, \
     "unimpl ucs2")
 
@@ -185,13 +157,7 @@
 ucs2_scan(PARROT_INTERP, ARGIN(const STRING *src))
 {
     ASSERT_ARGS(ucs2_scan)
-#if PARROT_HAS_ICU
-    UNUSED(interp);
-    return src->bufused / sizeof (UChar);
-#else
-    UNUSED(src);
-    no_ICU_lib(interp);
-#endif
+    return src->bufused >> 1;
 }
 
 /*
@@ -208,9 +174,8 @@
 ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
 {
     ASSERT_ARGS(ucs2_ord)
-#if PARROT_HAS_ICU
     const UINTVAL len = STRING_length(src);
-    const UChar  *s;
+    const Parrot_UInt2 *s;
 
     if (idx < 0)
         idx += len;
@@ -218,14 +183,9 @@
     if ((UINTVAL)idx >= len)
         encoding_ord_error(interp, src, idx);
 
-    s = (const UChar *)src->strstart;
+    s = (const Parrot_UInt2 *)src->strstart;
 
     return s[idx];
-#else
-    UNUSED(idx);
-    UNUSED(src);
-    no_ICU_lib(interp);
-#endif
 }
 
 
@@ -241,11 +201,13 @@
 */
 
 static UINTVAL
-ucs2_iter_get(PARROT_INTERP,
+ucs2_iter_get(SHIM_INTERP,
     ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
 {
     ASSERT_ARGS(ucs2_iter_get)
-    return ucs2_ord(interp, str, i->charpos + offset);
+    const Parrot_UInt2 * const s = (const Parrot_UInt2 *)str->strstart;
+
+    return s[i->charpos + offset];
 }
 
 /*
@@ -260,20 +222,13 @@
 */
 
 static void
-ucs2_iter_skip(PARROT_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
+ucs2_iter_skip(SHIM_INTERP,
+    SHIM(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
 {
     ASSERT_ARGS(ucs2_iter_skip)
-    UNUSED(str);
 
-#if PARROT_HAS_ICU
     i->charpos += skip;
-    i->bytepos += skip * sizeof (UChar);
-#else
-    UNUSED(i);
-    UNUSED(skip);
-    no_ICU_lib(interp);
-#endif
+    i->bytepos += skip * 2;
 }
 
 /*
@@ -288,28 +243,17 @@
 */
 
 static UINTVAL
-ucs2_iter_get_and_advance(PARROT_INTERP,
+ucs2_iter_get_and_advance(SHIM_INTERP,
     ARGIN(const STRING *str), ARGMOD(String_iter *i))
 {
     ASSERT_ARGS(ucs2_iter_get_and_advance)
+    const Parrot_UInt2 * const s = (Parrot_UInt2 *)str->strstart;
+    const UINTVAL c = s[i->charpos];
 
-#if PARROT_HAS_ICU
-    UChar * const s = (UChar*) str->strstart;
-    size_t pos = i->bytepos / sizeof (UChar);
-
-    /* TODO either make sure that we don't go past end or use SAFE
-     *      iter versions
-     */
-    const UChar c = s[pos++];
     i->charpos++;
-    i->bytepos = pos * sizeof (UChar);
+    i->bytepos += 2;
+
     return c;
-#else
-    UNUSED(str);
-    UNUSED(i);
-    no_ICU_lib(interp);
-    return (UINTVAL)0; /* Stop the static analyzers from panicing */
-#endif
 }
 
 /*
@@ -325,23 +269,16 @@
 */
 
 static void
-ucs2_iter_set_and_advance(PARROT_INTERP,
+ucs2_iter_set_and_advance(SHIM_INTERP,
     ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
 {
     ASSERT_ARGS(ucs2_iter_set_and_advance)
+    Parrot_UInt2 * const s = (Parrot_UInt2 *) str->strstart;
+
+    s[i->charpos] = c;
 
-#if PARROT_HAS_ICU
-    UChar * const s = (UChar*) str->strstart;
-    UINTVAL pos = i->bytepos / sizeof (UChar);
-    s[pos++] = (UChar)c;
     i->charpos++;
-    i->bytepos = pos * sizeof (UChar);
-#else
-    UNUSED(str);
-    UNUSED(i);
-    UNUSED(c);
-    no_ICU_lib(interp);
-#endif
+    i->bytepos += 2;
 }
 
 /*
@@ -356,20 +293,13 @@
 */
 
 static void
-ucs2_iter_set_position(PARROT_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
+ucs2_iter_set_position(SHIM_INTERP,
+    SHIM(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
 {
     ASSERT_ARGS(ucs2_iter_set_position)
-    UNUSED(str);
 
-#if PARROT_HAS_ICU
     i->charpos = n;
-    i->bytepos = n * sizeof (UChar);
-#else
-    UNUSED(i);
-    UNUSED(n);
-    no_ICU_lib(interp);
-#endif
+    i->bytepos = n * 2;
 }
 
 /*

Modified: branches/string_macros/src/string/encoding/ucs4.c
==============================================================================
--- branches/string_macros/src/string/encoding/ucs4.c	Wed Sep 22 01:21:16 2010	(r49226)
+++ branches/string_macros/src/string/encoding/ucs4.c	Wed Sep 22 01:21:38 2010	(r49227)
@@ -8,7 +8,7 @@
 
 =head1 DESCRIPTION
 
-UCS-4 encoding with the help of the ICU library.
+UCS-4 encoding
 
 =head2 Functions
 
@@ -21,16 +21,6 @@
 #include "parrot/parrot.h"
 #include "shared.h"
 
-#if !PARROT_HAS_ICU
-PARROT_DOES_NOT_RETURN
-static void no_ICU_lib(PARROT_INTERP) /* HEADERIZER SKIP */
-{
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-}
-#endif
-
 /* HEADERIZER HFILE: none */
 
 /* HEADERIZER BEGIN: static */
@@ -41,47 +31,40 @@
     size_t hashval)
         __attribute__nonnull__(2);
 
-static UINTVAL ucs4_iter_get(PARROT_INTERP,
+static UINTVAL ucs4_iter_get(SHIM_INTERP,
     ARGIN(const STRING *str),
     ARGIN(const String_iter *i),
     INTVAL offset)
-        __attribute__nonnull__(1)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3);
 
-static UINTVAL ucs4_iter_get_and_advance(PARROT_INTERP,
+static UINTVAL ucs4_iter_get_and_advance(SHIM_INTERP,
     ARGIN(const STRING *str),
     ARGMOD(String_iter *i))
-        __attribute__nonnull__(1)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-static void ucs4_iter_set_and_advance(PARROT_INTERP,
+static void ucs4_iter_set_and_advance(SHIM_INTERP,
     ARGMOD(STRING *str),
     ARGMOD(String_iter *i),
     UINTVAL c)
-        __attribute__nonnull__(1)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*str)
         FUNC_MODIFIES(*i);
 
-static void ucs4_iter_set_position(PARROT_INTERP,
-    ARGIN(const STRING *str),
+static void ucs4_iter_set_position(SHIM_INTERP,
+    SHIM(const STRING *str),
     ARGMOD(String_iter *i),
     UINTVAL n)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-static void ucs4_iter_skip(PARROT_INTERP,
-    ARGIN(const STRING *str),
+static void ucs4_iter_skip(SHIM_INTERP,
+    SHIM(const STRING *str),
     ARGMOD(String_iter *i),
     INTVAL skip)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
@@ -90,8 +73,7 @@
         __attribute__nonnull__(2);
 
 PARROT_WARN_UNUSED_RESULT
-static UINTVAL ucs4_scan(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
+static UINTVAL ucs4_scan(SHIM_INTERP, ARGIN(const STRING *src))
         __attribute__nonnull__(2);
 
 PARROT_WARN_UNUSED_RESULT
@@ -103,41 +85,29 @@
 #define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_ucs4_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
+       PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs4_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
+       PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs4_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
+       PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs4_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(i))
+       PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs4_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(i))
+       PARROT_ASSERT_ARG(i))
 #define ASSERT_ARGS_ucs4_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_ucs4_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
+       PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_ucs4_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
-#if PARROT_HAS_ICU
-#  include <unicode/ustring.h>
-#endif
-
 
 /*
 
@@ -155,30 +125,40 @@
 ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
 {
     ASSERT_ARGS(ucs4_to_encoding)
-#if PARROT_HAS_ICU
-    if (src->encoding == Parrot_ucs4_encoding_ptr) {
+    UINTVAL       len;
+    STRING       *res;
+    Parrot_UInt4 *buf;
+
+    if (src->encoding == Parrot_ucs4_encoding_ptr)
         return Parrot_str_clone(interp, src);
+
+    len = STRING_length(src);
+    res = Parrot_str_new_init(interp, NULL, len * 4,
+            Parrot_ucs4_encoding_ptr, 0);
+    buf = (Parrot_UInt4 *) res->strstart;
+
+    if (STRING_max_bytes_per_codepoint(src) == 1) {
+        const unsigned char *s = (const unsigned char *)src->strstart;
+        UINTVAL i;
+
+        for (i = 0; i < len; i++) {
+            buf[i] = s[i];
+        }
     }
     else {
-        UINTVAL len = Parrot_str_length(interp, src);
-        STRING *res = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
-                           Parrot_ucs4_encoding_ptr, 0);
-        UChar32 *buf = (UChar32 *) res->strstart;
-        UINTVAL offs;
-        /* TODO: use an iterator */
-        for (offs = 0; offs < len; offs++){
-            buf[offs] = STRING_ord(interp, src, offs);
-        };
-        res->strlen  = len;
-        res->bufused = len * sizeof (UChar32);
+        String_iter iter;
 
-        return res;
+        STRING_ITER_INIT(interp, &iter);
+
+        while (iter.charpos < len) {
+            buf[iter.charpos] = STRING_iter_get_and_advance(interp, src, &iter);
+        }
     }
-#else
-    UNUSED(src);
-    no_ICU_lib(interp);
-#endif
 
+    res->strlen  = len;
+    res->bufused = len * 4;
+
+    return res;
 }
 
 
@@ -194,16 +174,11 @@
 
 PARROT_WARN_UNUSED_RESULT
 static UINTVAL
-ucs4_scan(PARROT_INTERP, ARGIN(const STRING *src))
+ucs4_scan(SHIM_INTERP, ARGIN(const STRING *src))
 {
     ASSERT_ARGS(ucs4_scan)
-#if PARROT_HAS_ICU
-    UNUSED(interp);
-    return src->bufused / sizeof (UChar32);
-#else
-    UNUSED(src);
-    no_ICU_lib(interp);
-#endif
+
+    return src->bufused >> 2;
 }
 
 
@@ -221,9 +196,8 @@
 ucs4_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
 {
     ASSERT_ARGS(ucs4_ord)
-#if PARROT_HAS_ICU
     const UINTVAL  len = STRING_length(src);
-    const UChar32 *s;
+    const Parrot_UInt4 *s;
 
     if (idx < 0)
         idx += len;
@@ -231,14 +205,9 @@
     if ((UINTVAL)idx >= len)
         encoding_ord_error(interp, src, idx);
 
-    s = (const UChar32 *)src->strstart;
+    s = (const Parrot_UInt4 *)src->strstart;
 
     return s[idx];
-#else
-    UNUSED(idx);
-    UNUSED(src);
-    no_ICU_lib(interp);
-#endif
 }
 
 
@@ -254,11 +223,13 @@
 */
 
 static UINTVAL
-ucs4_iter_get(PARROT_INTERP,
+ucs4_iter_get(SHIM_INTERP,
     ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
 {
     ASSERT_ARGS(ucs4_iter_get)
-    return ucs4_ord(interp, str, i->charpos + offset);
+    const Parrot_UInt4 * const s = (const Parrot_UInt4 *)str->strstart;
+
+    return s[i->charpos + offset];
 }
 
 
@@ -274,20 +245,13 @@
 */
 
 static void
-ucs4_iter_skip(PARROT_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
+ucs4_iter_skip(SHIM_INTERP,
+    SHIM(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
 {
     ASSERT_ARGS(ucs4_iter_skip)
-    UNUSED(str);
 
-#if PARROT_HAS_ICU
     i->charpos += skip;
-    i->bytepos += skip * sizeof (UChar32);
-#else
-    UNUSED(i);
-    UNUSED(skip);
-    no_ICU_lib(interp);
-#endif
+    i->bytepos += skip * 4;
 }
 
 
@@ -303,22 +267,17 @@
 */
 
 static UINTVAL
-ucs4_iter_get_and_advance(PARROT_INTERP,
+ucs4_iter_get_and_advance(SHIM_INTERP,
     ARGIN(const STRING *str), ARGMOD(String_iter *i))
 {
     ASSERT_ARGS(ucs4_iter_get_and_advance)
+    const Parrot_UInt4 * const s = (const Parrot_UInt4 *)str->strstart;
+    const UINTVAL c = s[i->charpos];
+
+    i->charpos++;
+    i->bytepos += 4;
 
-#if PARROT_HAS_ICU
-    const UChar32 * const s = (const UChar32*) str->strstart;
-    const UChar32 c = s[i->charpos++];
-    i->bytepos += sizeof (UChar32);
     return c;
-#else
-    UNUSED(str);
-    UNUSED(i);
-    no_ICU_lib(interp);
-    return (UINTVAL)0; /* Stop the static analyzers from panicing */
-#endif
 }
 
 
@@ -335,21 +294,16 @@
 */
 
 static void
-ucs4_iter_set_and_advance(PARROT_INTERP,
+ucs4_iter_set_and_advance(SHIM_INTERP,
     ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
 {
     ASSERT_ARGS(ucs4_iter_set_and_advance)
+    Parrot_UInt4 * const s = (Parrot_UInt4 *)str->strstart;
+
+    s[i->charpos] = c;
 
-#if PARROT_HAS_ICU
-    UChar32 * const s = (UChar32*) str->strstart;
-    s[i->charpos++] = (UChar32)c;
-    i->bytepos += sizeof (UChar32);
-#else
-    UNUSED(str);
-    UNUSED(i);
-    UNUSED(c);
-    no_ICU_lib(interp);
-#endif
+    i->charpos++;
+    i->bytepos += 4;
 }
 
 
@@ -365,20 +319,13 @@
 */
 
 static void
-ucs4_iter_set_position(PARROT_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
+ucs4_iter_set_position(SHIM_INTERP,
+    SHIM(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
 {
     ASSERT_ARGS(ucs4_iter_set_position)
-    UNUSED(str);
 
-#if PARROT_HAS_ICU
     i->charpos = n;
-    i->bytepos = n * sizeof (UChar32);
-#else
-    UNUSED(i);
-    UNUSED(n);
-    no_ICU_lib(interp);
-#endif
+    i->bytepos = n * 4;
 }
 
 

Modified: branches/string_macros/src/string/encoding/utf16.c
==============================================================================
--- branches/string_macros/src/string/encoding/utf16.c	Wed Sep 22 01:21:16 2010	(r49226)
+++ branches/string_macros/src/string/encoding/utf16.c	Wed Sep 22 01:21:38 2010	(r49227)
@@ -160,13 +160,9 @@
 utf16_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
 {
     ASSERT_ARGS(utf16_to_encoding)
-#if PARROT_HAS_ICU
-    UErrorCode err;
-    int dest_len;
-    UChar *p;
-#endif
-    int src_len;
-    STRING *result;
+    int           src_len, dest_len;
+    Parrot_UInt2 *p;
+    STRING       *result;
 
     if (src->encoding == Parrot_utf16_encoding_ptr
     ||  src->encoding == Parrot_ucs2_encoding_ptr)
@@ -174,56 +170,58 @@
          * and downcase functions assume to get an unshared buffer */
         return Parrot_str_clone(interp, src);
 
-    result = Parrot_gc_new_string_header(interp, 0);
+    result  = Parrot_gc_new_string_header(interp, 0);
+    src_len = STRING_length(src);
 
-    /*
-     * TODO adapt string creation functions
-     */
-    src_len = src->strlen;
     if (!src_len) {
         result->encoding = Parrot_ucs2_encoding_ptr;
-        result->strlen = result->bufused = 0;
         return result;
     }
-#if PARROT_HAS_ICU
-    Parrot_gc_allocate_string_storage(interp, result, sizeof (UChar) * src_len);
-    p = (UChar *)result->strstart;
 
-    if (src->encoding == Parrot_latin1_encoding_ptr ||
-            src->encoding == Parrot_ascii_encoding_ptr) {
-        for (dest_len = 0; dest_len < (int)src->strlen; ++dest_len) {
-            p[dest_len] = (UChar)((unsigned char*)src->strstart)[dest_len];
+    Parrot_gc_allocate_string_storage(interp, result, 2 * src_len);
+    p = (Parrot_UInt2 *)result->strstart;
+
+    if (STRING_max_bytes_per_codepoint(src) == 1) {
+        for (dest_len = 0; dest_len < src_len; ++dest_len) {
+            p[dest_len] = (unsigned char)src->strstart[dest_len];
         }
     }
-    else {
-        err = U_ZERO_ERROR;
-        u_strFromUTF8(p, src_len,
-                &dest_len, src->strstart, src->bufused, &err);
+    else if (src->encoding == Parrot_utf8_encoding_ptr) {
+#if PARROT_HAS_ICU
+        UErrorCode err = U_ZERO_ERROR;
+
+        u_strFromUTF8(p, src_len, &dest_len, src->strstart, src->bufused, &err);
+
         if (!U_SUCCESS(err)) {
             /*
              * have to resize - required len in UChars is in dest_len
              */
-            result->bufused = dest_len * sizeof (UChar);
-            Parrot_gc_reallocate_string_storage(interp, result,
-                                     sizeof (UChar) * dest_len);
-            p = (UChar *)result->strstart;
-            u_strFromUTF8(p, dest_len,
-                    &dest_len, src->strstart, src->bufused, &err);
+            result->bufused = 2 * dest_len;
+            Parrot_gc_reallocate_string_storage(interp, result, 2 * dest_len);
+
+            p = (Parrot_UInt2 *)result->strstart;
+            u_strFromUTF8(p, dest_len, &dest_len, src->strstart, src->bufused, &err);
             PARROT_ASSERT(U_SUCCESS(err));
         }
+#else
+        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+            "no ICU lib loaded");
+#endif
     }
-    result->bufused = dest_len * sizeof (UChar);
-    result->encoding = Parrot_utf16_encoding_ptr;
-    result->strlen = src_len;
+    else {
+        UNIMPL;
+    }
+
+    result->bufused  = 2 * dest_len;
+    result->strlen   = src_len;
 
     /* downgrade if possible */
-    if (dest_len == (int)src->strlen)
+    if (dest_len == src_len)
         result->encoding = Parrot_ucs2_encoding_ptr;
+    else
+        result->encoding = Parrot_utf16_encoding_ptr;
+
     return result;
-#else
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
 }
 
 /*


More information about the parrot-commits mailing list