[svn:parrot] r49227 - branches/string_macros/src/string/encoding
nwellnhof at svn.parrot.org
nwellnhof at svn.parrot.org
Wed Sep 22 01:21:38 UTC 2010
Author: nwellnhof
Date: Wed Sep 22 01:21:38 2010
New Revision: 49227
URL: https://trac.parrot.org/parrot/changeset/49227
Log:
[str] Make some string code work without ICU
Modified:
branches/string_macros/src/string/encoding/ucs2.c
branches/string_macros/src/string/encoding/ucs4.c
branches/string_macros/src/string/encoding/utf16.c
Modified: branches/string_macros/src/string/encoding/ucs2.c
==============================================================================
--- branches/string_macros/src/string/encoding/ucs2.c Wed Sep 22 01:21:16 2010 (r49226)
+++ branches/string_macros/src/string/encoding/ucs2.c Wed Sep 22 01:21:38 2010 (r49227)
@@ -8,7 +8,7 @@
=head1 DESCRIPTION
-UCS-2 encoding with the help of the ICU library.
+UCS-2 encoding
=head2 Functions
@@ -21,16 +21,6 @@
#include "parrot/parrot.h"
#include "shared.h"
-#if !PARROT_HAS_ICU
-PARROT_DOES_NOT_RETURN
-static void no_ICU_lib(PARROT_INTERP) /* HEADERIZER SKIP */
-{
- Parrot_ex_throw_from_c_args(interp, NULL,
- EXCEPTION_LIBRARY_ERROR,
- "no ICU lib loaded");
-}
-#endif
-
/* HEADERIZER HFILE: none */
/* HEADERIZER BEGIN: static */
@@ -41,47 +31,40 @@
size_t hashval)
__attribute__nonnull__(2);
-static UINTVAL ucs2_iter_get(PARROT_INTERP,
+static UINTVAL ucs2_iter_get(SHIM_INTERP,
ARGIN(const STRING *str),
ARGIN(const String_iter *i),
INTVAL offset)
- __attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3);
-static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP,
+static UINTVAL ucs2_iter_get_and_advance(SHIM_INTERP,
ARGIN(const STRING *str),
ARGMOD(String_iter *i))
- __attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(*i);
-static void ucs2_iter_set_and_advance(PARROT_INTERP,
+static void ucs2_iter_set_and_advance(SHIM_INTERP,
ARGMOD(STRING *str),
ARGMOD(String_iter *i),
UINTVAL c)
- __attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(*str)
FUNC_MODIFIES(*i);
-static void ucs2_iter_set_position(PARROT_INTERP,
- ARGIN(const STRING *str),
+static void ucs2_iter_set_position(SHIM_INTERP,
+ SHIM(const STRING *str),
ARGMOD(String_iter *i),
UINTVAL n)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(*i);
-static void ucs2_iter_skip(PARROT_INTERP,
- ARGIN(const STRING *str),
+static void ucs2_iter_skip(SHIM_INTERP,
+ SHIM(const STRING *str),
ARGMOD(String_iter *i),
INTVAL skip)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(*i);
@@ -103,25 +86,18 @@
#define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
+ PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs2_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
+ PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs2_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
+ PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs2_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(i))
+ PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(i))
+ PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs2_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -134,10 +110,6 @@
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */
-#if PARROT_HAS_ICU
-# include <unicode/ustring.h>
-#endif
-
#define UNIMPL Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED, \
"unimpl ucs2")
@@ -185,13 +157,7 @@
ucs2_scan(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(ucs2_scan)
-#if PARROT_HAS_ICU
- UNUSED(interp);
- return src->bufused / sizeof (UChar);
-#else
- UNUSED(src);
- no_ICU_lib(interp);
-#endif
+ return src->bufused >> 1;
}
/*
@@ -208,9 +174,8 @@
ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
{
ASSERT_ARGS(ucs2_ord)
-#if PARROT_HAS_ICU
const UINTVAL len = STRING_length(src);
- const UChar *s;
+ const Parrot_UInt2 *s;
if (idx < 0)
idx += len;
@@ -218,14 +183,9 @@
if ((UINTVAL)idx >= len)
encoding_ord_error(interp, src, idx);
- s = (const UChar *)src->strstart;
+ s = (const Parrot_UInt2 *)src->strstart;
return s[idx];
-#else
- UNUSED(idx);
- UNUSED(src);
- no_ICU_lib(interp);
-#endif
}
@@ -241,11 +201,13 @@
*/
static UINTVAL
-ucs2_iter_get(PARROT_INTERP,
+ucs2_iter_get(SHIM_INTERP,
ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
{
ASSERT_ARGS(ucs2_iter_get)
- return ucs2_ord(interp, str, i->charpos + offset);
+ const Parrot_UInt2 * const s = (const Parrot_UInt2 *)str->strstart;
+
+ return s[i->charpos + offset];
}
/*
@@ -260,20 +222,13 @@
*/
static void
-ucs2_iter_skip(PARROT_INTERP,
- ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
+ucs2_iter_skip(SHIM_INTERP,
+ SHIM(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
{
ASSERT_ARGS(ucs2_iter_skip)
- UNUSED(str);
-#if PARROT_HAS_ICU
i->charpos += skip;
- i->bytepos += skip * sizeof (UChar);
-#else
- UNUSED(i);
- UNUSED(skip);
- no_ICU_lib(interp);
-#endif
+ i->bytepos += skip * 2;
}
/*
@@ -288,28 +243,17 @@
*/
static UINTVAL
-ucs2_iter_get_and_advance(PARROT_INTERP,
+ucs2_iter_get_and_advance(SHIM_INTERP,
ARGIN(const STRING *str), ARGMOD(String_iter *i))
{
ASSERT_ARGS(ucs2_iter_get_and_advance)
+ const Parrot_UInt2 * const s = (Parrot_UInt2 *)str->strstart;
+ const UINTVAL c = s[i->charpos];
-#if PARROT_HAS_ICU
- UChar * const s = (UChar*) str->strstart;
- size_t pos = i->bytepos / sizeof (UChar);
-
- /* TODO either make sure that we don't go past end or use SAFE
- * iter versions
- */
- const UChar c = s[pos++];
i->charpos++;
- i->bytepos = pos * sizeof (UChar);
+ i->bytepos += 2;
+
return c;
-#else
- UNUSED(str);
- UNUSED(i);
- no_ICU_lib(interp);
- return (UINTVAL)0; /* Stop the static analyzers from panicing */
-#endif
}
/*
@@ -325,23 +269,16 @@
*/
static void
-ucs2_iter_set_and_advance(PARROT_INTERP,
+ucs2_iter_set_and_advance(SHIM_INTERP,
ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
{
ASSERT_ARGS(ucs2_iter_set_and_advance)
+ Parrot_UInt2 * const s = (Parrot_UInt2 *) str->strstart;
+
+ s[i->charpos] = c;
-#if PARROT_HAS_ICU
- UChar * const s = (UChar*) str->strstart;
- UINTVAL pos = i->bytepos / sizeof (UChar);
- s[pos++] = (UChar)c;
i->charpos++;
- i->bytepos = pos * sizeof (UChar);
-#else
- UNUSED(str);
- UNUSED(i);
- UNUSED(c);
- no_ICU_lib(interp);
-#endif
+ i->bytepos += 2;
}
/*
@@ -356,20 +293,13 @@
*/
static void
-ucs2_iter_set_position(PARROT_INTERP,
- ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
+ucs2_iter_set_position(SHIM_INTERP,
+ SHIM(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
{
ASSERT_ARGS(ucs2_iter_set_position)
- UNUSED(str);
-#if PARROT_HAS_ICU
i->charpos = n;
- i->bytepos = n * sizeof (UChar);
-#else
- UNUSED(i);
- UNUSED(n);
- no_ICU_lib(interp);
-#endif
+ i->bytepos = n * 2;
}
/*
Modified: branches/string_macros/src/string/encoding/ucs4.c
==============================================================================
--- branches/string_macros/src/string/encoding/ucs4.c Wed Sep 22 01:21:16 2010 (r49226)
+++ branches/string_macros/src/string/encoding/ucs4.c Wed Sep 22 01:21:38 2010 (r49227)
@@ -8,7 +8,7 @@
=head1 DESCRIPTION
-UCS-4 encoding with the help of the ICU library.
+UCS-4 encoding
=head2 Functions
@@ -21,16 +21,6 @@
#include "parrot/parrot.h"
#include "shared.h"
-#if !PARROT_HAS_ICU
-PARROT_DOES_NOT_RETURN
-static void no_ICU_lib(PARROT_INTERP) /* HEADERIZER SKIP */
-{
- Parrot_ex_throw_from_c_args(interp, NULL,
- EXCEPTION_LIBRARY_ERROR,
- "no ICU lib loaded");
-}
-#endif
-
/* HEADERIZER HFILE: none */
/* HEADERIZER BEGIN: static */
@@ -41,47 +31,40 @@
size_t hashval)
__attribute__nonnull__(2);
-static UINTVAL ucs4_iter_get(PARROT_INTERP,
+static UINTVAL ucs4_iter_get(SHIM_INTERP,
ARGIN(const STRING *str),
ARGIN(const String_iter *i),
INTVAL offset)
- __attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3);
-static UINTVAL ucs4_iter_get_and_advance(PARROT_INTERP,
+static UINTVAL ucs4_iter_get_and_advance(SHIM_INTERP,
ARGIN(const STRING *str),
ARGMOD(String_iter *i))
- __attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(*i);
-static void ucs4_iter_set_and_advance(PARROT_INTERP,
+static void ucs4_iter_set_and_advance(SHIM_INTERP,
ARGMOD(STRING *str),
ARGMOD(String_iter *i),
UINTVAL c)
- __attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(*str)
FUNC_MODIFIES(*i);
-static void ucs4_iter_set_position(PARROT_INTERP,
- ARGIN(const STRING *str),
+static void ucs4_iter_set_position(SHIM_INTERP,
+ SHIM(const STRING *str),
ARGMOD(String_iter *i),
UINTVAL n)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(*i);
-static void ucs4_iter_skip(PARROT_INTERP,
- ARGIN(const STRING *str),
+static void ucs4_iter_skip(SHIM_INTERP,
+ SHIM(const STRING *str),
ARGMOD(String_iter *i),
INTVAL skip)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
__attribute__nonnull__(3)
FUNC_MODIFIES(*i);
@@ -90,8 +73,7 @@
__attribute__nonnull__(2);
PARROT_WARN_UNUSED_RESULT
-static UINTVAL ucs4_scan(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
+static UINTVAL ucs4_scan(SHIM_INTERP, ARGIN(const STRING *src))
__attribute__nonnull__(2);
PARROT_WARN_UNUSED_RESULT
@@ -103,41 +85,29 @@
#define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ucs4_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
+ PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs4_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
+ PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs4_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
+ PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs4_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(i))
+ PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs4_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(i))
+ PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs4_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ucs4_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
+ PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ucs4_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */
-#if PARROT_HAS_ICU
-# include <unicode/ustring.h>
-#endif
-
/*
@@ -155,30 +125,40 @@
ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(ucs4_to_encoding)
-#if PARROT_HAS_ICU
- if (src->encoding == Parrot_ucs4_encoding_ptr) {
+ UINTVAL len;
+ STRING *res;
+ Parrot_UInt4 *buf;
+
+ if (src->encoding == Parrot_ucs4_encoding_ptr)
return Parrot_str_clone(interp, src);
+
+ len = STRING_length(src);
+ res = Parrot_str_new_init(interp, NULL, len * 4,
+ Parrot_ucs4_encoding_ptr, 0);
+ buf = (Parrot_UInt4 *) res->strstart;
+
+ if (STRING_max_bytes_per_codepoint(src) == 1) {
+ const unsigned char *s = (const unsigned char *)src->strstart;
+ UINTVAL i;
+
+ for (i = 0; i < len; i++) {
+ buf[i] = s[i];
+ }
}
else {
- UINTVAL len = Parrot_str_length(interp, src);
- STRING *res = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
- Parrot_ucs4_encoding_ptr, 0);
- UChar32 *buf = (UChar32 *) res->strstart;
- UINTVAL offs;
- /* TODO: use an iterator */
- for (offs = 0; offs < len; offs++){
- buf[offs] = STRING_ord(interp, src, offs);
- };
- res->strlen = len;
- res->bufused = len * sizeof (UChar32);
+ String_iter iter;
- return res;
+ STRING_ITER_INIT(interp, &iter);
+
+ while (iter.charpos < len) {
+ buf[iter.charpos] = STRING_iter_get_and_advance(interp, src, &iter);
+ }
}
-#else
- UNUSED(src);
- no_ICU_lib(interp);
-#endif
+ res->strlen = len;
+ res->bufused = len * 4;
+
+ return res;
}
@@ -194,16 +174,11 @@
PARROT_WARN_UNUSED_RESULT
static UINTVAL
-ucs4_scan(PARROT_INTERP, ARGIN(const STRING *src))
+ucs4_scan(SHIM_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(ucs4_scan)
-#if PARROT_HAS_ICU
- UNUSED(interp);
- return src->bufused / sizeof (UChar32);
-#else
- UNUSED(src);
- no_ICU_lib(interp);
-#endif
+
+ return src->bufused >> 2;
}
@@ -221,9 +196,8 @@
ucs4_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
{
ASSERT_ARGS(ucs4_ord)
-#if PARROT_HAS_ICU
const UINTVAL len = STRING_length(src);
- const UChar32 *s;
+ const Parrot_UInt4 *s;
if (idx < 0)
idx += len;
@@ -231,14 +205,9 @@
if ((UINTVAL)idx >= len)
encoding_ord_error(interp, src, idx);
- s = (const UChar32 *)src->strstart;
+ s = (const Parrot_UInt4 *)src->strstart;
return s[idx];
-#else
- UNUSED(idx);
- UNUSED(src);
- no_ICU_lib(interp);
-#endif
}
@@ -254,11 +223,13 @@
*/
static UINTVAL
-ucs4_iter_get(PARROT_INTERP,
+ucs4_iter_get(SHIM_INTERP,
ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
{
ASSERT_ARGS(ucs4_iter_get)
- return ucs4_ord(interp, str, i->charpos + offset);
+ const Parrot_UInt4 * const s = (const Parrot_UInt4 *)str->strstart;
+
+ return s[i->charpos + offset];
}
@@ -274,20 +245,13 @@
*/
static void
-ucs4_iter_skip(PARROT_INTERP,
- ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
+ucs4_iter_skip(SHIM_INTERP,
+ SHIM(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
{
ASSERT_ARGS(ucs4_iter_skip)
- UNUSED(str);
-#if PARROT_HAS_ICU
i->charpos += skip;
- i->bytepos += skip * sizeof (UChar32);
-#else
- UNUSED(i);
- UNUSED(skip);
- no_ICU_lib(interp);
-#endif
+ i->bytepos += skip * 4;
}
@@ -303,22 +267,17 @@
*/
static UINTVAL
-ucs4_iter_get_and_advance(PARROT_INTERP,
+ucs4_iter_get_and_advance(SHIM_INTERP,
ARGIN(const STRING *str), ARGMOD(String_iter *i))
{
ASSERT_ARGS(ucs4_iter_get_and_advance)
+ const Parrot_UInt4 * const s = (const Parrot_UInt4 *)str->strstart;
+ const UINTVAL c = s[i->charpos];
+
+ i->charpos++;
+ i->bytepos += 4;
-#if PARROT_HAS_ICU
- const UChar32 * const s = (const UChar32*) str->strstart;
- const UChar32 c = s[i->charpos++];
- i->bytepos += sizeof (UChar32);
return c;
-#else
- UNUSED(str);
- UNUSED(i);
- no_ICU_lib(interp);
- return (UINTVAL)0; /* Stop the static analyzers from panicing */
-#endif
}
@@ -335,21 +294,16 @@
*/
static void
-ucs4_iter_set_and_advance(PARROT_INTERP,
+ucs4_iter_set_and_advance(SHIM_INTERP,
ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
{
ASSERT_ARGS(ucs4_iter_set_and_advance)
+ Parrot_UInt4 * const s = (Parrot_UInt4 *)str->strstart;
+
+ s[i->charpos] = c;
-#if PARROT_HAS_ICU
- UChar32 * const s = (UChar32*) str->strstart;
- s[i->charpos++] = (UChar32)c;
- i->bytepos += sizeof (UChar32);
-#else
- UNUSED(str);
- UNUSED(i);
- UNUSED(c);
- no_ICU_lib(interp);
-#endif
+ i->charpos++;
+ i->bytepos += 4;
}
@@ -365,20 +319,13 @@
*/
static void
-ucs4_iter_set_position(PARROT_INTERP,
- ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
+ucs4_iter_set_position(SHIM_INTERP,
+ SHIM(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
{
ASSERT_ARGS(ucs4_iter_set_position)
- UNUSED(str);
-#if PARROT_HAS_ICU
i->charpos = n;
- i->bytepos = n * sizeof (UChar32);
-#else
- UNUSED(i);
- UNUSED(n);
- no_ICU_lib(interp);
-#endif
+ i->bytepos = n * 4;
}
Modified: branches/string_macros/src/string/encoding/utf16.c
==============================================================================
--- branches/string_macros/src/string/encoding/utf16.c Wed Sep 22 01:21:16 2010 (r49226)
+++ branches/string_macros/src/string/encoding/utf16.c Wed Sep 22 01:21:38 2010 (r49227)
@@ -160,13 +160,9 @@
utf16_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(utf16_to_encoding)
-#if PARROT_HAS_ICU
- UErrorCode err;
- int dest_len;
- UChar *p;
-#endif
- int src_len;
- STRING *result;
+ int src_len, dest_len;
+ Parrot_UInt2 *p;
+ STRING *result;
if (src->encoding == Parrot_utf16_encoding_ptr
|| src->encoding == Parrot_ucs2_encoding_ptr)
@@ -174,56 +170,58 @@
* and downcase functions assume to get an unshared buffer */
return Parrot_str_clone(interp, src);
- result = Parrot_gc_new_string_header(interp, 0);
+ result = Parrot_gc_new_string_header(interp, 0);
+ src_len = STRING_length(src);
- /*
- * TODO adapt string creation functions
- */
- src_len = src->strlen;
if (!src_len) {
result->encoding = Parrot_ucs2_encoding_ptr;
- result->strlen = result->bufused = 0;
return result;
}
-#if PARROT_HAS_ICU
- Parrot_gc_allocate_string_storage(interp, result, sizeof (UChar) * src_len);
- p = (UChar *)result->strstart;
- if (src->encoding == Parrot_latin1_encoding_ptr ||
- src->encoding == Parrot_ascii_encoding_ptr) {
- for (dest_len = 0; dest_len < (int)src->strlen; ++dest_len) {
- p[dest_len] = (UChar)((unsigned char*)src->strstart)[dest_len];
+ Parrot_gc_allocate_string_storage(interp, result, 2 * src_len);
+ p = (Parrot_UInt2 *)result->strstart;
+
+ if (STRING_max_bytes_per_codepoint(src) == 1) {
+ for (dest_len = 0; dest_len < src_len; ++dest_len) {
+ p[dest_len] = (unsigned char)src->strstart[dest_len];
}
}
- else {
- err = U_ZERO_ERROR;
- u_strFromUTF8(p, src_len,
- &dest_len, src->strstart, src->bufused, &err);
+ else if (src->encoding == Parrot_utf8_encoding_ptr) {
+#if PARROT_HAS_ICU
+ UErrorCode err = U_ZERO_ERROR;
+
+ u_strFromUTF8(p, src_len, &dest_len, src->strstart, src->bufused, &err);
+
if (!U_SUCCESS(err)) {
/*
* have to resize - required len in UChars is in dest_len
*/
- result->bufused = dest_len * sizeof (UChar);
- Parrot_gc_reallocate_string_storage(interp, result,
- sizeof (UChar) * dest_len);
- p = (UChar *)result->strstart;
- u_strFromUTF8(p, dest_len,
- &dest_len, src->strstart, src->bufused, &err);
+ result->bufused = 2 * dest_len;
+ Parrot_gc_reallocate_string_storage(interp, result, 2 * dest_len);
+
+ p = (Parrot_UInt2 *)result->strstart;
+ u_strFromUTF8(p, dest_len, &dest_len, src->strstart, src->bufused, &err);
PARROT_ASSERT(U_SUCCESS(err));
}
+#else
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+ "no ICU lib loaded");
+#endif
}
- result->bufused = dest_len * sizeof (UChar);
- result->encoding = Parrot_utf16_encoding_ptr;
- result->strlen = src_len;
+ else {
+ UNIMPL;
+ }
+
+ result->bufused = 2 * dest_len;
+ result->strlen = src_len;
/* downgrade if possible */
- if (dest_len == (int)src->strlen)
+ if (dest_len == src_len)
result->encoding = Parrot_ucs2_encoding_ptr;
+ else
+ result->encoding = Parrot_utf16_encoding_ptr;
+
return result;
-#else
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
- "no ICU lib loaded");
-#endif
}
/*
More information about the parrot-commits
mailing list