[svn:parrot] r48565 - in trunk: include/parrot src/io src/pmc src/string src/string/charset src/string/encoding
chromatic at svn.parrot.org
chromatic at svn.parrot.org
Thu Aug 19 05:53:14 UTC 2010
Author: chromatic
Date: Thu Aug 19 05:53:12 2010
New Revision: 48565
URL: https://trac.parrot.org/parrot/changeset/48565
Log:
[str] Revised STRING iterator interface (TT #1456)
These two patches from Nick Wellnhofer improve the algorithmic performance of
STRING iteration especially for variable-width encodings.
Modified:
trunk/include/parrot/encoding.h
trunk/include/parrot/string.h
trunk/include/parrot/string_funcs.h
trunk/src/io/utf8.c
trunk/src/pmc/stringiterator.pmc
trunk/src/string/api.c
trunk/src/string/charset/ascii.c
trunk/src/string/charset/iso-8859-1.c
trunk/src/string/charset/unicode.c
trunk/src/string/encoding/fixed_8.c
trunk/src/string/encoding/ucs2.c
trunk/src/string/encoding/ucs4.c
trunk/src/string/encoding/utf16.c
trunk/src/string/encoding/utf8.c
Modified: trunk/include/parrot/encoding.h
==============================================================================
--- trunk/include/parrot/encoding.h Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/include/parrot/encoding.h Thu Aug 19 05:53:12 2010 (r48565)
@@ -30,8 +30,16 @@
struct string_iterator_t; /* s. parrot/string.h */
-typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src,
- struct string_iterator_t *);
+typedef UINTVAL (*encoding_iter_get_t)(
+ PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset);
+typedef void (*encoding_iter_skip_t)(
+ PARROT_INTERP, const STRING *str, String_iter *i, INTVAL skip);
+typedef UINTVAL (*encoding_iter_get_and_advance_t)(
+ PARROT_INTERP, const STRING *str, String_iter *i);
+typedef void (*encoding_iter_set_and_advance_t)(
+ PARROT_INTERP, STRING *str, String_iter *i, UINTVAL c);
+typedef void (*encoding_iter_set_position_t)(
+ PARROT_INTERP, const STRING *str, String_iter *i, UINTVAL pos);
struct _encoding {
ARGIN(const char *name);
@@ -44,9 +52,13 @@
encoding_get_bytes_t get_bytes;
encoding_codepoints_t codepoints;
encoding_bytes_t bytes;
- encoding_iter_init_t iter_init;
encoding_find_cclass_t find_cclass;
encoding_hash_t hash;
+ encoding_iter_get_t iter_get;
+ encoding_iter_skip_t iter_skip;
+ encoding_iter_get_and_advance_t iter_get_and_advance;
+ encoding_iter_set_and_advance_t iter_set_and_advance;
+ encoding_iter_set_position_t iter_set_position;
};
typedef struct _encoding ENCODING;
@@ -209,8 +221,6 @@
((src)->encoding)->codepoints((i), (src))
#define ENCODING_BYTES(i, src) \
((src)->encoding)->bytes((i), (src))
-#define ENCODING_ITER_INIT(i, src, iter) \
- ((src)->encoding)->iter_init((i), (src), (iter))
#define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \
((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end))
#define ENCODING_HASH(i, src, seed) \
Modified: trunk/include/parrot/string.h
==============================================================================
--- trunk/include/parrot/string.h Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/include/parrot/string.h Thu Aug 19 05:53:12 2010 (r48565)
@@ -30,14 +30,23 @@
/* String iterator */
typedef struct string_iterator_t {
- const STRING *str;
UINTVAL bytepos;
UINTVAL charpos;
- UINTVAL (*get_and_advance)(PARROT_INTERP, struct string_iterator_t *i);
- void (*set_and_advance)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL c);
- void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos);
} String_iter;
+#define STRING_ITER_INIT(i, iter) \
+ (iter)->charpos = (iter)->bytepos = 0
+#define STRING_ITER_GET(i, str, iter, offset) \
+ ((str)->encoding)->iter_get((i), (str), (iter), (offset))
+#define STRING_ITER_SKIP(i, str, iter, skip) \
+ ((str)->encoding)->iter_skip((i), (str), (iter), (skip))
+#define STRING_ITER_GET_AND_ADVANCE(i, str, iter) \
+ ((str)->encoding)->iter_get_and_advance((i), (str), (iter))
+#define STRING_ITER_SET_AND_ADVANCE(i, str, iter, c) \
+ ((str)->encoding)->iter_set_and_advance((i), (str), (iter), (c))
+#define STRING_ITER_SET_POSITION(i, str, iter, pos) \
+ ((str)->encoding)->iter_set_position((i), (str), (iter), (pos))
+
#define STREQ(x, y) (strcmp((x), (y))==0)
#define STRNEQ(x, y) (strcmp((x), (y))!=0)
Modified: trunk/include/parrot/string_funcs.h
==============================================================================
--- trunk/include/parrot/string_funcs.h Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/include/parrot/string_funcs.h Thu Aug 19 05:53:12 2010 (r48565)
@@ -226,6 +226,31 @@
INTVAL Parrot_str_is_null(SHIM_INTERP, ARGIN_NULLOK(const STRING *s));
PARROT_EXPORT
+INTVAL Parrot_str_iter_index(PARROT_INTERP,
+ ARGIN(const STRING *src),
+ ARGMOD(String_iter *start),
+ ARGOUT(String_iter *end),
+ ARGIN(const STRING *search))
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ __attribute__nonnull__(4)
+ __attribute__nonnull__(5)
+ FUNC_MODIFIES(*start)
+ FUNC_MODIFIES(*end);
+
+PARROT_EXPORT
+PARROT_CANNOT_RETURN_NULL
+PARROT_WARN_UNUSED_RESULT
+STRING * Parrot_str_iter_substr(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGIN(const String_iter *l),
+ ARGIN_NULLOK(const String_iter *r))
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
+PARROT_EXPORT
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
STRING* Parrot_str_join(PARROT_INTERP,
@@ -559,6 +584,16 @@
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(s))
#define ASSERT_ARGS_Parrot_str_is_null __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
+#define ASSERT_ARGS_Parrot_str_iter_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(src) \
+ , PARROT_ASSERT_ARG(start) \
+ , PARROT_ASSERT_ARG(end) \
+ , PARROT_ASSERT_ARG(search))
+#define ASSERT_ARGS_Parrot_str_iter_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(l))
#define ASSERT_ARGS_Parrot_str_join __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(ar))
Modified: trunk/src/io/utf8.c
==============================================================================
--- trunk/src/io/utf8.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/io/utf8.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -57,7 +57,7 @@
s->encoding = Parrot_utf8_encoding_ptr;
/* count chars, verify utf8 */
- Parrot_utf8_encoding_ptr->iter_init(interp, s, &iter);
+ STRING_ITER_INIT(interp, &iter);
while (iter.bytepos < s->bufused) {
if (iter.bytepos + 4 > s->bufused) {
@@ -84,8 +84,6 @@
s->strlen = iter.charpos;
s = Parrot_str_concat(interp, s, s2);
- /* String is updated. Poke into iterator to replace old string */
- iter.str = s;
*buf = s;
len += len2 + 1;
@@ -93,7 +91,7 @@
}
}
ok:
- iter.get_and_advance(interp, &iter);
+ Parrot_utf8_encoding_ptr->iter_get_and_advance(interp, *buf, &iter);
}
s->strlen = iter.charpos;
return len;
Modified: trunk/src/pmc/stringiterator.pmc
==============================================================================
--- trunk/src/pmc/stringiterator.pmc Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/pmc/stringiterator.pmc Thu Aug 19 05:53:12 2010 (r48565)
@@ -27,11 +27,9 @@
/* HEADERIZER END: static */
pmclass StringIterator auto_attrs extends Iterator {
- ATTR PMC *string; /* String to iterate over */
- ATTR INTVAL pos; /* Current position of iterator for forward iterator */
- /* Previous position of iterator for reverse iterator */
- ATTR INTVAL length; /* Length of C<string> */
- ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */
+ ATTR STRING *str_val; /* String to iterate over */
+ ATTR String_iter iter; /* String iterator */
+ ATTR INTVAL reverse; /* Direction of iteration. 1 - for reverse iteration */
/*
@@ -43,10 +41,13 @@
*/
VTABLE void init_pmc(PMC *string) {
- SET_ATTR_string(INTERP, SELF, string);
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ STRING * const str_val = VTABLE_get_string(INTERP, string);
+
+ SET_ATTR_str_val(INTERP, SELF, str_val);
+ STRING_ITER_INIT(INTERP, iter);
+ SET_ATTR_reverse(INTERP, SELF, ITERATE_FROM_START);
- /* by default, iterate from start */
- SELF.set_integer_native(ITERATE_FROM_START);
PObj_custom_mark_SET(SELF);
}
@@ -61,9 +62,10 @@
*/
VTABLE void mark() {
- PMC *string;
- GET_ATTR_string(INTERP, SELF, string);
- Parrot_gc_mark_PMC_alive(INTERP, string);
+ STRING *str_val;
+
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ Parrot_gc_mark_STRING_alive(INTERP, str_val);
}
/*
@@ -74,15 +76,21 @@
*/
VTABLE PMC* clone() {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
- PMC * const clone =
- Parrot_pmc_new_init(INTERP, enum_class_StringIterator, attrs->string);
- Parrot_StringIterator_attributes * const clone_attrs =
- PARROT_STRINGITERATOR(clone);
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ PMC *clone, *str_pmc;
+ String_iter *clone_iter;
+ STRING *str_val;
+ INTVAL reverse;
+
+ str_pmc = Parrot_pmc_new(INTERP, enum_class_String);
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ VTABLE_set_string_native(INTERP, str_pmc, str_val);
+ clone = Parrot_pmc_new_init(INTERP, enum_class_StringIterator, str_pmc);
+ clone_iter = &PARROT_STRINGITERATOR(clone)->iter;
+ *clone_iter = *iter;
+ GET_ATTR_reverse(INTERP, SELF, reverse);
+ SET_ATTR_reverse(INTERP, clone, reverse);
- clone_attrs->pos = attrs->pos;
- clone_attrs->reverse = attrs->reverse;
return clone;
}
@@ -111,12 +119,17 @@
*/
VTABLE INTVAL elements() {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
- if (attrs->reverse)
- return attrs->pos;
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ STRING *str_val;
+ INTVAL reverse;
+
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ GET_ATTR_reverse(INTERP, SELF, reverse);
+
+ if (reverse)
+ return iter->charpos;
else
- return attrs->length - attrs->pos;
+ return str_val->strlen - iter->charpos;
}
VTABLE INTVAL get_integer() {
@@ -137,20 +150,19 @@
*/
VTABLE void set_integer_native(INTVAL value) {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
- switch (value) {
- case ITERATE_FROM_START:
- attrs->reverse = 0;
- attrs->pos = 0;
- attrs->length = VTABLE_elements(INTERP, attrs->string);
- break;
- case ITERATE_FROM_END:
- attrs->reverse = 1;
- attrs->pos = attrs->length
- = VTABLE_elements(INTERP, attrs->string);
- break;
- default:
+ STRING *str_val;
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ if (value == ITERATE_FROM_START) {
+ SET_ATTR_reverse(INTERP, SELF, 0);
+ STRING_ITER_SET_POSITION(INTERP, str_val, iter, 0);
+ }
+ else if (value == ITERATE_FROM_END) {
+ SET_ATTR_reverse(INTERP, SELF, 1);
+ STRING_ITER_SET_POSITION(INTERP, str_val, iter, str_val->strlen);
+ }
+ else {
Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION,
"Wrong direction for StringIterator");
}
@@ -167,9 +179,13 @@
*/
VTABLE PMC *get_pmc() {
- PMC *string;
- GET_ATTR_string(INTERP, SELF, string);
- return string ? string : PMCNULL;
+ PMC * const string = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(
+ interp, enum_class_String));
+ STRING *str_val;
+
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ VTABLE_set_string_native(interp, string, str_val);
+ return string;
}
/*
@@ -182,17 +198,20 @@
*/
VTABLE PMC *shift_pmc() {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
PMC *ret;
+ STRING *str_val, *substr;
+ const String_iter old_iter = *iter;
- if (attrs->pos >= attrs->length)
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ if (iter->charpos >= str_val->strlen)
Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
"StopIteration");
ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String));
- VTABLE_set_string_native(INTERP, ret,
- VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++));
+ STRING_ITER_SKIP(INTERP, str_val, iter, 1);
+ substr = Parrot_str_iter_substr(INTERP, str_val, &old_iter, iter);
+ VTABLE_set_string_native(INTERP, ret, substr);
return ret;
}
@@ -206,14 +225,17 @@
*/
VTABLE STRING *shift_string() {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ STRING *str_val;
+ const String_iter old_iter = *iter;
- if (attrs->pos >= attrs->length)
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ if (iter->charpos >= str_val->strlen)
Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
"StopIteration");
- return VTABLE_get_string_keyed_int(INTERP, attrs->string, attrs->pos++);
+ STRING_ITER_SKIP(INTERP, str_val, iter, 1);
+ return Parrot_str_iter_substr(INTERP, str_val, &old_iter, iter);
}
/*
@@ -226,14 +248,15 @@
*/
VTABLE INTVAL shift_integer() {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ STRING *str_val;
- if (attrs->pos >= attrs->length)
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ if (iter->charpos >= str_val->strlen)
Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
"StopIteration");
- return VTABLE_get_integer_keyed_int(INTERP, attrs->string, attrs->pos++);
+ return STRING_ITER_GET_AND_ADVANCE(INTERP, str_val, iter);
}
/*
@@ -246,17 +269,21 @@
*/
VTABLE PMC *pop_pmc() {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ STRING *str_val, *substr;
PMC *ret;
+ const String_iter old_iter = *iter;
- if (!STATICSELF.get_bool())
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ /* Shouldn't this test be (iter->charpos <= 0) ? */
+ if (SELF.elements() <= 0)
Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
"StopIteration");
ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String));
- VTABLE_set_string_native(INTERP, ret,
- VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos));
+ STRING_ITER_SKIP(INTERP, str_val, iter, -1);
+ substr = Parrot_str_iter_substr(INTERP, str_val, iter, &old_iter);
+ VTABLE_set_string_native(INTERP, ret, substr);
return ret;
}
@@ -270,14 +297,18 @@
*/
VTABLE STRING *pop_string() {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
-
- if (!STATICSELF.get_bool())
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ STRING *str_val;
+ const String_iter old_iter = *iter;
+
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ /* Shouldn't this test be (iter->charpos <= 0) ? */
+ if (SELF.elements() <= 0)
Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
"StopIteration");
- return VTABLE_get_string_keyed_int(INTERP, attrs->string, --attrs->pos);
+ STRING_ITER_SKIP(INTERP, str_val, iter, -1);
+ return Parrot_str_iter_substr(INTERP, str_val, iter, &old_iter);
}
/*
@@ -290,14 +321,17 @@
*/
VTABLE INTVAL pop_integer() {
- Parrot_StringIterator_attributes * const attrs =
- PARROT_STRINGITERATOR(SELF);
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ STRING *str_val;
- if (!STATICSELF.get_bool())
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ /* Shouldn't this test be (iter->charpos <= 0) ? */
+ if (SELF.elements() <= 0)
Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
"StopIteration");
- return VTABLE_get_integer_keyed_int(INTERP, attrs->string, --attrs->pos);
+ STRING_ITER_SKIP(INTERP, str_val, iter, -1);
+ return STRING_ITER_GET(INTERP, str_val, iter, 0);
}
/*
@@ -311,8 +345,16 @@
*/
VTABLE INTVAL get_integer_keyed_int(INTVAL idx) {
- return VTABLE_get_integer_keyed_int(INTERP, STATICSELF.get_pmc(),
- PARROT_STRINGITERATOR(SELF)->pos + idx);
+ String_iter * const iter = &PARROT_STRINGITERATOR(SELF)->iter;
+ STRING *str_val;
+ const UINTVAL offset = iter->charpos + idx;
+
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ if (offset >= str_val->strlen)
+ Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
+ "StopIteration");
+
+ return STRING_ITER_GET(INTERP, str_val, iter, idx);
}
/*
@@ -326,8 +368,22 @@
*/
VTABLE STRING *get_string_keyed_int(INTVAL idx) {
- return VTABLE_get_string_keyed_int(INTERP, STATICSELF.get_pmc(),
- PARROT_STRINGITERATOR(SELF)->pos + idx);
+ String_iter iter = PARROT_STRINGITERATOR(SELF)->iter;
+ String_iter next_iter;
+ STRING *str_val;
+ const UINTVAL offset = iter.charpos + idx;
+
+ GET_ATTR_str_val(INTERP, SELF, str_val);
+ if (offset >= str_val->strlen)
+ Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
+ "StopIteration");
+
+ if (idx != 0)
+ STRING_ITER_SKIP(INTERP, str_val, &iter, idx);
+ next_iter = iter;
+ STRING_ITER_SKIP(INTERP, str_val, &next_iter, 1);
+
+ return Parrot_str_iter_substr(INTERP, str_val, &iter, &next_iter);
}
}
Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/api.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -1104,6 +1104,104 @@
return CHARSET_GET_CODEPOINTS(interp, src, true_offset, true_length);
}
+/*
+
+=item C<STRING * Parrot_str_iter_substr(PARROT_INTERP, const STRING *str, const
+String_iter *l, const String_iter *r)>
+
+Returns the substring between iterators C<l> and C<r>.
+
+=cut
+
+*/
+
+PARROT_EXPORT
+PARROT_CANNOT_RETURN_NULL
+PARROT_WARN_UNUSED_RESULT
+STRING *
+Parrot_str_iter_substr(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGIN(const String_iter *l), ARGIN_NULLOK(const String_iter *r))
+{
+ ASSERT_ARGS(Parrot_str_iter_substr)
+ STRING *dest = Parrot_str_copy(interp, str);
+
+ dest->strstart = (char *)dest->strstart + l->bytepos;
+
+ if (r == NULL) {
+ dest->bufused = str->bufused - l->bytepos;
+ dest->strlen = str->strlen - l->charpos;
+ }
+ else {
+ dest->bufused = r->bytepos - l->bytepos;
+ dest->strlen = r->charpos - l->charpos;
+ }
+
+ dest->hashval = 0;
+
+ return dest;
+}
+
+/*
+
+=item C<INTVAL Parrot_str_iter_index(PARROT_INTERP, const STRING *src,
+String_iter *start, String_iter *end, const STRING *search)>
+
+Find the next occurence of STRING C<search> in STRING C<src> starting at
+String_iter C<start>. If C<search> is found C<start> is modified to mark the
+beginning of C<search> and String_iter C<end> is set to the character after
+C<search> in C<src>. Returns the character position where C<search> was found
+or -1 if it wasn't found.
+
+=cut
+
+*/
+
+PARROT_EXPORT
+INTVAL
+Parrot_str_iter_index(PARROT_INTERP,
+ ARGIN(const STRING *src),
+ ARGMOD(String_iter *start), ARGOUT(String_iter *end),
+ ARGIN(const STRING *search))
+{
+ ASSERT_ARGS(Parrot_str_iter_index)
+ String_iter search_iter, search_start, next_start;
+ const UINTVAL len = search->strlen;
+ UINTVAL c0;
+
+ if (len == 0) {
+ *end = *start;
+ return start->charpos;
+ }
+
+ STRING_ITER_INIT(interp, &search_iter);
+ c0 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter);
+ search_start = search_iter;
+ next_start = *start;
+
+ while (start->charpos + len <= src->strlen) {
+ UINTVAL c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, &next_start);
+
+ if (c1 == c0) {
+ UINTVAL c2;
+ *end = next_start;
+
+ do {
+ if (search_iter.charpos >= len)
+ return start->charpos;
+ c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, end);
+ c2 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter);
+ } while (c1 == c2);
+
+ search_iter = search_start;
+ }
+
+ *start = next_start;
+ }
+
+ return -1;
+}
+
/*
@@ -1145,7 +1243,7 @@
UINTVAL true_offset = (UINTVAL)offset;
UINTVAL true_length = (UINTVAL)length;
- UINTVAL start_byte, end_byte;
+ UINTVAL start_byte, end_byte, start_char, end_char;
INTVAL buf_size;
if (STRING_IS_NULL(src)) {
@@ -1181,13 +1279,15 @@
}
/* get byte position of the part that will be replaced */
- ENCODING_ITER_INIT(interp, src, &iter);
+ STRING_ITER_INIT(interp, &iter);
- iter.set_position(interp, &iter, true_offset);
+ STRING_ITER_SET_POSITION(interp, src, &iter, true_offset);
start_byte = iter.bytepos;
+ start_char = iter.charpos;
- iter.set_position(interp, &iter, true_offset + true_length);
+ STRING_ITER_SKIP(interp, src, &iter, true_length);
end_byte = iter.bytepos;
+ end_char = iter.charpos;
/* not possible.... */
if (end_byte < start_byte)
@@ -1226,7 +1326,7 @@
(char *)src->strstart + end_byte,
src->bufused - end_byte);
- dest->strlen = CHARSET_CODEPOINTS(interp, dest);
+ dest->strlen = src->strlen - (end_char - start_char) + rep->strlen;
dest->hashval = 0;
return dest;
@@ -1252,7 +1352,7 @@
ASSERT_ARGS(Parrot_str_chopn)
STRING * const chopped = Parrot_str_copy(interp, s);
- UINTVAL new_length, uchar_size;
+ UINTVAL new_length;
if (n < 0) {
new_length = -n;
@@ -1273,23 +1373,23 @@
return chopped;
}
- uchar_size = chopped->bufused / chopped->strlen;
- chopped->strlen = new_length;
-
if (chopped->encoding == Parrot_fixed_8_encoding_ptr) {
chopped->bufused = new_length;
}
else if (chopped->encoding == Parrot_ucs2_encoding_ptr) {
+ const UINTVAL uchar_size = chopped->bufused / chopped->strlen;
chopped->bufused = new_length * uchar_size;
}
else {
String_iter iter;
- ENCODING_ITER_INIT(interp, s, &iter);
- iter.set_position(interp, &iter, new_length);
+ STRING_ITER_INIT(interp, &iter);
+ STRING_ITER_SET_POSITION(interp, s, &iter, new_length);
chopped->bufused = iter.bytepos;
}
+ chopped->strlen = new_length;
+
return chopped;
}
@@ -1860,13 +1960,12 @@
int sign = 1;
UINTVAL i = 0;
String_iter iter;
- UINTVAL offs;
number_parse_state state = parse_start;
- ENCODING_ITER_INIT(interp, s, &iter);
+ STRING_ITER_INIT(interp, &iter);
- for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {
- const UINTVAL c = iter.get_and_advance(interp, &iter);
+ while (state != parse_end && iter.charpos < s->strlen) {
+ const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter);
/* Check for overflow */
if (c > 255)
break;
@@ -1956,17 +2055,16 @@
int d_length = 0;
int check_nan = 0; /* Check for NaN and Inf after main loop */
String_iter iter;
- UINTVAL offs;
number_parse_state state = parse_start;
if (STRING_IS_NULL(s))
return 0.0;
- ENCODING_ITER_INIT(interp, s, &iter);
+ STRING_ITER_INIT(interp, &iter);
- /* Handcrafter FSM to read float value */
- for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {
- const UINTVAL c = iter.get_and_advance(interp, &iter);
+ /* Handcrafted FSM to read float value */
+ while (state != parse_end && iter.charpos < s->strlen) {
+ const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter);
/* Check for overflow */
if (c > 255)
break;
@@ -2415,11 +2513,11 @@
Parrot_fixed_8_encoding_ptr, Parrot_ascii_charset_ptr, 0);
/* more work TODO */
- ENCODING_ITER_INIT(interp, src, &iter);
+ STRING_ITER_INIT(interp, &iter);
dp = (unsigned char *)result->strstart;
for (i = 0; len > 0; --len) {
- UINTVAL c = iter.get_and_advance(interp, &iter);
+ UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
if (c < 0x7f) {
/* process ASCII chars */
if (i >= charlen - 2) {
@@ -2559,17 +2657,17 @@
Parrot_gc_allocate_string_storage(interp, result, reserved);
result->bufused = reserved;
- src->encoding->iter_init(interp, src, &itersrc);
- encoding->iter_init(interp, result, &iterdest);
+ STRING_ITER_INIT(interp, &itersrc);
+ STRING_ITER_INIT(interp, &iterdest);
while (itersrc.bytepos < srclen) {
- INTVAL c = itersrc.get_and_advance(interp, &itersrc);
+ INTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
INTVAL next;
do {
pending = 0;
next = c;
if (c == '\\') {
- c = itersrc.get_and_advance(interp, &itersrc);
+ c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
switch (c) {
/* Common one char sequences */
case 'a': next = '\a'; break;
@@ -2582,7 +2680,7 @@
case 'e': next = '\e'; break;
/* Escape character */
case 'c':
- c = itersrc.get_and_advance(interp, &itersrc);
+ c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
/* This assumes ascii-alike encoding */
if (c < 'A' || c > 'Z')
throw_illegal_escape(interp);
@@ -2590,11 +2688,11 @@
break;
case 'x':
digcount = 0;
- c = itersrc.get_and_advance(interp, &itersrc);
+ c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
if (c == '{') {
/* \x{h..h} 1..8 hex digits */
while (itersrc.bytepos < srclen) {
- c = itersrc.get_and_advance(interp, &itersrc);
+ c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
if (c == '}')
break;
if (!isxdigit(c))
@@ -2618,7 +2716,7 @@
pending = 0;
break;
}
- c = itersrc.get_and_advance(interp, &itersrc);
+ c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
}
}
if (digcount == 0)
@@ -2629,7 +2727,7 @@
case 'u':
/* \uhhhh 4 hex digits */
for (digcount = 0; digcount < 4; ++digcount) {
- c = itersrc.get_and_advance(interp, &itersrc);
+ c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
if (!isxdigit(c))
throw_illegal_escape(interp);
digbuf[digcount] = c;
@@ -2640,7 +2738,7 @@
case 'U':
/* \Uhhhhhhhh 8 hex digits */
for (digcount = 0; digcount < 8; ++digcount) {
- c = itersrc.get_and_advance(interp, &itersrc);
+ c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
if (!isxdigit(c))
throw_illegal_escape(interp);
digbuf[digcount] = c;
@@ -2653,7 +2751,7 @@
/* \ooo 1..3 oct digits */
digbuf[0] = c;
for (digcount = 1; digcount < 3; ++digcount) {
- c = itersrc.get_and_advance(interp, &itersrc);
+ c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
if (c < '0' || c > '7')
break;
digbuf[digcount] = c;
@@ -2667,7 +2765,7 @@
next = c;
}
}
- iterdest.set_and_advance(interp, &iterdest, next);
+ STRING_ITER_SET_AND_ADVANCE(interp, result, &iterdest, next);
} while (pending);
}
result->bufused = iterdest.bytepos;
@@ -2748,7 +2846,7 @@
encoding = result->encoding;
}
- encoding->iter_init(interp, result, &iter);
+ STRING_ITER_INIT(interp, &iter);
for (offs = d = 0; offs < clength; ++offs) {
r = (Parrot_UInt4)((unsigned char *)result->strstart)[offs];
@@ -2771,7 +2869,7 @@
}
PARROT_ASSERT(d < offs);
- iter.set_and_advance(interp, &iter, r);
+ encoding->iter_set_and_advance(interp, result, &iter, r);
++d;
}
@@ -3269,8 +3367,10 @@
ARGIN_NULLOK(const STRING *delim), ARGIN_NULLOK(STRING *str))
{
ASSERT_ARGS(Parrot_str_split)
- PMC *res;
- INTVAL slen, dlen, ps, pe;
+ PMC *res;
+ STRING *tstr;
+ UINTVAL slen, dlen;
+ String_iter iter;
if (STRING_IS_NULL(delim) || STRING_IS_NULL(str))
return PMCNULL;
@@ -3282,44 +3382,38 @@
if (!slen)
return res;
+ STRING_ITER_INIT(interp, &iter);
dlen = Parrot_str_length(interp, delim);
if (dlen == 0) {
- int i;
VTABLE_set_integer_native(interp, res, slen);
- for (i = 0; i < slen; ++i) {
- STRING * const p = Parrot_str_substr(interp, str, i, 1);
- VTABLE_set_string_keyed_int(interp, res, i, p);
- }
-
- return res;
- }
+ do {
+ const String_iter old_iter = iter;
- pe = Parrot_str_find_index(interp, str, delim, 0);
+ STRING_ITER_SKIP(interp, str, &iter, 1);
+ tstr = Parrot_str_iter_substr(interp, str, &old_iter, &iter);
+ VTABLE_set_string_keyed_int(interp, res, old_iter.charpos, tstr);
+ } while (iter.charpos < slen);
- if (pe < 0) {
- VTABLE_push_string(interp, res, str);
return res;
}
- ps = 0;
-
- while (ps <= slen) {
- const int pl = pe - ps;
- STRING * const tstr = Parrot_str_substr(interp, str, ps, pl);
-
- VTABLE_push_string(interp, res, tstr);
- ps = pe + Parrot_str_length(interp, delim);
+ do {
+ String_iter start, end;
+ INTVAL pos;
- if (ps > slen)
+ start = iter;
+ if (Parrot_str_iter_index(interp, str, &start, &end, delim) < 0)
break;
- pe = Parrot_str_find_index(interp, str, delim, ps);
+ tstr = Parrot_str_iter_substr(interp, str, &iter, &start);
+ VTABLE_push_string(interp, res, tstr);
+ iter = end;
+ } while (iter.charpos < slen);
- if (pe < 0)
- pe = slen;
- }
+ tstr = Parrot_str_iter_substr(interp, str, &iter, NULL);
+ VTABLE_push_string(interp, res, tstr);
return res;
}
Modified: trunk/src/string/charset/ascii.c
==============================================================================
--- trunk/src/string/charset/ascii.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/charset/ascii.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -201,7 +201,6 @@
{
ASSERT_ARGS(to_ascii)
String_iter iter;
- UINTVAL offs;
unsigned char *p;
const UINTVAL len = src->strlen;
@@ -209,9 +208,9 @@
STRING * const dest = Parrot_str_clone(interp, src);
p = (unsigned char *)dest->strstart;
- ENCODING_ITER_INIT(interp, src, &iter);
- for (offs = 0; offs < len; ++offs) {
- const UINTVAL c = iter.get_and_advance(interp, &iter);
+ STRING_ITER_INIT(interp, &iter);
+ while (iter.charpos < len) {
+ const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
if (c >= 128)
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
"can't convert unicode string to ascii");
@@ -493,11 +492,10 @@
return ret_val < 0 ? -1 : 1;
}
else {
- UINTVAL offs;
- ENCODING_ITER_INIT(interp, rhs, &iter);
- for (offs = 0; offs < min_len; ++offs) {
- const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, offs);
- const UINTVAL cr = iter.get_and_advance(interp, &iter);
+ STRING_ITER_INIT(interp, &iter);
+ while (iter.charpos < min_len) {
+ const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, iter.charpos);
+ const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &iter);
if (cl != cr)
return cl < cr ? -1 : 1;
}
@@ -531,35 +529,12 @@
UINTVAL offs)
{
ASSERT_ARGS(mixed_cs_index)
+ String_iter start, end;
- if (search->strlen <= src->strlen) {
- String_iter src_iter, search_iter;
- const UINTVAL maxpos = src->strlen - search->strlen + 1;
- const UINTVAL cfirst = Parrot_str_indexed(interp, search, 0);
-
- ENCODING_ITER_INIT(interp, src, &src_iter);
- src_iter.set_position(interp, &src_iter, offs);
- ENCODING_ITER_INIT(interp, search, &search_iter);
-
- while (src_iter.charpos < maxpos) {
- if (cfirst == src_iter.get_and_advance(interp, &src_iter)) {
- const INTVAL next_pos = src_iter.charpos;
- const INTVAL next_byte = src_iter.bytepos;
- UINTVAL len;
- search_iter.set_position(interp, &search_iter, 1);
- for (len = search->strlen - 1; len; --len) {
- if ((src_iter.get_and_advance(interp, &src_iter)) !=
- (search_iter.get_and_advance(interp, &search_iter)))
- break;
- }
- if (len == 0)
- return next_pos - 1;
- src_iter.charpos = next_pos;
- src_iter.bytepos = next_byte;
- }
- }
- }
- return -1;
+ STRING_ITER_INIT(interp, &start);
+ STRING_ITER_SET_POSITION(interp, src, &start, offs);
+
+ return Parrot_str_iter_index(interp, src, &start, &end, search);
}
/*
@@ -638,13 +613,12 @@
validate(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(validate)
- INTVAL offset;
String_iter iter;
const INTVAL length = Parrot_str_length(interp, src);
- ENCODING_ITER_INIT(interp, src, &iter);
- for (offset = 0; offset < length; ++offset) {
- const UINTVAL codepoint = iter.get_and_advance(interp, &iter);
+ STRING_ITER_INIT(interp, &iter);
+ while (iter.charpos < length) {
+ const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
if (codepoint >= 0x80)
return 0;
}
Modified: trunk/src/string/charset/iso-8859-1.c
==============================================================================
--- trunk/src/string/charset/iso-8859-1.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/charset/iso-8859-1.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -178,24 +178,24 @@
to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(to_iso_8859_1)
- UINTVAL offs, src_len;
+ UINTVAL src_len;
String_iter iter;
/* iso-8859-1 is never bigger then source */
STRING * dest = Parrot_str_clone(interp, src);
- ENCODING_ITER_INIT(interp, src, &iter);
+ STRING_ITER_INIT(interp, &iter);
src_len = src->strlen;
dest->bufused = src_len;
- dest->charset = Parrot_iso_8859_1_charset_ptr;
- dest->encoding = Parrot_fixed_8_encoding_ptr;
- for (offs = 0; offs < src_len; ++offs) {
- const UINTVAL c = iter.get_and_advance(interp, &iter);
+ while (iter.charpos < src_len) {
+ const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
if (c >= 0x100)
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
"lossy conversion to iso-8559-1");
- ENCODING_SET_BYTE(interp, dest, offs, c);
+ Parrot_fixed_8_encoding_ptr->set_byte(interp, dest, iter.charpos - 1, c);
}
+ dest->charset = Parrot_iso_8859_1_charset_ptr;
+ dest->encoding = Parrot_fixed_8_encoding_ptr;
return dest;
}
@@ -221,18 +221,18 @@
dest->charset = Parrot_unicode_charset_ptr;
dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest);
Parrot_gc_reallocate_string_storage(interp, dest, src->strlen);
- ENCODING_ITER_INIT(interp, dest, &iter);
- for (offs = 0; offs < src->strlen; ++offs) {
- const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
+ STRING_ITER_INIT(interp, &iter);
+ while (iter.charpos < src->strlen) {
+ const UINTVAL c = ENCODING_GET_BYTE(interp, src, iter.charpos);
if (iter.bytepos >= Buffer_buflen(dest) - 4) {
- UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);
+ UINTVAL need = (UINTVAL)((src->strlen - iter.charpos) * 1.5);
if (need < 16)
need = 16;
Parrot_gc_reallocate_string_storage(interp, dest,
Buffer_buflen(dest) + need);
}
- iter.set_and_advance(interp, &iter, c);
+ STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, c);
}
dest->bufused = iter.bytepos;
dest->strlen = iter.charpos;
Modified: trunk/src/string/charset/unicode.c
==============================================================================
--- trunk/src/string/charset/unicode.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/charset/unicode.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -651,20 +651,20 @@
{
ASSERT_ARGS(compare)
String_iter l_iter, r_iter;
- UINTVAL offs, cl, cr, min_len, l_len, r_len;
+ UINTVAL min_len, l_len, r_len;
/* TODO make optimized equal - strings are equal length then already */
- ENCODING_ITER_INIT(interp, lhs, &l_iter);
- ENCODING_ITER_INIT(interp, rhs, &r_iter);
+ STRING_ITER_INIT(interp, &l_iter);
+ STRING_ITER_INIT(interp, &r_iter);
l_len = lhs->strlen;
r_len = rhs->strlen;
min_len = l_len > r_len ? r_len : l_len;
- for (offs = 0; offs < min_len; ++offs) {
- cl = l_iter.get_and_advance(interp, &l_iter);
- cr = r_iter.get_and_advance(interp, &r_iter);
+ while (l_iter.charpos < min_len) {
+ const UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter);
+ const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter);
if (cl != cr)
return cl < cr ? -1 : 1;
@@ -716,13 +716,12 @@
validate(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(validate)
- INTVAL offset;
String_iter iter;
const INTVAL length = Parrot_str_length(interp, src);
- ENCODING_ITER_INIT(interp, src, &iter);
- for (offset = 0; offset < length; ++offset) {
- const UINTVAL codepoint = iter.get_and_advance(interp, &iter);
+ STRING_ITER_INIT(interp, &iter);
+ while (iter.charpos < length) {
+ const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
/* Check for Unicode non-characters */
if (codepoint >= 0xfdd0
&& (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe)
@@ -877,24 +876,22 @@
ASSERT_ARGS(find_cclass)
String_iter iter;
UINTVAL codepoint;
- UINTVAL pos = offset;
UINTVAL end = offset + count;
- ENCODING_ITER_INIT(interp, src, &iter);
-
- iter.set_position(interp, &iter, pos);
+ STRING_ITER_INIT(interp, &iter);
+ STRING_ITER_SET_POSITION(interp, src, &iter, offset);
end = src->strlen < end ? src->strlen : end;
- for (; pos < end; ++pos) {
- codepoint = iter.get_and_advance(interp, &iter);
+ while (iter.charpos < end) {
+ codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
if (codepoint >= 256) {
if (u_iscclass(interp, codepoint, flags))
- return pos;
+ return iter.charpos - 1;
}
else {
if (Parrot_iso_8859_1_typetable[codepoint] & flags)
- return pos;
+ return iter.charpos - 1;
}
}
@@ -920,37 +917,36 @@
ASSERT_ARGS(find_not_cclass)
String_iter iter;
UINTVAL codepoint;
- UINTVAL pos = offset;
UINTVAL end = offset + count;
int bit;
- if (pos > src->strlen) {
+ if (offset > src->strlen) {
/* XXX: Throw in this case? */
return offset + count;
}
- ENCODING_ITER_INIT(interp, src, &iter);
+ STRING_ITER_INIT(interp, &iter);
- if (pos)
- iter.set_position(interp, &iter, pos);
+ if (offset)
+ STRING_ITER_SET_POSITION(interp, src, &iter, offset);
end = src->strlen < end ? src->strlen : end;
if (flags == enum_cclass_any)
return end;
- for (; pos < end; ++pos) {
- codepoint = iter.get_and_advance(interp, &iter);
+ while (iter.charpos < end) {
+ codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
if (codepoint >= 256) {
for (bit = enum_cclass_uppercase;
bit <= enum_cclass_word ; bit <<= 1) {
if ((bit & flags) && !u_iscclass(interp, codepoint, bit))
- return pos;
+ return iter.charpos - 1;
}
}
else {
if (!(Parrot_iso_8859_1_typetable[codepoint] & flags))
- return pos;
+ return iter.charpos - 1;
}
}
@@ -978,8 +974,8 @@
dest->strlen = 1;
- ENCODING_ITER_INIT(interp, dest, &iter);
- iter.set_and_advance(interp, &iter, codepoint);
+ STRING_ITER_INIT(interp, &iter);
+ STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint);
dest->bufused = iter.bytepos;
return dest;
@@ -1002,13 +998,12 @@
{
ASSERT_ARGS(compute_hash)
String_iter iter;
- UINTVAL offs;
size_t hashval = seed;
- ENCODING_ITER_INIT(interp, src, &iter);
+ STRING_ITER_INIT(interp, &iter);
- for (offs = 0; offs < src->strlen; ++offs) {
- const UINTVAL c = iter.get_and_advance(interp, &iter);
+ while (iter.charpos < src->strlen) {
+ const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
hashval += hashval << 5;
hashval += c;
}
Modified: trunk/src/string/encoding/fixed_8.c
==============================================================================
--- trunk/src/string/encoding/fixed_8.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/encoding/fixed_8.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -41,22 +41,46 @@
__attribute__nonnull__(2)
__attribute__nonnull__(3);
-static UINTVAL fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))
+static UINTVAL fixed8_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGIN(const String_iter *iter),
+ INTVAL offset)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
+static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *iter))
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*iter);
-static void fixed8_set_next(PARROT_INTERP,
+static void fixed8_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str),
ARGMOD(String_iter *iter),
UINTVAL c)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*str)
FUNC_MODIFIES(*iter);
-static void fixed8_set_position(SHIM_INTERP,
+static void fixed8_iter_set_position(SHIM_INTERP,
+ ARGIN(const STRING *str),
ARGMOD(String_iter *iter),
UINTVAL pos)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*iter);
+
+static void fixed8_iter_skip(SHIM_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *iter),
+ INTVAL skip)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*iter);
static size_t fixed_8_hash(SHIM_INTERP,
@@ -95,13 +119,6 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static void iter_init(SHIM_INTERP,
- ARGIN(const STRING *src),
- ARGOUT(String_iter *iter))
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
static void set_byte(PARROT_INTERP,
ARGIN(const STRING *src),
UINTVAL offset,
@@ -122,14 +139,24 @@
#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(s) \
, PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(iter))
#define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(s))
#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
@@ -143,9 +170,6 @@
#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(src) \
- , PARROT_ASSERT_ARG(iter))
#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -375,85 +399,105 @@
/*
-=item C<static UINTVAL fixed8_get_next(PARROT_INTERP, String_iter *iter)>
+=item C<static UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const
+String_iter *iter, INTVAL offset)>
-Moves the string iterator C<i> to the next codepoint.
+Get the character at C<iter> plus C<offset>.
=cut
*/
static UINTVAL
-fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))
+fixed8_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset)
{
- ASSERT_ARGS(fixed8_get_next)
- const UINTVAL c = get_byte(interp, iter->str, iter->charpos++);
- ++iter->bytepos;
- return c;
+ ASSERT_ARGS(fixed8_iter_get)
+ return get_byte(interp, str, iter->charpos + offset);
}
/*
-=item C<static void fixed8_set_next(PARROT_INTERP, String_iter *iter, UINTVAL
-c)>
+=item C<static void fixed8_iter_skip(PARROT_INTERP, const STRING *str,
+String_iter *iter, INTVAL skip)>
-With the string iterator C<i>, appends the codepoint C<c> and advances to the
-next position in the string.
+Moves the string iterator C<i> by C<skip> characters.
=cut
*/
static void
-fixed8_set_next(PARROT_INTERP, ARGMOD(String_iter *iter), UINTVAL c)
+fixed8_iter_skip(SHIM_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip)
{
- ASSERT_ARGS(fixed8_set_next)
- set_byte(interp, iter->str, iter->charpos++, c);
- ++iter->bytepos;
+ ASSERT_ARGS(fixed8_iter_skip)
+ iter->bytepos += skip;
+ iter->charpos += skip;
+ PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str));
}
/*
-=item C<static void fixed8_set_position(PARROT_INTERP, String_iter *iter,
-UINTVAL pos)>
+=item C<static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING
+*str, String_iter *iter)>
-Moves the string iterator C<i> to the position C<n> in the string.
+Moves the string iterator C<i> to the next codepoint.
=cut
*/
-static void
-fixed8_set_position(SHIM_INTERP, ARGMOD(String_iter *iter), UINTVAL pos)
+static UINTVAL
+fixed8_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *iter))
{
- ASSERT_ARGS(fixed8_set_position)
- iter->bytepos = iter->charpos = pos;
- PARROT_ASSERT(pos <= Buffer_buflen(iter->str));
+ ASSERT_ARGS(fixed8_iter_get_and_advance)
+ const UINTVAL c = get_byte(interp, str, iter->charpos++);
+ iter->bytepos++;
+ return c;
}
-
/*
-=item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter
-*iter)>
+=item C<static void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str,
+String_iter *iter, UINTVAL c)>
-Initializes for string C<src> the string iterator C<iter>.
+With the string iterator C<i>, appends the codepoint C<c> and advances to the
+next position in the string.
=cut
*/
static void
-iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))
+fixed8_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c)
{
- ASSERT_ARGS(iter_init)
- iter->str = src;
- iter->bytepos = iter->charpos = 0;
- iter->get_and_advance = fixed8_get_next;
- iter->set_and_advance = fixed8_set_next;
- iter->set_position = fixed8_set_position;
+ ASSERT_ARGS(fixed8_iter_set_and_advance)
+ set_byte(interp, str, iter->charpos++, c);
+ iter->bytepos++;
}
+/*
+
+=item C<static void fixed8_iter_set_position(PARROT_INTERP, const STRING *str,
+String_iter *iter, UINTVAL pos)>
+
+Moves the string iterator C<i> to the position C<n> in the string.
+
+=cut
+
+*/
+
+static void
+fixed8_iter_set_position(SHIM_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos)
+{
+ ASSERT_ARGS(fixed8_iter_set_position)
+ iter->bytepos = iter->charpos = pos;
+ PARROT_ASSERT(pos <= Buffer_buflen(str));
+}
/*
@@ -509,9 +553,13 @@
get_bytes,
codepoints,
bytes,
- iter_init,
find_cclass,
- fixed_8_hash
+ fixed_8_hash,
+ fixed8_iter_get,
+ fixed8_iter_skip,
+ fixed8_iter_get_and_advance,
+ fixed8_iter_set_and_advance,
+ fixed8_iter_set_position
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Modified: trunk/src/string/encoding/ucs2.c
==============================================================================
--- trunk/src/string/encoding/ucs2.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/encoding/ucs2.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -84,14 +84,6 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static void iter_init(PARROT_INTERP,
- ARGIN(const STRING *src),
- ARGOUT(String_iter *iter))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
static void set_byte(PARROT_INTERP,
SHIM(const STRING *src),
SHIM(UINTVAL offset),
@@ -104,26 +96,54 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static UINTVAL ucs2_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))
+static size_t ucs2_hash(PARROT_INTERP,
+ ARGIN(const STRING *s),
+ size_t hashval)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2);
+
+static UINTVAL ucs2_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGIN(const String_iter *i),
+ INTVAL offset)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
+static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *i))
+ __attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*i);
-static void ucs2_encode_and_advance(SHIM_INTERP,
+static void ucs2_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str),
ARGMOD(String_iter *i),
UINTVAL c)
+ __attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*str)
FUNC_MODIFIES(*i);
-static size_t ucs2_hash(PARROT_INTERP,
- ARGIN(const STRING *s),
- size_t hashval)
+static void ucs2_iter_set_position(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *i),
+ UINTVAL n)
__attribute__nonnull__(1)
- __attribute__nonnull__(2);
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*i);
-static void ucs2_set_position(SHIM_INTERP,
+static void ucs2_iter_skip(PARROT_INTERP,
+ ARGIN(const STRING *str),
ARGMOD(String_iter *i),
- UINTVAL n)
+ INTVAL skip)
+ __attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*i);
#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
@@ -145,24 +165,34 @@
#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src) \
- , PARROT_ASSERT_ARG(iter))
#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp))
#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs2_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_ucs2_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(s))
-#define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs2_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs2_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs2_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */
@@ -323,11 +353,11 @@
String_iter iter;
UINTVAL start;
- iter_init(interp, src, &iter);
- iter.set_position(interp, &iter, offset);
+ STRING_ITER_INIT(interp, &iter);
+ ucs2_iter_set_position(interp, src, &iter, offset);
start = iter.bytepos;
return_string->strstart = (char *)return_string->strstart + start;
- iter.set_position(interp, &iter, offset + count);
+ ucs2_iter_set_position(interp, src, &iter, offset + count);
return_string->bufused = iter.bytepos - start;
}
#endif
@@ -402,7 +432,55 @@
/*
-=item C<static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, String_iter *i)>
+=item C<static UINTVAL ucs2_iter_get(PARROT_INTERP, const STRING *str, const
+String_iter *i, INTVAL offset)>
+
+Get the character at C<i> + C<offset>.
+
+=cut
+
+*/
+
+static UINTVAL
+ucs2_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
+{
+ ASSERT_ARGS(ucs2_iter_get)
+ return get_codepoint(interp, str, i->charpos + offset);
+}
+
+/*
+
+=item C<static void ucs2_iter_skip(PARROT_INTERP, const STRING *str, String_iter
+*i, INTVAL skip)>
+
+Moves the string iterator C<i> by C<skip> characters.
+
+=cut
+
+*/
+
+static void
+ucs2_iter_skip(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
+{
+ ASSERT_ARGS(ucs2_iter_skip)
+ UNUSED(str);
+
+#if PARROT_HAS_ICU
+ i->charpos += skip;
+ i->bytepos += skip * sizeof (UChar);
+#else
+ UNUSED(i);
+ UNUSED(skip);
+ no_ICU_lib(interp);
+#endif
+}
+
+/*
+
+=item C<static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP, const STRING
+*str, String_iter *i)>
Moves the string iterator C<i> to the next UCS-2 codepoint.
@@ -411,35 +489,34 @@
*/
static UINTVAL
-ucs2_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))
+ucs2_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i))
{
- ASSERT_ARGS(ucs2_decode_and_advance)
+ ASSERT_ARGS(ucs2_iter_get_and_advance)
#if PARROT_HAS_ICU
- const UChar * const s = (const UChar*) i->str->strstart;
+ UChar * const s = (UChar*) str->strstart;
size_t pos = i->bytepos / sizeof (UChar);
/* TODO either make sure that we don't go past end or use SAFE
* iter versions
*/
const UChar c = s[pos++];
- ++i->charpos;
+ i->charpos++;
i->bytepos = pos * sizeof (UChar);
return c;
#else
- /* This function must never be called if compiled without ICU.
- * See TT #557
- */
- PARROT_ASSERT(0);
+ UNUSED(str);
UNUSED(i);
+ no_ICU_lib(interp);
return (UINTVAL)0; /* Stop the static analyzers from panicing */
#endif
}
/*
-=item C<static void ucs2_encode_and_advance(PARROT_INTERP, String_iter *i,
-UINTVAL c)>
+=item C<static void ucs2_iter_set_and_advance(PARROT_INTERP, STRING *str,
+String_iter *i, UINTVAL c)>
With the string iterator C<i>, appends the codepoint C<c> and advances to the
next position in the string.
@@ -449,64 +526,29 @@
*/
static void
-ucs2_encode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL c)
+ucs2_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
{
- ASSERT_ARGS(ucs2_encode_and_advance)
+ ASSERT_ARGS(ucs2_iter_set_and_advance)
#if PARROT_HAS_ICU
- UChar *s = (UChar*) i->str->strstart;
+ UChar * const s = (UChar*) str->strstart;
UINTVAL pos = i->bytepos / sizeof (UChar);
- s[pos++] = (UChar)c;
- ++i->charpos;
+ s[pos++] = (UChar)c;
+ i->charpos++;
i->bytepos = pos * sizeof (UChar);
#else
- /* This function must never be called if compiled without ICU.
- * See TT #557
- */
+ UNUSED(str);
UNUSED(i);
UNUSED(c);
- PARROT_ASSERT(0);
-#endif
-}
-
-/*
-
-=item C<static size_t ucs2_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
-
-Returns the hashed value of the string, given a seed in hashval.
-
-=cut
-
-*/
-
-static size_t
-ucs2_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
-{
- ASSERT_ARGS(ucs2_hash)
-#if PARROT_HAS_ICU
- const UChar *pos = (const UChar*) s->strstart;
- UINTVAL len = s->strlen;
- UNUSED(interp);
-
- while (len--) {
- hashval += hashval << 5;
- hashval += *(pos++);
- }
-
- return hashval;
-
-#else
- UNUSED(s);
- UNUSED(hashval);
-
no_ICU_lib(interp);
#endif
}
-
/*
-=item C<static void ucs2_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>
+=item C<static void ucs2_iter_set_position(PARROT_INTERP, const STRING *str,
+String_iter *i, UINTVAL n)>
Moves the string iterator C<i> to the position C<n> in the string.
@@ -515,50 +557,52 @@
*/
static void
-ucs2_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n)
+ucs2_iter_set_position(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
{
- ASSERT_ARGS(ucs2_set_position)
+ ASSERT_ARGS(ucs2_iter_set_position)
+ UNUSED(str);
#if PARROT_HAS_ICU
i->charpos = n;
i->bytepos = n * sizeof (UChar);
#else
- /* This function must never be called if compiled without ICU.
- * See TT #557
- */
UNUSED(i);
UNUSED(n);
- PARROT_ASSERT(0);
+ no_ICU_lib(interp);
#endif
}
-
/*
-=item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter
-*iter)>
+=item C<static size_t ucs2_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
-Initializes for string C<src> the string iterator C<iter>.
+Returns the hashed value of the string, given a seed in hashval.
=cut
*/
-static void
-iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))
+static size_t
+ucs2_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
{
- ASSERT_ARGS(iter_init)
+ ASSERT_ARGS(ucs2_hash)
#if PARROT_HAS_ICU
+ const UChar *pos = (const UChar*) s->strstart;
+ UINTVAL len = s->strlen;
UNUSED(interp);
- iter->str = src;
- iter->bytepos = 0;
- iter->charpos = 0;
- iter->get_and_advance = ucs2_decode_and_advance;
- iter->set_and_advance = ucs2_encode_and_advance;
- iter->set_position = ucs2_set_position;
+
+ while (len--) {
+ hashval += hashval << 5;
+ hashval += *(pos++);
+ }
+
+ return hashval;
+
#else
- UNUSED(src);
- UNUSED(iter);
+ UNUSED(s);
+ UNUSED(hashval);
+
no_ICU_lib(interp);
#endif
}
@@ -590,9 +634,13 @@
get_bytes,
codepoints,
bytes,
- iter_init,
find_cclass,
- ucs2_hash
+ ucs2_hash,
+ ucs2_iter_get,
+ ucs2_iter_skip,
+ ucs2_iter_get_and_advance,
+ ucs2_iter_set_and_advance,
+ ucs2_iter_set_position
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "ucs2", return_encoding);
Modified: trunk/src/string/encoding/ucs4.c
==============================================================================
--- trunk/src/string/encoding/ucs4.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/encoding/ucs4.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -84,14 +84,6 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static void iter_init(PARROT_INTERP,
- ARGIN(const STRING *src),
- ARGOUT(String_iter *iter))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
static void set_byte(PARROT_INTERP,
SHIM(const STRING *src),
SHIM(UINTVAL offset),
@@ -104,30 +96,54 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static UINTVAL ucs4_decode_and_advance(PARROT_INTERP,
+static size_t ucs4_hash(PARROT_INTERP,
+ ARGIN(const STRING *s),
+ size_t hashval)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2);
+
+static UINTVAL ucs4_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGIN(const String_iter *i),
+ INTVAL offset)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
+static UINTVAL ucs4_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str),
ARGMOD(String_iter *i))
__attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*i);
-static void ucs4_encode_and_advance(PARROT_INTERP,
+static void ucs4_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str),
ARGMOD(String_iter *i),
UINTVAL c)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*str)
FUNC_MODIFIES(*i);
-static size_t ucs4_hash(PARROT_INTERP,
- ARGIN(const STRING *s),
- size_t hashval)
+static void ucs4_iter_set_position(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *i),
+ UINTVAL n)
__attribute__nonnull__(1)
- __attribute__nonnull__(2);
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*i);
-static void ucs4_set_position(PARROT_INTERP,
+static void ucs4_iter_skip(PARROT_INTERP,
+ ARGIN(const STRING *str),
ARGMOD(String_iter *i),
- UINTVAL n)
+ INTVAL skip)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*i);
#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
@@ -149,26 +165,33 @@
#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src) \
- , PARROT_ASSERT_ARG(iter))
#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp))
#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs4_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(s))
+#define ASSERT_ARGS_ucs4_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_ucs4_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_ucs4_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_ucs4_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(s))
-#define ASSERT_ARGS_ucs4_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs4_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs4_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */
@@ -414,90 +437,115 @@
/*
-=item C<static UINTVAL ucs4_decode_and_advance(PARROT_INTERP, String_iter *i)>
+=item C<static UINTVAL ucs4_iter_get(PARROT_INTERP, const STRING *str, const
+String_iter *i, INTVAL offset)>
-Moves the string iterator C<i> to the next UCS-4 codepoint.
+Get the character at C<i> + C<offset>.
=cut
*/
static UINTVAL
-ucs4_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))
+ucs4_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
+{
+ ASSERT_ARGS(ucs4_iter_get)
+ return get_codepoint(interp, str, i->charpos + offset);
+}
+
+/*
+
+=item C<static void ucs4_iter_skip(PARROT_INTERP, const STRING *str, String_iter
+*i, INTVAL skip)>
+
+Moves the string iterator C<i> by C<skip> characters.
+
+=cut
+
+*/
+
+static void
+ucs4_iter_skip(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
{
- ASSERT_ARGS(ucs4_decode_and_advance)
+ ASSERT_ARGS(ucs4_iter_skip)
+ UNUSED(str);
+
#if PARROT_HAS_ICU
- const UChar32 * const s = (const UChar32 *) i->str->strstart;
- size_t pos = i->bytepos / sizeof (UChar32);
- const UChar32 c = s[pos++];
- ++i->charpos;
- i->bytepos = pos * sizeof (UChar32);
- return c;
+ i->charpos += skip;
+ i->bytepos += skip * sizeof (UChar32);
#else
UNUSED(i);
+ UNUSED(skip);
no_ICU_lib(interp);
#endif
}
/*
-=item C<static void ucs4_encode_and_advance(PARROT_INTERP, String_iter *i,
-UINTVAL c)>
+=item C<static UINTVAL ucs4_iter_get_and_advance(PARROT_INTERP, const STRING
+*str, String_iter *i)>
-With the string iterator C<i>, appends the codepoint C<c> and advances to the
-next position in the string.
+Moves the string iterator C<i> to the next codepoint.
=cut
*/
-static void
-ucs4_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)
+static UINTVAL
+ucs4_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i))
{
- ASSERT_ARGS(ucs4_encode_and_advance)
+ ASSERT_ARGS(ucs4_iter_get_and_advance)
+
#if PARROT_HAS_ICU
- UChar32 *s = (UChar32 *) i->str->strstart;
- size_t pos = i->bytepos / sizeof (UChar32);
- s[pos++] = (UChar32) c;
- ++i->charpos;
- i->bytepos = pos * sizeof (UChar32);
+ const UChar32 * const s = (const UChar32*) str->strstart;
+ const UChar32 c = s[i->charpos++];
+ i->bytepos += sizeof (UChar32);
+ return c;
#else
+ UNUSED(str);
UNUSED(i);
no_ICU_lib(interp);
+ return (UINTVAL)0; /* Stop the static analyzers from panicing */
#endif
}
-#if PARROT_HAS_ICU
/*
-=item C<static size_t ucs4_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
+=item C<static void ucs4_iter_set_and_advance(PARROT_INTERP, STRING *str,
+String_iter *i, UINTVAL c)>
-Returns the hashed value of the string, given a seed in hashval.
+With the string iterator C<i>, appends the codepoint C<c> and advances to the
+next position in the string.
=cut
*/
-static size_t
-ucs4_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
+static void
+ucs4_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
{
- ASSERT_ARGS(ucs4_hash)
- const UChar32 *pos = (const UChar32*) s->strstart;
- UINTVAL len = s->strlen;
- UNUSED(interp);
-
- while (len--) {
- hashval += hashval << 5;
- hashval += *(pos++);
- }
+ ASSERT_ARGS(ucs4_iter_set_and_advance)
- return hashval;
-}
+#if PARROT_HAS_ICU
+ UChar32 * const s = (UChar32*) str->strstart;
+ s[i->charpos++] = (UChar32)c;
+ i->bytepos += sizeof (UChar32);
+#else
+ UNUSED(str);
+ UNUSED(i);
+ UNUSED(c);
+ no_ICU_lib(interp);
#endif
+}
/*
-=item C<static void ucs4_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>
+=item C<static void ucs4_iter_set_position(PARROT_INTERP, const STRING *str,
+String_iter *i, UINTVAL n)>
Moves the string iterator C<i> to the position C<n> in the string.
@@ -506,9 +554,12 @@
*/
static void
-ucs4_set_position(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL n)
+ucs4_iter_set_position(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
{
- ASSERT_ARGS(ucs4_set_position)
+ ASSERT_ARGS(ucs4_iter_set_position)
+ UNUSED(str);
+
#if PARROT_HAS_ICU
i->charpos = n;
i->bytepos = n * sizeof (UChar32);
@@ -519,36 +570,33 @@
#endif
}
-
+#if PARROT_HAS_ICU
/*
-=item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter
-*iter)>
+=item C<static size_t ucs4_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
-Initializes for string C<src> the string iterator C<iter>.
+Returns the hashed value of the string, given a seed in hashval.
=cut
*/
-static void
-iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))
+static size_t
+ucs4_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
{
- ASSERT_ARGS(iter_init)
-#if PARROT_HAS_ICU
+ ASSERT_ARGS(ucs4_hash)
+ const UChar32 *pos = (const UChar32*) s->strstart;
+ UINTVAL len = s->strlen;
UNUSED(interp);
- iter->str = src;
- iter->bytepos = 0;
- iter->charpos = 0;
- iter->get_and_advance = ucs4_decode_and_advance;
- iter->set_and_advance = ucs4_encode_and_advance;
- iter->set_position = ucs4_set_position;
-#else
- UNUSED(src);
- UNUSED(iter);
- no_ICU_lib(interp);
-#endif
+
+ while (len--) {
+ hashval += hashval << 5;
+ hashval += *(pos++);
+ }
+
+ return hashval;
}
+#endif
/*
@@ -577,13 +625,17 @@
get_bytes,
codepoints,
bytes,
- iter_init,
find_cclass,
#if PARROT_HAS_ICU
- ucs4_hash
+ ucs4_hash,
#else
- NULL
+ NULL,
#endif
+ ucs4_iter_get,
+ ucs4_iter_skip,
+ ucs4_iter_get_and_advance,
+ ucs4_iter_set_and_advance,
+ ucs4_iter_set_position
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "ucs4", return_encoding);
Modified: trunk/src/string/encoding/utf16.c
==============================================================================
--- trunk/src/string/encoding/utf16.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/encoding/utf16.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -75,14 +75,6 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static void iter_init(PARROT_INTERP,
- ARGIN(const STRING *src),
- ARGOUT(String_iter *iter))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
static void set_byte(PARROT_INTERP,
ARGIN(const STRING *src),
UINTVAL offset,
@@ -96,21 +88,49 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
+static UINTVAL utf16_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGIN(const String_iter *i),
+ INTVAL offset)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
PARROT_WARN_UNUSED_RESULT
-static UINTVAL utf16_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))
+static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *i))
+ __attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*i);
-static void utf16_encode_and_advance(SHIM_INTERP,
+static void utf16_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str),
ARGMOD(String_iter *i),
UINTVAL c)
+ __attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*str)
FUNC_MODIFIES(*i);
-static void utf16_set_position(SHIM_INTERP,
+static void utf16_iter_set_position(PARROT_INTERP,
+ ARGIN(const STRING *str),
ARGMOD(String_iter *i),
UINTVAL n)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*i);
+
+static void utf16_iter_skip(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *i),
+ INTVAL skip)
+ __attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*i);
#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
@@ -133,22 +153,32 @@
#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src) \
- , PARROT_ASSERT_ARG(iter))
#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_utf16_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf16_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf16_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf16_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf16_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */
@@ -377,19 +407,27 @@
get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
{
ASSERT_ARGS(get_codepoints)
- String_iter iter;
- UINTVAL start;
+#if PARROT_HAS_ICU
+ UINTVAL pos = 0, start;
+ const UChar * const s = (UChar*) src->strstart;
STRING * const return_string = Parrot_str_copy(interp, src);
- iter_init(interp, src, &iter);
- iter.set_position(interp, &iter, offset);
- start = iter.bytepos;
- return_string->strstart = (char *)return_string->strstart + start ;
- iter.set_position(interp, &iter, offset + count);
- return_string->bufused = iter.bytepos - start;
+ U16_FWD_N_UNSAFE(s, pos, offset);
+ start = pos * sizeof (UChar);
+ return_string->strstart = (char *)return_string->strstart + start;
+ U16_FWD_N_UNSAFE(s, pos, count);
+ return_string->bufused = pos * sizeof (UChar) - start;
return_string->strlen = count;
return_string->hashval = 0;
return return_string;
+#else
+ UNUSED(src);
+ UNUSED(offset);
+ UNUSED(count);
+
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+ "no ICU lib loaded");
+#endif
}
@@ -432,15 +470,24 @@
codepoints(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(codepoints)
- String_iter iter;
+#if PARROT_HAS_ICU
+ const UChar * const s = (UChar*) src->strstart;
+ UINTVAL pos = 0, charpos = 0;
/*
* this is used to initially calculate src->strlen,
* therefore we must scan the whole string
*/
- iter_init(interp, src, &iter);
- while (iter.bytepos < src->bufused)
- iter.get_and_advance(interp, &iter);
- return iter.charpos;
+ while (pos * sizeof (UChar) < src->bufused) {
+ U16_FWD_1_UNSAFE(s, pos);
+ ++charpos;
+ }
+ return charpos;
+#else
+ UNUSED(src);
+
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+ "no ICU lib loaded");
+#endif
}
/*
@@ -461,106 +508,184 @@
return src->bufused;
}
-#if PARROT_HAS_ICU
/*
-=item C<static UINTVAL utf16_decode_and_advance(PARROT_INTERP, String_iter *i)>
+=item C<static UINTVAL utf16_iter_get(PARROT_INTERP, const STRING *str, const
+String_iter *i, INTVAL offset)>
-Moves the string iterator C<i> to the next UTF-16 codepoint.
+Get the character at C<i> plus C<offset>.
=cut
*/
-PARROT_WARN_UNUSED_RESULT
static UINTVAL
-utf16_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))
+utf16_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
{
- ASSERT_ARGS(utf16_decode_and_advance)
- const UChar * const s = (const UChar*) i->str->strstart;
- UINTVAL pos = i->bytepos / sizeof (UChar);
- UINTVAL c;
+ ASSERT_ARGS(utf16_iter_get)
+#if PARROT_HAS_ICU
+ const UChar * const s = (UChar*) str->strstart;
+ UINTVAL c, pos;
+
+ pos = i->bytepos / sizeof (UChar);
+ if (offset > 0) {
+ U16_FWD_N_UNSAFE(s, pos, offset);
+ }
+ else if (offset < 0) {
+ U16_BACK_N_UNSAFE(s, pos, -offset);
+ }
+ U16_GET_UNSAFE(s, pos, c);
- /* TODO either make sure that we don't go past end or use SAFE
- * iter versions
- */
- U16_NEXT_UNSAFE(s, pos, c);
- ++i->charpos;
- i->bytepos = pos * sizeof (UChar);
return c;
+#else
+ UNUSED(str);
+ UNUSED(i);
+ UNUSED(offset);
+
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+ "no ICU lib loaded");
+#endif
}
/*
-=item C<static void utf16_encode_and_advance(PARROT_INTERP, String_iter *i,
-UINTVAL c)>
+=item C<static void utf16_iter_skip(PARROT_INTERP, const STRING *str,
+String_iter *i, INTVAL skip)>
-With the string iterator C<i>, appends the codepoint C<c> and advances to the
-next position in the string.
+Moves the string iterator C<i> by C<skip> characters.
=cut
*/
static void
-utf16_encode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL c)
+utf16_iter_skip(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
{
- ASSERT_ARGS(utf16_encode_and_advance)
- UChar * const s = (UChar*) i->str->strstart;
+ ASSERT_ARGS(utf16_iter_skip)
+#if PARROT_HAS_ICU
+ const UChar * const s = (UChar*) str->strstart;
UINTVAL pos = i->bytepos / sizeof (UChar);
- U16_APPEND_UNSAFE(s, pos, c);
- ++i->charpos;
+
+ if (skip > 0) {
+ U16_FWD_N_UNSAFE(s, pos, skip);
+ }
+ else if (skip < 0) {
+ U16_BACK_N_UNSAFE(s, pos, -skip);
+ }
+
+ i->charpos += skip;
i->bytepos = pos * sizeof (UChar);
+#else
+ UNUSED(str);
+ UNUSED(i);
+ UNUSED(skip);
+
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+ "no ICU lib loaded");
+#endif
}
/*
-=item C<static void utf16_set_position(PARROT_INTERP, String_iter *i, UINTVAL
-n)>
+=item C<static UINTVAL utf16_iter_get_and_advance(PARROT_INTERP, const STRING
+*str, String_iter *i)>
-Moves the string iterator C<i> to the position C<n> in the string.
+Moves the string iterator C<i> to the next UTF-16 codepoint.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL
+utf16_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i))
+{
+ ASSERT_ARGS(utf16_iter_get_and_advance)
+#if PARROT_HAS_ICU
+ const UChar * const s = (UChar*) str->strstart;
+ UINTVAL c, pos;
+ pos = i->bytepos / sizeof (UChar);
+ /* TODO either make sure that we don't go past end or use SAFE
+ * iter versions
+ */
+ U16_NEXT_UNSAFE(s, pos, c);
+ i->charpos++;
+ i->bytepos = pos * sizeof (UChar);
+ return c;
+#else
+ UNUSED(str);
+ UNUSED(i);
+
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+ "no ICU lib loaded");
+#endif
+}
+
+/*
+
+=item C<static void utf16_iter_set_and_advance(PARROT_INTERP, STRING *str,
+String_iter *i, UINTVAL c)>
+
+With the string iterator C<i>, appends the codepoint C<c> and advances to the
+next position in the string.
=cut
*/
static void
-utf16_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n)
+utf16_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
{
- ASSERT_ARGS(utf16_set_position)
- UChar * const s = (UChar*) i->str->strstart;
+ ASSERT_ARGS(utf16_iter_set_and_advance)
+#if PARROT_HAS_ICU
+ UChar * const s = (UChar*) str->strstart;
UINTVAL pos;
- pos = 0;
- U16_FWD_N_UNSAFE(s, pos, n);
- i->charpos = n;
+ pos = i->bytepos / sizeof (UChar);
+ U16_APPEND_UNSAFE(s, pos, c);
+ i->charpos++;
i->bytepos = pos * sizeof (UChar);
-}
+#else
+ UNUSED(str);
+ UNUSED(i);
+ UNUSED(c);
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+ "no ICU lib loaded");
#endif
+}
/*
-=item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter
-*iter)>
+=item C<static void utf16_iter_set_position(PARROT_INTERP, const STRING *str,
+String_iter *i, UINTVAL n)>
-Initializes for string C<src> the string iterator C<iter>.
+Moves the string iterator C<i> to the position C<n> in the string.
=cut
*/
static void
-iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))
+utf16_iter_set_position(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
{
- ASSERT_ARGS(iter_init)
- iter->str = src;
- iter->bytepos = iter->charpos = 0;
+ ASSERT_ARGS(utf16_iter_set_position)
#if PARROT_HAS_ICU
- UNUSED(interp);
- iter->get_and_advance = utf16_decode_and_advance;
- iter->set_and_advance = utf16_encode_and_advance;
- iter->set_position = utf16_set_position;
+ UChar * const s = (UChar*) str->strstart;
+ UINTVAL pos;
+ pos = 0;
+ U16_FWD_N_UNSAFE(s, pos, n);
+ i->charpos = n;
+ i->bytepos = pos * sizeof (UChar);
#else
+ UNUSED(str);
+ UNUSED(i);
+ UNUSED(n);
+
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
"no ICU lib loaded");
#endif
@@ -593,9 +718,13 @@
get_bytes,
codepoints,
bytes,
- iter_init,
find_cclass,
- NULL
+ NULL,
+ utf16_iter_get,
+ utf16_iter_skip,
+ utf16_iter_get_and_advance,
+ utf16_iter_set_and_advance,
+ utf16_iter_set_position
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "utf16", return_encoding);
Modified: trunk/src/string/encoding/utf8.c
==============================================================================
--- trunk/src/string/encoding/utf8.c Thu Aug 19 02:03:11 2010 (r48564)
+++ trunk/src/string/encoding/utf8.c Thu Aug 19 05:53:12 2010 (r48565)
@@ -71,13 +71,6 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static void iter_init(SHIM_INTERP,
- ARGIN(const STRING *src),
- ARGOUT(String_iter *iter))
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
static void set_byte(PARROT_INTERP,
ARGIN(const STRING *src),
UINTVAL offset,
@@ -100,28 +93,51 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static UINTVAL utf8_decode_and_advance(PARROT_INTERP,
- ARGMOD(String_iter *i))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- FUNC_MODIFIES(*i);
-
PARROT_CANNOT_RETURN_NULL
static void * utf8_encode(PARROT_INTERP, ARGIN(void *ptr), UINTVAL c)
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static void utf8_encode_and_advance(PARROT_INTERP,
+static UINTVAL utf8_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGIN(const String_iter *i),
+ INTVAL offset)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
+static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *i))
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*i);
+
+static void utf8_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str),
ARGMOD(String_iter *i),
UINTVAL c)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*str)
FUNC_MODIFIES(*i);
-static void utf8_set_position(SHIM_INTERP,
+static void utf8_iter_set_position(SHIM_INTERP,
+ ARGIN(const STRING *str),
ARGMOD(String_iter *i),
UINTVAL pos)
__attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ FUNC_MODIFIES(*i);
+
+static void utf8_iter_skip(SHIM_INTERP,
+ ARGIN(const STRING *str),
+ ARGMOD(String_iter *i),
+ INTVAL skip)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
FUNC_MODIFIES(*i);
PARROT_WARN_UNUSED_RESULT
@@ -151,9 +167,6 @@
#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(src) \
- , PARROT_ASSERT_ARG(iter))
#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -166,17 +179,27 @@
#define ASSERT_ARGS_utf8_decode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(ptr))
-#define ASSERT_ARGS_utf8_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_utf8_encode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(ptr))
-#define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(str) \
+ , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(str) \
, PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(i))
#define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(ptr))
#define ASSERT_ARGS_utf8_skip_forward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
@@ -387,7 +410,65 @@
/*
-=item C<static UINTVAL utf8_decode_and_advance(PARROT_INTERP, String_iter *i)>
+=item C<static UINTVAL utf8_iter_get(PARROT_INTERP, const STRING *str, const
+String_iter *i, INTVAL offset)>
+
+Get the character at C<i> plus C<offset>.
+
+=cut
+
+*/
+
+static UINTVAL
+utf8_iter_get(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
+{
+ ASSERT_ARGS(utf8_iter_get)
+ const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos);
+
+ if (offset > 0) {
+ u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, offset);
+ }
+ else if (offset < 0) {
+ u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -offset);
+ }
+
+ return utf8_decode(interp, u8ptr);
+}
+
+/*
+
+=item C<static void utf8_iter_skip(PARROT_INTERP, const STRING *str, String_iter
+*i, INTVAL skip)>
+
+Moves the string iterator C<i> by C<skip> characters.
+
+=cut
+
+*/
+
+static void
+utf8_iter_skip(SHIM_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
+{
+ ASSERT_ARGS(utf8_iter_skip)
+ const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos);
+
+ if (skip > 0) {
+ u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, skip);
+ }
+ else if (skip < 0) {
+ u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -skip);
+ }
+
+ i->charpos += skip;
+ i->bytepos = (const char *)u8ptr - (const char *)str->strstart;
+}
+
+/*
+
+=item C<static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, const STRING
+*str, String_iter *i)>
The UTF-8 implementation of the string iterator's C<get_and_advance>
function.
@@ -397,10 +478,11 @@
*/
static UINTVAL
-utf8_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))
+utf8_iter_get_and_advance(PARROT_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i))
{
- ASSERT_ARGS(utf8_decode_and_advance)
- const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos);
+ ASSERT_ARGS(utf8_iter_get_and_advance)
+ const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos);
UINTVAL c = *u8ptr;
if (UTF8_IS_START(c)) {
@@ -408,13 +490,12 @@
c &= UTF8_START_MASK(len);
i->bytepos += len;
- for (--len; len; --len) {
- ++u8ptr;
+ for (len--; len; len--) {
+ u8ptr++;
if (!UTF8_IS_CONTINUATION(*u8ptr))
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,
"Malformed UTF-8 string\n");
-
c = UTF8_ACCUMULATE(c, *u8ptr);
}
@@ -427,17 +508,17 @@
"Malformed UTF-8 string\n");
}
else {
- ++i->bytepos;
+ i->bytepos++;
}
- ++i->charpos;
+ i->charpos++;
return c;
}
/*
-=item C<static void utf8_encode_and_advance(PARROT_INTERP, String_iter *i,
-UINTVAL c)>
+=item C<static void utf8_iter_set_and_advance(PARROT_INTERP, STRING *str,
+String_iter *i, UINTVAL c)>
The UTF-8 implementation of the string iterator's C<set_and_advance>
function.
@@ -447,23 +528,23 @@
*/
static void
-utf8_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)
+utf8_iter_set_and_advance(PARROT_INTERP,
+ ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
{
- ASSERT_ARGS(utf8_encode_and_advance)
- const STRING * const s = i->str;
- unsigned char * const pos = (unsigned char *)s->strstart + i->bytepos;
+ ASSERT_ARGS(utf8_iter_set_and_advance)
+ unsigned char * const pos = (unsigned char *)str->strstart + i->bytepos;
unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c);
i->bytepos += (new_pos - pos);
/* XXX possible buffer overrun exception? */
- PARROT_ASSERT(i->bytepos <= Buffer_buflen(s));
- ++i->charpos;
+ PARROT_ASSERT(i->bytepos <= Buffer_buflen(str));
+ i->charpos++;
}
/*
-=item C<static void utf8_set_position(PARROT_INTERP, String_iter *i, UINTVAL
-pos)>
+=item C<static void utf8_iter_set_position(PARROT_INTERP, const STRING *str,
+String_iter *i, UINTVAL pos)>
The UTF-8 implementation of the string iterator's C<set_position>
function.
@@ -473,25 +554,46 @@
*/
static void
-utf8_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL pos)
+utf8_iter_set_position(SHIM_INTERP,
+ ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL pos)
{
- ASSERT_ARGS(utf8_set_position)
- const utf8_t *u8ptr = (const utf8_t *)i->str->strstart;
+ ASSERT_ARGS(utf8_iter_set_position)
+ const utf8_t *u8ptr = (const utf8_t *)str->strstart;
- /* start from last known charpos, if we can */
- if (i->charpos <= pos) {
- const UINTVAL old_pos = pos;
- pos -= i->charpos;
- u8ptr += i->bytepos;
- i->charpos = old_pos;
+ if (pos == 0) {
+ i->charpos = 0;
+ i->bytepos = 0;
+ return;
}
- else
- i->charpos = pos;
- while (pos-- > 0)
- u8ptr += UTF8SKIP(u8ptr);
+ /*
+ * we know the byte offsets of three positions: start, current and end
+ * now find the shortest way to reach pos
+ */
+ if (pos < i->charpos) {
+ if (pos <= (i->charpos >> 1)) {
+ /* go forward from start */
+ u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, pos);
+ }
+ else {
+ /* go backward from current */
+ u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + i->bytepos, i->charpos - pos);
+ }
+ }
+ else {
+ const UINTVAL len = str->strlen;
+ if (pos <= i->charpos + ((len - i->charpos) >> 1)) {
+ /* go forward from current */
+ u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr + i->bytepos, pos - i->charpos);
+ }
+ else {
+ /* go backward from end */
+ u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + str->bufused, len - pos);
+ }
+ }
- i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart;
+ i->charpos = pos;
+ i->bytepos = (const char *)u8ptr - (const char *)str->strstart;
}
@@ -513,8 +615,8 @@
{
ASSERT_ARGS(to_encoding)
STRING *result;
- String_iter src_iter;
- UINTVAL offs, dest_len, dest_pos, src_len;
+ const ENCODING *src_encoding;
+ UINTVAL dest_len, dest_pos, src_len;
unsigned char *p;
if (src->encoding == Parrot_utf8_encoding_ptr)
@@ -523,8 +625,8 @@
result = Parrot_gc_new_string_header(interp, 0);
src_len = src->strlen;
- /* init iter before possilby changing encoding */
- ENCODING_ITER_INIT(interp, src, &src_iter);
+ /* save source encoding before possibly changing it */
+ src_encoding = src->encoding;
result->charset = Parrot_unicode_charset_ptr;
result->encoding = Parrot_utf8_encoding_ptr;
result->strlen = src_len;
@@ -542,15 +644,17 @@
result->bufused = dest_len;
}
else {
+ String_iter src_iter;
+ STRING_ITER_INIT(interp, &src_iter);
dest_len = src_len;
dest_pos = 0;
- for (offs = 0; offs < src_len; ++offs) {
- const UINTVAL c = src_iter.get_and_advance(interp, &src_iter);
+ while (src_iter.charpos < src_len) {
+ const UINTVAL c = src_encoding->iter_get_and_advance(interp, src, &src_iter);
unsigned char *new_pos;
unsigned char *pos;
if (dest_len - dest_pos < 6) {
- UINTVAL need = (UINTVAL)((src->strlen - offs) * 1.5);
+ UINTVAL need = (UINTVAL)((src->strlen - src_iter.charpos + 1) * 1.5);
if (need < 16)
need = 16;
dest_len += need;
@@ -683,16 +787,16 @@
String_iter iter;
UINTVAL start;
- iter_init(interp, src, &iter);
+ STRING_ITER_INIT(interp, &iter);
if (offset)
- iter.set_position(interp, &iter, offset);
+ utf8_iter_set_position(interp, src, &iter, offset);
start = iter.bytepos;
return_string->strstart = (char *)return_string->strstart + start;
if (count)
- iter.set_position(interp, &iter, offset + count);
+ utf8_iter_set_position(interp, src, &iter, offset + count);
return_string->bufused = iter.bytepos - start;
return_string->strlen = count;
@@ -749,9 +853,9 @@
* this is used to initially calculate src->strlen,
* therefore we must scan the whole string
*/
- iter_init(interp, src, &iter);
+ STRING_ITER_INIT(interp, &iter);
while (iter.bytepos < src->bufused)
- iter.get_and_advance(interp, &iter);
+ utf8_iter_get_and_advance(interp, src, &iter);
return iter.charpos;
}
@@ -775,29 +879,6 @@
/*
-=item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter
-*iter)>
-
-Initializes for string C<src> the string iterator C<iter>.
-
-=cut
-
-*/
-
-static void
-iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))
-{
- ASSERT_ARGS(iter_init)
- iter->str = src;
- iter->bytepos = 0;
- iter->charpos = 0;
- iter->get_and_advance = utf8_decode_and_advance;
- iter->set_and_advance = utf8_encode_and_advance;
- iter->set_position = utf8_set_position;
-}
-
-/*
-
=item C<void Parrot_encoding_utf8_init(PARROT_INTERP)>
Initializes the UTF-8 encoding.
@@ -823,9 +904,13 @@
get_bytes,
codepoints,
bytes,
- iter_init,
find_cclass,
- NULL
+ NULL,
+ utf8_iter_get,
+ utf8_iter_skip,
+ utf8_iter_get_and_advance,
+ utf8_iter_set_and_advance,
+ utf8_iter_set_position
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "utf8", return_encoding);
More information about the parrot-commits
mailing list