[svn:parrot] r48089 - branches/gsoc_nfg/src/string/encoding
darbelo at svn.parrot.org
darbelo at svn.parrot.org
Wed Jul 14 22:54:11 UTC 2010
Author: darbelo
Date: Wed Jul 14 22:54:11 2010
New Revision: 48089
URL: https://trac.parrot.org/parrot/changeset/48089
Log:
Make iteration of NFG string a codepoint-level operation.
Modified:
branches/gsoc_nfg/src/string/encoding/nfg.c
Modified: branches/gsoc_nfg/src/string/encoding/nfg.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/nfg.c Wed Jul 14 22:22:16 2010 (r48088)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c Wed Jul 14 22:54:11 2010 (r48089)
@@ -482,7 +482,7 @@
=item C<static UINTVAL nfg_decode_and_advance(PARROT_INTERP, String_iter *i)>
-Moves the string iterator C<i> to the next nfg codepoint.
+Moves the string iterator C<i> to the next codepoint.
=cut
@@ -493,12 +493,28 @@
{
ASSERT_ARGS(nfg_decode_and_advance)
#if PARROT_HAS_ICU
- const UChar32 * const s = (const UChar32 *) i->str->strstart;
- size_t pos = i->bytepos / sizeof (UChar32);
- const UChar32 c = s[pos++];
- ++i->charpos;
- i->bytepos = pos * sizeof (UChar32);
- return c;
+ const int32_t * const s = (const int32_t *) i->str->strstart;
+ const int32_t c = s[i->bytepos / sizeof (UChar32)];
+ const UINTVAL cpos = i->charpos++;
+
+ if (c >= 0) {
+ /* regular codepoint, return it. */
+ i->bytepos += sizeof (int32_t);
+ return (UINTVAL) c;
+ }
+ else {
+ /* dynamic codepoint, we have to peek inside. */
+ grapheme_table *table = (grapheme_table *) i->str->extra;
+ const int offs = cpos - i->bytepos / sizeof (UChar32);
+ const int idx = -1 - c;
+ if (offs < table->graphemes[idx].len) {
+ if (offs == table->graphemes[idx].len - 1)
+ i->bytepos += sizeof (int32_t);
+ /* Make sure we move off this grapheme if we've
+ * traversed all of it's codepoints */
+ return (UINTVAL) table->graphemes[-1 - c].codepoints[offs];
+ }
+ }
#else
UNUSED(i);
no_ICU_lib(interp);
More information about the parrot-commits
mailing list