[svn:parrot] r47667 - branches/gsoc_nfg/src/string/encoding
darbelo at svn.parrot.org
darbelo at svn.parrot.org
Thu Jun 17 00:07:49 UTC 2010
Author: darbelo
Date: Thu Jun 17 00:07:49 2010
New Revision: 47667
URL: https://trac.parrot.org/parrot/changeset/47667
Log:
Commit WIP iterator additions. The first step towards NFG literals has been taken.
Modified:
branches/gsoc_nfg/src/string/encoding/nfg.c
Modified: branches/gsoc_nfg/src/string/encoding/nfg.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/nfg.c Wed Jun 16 23:13:32 2010 (r47666)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c Thu Jun 17 00:07:49 2010 (r47667)
@@ -504,10 +504,41 @@
#if PARROT_HAS_ICU
UChar32 *s = (UChar32 *) i->str->strstart;
size_t pos = i->bytepos / sizeof (UChar32);
- s[pos++] = (UChar32) c;
- ++i->charpos;
- i->bytepos = pos * sizeof (UChar32);
- /* TODO: properly compose stuff here. */
+ if (!ISCOMBINING(c)) {
+ s[pos++] = (UChar32) c;
+ ++i->charpos;
+ i->bytepos = pos * sizeof (UChar32);
+ }
+ // TODO: This can create dynamic graphemes for valid Unicode compositions.
+ else {
+ int32_t prev = s[pos - 1];
+ grapheme g;
+ if (prev < 0) {
+ grapheme_table *table = (grapheme_table *) i->str->extra;
+ g.len = table->graphemes[-1 - prev].len + 1;
+ g.hash = table->graphemes[-1 - prev].hash;
+ g.hash += g.hash << 5;
+ g.hash += c;
+ g.codepoints = mem_gc_allocate_n_typed(interp, g.len, UChar32);
+ memcpy(g.codepoints, table->graphemes[-1 - prev].codepoints,
+ g.len * sizeof (UChar))
+ }
+ else {
+ g.len = 2;
+ g.hash = 0xffff;
+ g.codepoints = mem_gc_allocate_n_typed(interp, g.len, UChar32);
+
+ g.codepoints[0] = prev;
+ g.hash += g.hash << 5;
+ g.hash += prev;
+
+ g.codepoints[1] = c;
+ g.hash += g.hash << 5;
+ g.hash += c;
+ }
+ s[pos - 1] = add_grapheme(interp, (grapheme_table *) i->str->extra, &g);
+ mem_gc_free(interp, g.codepoints);
+ }
#else
UNUSED(i);
no_ICU_lib(interp);
More information about the parrot-commits
mailing list