[svn:parrot] r47667 - branches/gsoc_nfg/src/string/encoding

darbelo at svn.parrot.org darbelo at svn.parrot.org
Thu Jun 17 00:07:49 UTC 2010


Author: darbelo
Date: Thu Jun 17 00:07:49 2010
New Revision: 47667
URL: https://trac.parrot.org/parrot/changeset/47667

Log:
Commit WIP iterator additions.  The first step towards NFG literals has been taken.

Modified:
   branches/gsoc_nfg/src/string/encoding/nfg.c

Modified: branches/gsoc_nfg/src/string/encoding/nfg.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/nfg.c	Wed Jun 16 23:13:32 2010	(r47666)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c	Thu Jun 17 00:07:49 2010	(r47667)
@@ -504,10 +504,41 @@
 #if PARROT_HAS_ICU
     UChar32 *s   = (UChar32 *) i->str->strstart;
     size_t   pos = i->bytepos / sizeof (UChar32);
-    s[pos++] = (UChar32) c;
-    ++i->charpos;
-    i->bytepos = pos * sizeof (UChar32);
-    /* TODO: properly compose stuff here. */
+	if (!ISCOMBINING(c)) {
+        s[pos++] = (UChar32) c;
+        ++i->charpos;
+        i->bytepos = pos * sizeof (UChar32);
+    }
+    // TODO: This can create dynamic graphemes for valid Unicode compositions.
+	else {
+	    int32_t  prev = s[pos - 1];
+        grapheme g;
+		if (prev < 0) {
+		    grapheme_table *table = (grapheme_table *) i->str->extra;
+            g.len = table->graphemes[-1 - prev].len + 1;
+            g.hash = table->graphemes[-1 - prev].hash;
+            g.hash += g.hash << 5;
+            g.hash += c;
+            g.codepoints = mem_gc_allocate_n_typed(interp, g.len, UChar32);
+			memcpy(g.codepoints, table->graphemes[-1 - prev].codepoints,
+                   g.len * sizeof (UChar))
+        }
+        else {
+            g.len  = 2;
+            g.hash = 0xffff;
+            g.codepoints = mem_gc_allocate_n_typed(interp, g.len, UChar32);
+
+            g.codepoints[0] = prev;
+            g.hash += g.hash << 5;
+            g.hash += prev;
+
+            g.codepoints[1] = c;
+            g.hash += g.hash << 5;
+            g.hash += c;
+        }
+	    s[pos - 1] = add_grapheme(interp, (grapheme_table *) i->str->extra, &g);
+		mem_gc_free(interp, g.codepoints);
+    }
 #else
     UNUSED(i);
     no_ICU_lib(interp);


More information about the parrot-commits mailing list