[svn:parrot] r47705 - in branches/gsoc_nfg: include/parrot src/string src/string/encoding

darbelo at svn.parrot.org darbelo at svn.parrot.org
Sat Jun 19 11:23:51 UTC 2010


Author: darbelo
Date: Sat Jun 19 11:23:51 2010
New Revision: 47705
URL: https://trac.parrot.org/parrot/changeset/47705

Log:
Improve iterator handling a bit more, add a support function and provisionally de-const a pointer.

Modified:
   branches/gsoc_nfg/include/parrot/string.h
   branches/gsoc_nfg/src/string/encoding/nfg.c
   branches/gsoc_nfg/src/string/grapheme.c
   branches/gsoc_nfg/src/string/grapheme.h

Modified: branches/gsoc_nfg/include/parrot/string.h
==============================================================================
--- branches/gsoc_nfg/include/parrot/string.h	Sat Jun 19 10:44:57 2010	(r47704)
+++ branches/gsoc_nfg/include/parrot/string.h	Sat Jun 19 11:23:51 2010	(r47705)
@@ -30,7 +30,7 @@
 
 /* String iterator */
 typedef struct string_iterator_t {
-    const STRING *str;
+    STRING *str;
     UINTVAL bytepos;
     UINTVAL charpos;
     UINTVAL (*get_and_advance)(PARROT_INTERP, struct string_iterator_t *i);

Modified: branches/gsoc_nfg/src/string/encoding/nfg.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/nfg.c	Sat Jun 19 10:44:57 2010	(r47704)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c	Sat Jun 19 11:23:51 2010	(r47705)
@@ -249,14 +249,13 @@
     }
     else {
         /* Make sure we have NFC Unicode string. */
-        STRING  *from = Parrot_unicode_charset_ptr->compose(interp,
-                            Parrot_unicode_charset_ptr->to_charset(interp, src));
+        STRING  *from = Parrot_unicode_charset_ptr->to_charset(interp, src);
         UINTVAL  len  = Parrot_str_length(interp, from);
         STRING  *to   = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
                            Parrot_nfg_encoding_ptr, Parrot_unicode_charset_ptr, 0);
         UChar32 *buf  = (UChar32 *) to->strstart;
 
-        nfg_encode(interp, to, 0, src, 0, len, 0);
+        nfg_encode(interp, to, 0, Parrot_unicode_charset_ptr->compose(interp, from), 0, len, 0);
 
         return to;
     }
@@ -504,24 +503,31 @@
 #if PARROT_HAS_ICU
     UChar32 *s   = (UChar32 *) i->str->strstart;
     size_t   pos = i->bytepos / sizeof (UChar32);
-	if (!ISCOMBINING(c)) {
+
+    if (!ISCOMBINING(c)) {
         s[pos++] = (UChar32) c;
         ++i->charpos;
         i->bytepos = pos * sizeof (UChar32);
     }
     // TODO: This can create dynamic graphemes for valid Unicode compositions.
-	else {
-	    int32_t  prev = s[pos - 1];
+    else {
+        grapheme_table *table = (grapheme_table *) i->str->extra;
+        int32_t  prev = s[pos - 1];
         grapheme g;
-		if (prev < 0) {
-		    grapheme_table *table = (grapheme_table *) i->str->extra;
+
+        if (table == NULL) {
+            table = create_grapheme_table(interp, 1);
+            i->str->extra = table;
+        }
+
+        if (prev < 0) {
             g.len = table->graphemes[-1 - prev].len + 1;
             g.hash = table->graphemes[-1 - prev].hash;
             g.hash += g.hash << 5;
             g.hash += c;
             g.codepoints = mem_gc_allocate_n_typed(interp, g.len, UChar32);
-			memcpy(g.codepoints, table->graphemes[-1 - prev].codepoints,
-                   g.len * sizeof (UChar))
+            memcpy(g.codepoints, table->graphemes[-1 - prev].codepoints,
+                   g.len * sizeof (UChar));
         }
         else {
             g.len  = 2;
@@ -536,8 +542,10 @@
             g.hash += g.hash << 5;
             g.hash += c;
         }
-	    s[pos - 1] = add_grapheme(interp, (grapheme_table *) i->str->extra, &g);
-		mem_gc_free(interp, g.codepoints);
+        if (grapheme_table_capacity(interp, (grapheme_table *)i->str->extra) < 1)
+            i->str->extra = grow_grapheme_table(interp, (grapheme_table *) i->str->extra, 2);
+        s[pos - 1] = add_grapheme(interp, (grapheme_table *) i->str->extra, &g);
+        mem_gc_free(interp, g.codepoints);
     }
 #else
     UNUSED(i);

Modified: branches/gsoc_nfg/src/string/grapheme.c
==============================================================================
--- branches/gsoc_nfg/src/string/grapheme.c	Sat Jun 19 10:44:57 2010	(r47704)
+++ branches/gsoc_nfg/src/string/grapheme.c	Sat Jun 19 11:23:51 2010	(r47705)
@@ -14,6 +14,12 @@
 
 #if PARROT_HAS_ICU
 
+INTVAL
+grapheme_table_capacity(PARROT_INTERP, grapheme_table *table)
+{
+    return table->size - table->used;
+}
+
 grapheme_table *
 create_grapheme_table(PARROT_INTERP, UINTVAL n)
 {

Modified: branches/gsoc_nfg/src/string/grapheme.h
==============================================================================
--- branches/gsoc_nfg/src/string/grapheme.h	Sat Jun 19 10:44:57 2010	(r47704)
+++ branches/gsoc_nfg/src/string/grapheme.h	Sat Jun 19 11:23:51 2010	(r47705)
@@ -64,6 +64,9 @@
     int32_t codepoint)
         __attribute__nonnull__(1);
 
+INTVAL grapheme_table_capacity(PARROT_INTERP, grapheme_table *table)
+        __attribute__nonnull__(1);
+
 grapheme_table * grow_grapheme_table(SHIM_INTERP,
     grapheme_table *src,
     UINTVAL n);
@@ -87,6 +90,8 @@
        PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_get_grapheme_base __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_grapheme_table_capacity __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_grow_grapheme_table __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
 #define ASSERT_ARGS_merge_tables_and_fixup_substring \
      __attribute__unused__ int _ASSERT_ARGS_CHECK = (\


More information about the parrot-commits mailing list