[svn:parrot] r47606 - in branches/gsoc_nfg/src/string: . charset

darbelo at svn.parrot.org darbelo at svn.parrot.org
Sun Jun 13 22:06:39 UTC 2010


Author: darbelo
Date: Sun Jun 13 22:06:38 2010
New Revision: 47606
URL: https://trac.parrot.org/parrot/changeset/47606

Log:
Hack in some NFG-awareness into the unicode cclass functions.
For now just return the data for the first codepoint in the grapheme.
It should work out most of the time.

Modified:
   branches/gsoc_nfg/src/string/charset/unicode.c
   branches/gsoc_nfg/src/string/grapheme.c
   branches/gsoc_nfg/src/string/grapheme.h

Modified: branches/gsoc_nfg/src/string/charset/unicode.c
==============================================================================
--- branches/gsoc_nfg/src/string/charset/unicode.c	Sun Jun 13 16:54:36 2010	(r47605)
+++ branches/gsoc_nfg/src/string/charset/unicode.c	Sun Jun 13 22:06:38 2010	(r47606)
@@ -192,6 +192,7 @@
 #  include <unicode/uchar.h>
 #  include <unicode/ustring.h>
 #  include <unicode/unorm.h>
+#  include <../grapheme.h>
 #endif
 #define EXCEPTION(err, str) \
     Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
@@ -849,6 +850,12 @@
 
     codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
 
+#if PARROT_HAS_ICU
+    if (src->encoding == Parrot_nfg_encoding_ptr)
+        codepoint = get_grapheme_base(interp, (grapheme_table *)src->extra,
+                                      (int32_t) codepoint);
+#endif /* PARROT_HAS_ICU */
+
     if (codepoint >= 256)
         return u_iscclass(interp, codepoint, flags) != 0;
 
@@ -882,6 +889,12 @@
 
     for (; pos < end; ++pos) {
         codepoint = iter.get_and_advance(interp, &iter);
+#if PARROT_HAS_ICU
+        if (src->encoding == Parrot_nfg_encoding_ptr)
+            codepoint = get_grapheme_base(interp, (grapheme_table *)src->extra,
+                                          (int32_t) codepoint);
+#endif /* PARROT_HAS_ICU */
+
         if (codepoint >= 256) {
             if (u_iscclass(interp, codepoint, flags))
                     return pos;
@@ -933,6 +946,11 @@
 
     for (; pos < end; ++pos) {
         codepoint = iter.get_and_advance(interp, &iter);
+#if PARROT_HAS_ICU
+        if (src->encoding == Parrot_nfg_encoding_ptr)
+            codepoint = get_grapheme_base(interp, (grapheme_table *)src->extra,
+                                          (int32_t) codepoint);
+#endif /* PARROT_HAS_ICU */
         if (codepoint >= 256) {
             for (bit = enum_cclass_uppercase;
                     bit <= enum_cclass_word ; bit <<= 1) {

Modified: branches/gsoc_nfg/src/string/grapheme.c
==============================================================================
--- branches/gsoc_nfg/src/string/grapheme.c	Sun Jun 13 16:54:36 2010	(r47605)
+++ branches/gsoc_nfg/src/string/grapheme.c	Sun Jun 13 22:06:38 2010	(r47606)
@@ -33,7 +33,7 @@
 {
     ASSERT_ARGS(clone_grapheme_table)
     if (src != NULL) {
-        UINTVAL i;
+        INTVAL i;
         grapheme_table * dst = create_grapheme_table(interp, src->used);
 
         dst->used = src->used;
@@ -67,7 +67,7 @@
 destroy_grapheme_table(PARROT_INTERP, grapheme_table *table)
 {
     ASSERT_ARGS(destroy_grapheme_table)
-    UINTVAL i = 0;
+    INTVAL i = 0;
     while (i < table->used) {
         mem_gc_free(interp, table->graphemes[i++].codepoints);
     }
@@ -158,6 +158,21 @@
     return (UChar32) (-1 - i);
 }
 
+UINTVAL
+get_grapheme_base(PARROT_INTERP, grapheme_table *table, int32_t codepoint)
+{
+    const int32_t index = -1 - codepoint;
+
+    if (codepoint > 0)
+        return (UINTVAL) codepoint;
+
+    if (index >= table->used)
+        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_OUT_OF_BOUNDS,
+            "Grapheme table index out of bounds!");
+
+    return table->graphemes[index].codepoints[0];
+}
+
 #endif /* PARROT_HAS_ICU */
 
 /*

Modified: branches/gsoc_nfg/src/string/grapheme.h
==============================================================================
--- branches/gsoc_nfg/src/string/grapheme.h	Sun Jun 13 16:54:36 2010	(r47605)
+++ branches/gsoc_nfg/src/string/grapheme.h	Sun Jun 13 22:06:38 2010	(r47606)
@@ -59,6 +59,11 @@
 void destroy_grapheme_table(PARROT_INTERP, grapheme_table *table)
         __attribute__nonnull__(1);
 
+UINTVAL get_grapheme_base(PARROT_INTERP,
+    grapheme_table *table,
+    int32_t codepoint)
+        __attribute__nonnull__(1);
+
 grapheme_table * grow_grapheme_table(SHIM_INTERP,
     grapheme_table *src,
     UINTVAL n);
@@ -80,6 +85,8 @@
        PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_destroy_grapheme_table __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_get_grapheme_base __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_grow_grapheme_table __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
 #define ASSERT_ARGS_merge_tables_and_fixup_substring \
      __attribute__unused__ int _ASSERT_ARGS_CHECK = (\


More information about the parrot-commits mailing list