[svn:parrot] r47260 - branches/gsoc_nfg/src/string/encoding

darbelo at svn.parrot.org darbelo at svn.parrot.org
Tue Jun 1 15:53:55 UTC 2010


Author: darbelo
Date: Tue Jun  1 15:53:55 2010
New Revision: 47260
URL: https://trac.parrot.org/parrot/changeset/47260

Log:
First stab at NFG encoding. Trancode-only for now.

Modified:
   branches/gsoc_nfg/src/string/encoding/nfg.c

Modified: branches/gsoc_nfg/src/string/encoding/nfg.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/nfg.c	Tue Jun  1 15:53:39 2010	(r47259)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c	Tue Jun  1 15:53:55 2010	(r47260)
@@ -32,6 +32,42 @@
 }
 #endif
 
+#if PARROT_HAS_ICU
+static void
+nfg_encode(PARROT_INTERP, STRING *dest, UINTVAL index, STRING *src,
+           UINTVAL offs, UINTVAL len, UINTVAL graphemes)
+{
+    UChar32 *buf  = (UChar32 *) (dest->strstart);
+    UINTVAL  hash = 0xffff; // TODO: put a real seed here.
+    UINTVAL  aux;
+
+    while (offs < len) {
+        buf[index] = src->encoding->get_codepoint(interp, src, offs);
+        aux = offs;
+
+        while (ISCOMBINING(buf[index]) && offs < len) {
+            hash += hash << 5;
+            hash += buf[index];
+            buf[index] = src->encoding->get_codepoint(interp, src, ++offs);
+        }
+
+        if (hash != 0xffff) {
+            nfg_encode(interp, dest, index, src, offs, len, graphemes + 1);
+            buf[index] = add_grapheme_from_substr(dest->extra, src, aux, offs-aux, hash);
+            return;
+        }
+        offs++;
+        index++;
+    }
+
+    dest->extra   = create_grapheme_table(interp, graphemes);
+    dest->strlen  = index;
+    dest->bufused = index * sizeof (UChar32);
+
+}
+
+#endif
+
 /* HEADERIZER HFILE: src/string/encoding/nfg.h */
 
 /* HEADERIZER BEGIN: static */
@@ -98,6 +134,15 @@
         __attribute__nonnull__(2)
         FUNC_MODIFIES(*i);
 
+static void nfg_encode(PARROT_INTERP,
+    STRING *dest,
+    UINTVAL index,
+    STRING *src,
+    UINTVAL offs,
+    UINTVAL len,
+    UINTVAL graphemes)
+        __attribute__nonnull__(1);
+
 static void nfg_encode_and_advance(PARROT_INTERP,
     ARGMOD(String_iter *i),
     UINTVAL c)
@@ -156,6 +201,8 @@
 #define ASSERT_ARGS_nfg_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_nfg_encode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_nfg_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(i))
@@ -179,6 +226,7 @@
 #  include <unicode/ustring.h>
 #endif
 
+
 /*
 
 =item C<static STRING * to_encoding(PARROT_INTERP, const STRING *src)>
@@ -208,15 +256,7 @@
                            Parrot_nfg_encoding_ptr, Parrot_unicode_charset_ptr, 0);
         UChar32 *buf  = (UChar32 *) to->strstart;
 
-        UINTVAL  offs = 0;
-        while (offs < len){
-            buf[offs] = src->encoding->get_codepoint(interp, src, offs);
-            offs++;
-            //TODO
-        };
-
-        to->strlen  = offs;
-        to->bufused = offs * sizeof (UChar32);
+        nfg_encode(interp, to, 0, src, 0, len - 1, 0);
 
         return to;
     }


More information about the parrot-commits mailing list