[svn:parrot] r47116 - branches/gsoc_nfg/src/string/encoding

darbelo at svn.parrot.org darbelo at svn.parrot.org
Sat May 29 19:46:30 UTC 2010


Author: darbelo
Date: Sat May 29 19:46:29 2010
New Revision: 47116
URL: https://trac.parrot.org/parrot/changeset/47116

Log:
Try to pre-normalize incoming strings as much as possible.

Modified:
   branches/gsoc_nfg/src/string/encoding/nfg.c

Modified: branches/gsoc_nfg/src/string/encoding/nfg.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/nfg.c	Sat May 29 19:27:03 2010	(r47115)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c	Sat May 29 19:46:29 2010	(r47116)
@@ -200,24 +200,30 @@
         return Parrot_str_clone(interp, src);
     }
     else {
-        UINTVAL len = Parrot_str_length(interp, src);
-        STRING *res = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
+        /* Make sure we have NFC Unicode string. */
+        STRING  *from = Parrot_unicode_charset_ptr->compose(interp, 
+                            Parrot_unicode_charset_ptr->to_charset(interp, src));
+        UINTVAL  len  = Parrot_str_length(interp, from);
+        STRING  *to   = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
                            Parrot_nfg_encoding_ptr, Parrot_unicode_charset_ptr, 0);
-        UChar32 *buf = (UChar32 *) res->strstart;
-        UINTVAL offs;
-        for (offs = 0; offs < len; offs++){
+        UChar32 *buf  = (UChar32 *) to->strstart;
+
+        UINTVAL  offs = 0;
+        while (offs < len){
             buf[offs] = src->encoding->get_codepoint(interp, src, offs);
+            offs++;
+            //TODO
         };
-        res->strlen  = len;
-        res->bufused = len * sizeof (UChar32);
 
-        return res;
+        to->strlen  = offs;
+        to->bufused = offs * sizeof (UChar32);
+
+        return to;
     }
 #else
     UNUSED(src);
     no_ICU_lib(interp);
 #endif
-
 }
 
 /*


More information about the parrot-commits mailing list