[svn:parrot] r47116 - branches/gsoc_nfg/src/string/encoding
darbelo at svn.parrot.org
darbelo at svn.parrot.org
Sat May 29 19:46:30 UTC 2010
Author: darbelo
Date: Sat May 29 19:46:29 2010
New Revision: 47116
URL: https://trac.parrot.org/parrot/changeset/47116
Log:
Try to pre-normalize incoming strings as much as possible.
Modified:
branches/gsoc_nfg/src/string/encoding/nfg.c
Modified: branches/gsoc_nfg/src/string/encoding/nfg.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/nfg.c Sat May 29 19:27:03 2010 (r47115)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c Sat May 29 19:46:29 2010 (r47116)
@@ -200,24 +200,30 @@
return Parrot_str_clone(interp, src);
}
else {
- UINTVAL len = Parrot_str_length(interp, src);
- STRING *res = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
+ /* Make sure we have NFC Unicode string. */
+ STRING *from = Parrot_unicode_charset_ptr->compose(interp,
+ Parrot_unicode_charset_ptr->to_charset(interp, src));
+ UINTVAL len = Parrot_str_length(interp, from);
+ STRING *to = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
Parrot_nfg_encoding_ptr, Parrot_unicode_charset_ptr, 0);
- UChar32 *buf = (UChar32 *) res->strstart;
- UINTVAL offs;
- for (offs = 0; offs < len; offs++){
+ UChar32 *buf = (UChar32 *) to->strstart;
+
+ UINTVAL offs = 0;
+ while (offs < len){
buf[offs] = src->encoding->get_codepoint(interp, src, offs);
+ offs++;
+ //TODO
};
- res->strlen = len;
- res->bufused = len * sizeof (UChar32);
- return res;
+ to->strlen = offs;
+ to->bufused = offs * sizeof (UChar32);
+
+ return to;
}
#else
UNUSED(src);
no_ICU_lib(interp);
#endif
-
}
/*
More information about the parrot-commits
mailing list