[svn:parrot] r47439 - in branches/gsoc_nfg/src/string: . encoding
darbelo at svn.parrot.org
darbelo at svn.parrot.org
Mon Jun 7 21:04:53 UTC 2010
Author: darbelo
Date: Mon Jun 7 21:04:53 2010
New Revision: 47439
URL: https://trac.parrot.org/parrot/changeset/47439
Log:
Make various parts of codetest happy again.
Modified:
branches/gsoc_nfg/src/string/encoding/nfg.c
branches/gsoc_nfg/src/string/grapheme.c
branches/gsoc_nfg/src/string/grapheme.h
Modified: branches/gsoc_nfg/src/string/encoding/nfg.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/nfg.c Mon Jun 7 19:47:02 2010 (r47438)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c Mon Jun 7 21:04:53 2010 (r47439)
@@ -32,41 +32,6 @@
}
#endif
-#if PARROT_HAS_ICU
-static void
-nfg_encode(PARROT_INTERP, STRING *dest, UINTVAL index, STRING *src,
- UINTVAL offs, UINTVAL len, UINTVAL graphemes)
-{
- UChar32 *buf = (UChar32 *) (dest->strstart);
- UINTVAL hash = 0xffff; // TODO: put a real seed here.
- UINTVAL aux;
-
- while (offs < len) {
- buf[index] = src->encoding->get_codepoint(interp, src, offs);
- aux = offs;
-
- while (ISCOMBINING(buf[index]) && offs < len) {
- hash += hash << 5;
- hash += buf[index];
- buf[index] = src->encoding->get_codepoint(interp, src, ++offs);
- }
-
- if (hash != 0xffff) {
- nfg_encode(interp, dest, index, src, offs, len, graphemes + 1);
- buf[index] = add_grapheme_from_substr(interp, dest->extra, src, aux, offs-aux, hash);
- return;
- }
- offs++;
- index++;
- }
-
- dest->extra = create_grapheme_table(interp, graphemes);
- dest->strlen = index;
- dest->bufused = index * sizeof (UChar32);
-
-}
-
-#endif
/* HEADERIZER HFILE: src/string/encoding/nfg.h */
@@ -224,6 +189,41 @@
#if PARROT_HAS_ICU
# include <unicode/ustring.h>
+
+static void
+nfg_encode(PARROT_INTERP, STRING *dest, UINTVAL index, STRING *src,
+ UINTVAL offs, UINTVAL len, UINTVAL graphemes)
+{
+ ASSERT_ARGS(nfg_encode)
+ UChar32 *buf = (UChar32 *) (dest->strstart);
+ UINTVAL hash = 0xffff; // TODO: put a real seed here.
+ UINTVAL aux;
+
+ while (offs < len) {
+ buf[index] = src->encoding->get_codepoint(interp, src, offs);
+ aux = offs;
+
+ while (ISCOMBINING(buf[index]) && offs < len) {
+ hash += hash << 5;
+ hash += buf[index];
+ buf[index] = src->encoding->get_codepoint(interp, src, ++offs);
+ }
+
+ if (hash != 0xffff) {
+ nfg_encode(interp, dest, index, src, offs, len, graphemes + 1);
+ buf[index] = add_grapheme_from_substr(interp, dest->extra, src, aux, offs-aux, hash);
+ return;
+ }
+ offs++;
+ index++;
+ }
+
+ dest->extra = create_grapheme_table(interp, graphemes);
+ dest->strlen = index;
+ dest->bufused = index * sizeof (UChar32);
+
+}
+
#endif
@@ -249,7 +249,7 @@
}
else {
/* Make sure we have NFC Unicode string. */
- STRING *from = Parrot_unicode_charset_ptr->compose(interp,
+ STRING *from = Parrot_unicode_charset_ptr->compose(interp,
Parrot_unicode_charset_ptr->to_charset(interp, src));
UINTVAL len = Parrot_str_length(interp, from);
STRING *to = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
Modified: branches/gsoc_nfg/src/string/grapheme.c
==============================================================================
--- branches/gsoc_nfg/src/string/grapheme.c Mon Jun 7 19:47:02 2010 (r47438)
+++ branches/gsoc_nfg/src/string/grapheme.c Mon Jun 7 21:04:53 2010 (r47439)
@@ -17,8 +17,9 @@
grapheme_table *
create_grapheme_table(PARROT_INTERP, UINTVAL n)
{
- UINTVAL entries = ( n > MIN_TABLE_LENGTH) ? n - MIN_TABLE_LENGTH : 0;
- grapheme_table *table = mem_sys_allocate(sizeof (grapheme_table)
+ ASSERT_ARGS(create_grapheme_table)
+ UINTVAL entries = (n > MIN_TABLE_LENGTH) ? n - MIN_TABLE_LENGTH : 0;
+ grapheme_table *table = mem_sys_allocate(sizeof (grapheme_table)
+ entries * sizeof (grapheme));
table->size = entries + MIN_TABLE_LENGTH;
table->used = 0;
@@ -30,10 +31,11 @@
add_grapheme_from_substr(PARROT_INTERP, grapheme_table *table, STRING *src,
UINTVAL start, UINTVAL len, UINTVAL hash)
{
+ ASSERT_ARGS(add_grapheme_from_substr)
int32_t i;
/* Check if it's in the table already... */
for (i = 0; i < table->used; i++) {
- if (table->graphemes[i].hash == hash )
+ if (table->graphemes[i].hash == hash)
return (UChar32) (-1 * (i + 1));
}
@@ -42,7 +44,7 @@
table->graphemes[table->used].hash = hash;
table->graphemes[table->used].codepoints = mem_gc_allocate_n_typed(interp, len, UChar32);
for (i = 0; i < len; i++){
- table->graphemes[table->used].codepoints[i] =
+ table->graphemes[table->used].codepoints[i] =
src->encoding->get_codepoint(interp, src, start + i);
};
}
Modified: branches/gsoc_nfg/src/string/grapheme.h
==============================================================================
--- branches/gsoc_nfg/src/string/grapheme.h Mon Jun 7 19:47:02 2010 (r47438)
+++ branches/gsoc_nfg/src/string/grapheme.h Mon Jun 7 21:04:53 2010 (r47439)
@@ -17,7 +17,7 @@
typedef struct grapheme_t grapheme;
-#define MIN_TABLE_LENGTH 1
+# define MIN_TABLE_LENGTH 1
struct grapheme_table_t {
UINTVAL size; /* Total table size, in graphemes. */
@@ -31,10 +31,10 @@
typedef struct grapheme_table_t grapheme_table;
-#define ISCOMBINING(c) ( (0x0299 < (c) && (c) < 0x0370)\
- || (0x1dbf < (c) && (c) < 0x1e00)\
- || (0x20cf < (c) && (c) < 0x2100)\
- || (0xfe1f < (c) && (c) < 0xfe30))
+# define ISCOMBINING(c) ( (0x0299 < (c) && (c) < 0x0370)\
+ || (0x1dbf < (c) && (c) < 0x1e00)\
+ || (0x20cf < (c) && (c) < 0x2100)\
+ || (0xfe1f < (c) && (c) < 0xfe30))
/* HEADERIZER BEGIN: src/string/grapheme.c */
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
More information about the parrot-commits
mailing list