[svn:parrot] r47606 - in branches/gsoc_nfg/src/string: . charset
darbelo at svn.parrot.org
darbelo at svn.parrot.org
Sun Jun 13 22:06:39 UTC 2010
Author: darbelo
Date: Sun Jun 13 22:06:38 2010
New Revision: 47606
URL: https://trac.parrot.org/parrot/changeset/47606
Log:
Hack in some NFG-awareness into the unicode cclass functions.
For now just return the data for the first codepoint in the grapheme.
It should work out most of the time.
Modified:
branches/gsoc_nfg/src/string/charset/unicode.c
branches/gsoc_nfg/src/string/grapheme.c
branches/gsoc_nfg/src/string/grapheme.h
Modified: branches/gsoc_nfg/src/string/charset/unicode.c
==============================================================================
--- branches/gsoc_nfg/src/string/charset/unicode.c Sun Jun 13 16:54:36 2010 (r47605)
+++ branches/gsoc_nfg/src/string/charset/unicode.c Sun Jun 13 22:06:38 2010 (r47606)
@@ -192,6 +192,7 @@
# include <unicode/uchar.h>
# include <unicode/ustring.h>
# include <unicode/unorm.h>
+# include <../grapheme.h>
#endif
#define EXCEPTION(err, str) \
Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
@@ -849,6 +850,12 @@
codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
+#if PARROT_HAS_ICU
+ if (src->encoding == Parrot_nfg_encoding_ptr)
+ codepoint = get_grapheme_base(interp, (grapheme_table *)src->extra,
+ (int32_t) codepoint);
+#endif /* PARROT_HAS_ICU */
+
if (codepoint >= 256)
return u_iscclass(interp, codepoint, flags) != 0;
@@ -882,6 +889,12 @@
for (; pos < end; ++pos) {
codepoint = iter.get_and_advance(interp, &iter);
+#if PARROT_HAS_ICU
+ if (src->encoding == Parrot_nfg_encoding_ptr)
+ codepoint = get_grapheme_base(interp, (grapheme_table *)src->extra,
+ (int32_t) codepoint);
+#endif /* PARROT_HAS_ICU */
+
if (codepoint >= 256) {
if (u_iscclass(interp, codepoint, flags))
return pos;
@@ -933,6 +946,11 @@
for (; pos < end; ++pos) {
codepoint = iter.get_and_advance(interp, &iter);
+#if PARROT_HAS_ICU
+ if (src->encoding == Parrot_nfg_encoding_ptr)
+ codepoint = get_grapheme_base(interp, (grapheme_table *)src->extra,
+ (int32_t) codepoint);
+#endif /* PARROT_HAS_ICU */
if (codepoint >= 256) {
for (bit = enum_cclass_uppercase;
bit <= enum_cclass_word ; bit <<= 1) {
Modified: branches/gsoc_nfg/src/string/grapheme.c
==============================================================================
--- branches/gsoc_nfg/src/string/grapheme.c Sun Jun 13 16:54:36 2010 (r47605)
+++ branches/gsoc_nfg/src/string/grapheme.c Sun Jun 13 22:06:38 2010 (r47606)
@@ -33,7 +33,7 @@
{
ASSERT_ARGS(clone_grapheme_table)
if (src != NULL) {
- UINTVAL i;
+ INTVAL i;
grapheme_table * dst = create_grapheme_table(interp, src->used);
dst->used = src->used;
@@ -67,7 +67,7 @@
destroy_grapheme_table(PARROT_INTERP, grapheme_table *table)
{
ASSERT_ARGS(destroy_grapheme_table)
- UINTVAL i = 0;
+ INTVAL i = 0;
while (i < table->used) {
mem_gc_free(interp, table->graphemes[i++].codepoints);
}
@@ -158,6 +158,21 @@
return (UChar32) (-1 - i);
}
+UINTVAL
+get_grapheme_base(PARROT_INTERP, grapheme_table *table, int32_t codepoint)
+{
+ const int32_t index = -1 - codepoint;
+
+ if (codepoint > 0)
+ return (UINTVAL) codepoint;
+
+ if (index >= table->used)
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_OUT_OF_BOUNDS,
+ "Grapheme table index out of bounds!");
+
+ return table->graphemes[index].codepoints[0];
+}
+
#endif /* PARROT_HAS_ICU */
/*
Modified: branches/gsoc_nfg/src/string/grapheme.h
==============================================================================
--- branches/gsoc_nfg/src/string/grapheme.h Sun Jun 13 16:54:36 2010 (r47605)
+++ branches/gsoc_nfg/src/string/grapheme.h Sun Jun 13 22:06:38 2010 (r47606)
@@ -59,6 +59,11 @@
void destroy_grapheme_table(PARROT_INTERP, grapheme_table *table)
__attribute__nonnull__(1);
+UINTVAL get_grapheme_base(PARROT_INTERP,
+ grapheme_table *table,
+ int32_t codepoint)
+ __attribute__nonnull__(1);
+
grapheme_table * grow_grapheme_table(SHIM_INTERP,
grapheme_table *src,
UINTVAL n);
@@ -80,6 +85,8 @@
PARROT_ASSERT_ARG(interp))
#define ASSERT_ARGS_destroy_grapheme_table __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_get_grapheme_base __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp))
#define ASSERT_ARGS_grow_grapheme_table __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
#define ASSERT_ARGS_merge_tables_and_fixup_substring \
__attribute__unused__ int _ASSERT_ARGS_CHECK = (\
More information about the parrot-commits
mailing list