[svn:parrot] r49747 - in branches/string_checks: src/string/encoding t/op

nwellnhof at svn.parrot.org nwellnhof at svn.parrot.org
Sun Oct 31 14:58:41 UTC 2010


Author: nwellnhof
Date: Sun Oct 31 14:58:40 2010
New Revision: 49747
URL: https://trac.parrot.org/parrot/changeset/49747

Log:
[str] Merge some ASCII and Latin1 functions

Modified:
   branches/string_checks/src/string/encoding/ascii.c
   branches/string_checks/src/string/encoding/binary.c
   branches/string_checks/src/string/encoding/latin1.c
   branches/string_checks/src/string/encoding/shared.c
   branches/string_checks/src/string/encoding/shared.h
   branches/string_checks/src/string/encoding/tables.c
   branches/string_checks/src/string/encoding/tables.h
   branches/string_checks/t/op/string_cs.t
   branches/string_checks/t/op/stringu.t

Modified: branches/string_checks/src/string/encoding/ascii.c
==============================================================================
--- branches/string_checks/src/string/encoding/ascii.c	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/src/string/encoding/ascii.c	Sun Oct 31 14:58:40 2010	(r49747)
@@ -40,31 +40,6 @@
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
-PARROT_WARN_UNUSED_RESULT
-static INTVAL ascii_find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL ascii_find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL ascii_is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
 PARROT_CANNOT_RETURN_NULL
 static STRING* ascii_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
         __attribute__nonnull__(1)
@@ -104,15 +79,6 @@
 #define ASSERT_ARGS_ascii_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ascii_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ascii_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ascii_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_ascii_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
@@ -150,47 +116,11 @@
 ascii_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
 {
     ASSERT_ARGS(ascii_to_encoding)
-    STRING        *dest;
-
-    if (STRING_max_bytes_per_codepoint(src) == 1) {
-        unsigned char * const src_buf  = (unsigned char *)src->strstart;
-        UINTVAL offs;
-
-        for (offs = 0; offs < src->strlen; ++offs) {
-            UINTVAL c = src_buf[offs];
-            if (c >= 0x80)
-                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                    "lossy conversion to ascii");
-        }
 
-        dest           = Parrot_str_clone(interp, src);
-        dest->encoding = Parrot_ascii_encoding_ptr;
-    }
-    else {
-        String_iter iter;
-        unsigned char *p;
-        const UINTVAL len = src->strlen;
-
-        dest = Parrot_str_new_init(interp, NULL, len,
-                Parrot_ascii_encoding_ptr, 0);
-        p    = (unsigned char *)dest->strstart;
-        STRING_ITER_INIT(interp, &iter);
-
-        while (iter.charpos < len) {
-            const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
-            if (c >= 0x80)
-                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                        "can't convert unicode string to ascii");
-            *p++ = c;
-        }
-
-        dest->bufused = len;
-        dest->strlen  = len;
-    }
-
-    return dest;
+    return fixed8_to_encoding(interp, src, Parrot_ascii_encoding_ptr);
 }
 
+
 /*
 
 =item C<static STRING * ascii_chr(PARROT_INTERP, UINTVAL codepoint)>
@@ -243,94 +173,6 @@
 
 /*
 
-=item C<static INTVAL ascii_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-ascii_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(ascii_is_cclass)
-    UINTVAL codepoint;
-
-    if (offset >= src->strlen)
-        return 0;
-    codepoint = STRING_ord(interp, src, offset);
-
-    if (codepoint >= sizeof (Parrot_ascii_typetable) / sizeof (Parrot_ascii_typetable[0])) {
-        return 0;
-    }
-    return (Parrot_ascii_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-/*
-
-=item C<static INTVAL ascii_find_cclass(PARROT_INTERP, INTVAL flags, const
-STRING *src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.  Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-ascii_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset,
-                    UINTVAL count)
-{
-    ASSERT_ARGS(ascii_find_cclass)
-    const unsigned char *contents = (const unsigned char *)src->strstart;
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    for (; pos < end; ++pos) {
-        if ((Parrot_ascii_typetable[contents[pos]] & flags) != 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-/*
-
-=item C<static INTVAL ascii_find_not_cclass(PARROT_INTERP, INTVAL flags, const
-STRING *src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-ascii_find_not_cclass(PARROT_INTERP,
-                INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(ascii_find_not_cclass)
-    const unsigned char *contents = (const unsigned char *)src->strstart;
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    for (; pos < end; ++pos) {
-        if ((Parrot_ascii_typetable[contents[pos]] & flags) == 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-/*
-
 =item C<static STRING* ascii_upcase(PARROT_INTERP, const STRING *src)>
 
 Converts the STRING C<src> to all uppercase.
@@ -520,9 +362,9 @@
     fixed8_ord,
     fixed_substr,
 
-    ascii_is_cclass,
-    ascii_find_cclass,
-    ascii_find_not_cclass,
+    fixed8_is_cclass,
+    fixed8_find_cclass,
+    fixed8_find_not_cclass,
 
     encoding_get_graphemes,
     fixed8_compose,

Modified: branches/string_checks/src/string/encoding/binary.c
==============================================================================
--- branches/string_checks/src/string/encoding/binary.c	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/src/string/encoding/binary.c	Sun Oct 31 14:58:40 2010	(r49747)
@@ -92,14 +92,8 @@
 binary_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
 {
     ASSERT_ARGS(binary_to_encoding)
-    STRING      *dest;
 
-    dest           = Parrot_str_clone(interp, src);
-    dest->encoding = Parrot_binary_encoding_ptr;
-    dest->strlen   = dest->bufused;
-    dest->hashval  = 0;
-
-    return dest;
+    return fixed8_to_encoding(interp, src, Parrot_binary_encoding_ptr);
 }
 
 

Modified: branches/string_checks/src/string/encoding/latin1.c
==============================================================================
--- branches/string_checks/src/string/encoding/latin1.c	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/src/string/encoding/latin1.c	Sun Oct 31 14:58:40 2010	(r49747)
@@ -40,29 +40,6 @@
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
-static INTVAL latin1_find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL latin1_find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL latin1_is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
 PARROT_CANNOT_RETURN_NULL
 static STRING* latin1_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
         __attribute__nonnull__(1)
@@ -102,15 +79,6 @@
 #define ASSERT_ARGS_latin1_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_latin1_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_latin1_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_latin1_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_latin1_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
@@ -149,35 +117,8 @@
 latin1_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
 {
     ASSERT_ARGS(latin1_to_encoding)
-    STRING      *dest;
 
-    if (STRING_max_bytes_per_codepoint(src) == 1) {
-        dest           = Parrot_str_clone(interp, src);
-        dest->encoding = Parrot_latin1_encoding_ptr;
-    }
-    else {
-        String_iter  iter;
-        unsigned char *p;
-        const UINTVAL len = src->strlen;
-
-        dest = Parrot_str_new_init(interp, NULL, len,
-                Parrot_latin1_encoding_ptr, 0);
-        p    = (unsigned char *)dest->strstart;
-        STRING_ITER_INIT(interp, &iter);
-
-        while (iter.charpos < len) {
-            const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
-            if (c >= 0x100)
-                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                    "lossy conversion to iso-8559-1");
-            *p++ = c;
-        }
-
-        dest->bufused = len;
-        dest->strlen  = len;
-    }
-
-    return dest;
+    return fixed8_to_encoding(interp, src, Parrot_latin1_encoding_ptr);
 }
 
 
@@ -231,95 +172,6 @@
 
 /*
 
-=item C<static INTVAL latin1_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-latin1_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(latin1_is_cclass)
-    UINTVAL codepoint;
-
-    if (offset >= src->strlen) return 0;
-    codepoint = STRING_ord(interp, src, offset);
-
-    if (codepoint >= sizeof (Parrot_ascii_typetable) /
-                     sizeof (Parrot_ascii_typetable[0])) {
-        return 0;
-    }
-    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-
-/*
-
-=item C<static INTVAL latin1_find_cclass(PARROT_INTERP, INTVAL flags, const
-STRING *src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.  Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-static INTVAL
-latin1_find_cclass(PARROT_INTERP, INTVAL flags,
-                ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(latin1_find_cclass)
-    const unsigned char *contents = (const unsigned char *)src->strstart;
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    for (; pos < end; ++pos) {
-        if ((Parrot_iso_8859_1_typetable[contents[pos]] & flags) != 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-
-/*
-
-=item C<static INTVAL latin1_find_not_cclass(PARROT_INTERP, INTVAL flags, const
-STRING *src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-latin1_find_not_cclass(PARROT_INTERP, INTVAL flags,
-                ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(latin1_find_not_cclass)
-    const unsigned char *contents = (const unsigned char *)src->strstart;
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    for (; pos < end; ++pos) {
-        if ((Parrot_iso_8859_1_typetable[contents[pos]] & flags) == 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-
-/*
-
 =item C<static STRING* latin1_upcase(PARROT_INTERP, const STRING *src)>
 
 Convert all graphemes in the STRING C<src> to upper case, for those
@@ -547,9 +399,9 @@
     fixed8_ord,
     fixed_substr,
 
-    latin1_is_cclass,
-    latin1_find_cclass,
-    latin1_find_not_cclass,
+    fixed8_is_cclass,
+    fixed8_find_cclass,
+    fixed8_find_not_cclass,
 
     encoding_get_graphemes,
     fixed8_compose,

Modified: branches/string_checks/src/string/encoding/shared.c
==============================================================================
--- branches/string_checks/src/string/encoding/shared.c	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/src/string/encoding/shared.c	Sun Oct 31 14:58:40 2010	(r49747)
@@ -597,6 +597,71 @@
 
 /*
 
+=item C<STRING * fixed8_to_encoding(PARROT_INTERP, const STRING *src, const
+STR_VTABLE *enc)>
+
+Converts STRING C<src> to a string with fixed8 encoding C<enc>.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+STRING *
+fixed8_to_encoding(PARROT_INTERP, ARGIN(const STRING *src),
+        ARGIN(const STR_VTABLE *enc))
+{
+    ASSERT_ARGS(fixed8_to_encoding)
+    STRING        *dest;
+    const UINTVAL  limit = enc == Parrot_ascii_encoding_ptr ? 0x80 : 0x100;
+
+    if (STRING_max_bytes_per_codepoint(src) == 1) {
+        if (limit < 0x100) {
+            const unsigned char * const ptr = (unsigned char *)src->strstart;
+            UINTVAL i;
+
+            for (i = 0; i < src->strlen; ++i) {
+                if (ptr[i] >= limit)
+                    Parrot_ex_throw_from_c_args(interp, NULL,
+                        EXCEPTION_LOSSY_CONVERSION,
+                        "Lossy conversion to single byte encoding");
+            }
+        }
+
+        dest           = Parrot_str_copy(interp, src);
+        dest->encoding = enc;
+    }
+    else {
+        String_iter    iter;
+        unsigned char *ptr;
+        const UINTVAL  len = src->strlen;
+
+        dest  = Parrot_str_new_init(interp, NULL, len, enc, 0);
+        ptr   = (unsigned char *)dest->strstart;
+
+        STRING_ITER_INIT(interp, &iter);
+
+        while (iter.charpos < len) {
+            const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
+
+            if (c >= limit)
+                Parrot_ex_throw_from_c_args(interp, NULL,
+                    EXCEPTION_LOSSY_CONVERSION,
+                    "Lossy conversion to single byte encoding");
+
+            *ptr++ = c;
+        }
+
+        dest->bufused = len;
+        dest->strlen  = len;
+    }
+
+    return dest;
+}
+
+
+/*
+
 =item C<INTVAL fixed8_equal(PARROT_INTERP, const STRING *lhs, const STRING
 *rhs)>
 
@@ -900,6 +965,98 @@
 
 /*
 
+=item C<INTVAL fixed8_is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
+UINTVAL offset)>
+
+Returns Boolean.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
+{
+    ASSERT_ARGS(fixed8_is_cclass)
+    const unsigned char * const ptr = (unsigned char *)src->strstart;
+    UINTVAL codepoint;
+
+    if (offset >= src->strlen) return 0;
+    codepoint = ptr[offset];
+
+    return Parrot_iso_8859_1_typetable[codepoint] & flags ? 1 : 0;
+}
+
+
+/*
+
+=item C<INTVAL fixed8_find_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset, UINTVAL count)>
+
+Find a character in the given character class.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
+        UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(fixed8_find_cclass)
+    const unsigned char * const ptr = (const unsigned char *)src->strstart;
+    UINTVAL pos;
+    UINTVAL end = offset + count;
+
+    if (end > src->strlen)
+        end = src->strlen;
+
+    for (pos = offset; pos < end; ++pos) {
+        if (Parrot_iso_8859_1_typetable[ptr[pos]] & flags)
+            return pos;
+    }
+
+    return end;
+}
+
+
+/*
+
+=item C<INTVAL fixed8_find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset, UINTVAL count)>
+
+Returns C<INTVAL>.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_find_not_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
+        UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(fixed8_find_not_cclass)
+    const unsigned char * const ptr = (unsigned char *)src->strstart;
+    UINTVAL pos;
+    UINTVAL end = offset + count;
+
+    if (end > src->strlen)
+        end = src->strlen;
+
+    for (pos = offset; pos < end; ++pos) {
+        if ((Parrot_iso_8859_1_typetable[ptr[pos]] & flags) == 0)
+            return pos;
+    }
+
+    return end;
+}
+
+
+/*
+
 =item C<STRING* fixed8_compose(PARROT_INTERP, const STRING *src)>
 
 Can't compose ASCII strings, so performs a string copy on it and
@@ -935,7 +1092,11 @@
     ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset)
 {
     ASSERT_ARGS(fixed8_iter_get)
-    return fixed8_ord(interp, str, iter->charpos + offset);
+    const unsigned char * const ptr = (unsigned char *)str->strstart;
+
+    PARROT_ASSERT(iter->charpos + offset < str->bufused);
+
+    return ptr[iter->charpos + offset];
 }
 
 
@@ -955,9 +1116,11 @@
     ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip)
 {
     ASSERT_ARGS(fixed8_iter_skip)
+
     iter->bytepos += skip;
     iter->charpos += skip;
-    PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str));
+
+    PARROT_ASSERT(iter->bytepos <= str->bufused);
 }
 
 
@@ -977,8 +1140,13 @@
     ARGIN(const STRING *str), ARGMOD(String_iter *iter))
 {
     ASSERT_ARGS(fixed8_iter_get_and_advance)
-    const UINTVAL c = fixed8_ord(interp, str, iter->charpos++);
+    unsigned char * const ptr = (unsigned char *)str->strstart;
+    const UINTVAL         c   = ptr[iter->charpos++];
+
     iter->bytepos++;
+
+    PARROT_ASSERT(iter->bytepos <= str->bufused);
+
     return c;
 }
 
@@ -1000,9 +1168,18 @@
     ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c)
 {
     ASSERT_ARGS(fixed8_iter_set_and_advance)
-    unsigned char *buf = (unsigned char *)str->strstart;
-    buf[iter->charpos++] = c;
+    unsigned char * const ptr = (unsigned char *)str->strstart;
+
+    UINTVAL limit = str->encoding == Parrot_ascii_encoding_ptr ? 0x80 : 0x100;
+
+    if (c >= limit)
+        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
+            "Lossy conversion to single byte encoding");
+
+    ptr[iter->charpos++] = c;
     iter->bytepos++;
+
+    PARROT_ASSERT(iter->bytepos <= str->bufused);
 }
 
 
@@ -1022,8 +1199,9 @@
     ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos)
 {
     ASSERT_ARGS(fixed8_iter_set_position)
+    PARROT_ASSERT(pos <= str->bufused);
+
     iter->bytepos = iter->charpos = pos;
-    PARROT_ASSERT(pos <= Buffer_buflen(str));
 }
 
 

Modified: branches/string_checks/src/string/encoding/shared.h
==============================================================================
--- branches/string_checks/src/string/encoding/shared.h	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/src/string/encoding/shared.h	Sun Oct 31 14:58:40 2010	(r49747)
@@ -130,6 +130,24 @@
         __attribute__nonnull__(3);
 
 PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_find_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_find_not_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
 size_t fixed8_hash(SHIM_INTERP, ARGIN(const STRING *src), size_t hashval)
         __attribute__nonnull__(2);
 
@@ -142,6 +160,14 @@
         __attribute__nonnull__(2)
         __attribute__nonnull__(3);
 
+PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_is_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
 UINTVAL fixed8_iter_get(PARROT_INTERP,
     ARGIN(const STRING *str),
     ARGIN(const String_iter *iter),
@@ -203,6 +229,14 @@
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
+PARROT_CANNOT_RETURN_NULL
+STRING * fixed8_to_encoding(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    ARGIN(const STR_VTABLE *enc))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 STRING * fixed_substr(PARROT_INTERP,
@@ -300,12 +334,21 @@
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(lhs) \
     , PARROT_ASSERT_ARG(rhs))
+#define ASSERT_ARGS_fixed8_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_fixed8_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_fixed8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_fixed8_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src) \
     , PARROT_ASSERT_ARG(search))
+#define ASSERT_ARGS_fixed8_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(str) \
@@ -334,6 +377,10 @@
 #define ASSERT_ARGS_fixed8_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_fixed8_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src) \
+    , PARROT_ASSERT_ARG(enc))
 #define ASSERT_ARGS_fixed_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))

Modified: branches/string_checks/src/string/encoding/tables.c
==============================================================================
--- branches/string_checks/src/string/encoding/tables.c	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/src/string/encoding/tables.c	Sun Oct 31 14:58:40 2010	(r49747)
@@ -51,40 +51,6 @@
 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x04c0, /* 240-247 */
 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 248-255 */
 };
-const INTVAL Parrot_ascii_typetable[256] = {
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
-0x0200, 0x0320, 0x1220, 0x0220, 0x1220, 0x1220, 0x0200, 0x0200, /* 8-15 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
-0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
-0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 40-47 */
-0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, /* 48-55 */
-0x28d8, 0x28d8, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 56-63 */
-0x04c0, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28c5, /* 64-71 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 72-79 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 80-87 */
-0x28c5, 0x28c5, 0x28c5, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x24c0, /* 88-95 */
-0x04c0, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28c6, /* 96-103 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
-0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1020, 0x0000, 0x0000, /* 128-135 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 136-143 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 144-151 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 152-159 */
-0x0020, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 160-167 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 168-175 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 176-183 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 184-191 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 192-199 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 200-207 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 208-215 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 216-223 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 224-231 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 232-239 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 240-247 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 248-255 */
-};
 /*
  * Local variables:
  *   c-file-style: "parrot"

Modified: branches/string_checks/src/string/encoding/tables.h
==============================================================================
--- branches/string_checks/src/string/encoding/tables.h	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/src/string/encoding/tables.h	Sun Oct 31 14:58:40 2010	(r49747)
@@ -25,7 +25,6 @@
 #define PUNCTUATION enum_cclass_punctuation
 #define DIGIT       enum_cclass_numeric
 extern const INTVAL Parrot_iso_8859_1_typetable[256];
-extern const INTVAL Parrot_ascii_typetable[256];
 #endif /* PARROT_CHARSET_TABLES_H_GUARD */
 /*
  * Local variables:

Modified: branches/string_checks/t/op/string_cs.t
==============================================================================
--- branches/string_checks/t/op/string_cs.t	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/t/op/string_cs.t	Sun Oct 31 14:58:40 2010	(r49747)
@@ -290,7 +290,7 @@
     print "never\n"
     end
 CODE
-/lossy conversion to ascii/
+/Lossy conversion/
 OUTPUT
 
 pasm_output_is( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i iso-8859-1 to binary" );
@@ -376,7 +376,7 @@
     print "never\n"
     end
 CODE
-/lossy conversion to iso-8559-1/
+/Lossy conversion/
 OUTPUT
 
 pasm_error_output_like( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i utf-8 to ascii - lossy" );
@@ -386,7 +386,7 @@
     print "never\n"
     end
 CODE
-/can't convert unicode string to ascii/
+/Lossy conversion/
 OUTPUT
 
 pir_output_is( <<'CODE', <<'OUTPUT', "bug #34661 literal" );

Modified: branches/string_checks/t/op/stringu.t
==============================================================================
--- branches/string_checks/t/op/stringu.t	Sun Oct 31 14:42:55 2010	(r49746)
+++ branches/string_checks/t/op/stringu.t	Sun Oct 31 14:58:40 2010	(r49747)
@@ -269,7 +269,7 @@
     print "\n"
     end
 CODE
-/Malformed string/
+/Lossy conversion/
 OUTPUT
 
 pasm_output_is( <<'CODE', <<OUTPUT, "substr with a UTF8 replacement #36794" );


More information about the parrot-commits mailing list