[svn:parrot] r49747 - in branches/string_checks: src/string/encoding t/op
nwellnhof at svn.parrot.org
nwellnhof at svn.parrot.org
Sun Oct 31 14:58:41 UTC 2010
Author: nwellnhof
Date: Sun Oct 31 14:58:40 2010
New Revision: 49747
URL: https://trac.parrot.org/parrot/changeset/49747
Log:
[str] Merge some ASCII and Latin1 functions
Modified:
branches/string_checks/src/string/encoding/ascii.c
branches/string_checks/src/string/encoding/binary.c
branches/string_checks/src/string/encoding/latin1.c
branches/string_checks/src/string/encoding/shared.c
branches/string_checks/src/string/encoding/shared.h
branches/string_checks/src/string/encoding/tables.c
branches/string_checks/src/string/encoding/tables.h
branches/string_checks/t/op/string_cs.t
branches/string_checks/t/op/stringu.t
Modified: branches/string_checks/src/string/encoding/ascii.c
==============================================================================
--- branches/string_checks/src/string/encoding/ascii.c Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/src/string/encoding/ascii.c Sun Oct 31 14:58:40 2010 (r49747)
@@ -40,31 +40,6 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-PARROT_WARN_UNUSED_RESULT
-static INTVAL ascii_find_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-static INTVAL ascii_find_not_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL ascii_is_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
PARROT_CANNOT_RETURN_NULL
static STRING* ascii_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
__attribute__nonnull__(1)
@@ -104,15 +79,6 @@
#define ASSERT_ARGS_ascii_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ascii_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ascii_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ascii_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ascii_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -150,47 +116,11 @@
ascii_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(ascii_to_encoding)
- STRING *dest;
-
- if (STRING_max_bytes_per_codepoint(src) == 1) {
- unsigned char * const src_buf = (unsigned char *)src->strstart;
- UINTVAL offs;
-
- for (offs = 0; offs < src->strlen; ++offs) {
- UINTVAL c = src_buf[offs];
- if (c >= 0x80)
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
- "lossy conversion to ascii");
- }
- dest = Parrot_str_clone(interp, src);
- dest->encoding = Parrot_ascii_encoding_ptr;
- }
- else {
- String_iter iter;
- unsigned char *p;
- const UINTVAL len = src->strlen;
-
- dest = Parrot_str_new_init(interp, NULL, len,
- Parrot_ascii_encoding_ptr, 0);
- p = (unsigned char *)dest->strstart;
- STRING_ITER_INIT(interp, &iter);
-
- while (iter.charpos < len) {
- const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
- if (c >= 0x80)
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
- "can't convert unicode string to ascii");
- *p++ = c;
- }
-
- dest->bufused = len;
- dest->strlen = len;
- }
-
- return dest;
+ return fixed8_to_encoding(interp, src, Parrot_ascii_encoding_ptr);
}
+
/*
=item C<static STRING * ascii_chr(PARROT_INTERP, UINTVAL codepoint)>
@@ -243,94 +173,6 @@
/*
-=item C<static INTVAL ascii_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-ascii_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
- ASSERT_ARGS(ascii_is_cclass)
- UINTVAL codepoint;
-
- if (offset >= src->strlen)
- return 0;
- codepoint = STRING_ord(interp, src, offset);
-
- if (codepoint >= sizeof (Parrot_ascii_typetable) / sizeof (Parrot_ascii_typetable[0])) {
- return 0;
- }
- return (Parrot_ascii_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-/*
-
-=item C<static INTVAL ascii_find_cclass(PARROT_INTERP, INTVAL flags, const
-STRING *src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class. Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-ascii_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset,
- UINTVAL count)
-{
- ASSERT_ARGS(ascii_find_cclass)
- const unsigned char *contents = (const unsigned char *)src->strstart;
- UINTVAL pos = offset;
- UINTVAL end = offset + count;
-
- end = src->strlen < end ? src->strlen : end;
- for (; pos < end; ++pos) {
- if ((Parrot_ascii_typetable[contents[pos]] & flags) != 0) {
- return pos;
- }
- }
- return end;
-}
-
-/*
-
-=item C<static INTVAL ascii_find_not_cclass(PARROT_INTERP, INTVAL flags, const
-STRING *src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-ascii_find_not_cclass(PARROT_INTERP,
- INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(ascii_find_not_cclass)
- const unsigned char *contents = (const unsigned char *)src->strstart;
- UINTVAL pos = offset;
- UINTVAL end = offset + count;
-
- end = src->strlen < end ? src->strlen : end;
- for (; pos < end; ++pos) {
- if ((Parrot_ascii_typetable[contents[pos]] & flags) == 0) {
- return pos;
- }
- }
- return end;
-}
-
-/*
-
=item C<static STRING* ascii_upcase(PARROT_INTERP, const STRING *src)>
Converts the STRING C<src> to all uppercase.
@@ -520,9 +362,9 @@
fixed8_ord,
fixed_substr,
- ascii_is_cclass,
- ascii_find_cclass,
- ascii_find_not_cclass,
+ fixed8_is_cclass,
+ fixed8_find_cclass,
+ fixed8_find_not_cclass,
encoding_get_graphemes,
fixed8_compose,
Modified: branches/string_checks/src/string/encoding/binary.c
==============================================================================
--- branches/string_checks/src/string/encoding/binary.c Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/src/string/encoding/binary.c Sun Oct 31 14:58:40 2010 (r49747)
@@ -92,14 +92,8 @@
binary_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(binary_to_encoding)
- STRING *dest;
- dest = Parrot_str_clone(interp, src);
- dest->encoding = Parrot_binary_encoding_ptr;
- dest->strlen = dest->bufused;
- dest->hashval = 0;
-
- return dest;
+ return fixed8_to_encoding(interp, src, Parrot_binary_encoding_ptr);
}
Modified: branches/string_checks/src/string/encoding/latin1.c
==============================================================================
--- branches/string_checks/src/string/encoding/latin1.c Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/src/string/encoding/latin1.c Sun Oct 31 14:58:40 2010 (r49747)
@@ -40,29 +40,6 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
-static INTVAL latin1_find_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-static INTVAL latin1_find_not_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-static INTVAL latin1_is_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
PARROT_CANNOT_RETURN_NULL
static STRING* latin1_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
__attribute__nonnull__(1)
@@ -102,15 +79,6 @@
#define ASSERT_ARGS_latin1_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_latin1_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_latin1_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_latin1_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_latin1_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -149,35 +117,8 @@
latin1_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
{
ASSERT_ARGS(latin1_to_encoding)
- STRING *dest;
- if (STRING_max_bytes_per_codepoint(src) == 1) {
- dest = Parrot_str_clone(interp, src);
- dest->encoding = Parrot_latin1_encoding_ptr;
- }
- else {
- String_iter iter;
- unsigned char *p;
- const UINTVAL len = src->strlen;
-
- dest = Parrot_str_new_init(interp, NULL, len,
- Parrot_latin1_encoding_ptr, 0);
- p = (unsigned char *)dest->strstart;
- STRING_ITER_INIT(interp, &iter);
-
- while (iter.charpos < len) {
- const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
- if (c >= 0x100)
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
- "lossy conversion to iso-8559-1");
- *p++ = c;
- }
-
- dest->bufused = len;
- dest->strlen = len;
- }
-
- return dest;
+ return fixed8_to_encoding(interp, src, Parrot_latin1_encoding_ptr);
}
@@ -231,95 +172,6 @@
/*
-=item C<static INTVAL latin1_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-latin1_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
- ASSERT_ARGS(latin1_is_cclass)
- UINTVAL codepoint;
-
- if (offset >= src->strlen) return 0;
- codepoint = STRING_ord(interp, src, offset);
-
- if (codepoint >= sizeof (Parrot_ascii_typetable) /
- sizeof (Parrot_ascii_typetable[0])) {
- return 0;
- }
- return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-
-/*
-
-=item C<static INTVAL latin1_find_cclass(PARROT_INTERP, INTVAL flags, const
-STRING *src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class. Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-static INTVAL
-latin1_find_cclass(PARROT_INTERP, INTVAL flags,
- ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(latin1_find_cclass)
- const unsigned char *contents = (const unsigned char *)src->strstart;
- UINTVAL pos = offset;
- UINTVAL end = offset + count;
-
- end = src->strlen < end ? src->strlen : end;
- for (; pos < end; ++pos) {
- if ((Parrot_iso_8859_1_typetable[contents[pos]] & flags) != 0) {
- return pos;
- }
- }
- return end;
-}
-
-
-/*
-
-=item C<static INTVAL latin1_find_not_cclass(PARROT_INTERP, INTVAL flags, const
-STRING *src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-latin1_find_not_cclass(PARROT_INTERP, INTVAL flags,
- ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(latin1_find_not_cclass)
- const unsigned char *contents = (const unsigned char *)src->strstart;
- UINTVAL pos = offset;
- UINTVAL end = offset + count;
-
- end = src->strlen < end ? src->strlen : end;
- for (; pos < end; ++pos) {
- if ((Parrot_iso_8859_1_typetable[contents[pos]] & flags) == 0) {
- return pos;
- }
- }
- return end;
-}
-
-
-/*
-
=item C<static STRING* latin1_upcase(PARROT_INTERP, const STRING *src)>
Convert all graphemes in the STRING C<src> to upper case, for those
@@ -547,9 +399,9 @@
fixed8_ord,
fixed_substr,
- latin1_is_cclass,
- latin1_find_cclass,
- latin1_find_not_cclass,
+ fixed8_is_cclass,
+ fixed8_find_cclass,
+ fixed8_find_not_cclass,
encoding_get_graphemes,
fixed8_compose,
Modified: branches/string_checks/src/string/encoding/shared.c
==============================================================================
--- branches/string_checks/src/string/encoding/shared.c Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/src/string/encoding/shared.c Sun Oct 31 14:58:40 2010 (r49747)
@@ -597,6 +597,71 @@
/*
+=item C<STRING * fixed8_to_encoding(PARROT_INTERP, const STRING *src, const
+STR_VTABLE *enc)>
+
+Converts STRING C<src> to a string with fixed8 encoding C<enc>.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+STRING *
+fixed8_to_encoding(PARROT_INTERP, ARGIN(const STRING *src),
+ ARGIN(const STR_VTABLE *enc))
+{
+ ASSERT_ARGS(fixed8_to_encoding)
+ STRING *dest;
+ const UINTVAL limit = enc == Parrot_ascii_encoding_ptr ? 0x80 : 0x100;
+
+ if (STRING_max_bytes_per_codepoint(src) == 1) {
+ if (limit < 0x100) {
+ const unsigned char * const ptr = (unsigned char *)src->strstart;
+ UINTVAL i;
+
+ for (i = 0; i < src->strlen; ++i) {
+ if (ptr[i] >= limit)
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_LOSSY_CONVERSION,
+ "Lossy conversion to single byte encoding");
+ }
+ }
+
+ dest = Parrot_str_copy(interp, src);
+ dest->encoding = enc;
+ }
+ else {
+ String_iter iter;
+ unsigned char *ptr;
+ const UINTVAL len = src->strlen;
+
+ dest = Parrot_str_new_init(interp, NULL, len, enc, 0);
+ ptr = (unsigned char *)dest->strstart;
+
+ STRING_ITER_INIT(interp, &iter);
+
+ while (iter.charpos < len) {
+ const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
+
+ if (c >= limit)
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_LOSSY_CONVERSION,
+ "Lossy conversion to single byte encoding");
+
+ *ptr++ = c;
+ }
+
+ dest->bufused = len;
+ dest->strlen = len;
+ }
+
+ return dest;
+}
+
+
+/*
+
=item C<INTVAL fixed8_equal(PARROT_INTERP, const STRING *lhs, const STRING
*rhs)>
@@ -900,6 +965,98 @@
/*
+=item C<INTVAL fixed8_is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
+UINTVAL offset)>
+
+Returns Boolean.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
+{
+ ASSERT_ARGS(fixed8_is_cclass)
+ const unsigned char * const ptr = (unsigned char *)src->strstart;
+ UINTVAL codepoint;
+
+ if (offset >= src->strlen) return 0;
+ codepoint = ptr[offset];
+
+ return Parrot_iso_8859_1_typetable[codepoint] & flags ? 1 : 0;
+}
+
+
+/*
+
+=item C<INTVAL fixed8_find_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset, UINTVAL count)>
+
+Find a character in the given character class.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
+ UINTVAL offset, UINTVAL count)
+{
+ ASSERT_ARGS(fixed8_find_cclass)
+ const unsigned char * const ptr = (const unsigned char *)src->strstart;
+ UINTVAL pos;
+ UINTVAL end = offset + count;
+
+ if (end > src->strlen)
+ end = src->strlen;
+
+ for (pos = offset; pos < end; ++pos) {
+ if (Parrot_iso_8859_1_typetable[ptr[pos]] & flags)
+ return pos;
+ }
+
+ return end;
+}
+
+
+/*
+
+=item C<INTVAL fixed8_find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset, UINTVAL count)>
+
+Returns C<INTVAL>.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_find_not_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
+ UINTVAL offset, UINTVAL count)
+{
+ ASSERT_ARGS(fixed8_find_not_cclass)
+ const unsigned char * const ptr = (unsigned char *)src->strstart;
+ UINTVAL pos;
+ UINTVAL end = offset + count;
+
+ if (end > src->strlen)
+ end = src->strlen;
+
+ for (pos = offset; pos < end; ++pos) {
+ if ((Parrot_iso_8859_1_typetable[ptr[pos]] & flags) == 0)
+ return pos;
+ }
+
+ return end;
+}
+
+
+/*
+
=item C<STRING* fixed8_compose(PARROT_INTERP, const STRING *src)>
Can't compose ASCII strings, so performs a string copy on it and
@@ -935,7 +1092,11 @@
ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset)
{
ASSERT_ARGS(fixed8_iter_get)
- return fixed8_ord(interp, str, iter->charpos + offset);
+ const unsigned char * const ptr = (unsigned char *)str->strstart;
+
+ PARROT_ASSERT(iter->charpos + offset < str->bufused);
+
+ return ptr[iter->charpos + offset];
}
@@ -955,9 +1116,11 @@
ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip)
{
ASSERT_ARGS(fixed8_iter_skip)
+
iter->bytepos += skip;
iter->charpos += skip;
- PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str));
+
+ PARROT_ASSERT(iter->bytepos <= str->bufused);
}
@@ -977,8 +1140,13 @@
ARGIN(const STRING *str), ARGMOD(String_iter *iter))
{
ASSERT_ARGS(fixed8_iter_get_and_advance)
- const UINTVAL c = fixed8_ord(interp, str, iter->charpos++);
+ unsigned char * const ptr = (unsigned char *)str->strstart;
+ const UINTVAL c = ptr[iter->charpos++];
+
iter->bytepos++;
+
+ PARROT_ASSERT(iter->bytepos <= str->bufused);
+
return c;
}
@@ -1000,9 +1168,18 @@
ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c)
{
ASSERT_ARGS(fixed8_iter_set_and_advance)
- unsigned char *buf = (unsigned char *)str->strstart;
- buf[iter->charpos++] = c;
+ unsigned char * const ptr = (unsigned char *)str->strstart;
+
+ UINTVAL limit = str->encoding == Parrot_ascii_encoding_ptr ? 0x80 : 0x100;
+
+ if (c >= limit)
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
+ "Lossy conversion to single byte encoding");
+
+ ptr[iter->charpos++] = c;
iter->bytepos++;
+
+ PARROT_ASSERT(iter->bytepos <= str->bufused);
}
@@ -1022,8 +1199,9 @@
ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos)
{
ASSERT_ARGS(fixed8_iter_set_position)
+ PARROT_ASSERT(pos <= str->bufused);
+
iter->bytepos = iter->charpos = pos;
- PARROT_ASSERT(pos <= Buffer_buflen(str));
}
Modified: branches/string_checks/src/string/encoding/shared.h
==============================================================================
--- branches/string_checks/src/string/encoding/shared.h Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/src/string/encoding/shared.h Sun Oct 31 14:58:40 2010 (r49747)
@@ -130,6 +130,24 @@
__attribute__nonnull__(3);
PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_find_cclass(PARROT_INTERP,
+ INTVAL flags,
+ ARGIN(const STRING *src),
+ UINTVAL offset,
+ UINTVAL count)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_find_not_cclass(PARROT_INTERP,
+ INTVAL flags,
+ ARGIN(const STRING *src),
+ UINTVAL offset,
+ UINTVAL count)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
size_t fixed8_hash(SHIM_INTERP, ARGIN(const STRING *src), size_t hashval)
__attribute__nonnull__(2);
@@ -142,6 +160,14 @@
__attribute__nonnull__(2)
__attribute__nonnull__(3);
+PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_is_cclass(PARROT_INTERP,
+ INTVAL flags,
+ ARGIN(const STRING *src),
+ UINTVAL offset)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(3);
+
UINTVAL fixed8_iter_get(PARROT_INTERP,
ARGIN(const STRING *str),
ARGIN(const String_iter *iter),
@@ -203,6 +229,14 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
+PARROT_CANNOT_RETURN_NULL
+STRING * fixed8_to_encoding(PARROT_INTERP,
+ ARGIN(const STRING *src),
+ ARGIN(const STR_VTABLE *enc))
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
STRING * fixed_substr(PARROT_INTERP,
@@ -300,12 +334,21 @@
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(lhs) \
, PARROT_ASSERT_ARG(rhs))
+#define ASSERT_ARGS_fixed8_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_fixed8_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_fixed8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_fixed8_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src) \
, PARROT_ASSERT_ARG(search))
+#define ASSERT_ARGS_fixed8_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(str) \
@@ -334,6 +377,10 @@
#define ASSERT_ARGS_fixed8_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_fixed8_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(src) \
+ , PARROT_ASSERT_ARG(enc))
#define ASSERT_ARGS_fixed_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
Modified: branches/string_checks/src/string/encoding/tables.c
==============================================================================
--- branches/string_checks/src/string/encoding/tables.c Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/src/string/encoding/tables.c Sun Oct 31 14:58:40 2010 (r49747)
@@ -51,40 +51,6 @@
0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x04c0, /* 240-247 */
0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 248-255 */
};
-const INTVAL Parrot_ascii_typetable[256] = {
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
-0x0200, 0x0320, 0x1220, 0x0220, 0x1220, 0x1220, 0x0200, 0x0200, /* 8-15 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
-0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
-0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 40-47 */
-0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, /* 48-55 */
-0x28d8, 0x28d8, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 56-63 */
-0x04c0, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28c5, /* 64-71 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 72-79 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 80-87 */
-0x28c5, 0x28c5, 0x28c5, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x24c0, /* 88-95 */
-0x04c0, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28c6, /* 96-103 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
-0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1020, 0x0000, 0x0000, /* 128-135 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 136-143 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 144-151 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 152-159 */
-0x0020, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 160-167 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 168-175 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 176-183 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 184-191 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 192-199 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 200-207 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 208-215 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 216-223 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 224-231 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 232-239 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 240-247 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 248-255 */
-};
/*
* Local variables:
* c-file-style: "parrot"
Modified: branches/string_checks/src/string/encoding/tables.h
==============================================================================
--- branches/string_checks/src/string/encoding/tables.h Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/src/string/encoding/tables.h Sun Oct 31 14:58:40 2010 (r49747)
@@ -25,7 +25,6 @@
#define PUNCTUATION enum_cclass_punctuation
#define DIGIT enum_cclass_numeric
extern const INTVAL Parrot_iso_8859_1_typetable[256];
-extern const INTVAL Parrot_ascii_typetable[256];
#endif /* PARROT_CHARSET_TABLES_H_GUARD */
/*
* Local variables:
Modified: branches/string_checks/t/op/string_cs.t
==============================================================================
--- branches/string_checks/t/op/string_cs.t Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/t/op/string_cs.t Sun Oct 31 14:58:40 2010 (r49747)
@@ -290,7 +290,7 @@
print "never\n"
end
CODE
-/lossy conversion to ascii/
+/Lossy conversion/
OUTPUT
pasm_output_is( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i iso-8859-1 to binary" );
@@ -376,7 +376,7 @@
print "never\n"
end
CODE
-/lossy conversion to iso-8559-1/
+/Lossy conversion/
OUTPUT
pasm_error_output_like( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i utf-8 to ascii - lossy" );
@@ -386,7 +386,7 @@
print "never\n"
end
CODE
-/can't convert unicode string to ascii/
+/Lossy conversion/
OUTPUT
pir_output_is( <<'CODE', <<'OUTPUT', "bug #34661 literal" );
Modified: branches/string_checks/t/op/stringu.t
==============================================================================
--- branches/string_checks/t/op/stringu.t Sun Oct 31 14:42:55 2010 (r49746)
+++ branches/string_checks/t/op/stringu.t Sun Oct 31 14:58:40 2010 (r49747)
@@ -269,7 +269,7 @@
print "\n"
end
CODE
-/Malformed string/
+/Lossy conversion/
OUTPUT
pasm_output_is( <<'CODE', <<OUTPUT, "substr with a UTF8 replacement #36794" );
More information about the parrot-commits
mailing list