[svn:parrot] r38853 - in trunk: include/parrot src/string/charset src/string/encoding
Infinoid at svn.parrot.org
Infinoid at svn.parrot.org
Sat May 16 23:28:15 UTC 2009
Author: Infinoid
Date: Sat May 16 23:28:14 2009
New Revision: 38853
URL: https://trac.parrot.org/parrot/changeset/38853
Log:
[encoding] Implement chromatic++'s idea to push the Parrot_str_find_cclass() loop down into the encoding plugin, for the ascii and iso-8859-1 cases.
This saves several million function calls worth of overhead.
Modified:
trunk/include/parrot/encoding.h
trunk/src/string/charset/ascii.c
trunk/src/string/charset/iso-8859-1.c
trunk/src/string/encoding/fixed_8.c
trunk/src/string/encoding/ucs2.c
trunk/src/string/encoding/utf16.c
trunk/src/string/encoding/utf8.c
Modified: trunk/include/parrot/encoding.h
==============================================================================
--- trunk/include/parrot/encoding.h Sat May 16 23:28:06 2009 (r38852)
+++ trunk/include/parrot/encoding.h Sat May 16 23:28:14 2009 (r38853)
@@ -29,6 +29,7 @@
typedef void (*encoding_become_encoding_t)(PARROT_INTERP, STRING *src);
typedef UINTVAL (*encoding_codepoints_t)(PARROT_INTERP, STRING *src);
typedef UINTVAL (*encoding_bytes_t)(PARROT_INTERP, STRING *src);
+typedef UINTVAL (*encoding_find_cclass_t)(PARROT_INTERP, STRING *s, const INTVAL *typetable, INTVAL flags, UINTVAL offset, UINTVAL count);
/* iterator support */
@@ -55,6 +56,7 @@
encoding_codepoints_t codepoints;
encoding_bytes_t bytes;
encoding_iter_init_t iter_init;
+ encoding_find_cclass_t find_cclass;
};
typedef struct _encoding ENCODING;
@@ -218,6 +220,8 @@
((src)->encoding)->bytes((i), (src))
#define ENCODING_ITER_INIT(i, src, iter) \
((src)->encoding)->iter_init((i), (src), (iter))
+#define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \
+ ((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end))
#endif /* PARROT_ENCODING_H_GUARD */
Modified: trunk/src/string/charset/ascii.c
==============================================================================
--- trunk/src/string/charset/ascii.c Sat May 16 23:28:06 2009 (r38852)
+++ trunk/src/string/charset/ascii.c Sat May 16 23:28:14 2009 (r38853)
@@ -766,6 +766,9 @@
=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, STRING
*source_string, UINTVAL offset, UINTVAL count)>
+Find a character in the given character class. Delegates to the find_cclass
+method of the encoding plugin.
+
=cut
*/
@@ -780,13 +783,8 @@
UINTVAL end = offset + count;
end = source_string->strlen < end ? source_string->strlen : end;
- for (; pos < end; ++pos) {
- const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, source_string, pos);
- if ((Parrot_ascii_typetable[codepoint] & flags) != 0) {
- return pos;
- }
- }
- return end;
+ return ENCODING_FIND_CCLASS(interp, source_string, Parrot_ascii_typetable,
+ flags, pos, end);
}
/*
Modified: trunk/src/string/charset/iso-8859-1.c
==============================================================================
--- trunk/src/string/charset/iso-8859-1.c Sat May 16 23:28:06 2009 (r38852)
+++ trunk/src/string/charset/iso-8859-1.c Sat May 16 23:28:14 2009 (r38853)
@@ -591,6 +591,9 @@
=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, STRING
*source_string, UINTVAL offset, UINTVAL count)>
+Find a character in the given character class. Delegates to the find_cclass
+method of the encoding plugin.
+
=cut
*/
@@ -602,16 +605,10 @@
ASSERT_ARGS(find_cclass)
UINTVAL pos = offset;
UINTVAL end = offset + count;
- UINTVAL codepoint;
end = source_string->strlen < end ? source_string->strlen : end;
- for (; pos < end; ++pos) {
- codepoint = ENCODING_GET_CODEPOINT(interp, source_string, pos);
- if ((Parrot_iso_8859_1_typetable[codepoint] & flags) != 0) {
- return pos;
- }
- }
- return end;
+ return ENCODING_FIND_CCLASS(interp, source_string,
+ Parrot_iso_8859_1_typetable, flags, pos, end);
}
/*
Modified: trunk/src/string/encoding/fixed_8.c
==============================================================================
--- trunk/src/string/encoding/fixed_8.c Sat May 16 23:28:06 2009 (r38852)
+++ trunk/src/string/encoding/fixed_8.c Sat May 16 23:28:14 2009 (r38853)
@@ -34,6 +34,17 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL find_cclass(PARROT_INTERP,
+ ARGIN(STRING *s),
+ ARGIN(const INTVAL *typetable),
+ INTVAL flags,
+ UINTVAL pos,
+ UINTVAL end)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
static UINTVAL fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))
__attribute__nonnull__(1)
__attribute__nonnull__(2)
@@ -163,6 +174,10 @@
#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = \
PARROT_ASSERT_ARG(interp) \
|| PARROT_ASSERT_ARG(source_string)
+#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = \
+ PARROT_ASSERT_ARG(interp) \
+ || PARROT_ASSERT_ARG(s) \
+ || PARROT_ASSERT_ARG(typetable)
#define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = \
PARROT_ASSERT_ARG(interp) \
|| PARROT_ASSERT_ARG(iter)
@@ -278,6 +293,33 @@
set_byte(interp, source_string, offset, codepoint);
}
+
+/*
+
+=item C<static UINTVAL find_cclass(PARROT_INTERP, STRING *s, const INTVAL
+*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
+
+codepoints are bytes, so delegate
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL
+find_cclass(PARROT_INTERP, ARGIN(STRING *s), ARGIN(const INTVAL *typetable),
+INTVAL flags, UINTVAL pos, UINTVAL end)
+{
+ ASSERT_ARGS(find_cclass)
+ unsigned char *contents = (unsigned char *)s->strstart;
+ for (; pos < end; ++pos) {
+ if ((typetable[contents[pos]] & flags) != 0) {
+ return pos;
+ }
+ }
+ return end;
+}
+
/*
=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *source_string,
@@ -652,7 +694,8 @@
become_encoding,
codepoints,
bytes,
- iter_init
+ iter_init,
+ find_cclass
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Modified: trunk/src/string/encoding/ucs2.c
==============================================================================
--- trunk/src/string/encoding/ucs2.c Sat May 16 23:28:06 2009 (r38852)
+++ trunk/src/string/encoding/ucs2.c Sat May 16 23:28:14 2009 (r38853)
@@ -49,6 +49,17 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL find_cclass(PARROT_INTERP,
+ ARGIN(STRING *s),
+ ARGIN(const INTVAL *typetable),
+ INTVAL flags,
+ UINTVAL pos,
+ UINTVAL end)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
static UINTVAL get_byte(PARROT_INTERP,
SHIM(const STRING *src),
SHIM(UINTVAL offset))
@@ -167,6 +178,10 @@
#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = \
PARROT_ASSERT_ARG(interp) \
|| PARROT_ASSERT_ARG(src)
+#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = \
+ PARROT_ASSERT_ARG(interp) \
+ || PARROT_ASSERT_ARG(s) \
+ || PARROT_ASSERT_ARG(typetable)
#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = \
PARROT_ASSERT_ARG(interp)
#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = \
@@ -299,6 +314,27 @@
/*
+=item C<static UINTVAL find_cclass(PARROT_INTERP, STRING *s, const INTVAL
+*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
+
+Stub, the charset level handles this for unicode strings.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL
+find_cclass(PARROT_INTERP, ARGIN(STRING *s), ARGIN(const INTVAL *typetable),
+INTVAL flags, UINTVAL pos, UINTVAL end)
+{
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_UNIMPLEMENTED,
+ "No find_cclass support in unicode encoding plugins");
+}
+
+/*
+
=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
offset)>
@@ -691,7 +727,8 @@
become_encoding,
codepoints,
bytes,
- iter_init
+ iter_init,
+ find_cclass
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "ucs2", return_encoding);
Modified: trunk/src/string/encoding/utf16.c
==============================================================================
--- trunk/src/string/encoding/utf16.c Sat May 16 23:28:06 2009 (r38852)
+++ trunk/src/string/encoding/utf16.c Sat May 16 23:28:14 2009 (r38853)
@@ -38,6 +38,17 @@
__attribute__nonnull__(1)
__attribute__nonnull__(2);
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL find_cclass(PARROT_INTERP,
+ ARGIN(STRING *s),
+ ARGIN(const INTVAL *typetable),
+ INTVAL flags,
+ UINTVAL pos,
+ UINTVAL end)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
static UINTVAL get_byte(SHIM_INTERP,
ARGIN(const STRING *src),
UINTVAL offset)
@@ -164,6 +175,10 @@
#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = \
PARROT_ASSERT_ARG(interp) \
|| PARROT_ASSERT_ARG(src)
+#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = \
+ PARROT_ASSERT_ARG(interp) \
+ || PARROT_ASSERT_ARG(s) \
+ || PARROT_ASSERT_ARG(typetable)
#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = \
PARROT_ASSERT_ARG(src)
#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = \
@@ -392,6 +407,27 @@
/*
+=item C<static UINTVAL find_cclass(PARROT_INTERP, STRING *s, const INTVAL
+*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
+
+Stub, the charset level handles this for unicode strings.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL
+find_cclass(PARROT_INTERP, ARGIN(STRING *s), ARGIN(const INTVAL *typetable),
+INTVAL flags, UINTVAL pos, UINTVAL end)
+{
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_UNIMPLEMENTED,
+ "No find_cclass support in unicode encoding plugins");
+}
+
+/*
+
=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
offset)>
@@ -806,7 +842,8 @@
become_encoding,
codepoints,
bytes,
- iter_init
+ iter_init,
+ find_cclass
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "utf16", return_encoding);
Modified: trunk/src/string/encoding/utf8.c
==============================================================================
--- trunk/src/string/encoding/utf8.c Sat May 16 23:28:06 2009 (r38852)
+++ trunk/src/string/encoding/utf8.c Sat May 16 23:28:14 2009 (r38853)
@@ -39,6 +39,17 @@
__attribute__nonnull__(2)
FUNC_MODIFIES(*src);
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL find_cclass(PARROT_INTERP,
+ ARGIN(STRING *s),
+ ARGIN(const INTVAL *typetable),
+ INTVAL flags,
+ UINTVAL pos,
+ UINTVAL end)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
+
static UINTVAL get_byte(SHIM_INTERP,
ARGIN(const STRING *src),
UINTVAL offset)
@@ -181,6 +192,10 @@
#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = \
PARROT_ASSERT_ARG(interp) \
|| PARROT_ASSERT_ARG(src)
+#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = \
+ PARROT_ASSERT_ARG(interp) \
+ || PARROT_ASSERT_ARG(s) \
+ || PARROT_ASSERT_ARG(typetable)
#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = \
PARROT_ASSERT_ARG(src)
#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = \
@@ -683,6 +698,27 @@
/*
+=item C<static UINTVAL find_cclass(PARROT_INTERP, STRING *s, const INTVAL
+*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
+
+Stub, the charset level handles this for unicode strings.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL
+find_cclass(PARROT_INTERP, ARGIN(STRING *s), ARGIN(const INTVAL *typetable),
+INTVAL flags, UINTVAL pos, UINTVAL end)
+{
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_UNIMPLEMENTED,
+ "No find_cclass support in unicode encoding plugins");
+}
+
+/*
+
=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
offset)>
@@ -1018,7 +1054,8 @@
become_encoding,
codepoints,
bytes,
- iter_init
+ iter_init,
+ find_cclass
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "utf8", return_encoding);
More information about the parrot-commits
mailing list