[svn:parrot] r48521 - in branches/substr_eq_at: include/parrot src/string src/string/charset
chromatic at svn.parrot.org
chromatic at svn.parrot.org
Mon Aug 16 04:11:35 UTC 2010
Author: chromatic
Date: Mon Aug 16 04:11:35 2010
New Revision: 48521
URL: https://trac.parrot.org/parrot/changeset/48521
Log:
[str] Added API to find substring in string.
Parrot_str_compare_offset() looks for STRING b in STRING a at offset i. This
may help PCT require far fewer new STRING headers.
Modified:
branches/substr_eq_at/include/parrot/charset.h
branches/substr_eq_at/include/parrot/string_funcs.h
branches/substr_eq_at/src/string/api.c
branches/substr_eq_at/src/string/charset/ascii.c
branches/substr_eq_at/src/string/charset/ascii.h
branches/substr_eq_at/src/string/charset/binary.c
branches/substr_eq_at/src/string/charset/unicode.c
Modified: branches/substr_eq_at/include/parrot/charset.h
==============================================================================
--- branches/substr_eq_at/include/parrot/charset.h Mon Aug 16 02:25:36 2010 (r48520)
+++ branches/substr_eq_at/include/parrot/charset.h Mon Aug 16 04:11:35 2010 (r48521)
@@ -40,7 +40,7 @@
typedef STRING * (*charset_upcase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
typedef STRING * (*charset_downcase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
typedef STRING * (*charset_titlecase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef INTVAL (*charset_compare_t)(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs));
+typedef INTVAL (*charset_compare_t)(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs), INTVAL offset);
typedef INTVAL (*charset_index_t)(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search_string), UINTVAL offset);
typedef INTVAL (*charset_rindex_t)(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search_string), UINTVAL offset);
typedef UINTVAL (*charset_validate_t)(PARROT_INTERP, ARGIN(const STRING *src));
@@ -246,7 +246,8 @@
#define CHARSET_UPCASE_FIRST(interp, source) ((source)->charset)->upcase_first((interp), (source))
#define CHARSET_DOWNCASE_FIRST(interp, source) ((source)->charset)->downcase_first((interp), (source))
#define CHARSET_TITLECASE_FIRST(interp, source) ((source)->charset)->titlecase_first((interp), (source))
-#define CHARSET_COMPARE(interp, lhs, rhs) ((const CHARSET *)(lhs)->charset)->compare((interp), (lhs), (rhs))
+#define CHARSET_COMPARE(interp, lhs, rhs) ((const CHARSET *)(lhs)->charset)->compare((interp), (lhs), (rhs), 0)
+#define CHARSET_COMPARE_OFFSET(interp, lhs, rhs, offset) ((const CHARSET *)(lhs)->charset)->compare((interp), (lhs), (rhs), offset)
#define CHARSET_INDEX(interp, source, search, offset) ((source)->charset)->index((interp), (source), (search), (offset))
#define CHARSET_RINDEX(interp, source, search, offset) ((source)->charset)->rindex((interp), (source), (search), (offset))
#define CHARSET_VALIDATE(interp, source) ((source)->charset)->validate((interp), (source))
Modified: branches/substr_eq_at/include/parrot/string_funcs.h
==============================================================================
--- branches/substr_eq_at/include/parrot/string_funcs.h Mon Aug 16 02:25:36 2010 (r48520)
+++ branches/substr_eq_at/include/parrot/string_funcs.h Mon Aug 16 04:11:35 2010 (r48521)
@@ -95,6 +95,16 @@
PARROT_EXPORT
PARROT_WARN_UNUSED_RESULT
+INTVAL Parrot_str_compare_offset(PARROT_INTERP,
+ ARGIN(const STRING *a),
+ INTVAL offset,
+ ARGIN(const STRING *b))
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(4);
+
+PARROT_EXPORT
+PARROT_WARN_UNUSED_RESULT
PARROT_CAN_RETURN_NULL
STRING * Parrot_str_compose(PARROT_INTERP, ARGIN_NULLOK(const STRING *src))
__attribute__nonnull__(1);
@@ -515,6 +525,10 @@
, PARROT_ASSERT_ARG(s))
#define ASSERT_ARGS_Parrot_str_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_Parrot_str_compare_offset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(a) \
+ , PARROT_ASSERT_ARG(b))
#define ASSERT_ARGS_Parrot_str_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp))
#define ASSERT_ARGS_Parrot_str_concat __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
Modified: branches/substr_eq_at/src/string/api.c
==============================================================================
--- branches/substr_eq_at/src/string/api.c Mon Aug 16 02:25:36 2010 (r48520)
+++ branches/substr_eq_at/src/string/api.c Mon Aug 16 04:11:35 2010 (r48521)
@@ -1326,6 +1326,44 @@
return CHARSET_COMPARE(interp, s1, s2);
}
+/*
+
+=item C<INTVAL Parrot_str_compare_offset(PARROT_INTERP, const STRING *a, INTVAL
+offset, const STRING *b)>
+
+Compares two strings to each other. If s1 is less than s2, returns -1. If the
+strings are equal, returns 0. If s1 is greater than s2, returns 2. This
+comparison uses the character set collation order of the strings for
+comparison. Any given offset (a positive value) will start the comparison that
+many characters from the start of s1.
+
+=cut
+
+*/
+
+
+PARROT_EXPORT
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+Parrot_str_compare_offset(PARROT_INTERP, ARGIN(const STRING *a), INTVAL offset,
+ ARGIN(const STRING *b))
+{
+ ASSERT_ARGS(Parrot_str_compare_offset)
+
+ /* do these make sense? */
+ if (STRING_IS_NULL(b))
+ return a && (a->strlen != 0);
+
+ if (STRING_IS_NULL(a))
+ return -(b->strlen != 0);
+
+ ASSERT_STRING_SANITY(a);
+ ASSERT_STRING_SANITY(b);
+
+ /* XXX: sanitize offset */
+ return CHARSET_COMPARE_OFFSET(interp, a, b, offset);
+}
+
/*
Modified: branches/substr_eq_at/src/string/charset/ascii.c
==============================================================================
--- branches/substr_eq_at/src/string/charset/ascii.c Mon Aug 16 02:25:36 2010 (r48520)
+++ branches/substr_eq_at/src/string/charset/ascii.c Mon Aug 16 04:11:35 2010 (r48521)
@@ -468,10 +468,11 @@
/*
=item C<INTVAL ascii_compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
+*rhs, INTVAL offset)>
-Compares two strings as ASCII strings. If STRING C<lhs> > C<rhs>, returns
-1. If C<lhs> == C<rhs> returns 0. If STRING C<lhs> < C<rhs>, returns -1.
+Compares two strings as ASCII strings. If STRING C<lhs> > C<rhs>, returns 1. If
+C<lhs> == C<rhs> returns 0. If STRING C<lhs> < C<rhs>, returns -1. The offset
+represents the number of characters into lhs to start the comparison.
=cut
@@ -479,35 +480,39 @@
PARROT_WARN_UNUSED_RESULT
INTVAL
-ascii_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+ascii_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs),
+ INTVAL offset)
{
ASSERT_ARGS(ascii_compare)
- const UINTVAL l_len = lhs->strlen;
- const UINTVAL r_len = rhs->strlen;
+ const UINTVAL l_len = lhs->strlen;
+ const UINTVAL r_len = rhs->strlen;
const UINTVAL min_len = l_len > r_len ? r_len : l_len;
+
String_iter iter;
if (lhs->encoding == rhs->encoding) {
- const int ret_val = memcmp(lhs->strstart, rhs->strstart, min_len);
+ const int ret_val = memcmp(lhs->strstart + offset,
+ rhs->strstart, min_len);
if (ret_val)
return ret_val < 0 ? -1 : 1;
}
else {
UINTVAL offs;
ENCODING_ITER_INIT(interp, rhs, &iter);
- for (offs = 0; offs < min_len; ++offs) {
+ for (offs = offset; offs < min_len; ++offs) {
const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, offs);
const UINTVAL cr = iter.get_and_advance(interp, &iter);
if (cl != cr)
return cl < cr ? -1 : 1;
}
}
- if (l_len < r_len) {
+
+ if (l_len < r_len)
return -1;
- }
- if (l_len > r_len) {
+
+ if (l_len > r_len)
return 1;
- }
+
return 0;
}
Modified: branches/substr_eq_at/src/string/charset/ascii.h
==============================================================================
--- branches/substr_eq_at/src/string/charset/ascii.h Mon Aug 16 02:25:36 2010 (r48520)
+++ branches/substr_eq_at/src/string/charset/ascii.h Mon Aug 16 04:11:35 2010 (r48521)
@@ -23,7 +23,8 @@
PARROT_WARN_UNUSED_RESULT
INTVAL ascii_compare(PARROT_INTERP,
ARGIN(const STRING *lhs),
- ARGIN(const STRING *rhs))
+ ARGIN(const STRING *rhs),
+ INTVAL offset)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3);
Modified: branches/substr_eq_at/src/string/charset/binary.c
==============================================================================
--- branches/substr_eq_at/src/string/charset/binary.c Mon Aug 16 02:25:36 2010 (r48520)
+++ branches/substr_eq_at/src/string/charset/binary.c Mon Aug 16 04:11:35 2010 (r48521)
@@ -29,7 +29,8 @@
static INTVAL compare(SHIM_INTERP,
ARGIN(const STRING *lhs),
- ARGIN(const STRING *rhs))
+ ARGIN(const STRING *rhs),
+ INTVAL offset)
__attribute__nonnull__(2)
__attribute__nonnull__(3);
@@ -312,24 +313,27 @@
/*
=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
+*rhs, INTVAL offset)>
-Compare the two buffers, first by size, then with memcmp.
+Compare the two buffers, first by size, then with memcmp. The offset
+represents the number of bytes within lhs to start the comparison.
=cut
*/
static INTVAL
-compare(SHIM_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+compare(SHIM_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs),
+ INTVAL offset)
{
ASSERT_ARGS(compare)
- const UINTVAL l_len = lhs->strlen;
+ const UINTVAL l_len = lhs->strlen - offset;
const UINTVAL r_len = rhs->strlen;
+
if (l_len != r_len)
return l_len - r_len;
- return memcmp(lhs->strstart, rhs->strstart, l_len);
+ return memcmp(lhs->strstart + offset, rhs->strstart, l_len);
}
/*
Modified: branches/substr_eq_at/src/string/charset/unicode.c
==============================================================================
--- branches/substr_eq_at/src/string/charset/unicode.c Mon Aug 16 02:25:36 2010 (r48520)
+++ branches/substr_eq_at/src/string/charset/unicode.c Mon Aug 16 04:11:35 2010 (r48521)
@@ -28,7 +28,8 @@
static INTVAL compare(PARROT_INTERP,
ARGIN(const STRING *lhs),
- ARGIN(const STRING *rhs))
+ ARGIN(const STRING *rhs),
+ INTVAL offset)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
__attribute__nonnull__(3);
@@ -637,17 +638,19 @@
/*
=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
+*rhs, INTVAL offset)>
Compares two STRINGs, C<lhs> and C<rhs>. Returns -1 if C<lhs> < C<rhs>. Returns
-0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>.
+0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>. The offset represents the
+number of characters from the start of lhs from which to begin the comparsion.
=cut
*/
static INTVAL
-compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs),
+ INTVAL offset)
{
ASSERT_ARGS(compare)
String_iter l_iter, r_iter;
@@ -657,12 +660,12 @@
ENCODING_ITER_INIT(interp, lhs, &l_iter);
ENCODING_ITER_INIT(interp, rhs, &r_iter);
- l_len = lhs->strlen;
- r_len = rhs->strlen;
+ l_len = lhs->strlen - offset;
+ r_len = rhs->strlen;
min_len = l_len > r_len ? r_len : l_len;
- for (offs = 0; offs < min_len; ++offs) {
+ for (offs = offset; offs < min_len; ++offs) {
cl = l_iter.get_and_advance(interp, &l_iter);
cr = r_iter.get_and_advance(interp, &r_iter);
More information about the parrot-commits
mailing list