[svn:parrot] r48521 - in branches/substr_eq_at: include/parrot src/string src/string/charset

chromatic at svn.parrot.org chromatic at svn.parrot.org
Mon Aug 16 04:11:35 UTC 2010


Author: chromatic
Date: Mon Aug 16 04:11:35 2010
New Revision: 48521
URL: https://trac.parrot.org/parrot/changeset/48521

Log:
[str] Added API to find substring in string.

Parrot_str_compare_offset() looks for STRING b in STRING a at offset i.  This
may help PCT require far fewer new STRING headers.

Modified:
   branches/substr_eq_at/include/parrot/charset.h
   branches/substr_eq_at/include/parrot/string_funcs.h
   branches/substr_eq_at/src/string/api.c
   branches/substr_eq_at/src/string/charset/ascii.c
   branches/substr_eq_at/src/string/charset/ascii.h
   branches/substr_eq_at/src/string/charset/binary.c
   branches/substr_eq_at/src/string/charset/unicode.c

Modified: branches/substr_eq_at/include/parrot/charset.h
==============================================================================
--- branches/substr_eq_at/include/parrot/charset.h	Mon Aug 16 02:25:36 2010	(r48520)
+++ branches/substr_eq_at/include/parrot/charset.h	Mon Aug 16 04:11:35 2010	(r48521)
@@ -40,7 +40,7 @@
 typedef STRING * (*charset_upcase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
 typedef STRING * (*charset_downcase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
 typedef STRING * (*charset_titlecase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef INTVAL   (*charset_compare_t)(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs));
+typedef INTVAL   (*charset_compare_t)(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs), INTVAL offset);
 typedef INTVAL   (*charset_index_t)(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search_string), UINTVAL offset);
 typedef INTVAL   (*charset_rindex_t)(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search_string), UINTVAL offset);
 typedef UINTVAL  (*charset_validate_t)(PARROT_INTERP, ARGIN(const STRING *src));
@@ -246,7 +246,8 @@
 #define CHARSET_UPCASE_FIRST(interp, source) ((source)->charset)->upcase_first((interp), (source))
 #define CHARSET_DOWNCASE_FIRST(interp, source) ((source)->charset)->downcase_first((interp), (source))
 #define CHARSET_TITLECASE_FIRST(interp, source) ((source)->charset)->titlecase_first((interp), (source))
-#define CHARSET_COMPARE(interp, lhs, rhs) ((const CHARSET *)(lhs)->charset)->compare((interp), (lhs), (rhs))
+#define CHARSET_COMPARE(interp, lhs, rhs) ((const CHARSET *)(lhs)->charset)->compare((interp), (lhs), (rhs), 0)
+#define CHARSET_COMPARE_OFFSET(interp, lhs, rhs, offset) ((const CHARSET *)(lhs)->charset)->compare((interp), (lhs), (rhs), offset)
 #define CHARSET_INDEX(interp, source, search, offset) ((source)->charset)->index((interp), (source), (search), (offset))
 #define CHARSET_RINDEX(interp, source, search, offset) ((source)->charset)->rindex((interp), (source), (search), (offset))
 #define CHARSET_VALIDATE(interp, source) ((source)->charset)->validate((interp), (source))

Modified: branches/substr_eq_at/include/parrot/string_funcs.h
==============================================================================
--- branches/substr_eq_at/include/parrot/string_funcs.h	Mon Aug 16 02:25:36 2010	(r48520)
+++ branches/substr_eq_at/include/parrot/string_funcs.h	Mon Aug 16 04:11:35 2010	(r48521)
@@ -95,6 +95,16 @@
 
 PARROT_EXPORT
 PARROT_WARN_UNUSED_RESULT
+INTVAL Parrot_str_compare_offset(PARROT_INTERP,
+    ARGIN(const STRING *a),
+    INTVAL offset,
+    ARGIN(const STRING *b))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(4);
+
+PARROT_EXPORT
+PARROT_WARN_UNUSED_RESULT
 PARROT_CAN_RETURN_NULL
 STRING * Parrot_str_compose(PARROT_INTERP, ARGIN_NULLOK(const STRING *src))
         __attribute__nonnull__(1);
@@ -515,6 +525,10 @@
     , PARROT_ASSERT_ARG(s))
 #define ASSERT_ARGS_Parrot_str_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_Parrot_str_compare_offset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(a) \
+    , PARROT_ASSERT_ARG(b))
 #define ASSERT_ARGS_Parrot_str_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_Parrot_str_concat __attribute__unused__ int _ASSERT_ARGS_CHECK = (\

Modified: branches/substr_eq_at/src/string/api.c
==============================================================================
--- branches/substr_eq_at/src/string/api.c	Mon Aug 16 02:25:36 2010	(r48520)
+++ branches/substr_eq_at/src/string/api.c	Mon Aug 16 04:11:35 2010	(r48521)
@@ -1326,6 +1326,44 @@
     return CHARSET_COMPARE(interp, s1, s2);
 }
 
+/*
+
+=item C<INTVAL Parrot_str_compare_offset(PARROT_INTERP, const STRING *a, INTVAL
+offset, const STRING *b)>
+
+Compares two strings to each other.  If s1 is less than s2, returns -1.  If the
+strings are equal, returns 0.  If s1 is greater than s2, returns 2.  This
+comparison uses the character set collation order of the strings for
+comparison.  Any given offset (a positive value) will start the comparison that
+many characters from the start of s1.
+
+=cut
+
+*/
+
+
+PARROT_EXPORT
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+Parrot_str_compare_offset(PARROT_INTERP, ARGIN(const STRING *a), INTVAL offset,
+    ARGIN(const STRING *b))
+{
+    ASSERT_ARGS(Parrot_str_compare_offset)
+
+    /* do these make sense? */
+    if (STRING_IS_NULL(b))
+        return a && (a->strlen != 0);
+
+    if (STRING_IS_NULL(a))
+        return -(b->strlen != 0);
+
+    ASSERT_STRING_SANITY(a);
+    ASSERT_STRING_SANITY(b);
+
+    /* XXX: sanitize offset */
+    return CHARSET_COMPARE_OFFSET(interp, a, b, offset);
+}
+
 
 /*
 

Modified: branches/substr_eq_at/src/string/charset/ascii.c
==============================================================================
--- branches/substr_eq_at/src/string/charset/ascii.c	Mon Aug 16 02:25:36 2010	(r48520)
+++ branches/substr_eq_at/src/string/charset/ascii.c	Mon Aug 16 04:11:35 2010	(r48521)
@@ -468,10 +468,11 @@
 /*
 
 =item C<INTVAL ascii_compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
+*rhs, INTVAL offset)>
 
-Compares two strings as ASCII strings. If STRING C<lhs> > C<rhs>, returns
-1. If C<lhs> == C<rhs> returns 0. If STRING C<lhs> < C<rhs>, returns  -1.
+Compares two strings as ASCII strings. If STRING C<lhs> > C<rhs>, returns 1. If
+C<lhs> == C<rhs> returns 0. If STRING C<lhs> < C<rhs>, returns -1.  The offset
+represents the number of characters into lhs to start the comparison.
 
 =cut
 
@@ -479,35 +480,39 @@
 
 PARROT_WARN_UNUSED_RESULT
 INTVAL
-ascii_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+ascii_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs),
+    INTVAL offset)
 {
     ASSERT_ARGS(ascii_compare)
-    const UINTVAL l_len = lhs->strlen;
-    const UINTVAL r_len = rhs->strlen;
+    const UINTVAL l_len   = lhs->strlen;
+    const UINTVAL r_len   = rhs->strlen;
     const UINTVAL min_len = l_len > r_len ? r_len : l_len;
+
     String_iter iter;
 
     if (lhs->encoding == rhs->encoding) {
-        const int ret_val = memcmp(lhs->strstart, rhs->strstart, min_len);
+        const int ret_val = memcmp(lhs->strstart + offset,
+                                   rhs->strstart, min_len);
         if (ret_val)
             return ret_val < 0 ? -1 : 1;
     }
     else {
         UINTVAL offs;
         ENCODING_ITER_INIT(interp, rhs, &iter);
-        for (offs = 0; offs < min_len; ++offs) {
+        for (offs = offset; offs < min_len; ++offs) {
             const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, offs);
             const UINTVAL cr = iter.get_and_advance(interp, &iter);
             if (cl != cr)
                 return cl < cr ? -1 : 1;
         }
     }
-    if (l_len < r_len) {
+
+    if (l_len < r_len)
         return -1;
-    }
-    if (l_len > r_len) {
+
+    if (l_len > r_len)
         return 1;
-    }
+
     return 0;
 }
 

Modified: branches/substr_eq_at/src/string/charset/ascii.h
==============================================================================
--- branches/substr_eq_at/src/string/charset/ascii.h	Mon Aug 16 02:25:36 2010	(r48520)
+++ branches/substr_eq_at/src/string/charset/ascii.h	Mon Aug 16 04:11:35 2010	(r48521)
@@ -23,7 +23,8 @@
 PARROT_WARN_UNUSED_RESULT
 INTVAL ascii_compare(PARROT_INTERP,
     ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
+    ARGIN(const STRING *rhs),
+    INTVAL offset)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3);

Modified: branches/substr_eq_at/src/string/charset/binary.c
==============================================================================
--- branches/substr_eq_at/src/string/charset/binary.c	Mon Aug 16 02:25:36 2010	(r48520)
+++ branches/substr_eq_at/src/string/charset/binary.c	Mon Aug 16 04:11:35 2010	(r48521)
@@ -29,7 +29,8 @@
 
 static INTVAL compare(SHIM_INTERP,
     ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
+    ARGIN(const STRING *rhs),
+    INTVAL offset)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3);
 
@@ -312,24 +313,27 @@
 /*
 
 =item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
+*rhs, INTVAL offset)>
 
-Compare the two buffers, first by size, then with memcmp.
+Compare the two buffers, first by size, then with memcmp.  The offset
+represents the number of bytes within lhs to start the comparison.
 
 =cut
 
 */
 
 static INTVAL
-compare(SHIM_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+compare(SHIM_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs),
+        INTVAL offset)
 {
     ASSERT_ARGS(compare)
-    const UINTVAL l_len = lhs->strlen;
+    const UINTVAL l_len = lhs->strlen - offset;
     const UINTVAL r_len = rhs->strlen;
+
     if (l_len != r_len)
         return l_len - r_len;
 
-    return memcmp(lhs->strstart, rhs->strstart, l_len);
+    return memcmp(lhs->strstart + offset, rhs->strstart, l_len);
 }
 
 /*

Modified: branches/substr_eq_at/src/string/charset/unicode.c
==============================================================================
--- branches/substr_eq_at/src/string/charset/unicode.c	Mon Aug 16 02:25:36 2010	(r48520)
+++ branches/substr_eq_at/src/string/charset/unicode.c	Mon Aug 16 04:11:35 2010	(r48521)
@@ -28,7 +28,8 @@
 
 static INTVAL compare(PARROT_INTERP,
     ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
+    ARGIN(const STRING *rhs),
+    INTVAL offset)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2)
         __attribute__nonnull__(3);
@@ -637,17 +638,19 @@
 /*
 
 =item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
+*rhs, INTVAL offset)>
 
 Compares two STRINGs, C<lhs> and C<rhs>. Returns -1 if C<lhs> < C<rhs>. Returns
-0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>.
+0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>.  The offset represents the
+number of characters from the start of lhs from which to begin the comparsion.
 
 =cut
 
 */
 
 static INTVAL
-compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs),
+        INTVAL offset)
 {
     ASSERT_ARGS(compare)
     String_iter l_iter, r_iter;
@@ -657,12 +660,12 @@
     ENCODING_ITER_INIT(interp, lhs, &l_iter);
     ENCODING_ITER_INIT(interp, rhs, &r_iter);
 
-    l_len = lhs->strlen;
-    r_len = rhs->strlen;
+    l_len   = lhs->strlen - offset;
+    r_len   = rhs->strlen;
 
     min_len = l_len > r_len ? r_len : l_len;
 
-    for (offs = 0; offs < min_len; ++offs) {
+    for (offs = offset; offs < min_len; ++offs) {
         cl = l_iter.get_and_advance(interp, &l_iter);
         cr = r_iter.get_and_advance(interp, &r_iter);
 


More information about the parrot-commits mailing list