[svn:parrot] r49221 - in branches/string_macros: include/parrot src src/io src/ops src/pmc src/string src/string/encoding t/op

nwellnhof at svn.parrot.org nwellnhof at svn.parrot.org
Wed Sep 22 01:17:14 UTC 2010


Author: nwellnhof
Date: Wed Sep 22 01:17:13 2010
New Revision: 49221
URL: https://trac.parrot.org/parrot/changeset/49221

Log:
[str] Switch to STRING_ord macro

Move the whole 'ord' logic into the string vtable functions.

Also modifies Parrot_str_indexed to accept negative indices as well. The old
string_ord function can finally be deprecated.

Modified:
   branches/string_macros/include/parrot/string.h
   branches/string_macros/include/parrot/string_funcs.h
   branches/string_macros/src/io/filehandle.c
   branches/string_macros/src/library.c
   branches/string_macros/src/ops/core_ops.c
   branches/string_macros/src/ops/string.ops
   branches/string_macros/src/pmc.c
   branches/string_macros/src/pmc/codestring.pmc
   branches/string_macros/src/pmc/nci.pmc
   branches/string_macros/src/pmc/string.pmc
   branches/string_macros/src/spf_render.c
   branches/string_macros/src/string/api.c
   branches/string_macros/src/string/encoding/shared.c
   branches/string_macros/src/string/encoding/shared.h
   branches/string_macros/src/string/encoding/ucs2.c
   branches/string_macros/src/string/encoding/ucs4.c
   branches/string_macros/src/string/encoding/utf16.c
   branches/string_macros/src/string/encoding/utf8.c
   branches/string_macros/src/warnings.c
   branches/string_macros/t/op/string.t

Modified: branches/string_macros/include/parrot/string.h
==============================================================================
--- branches/string_macros/include/parrot/string.h	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/include/parrot/string.h	Wed Sep 22 01:17:13 2010	(r49221)
@@ -96,7 +96,7 @@
 typedef UINTVAL  (*str_vtable_validate_t)(PARROT_INTERP, ARGIN(const STRING *src));
 
 typedef UINTVAL  (*str_vtable_scan_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef UINTVAL  (*str_vtable_ord_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
+typedef UINTVAL  (*str_vtable_ord_t)(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset);
 typedef STRING * (*str_vtable_substr_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
 
 /* character classes */

Modified: branches/string_macros/include/parrot/string_funcs.h
==============================================================================
--- branches/string_macros/include/parrot/string_funcs.h	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/include/parrot/string_funcs.h	Wed Sep 22 01:17:13 2010	(r49221)
@@ -201,9 +201,7 @@
 
 PARROT_EXPORT
 PARROT_WARN_UNUSED_RESULT
-INTVAL Parrot_str_indexed(PARROT_INTERP,
-    ARGIN(const STRING *s),
-    UINTVAL idx)
+INTVAL Parrot_str_indexed(PARROT_INTERP, ARGIN(const STRING *s), INTVAL idx)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 

Modified: branches/string_macros/src/io/filehandle.c
==============================================================================
--- branches/string_macros/src/io/filehandle.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/io/filehandle.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -52,7 +52,7 @@
     mode_len = Parrot_str_byte_length(interp, mode_str);
 
     for (i = 0; i < mode_len; ++i) {
-        const INTVAL s = Parrot_str_indexed(interp, mode_str, i);
+        const INTVAL s = STRING_ord(interp, mode_str, i);
         switch (s) {
           case 'r':
             flags |= PIO_F_READ;

Modified: branches/string_macros/src/library.c
==============================================================================
--- branches/string_macros/src/library.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/library.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -448,8 +448,8 @@
     STRING * const path_separator_string = string_chr(interp, path_separator);
 
     /* make sure the path has a trailing slash before appending the file */
-    if (Parrot_str_indexed(interp, path , path->strlen - 1)
-         != Parrot_str_indexed(interp, path_separator_string, 0))
+    if (STRING_ord(interp, path, -1)
+         != STRING_ord(interp, path_separator_string, 0))
         path = Parrot_str_concat(interp, path , path_separator_string);
 
     return path;

Modified: branches/string_macros/src/ops/core_ops.c
==============================================================================
--- branches/string_macros/src/ops/core_ops.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/ops/core_ops.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -22223,42 +22223,42 @@
 opcode_t *
 Parrot_ord_i_s(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    IREG(1) = string_ord(interp, SREG(2), 0);
+    IREG(1) = STRING_ord(interp, SREG(2), 0);
 
 return (opcode_t *)cur_opcode + 3;}
 
 opcode_t *
 Parrot_ord_i_sc(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    IREG(1) = string_ord(interp, CONST(2).u.string, 0);
+    IREG(1) = STRING_ord(interp, CONST(2).u.string, 0);
 
 return (opcode_t *)cur_opcode + 3;}
 
 opcode_t *
 Parrot_ord_i_s_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    IREG(1) = string_ord(interp, SREG(2), IREG(3));
+    IREG(1) = STRING_ord(interp, SREG(2), IREG(3));
 
 return (opcode_t *)cur_opcode + 4;}
 
 opcode_t *
 Parrot_ord_i_sc_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    IREG(1) = string_ord(interp, CONST(2).u.string, IREG(3));
+    IREG(1) = STRING_ord(interp, CONST(2).u.string, IREG(3));
 
 return (opcode_t *)cur_opcode + 4;}
 
 opcode_t *
 Parrot_ord_i_s_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    IREG(1) = string_ord(interp, SREG(2), cur_opcode[3]);
+    IREG(1) = STRING_ord(interp, SREG(2), cur_opcode[3]);
 
 return (opcode_t *)cur_opcode + 4;}
 
 opcode_t *
 Parrot_ord_i_sc_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    IREG(1) = string_ord(interp, CONST(2).u.string, cur_opcode[3]);
+    IREG(1) = STRING_ord(interp, CONST(2).u.string, cur_opcode[3]);
 
 return (opcode_t *)cur_opcode + 4;}
 

Modified: branches/string_macros/src/ops/string.ops
==============================================================================
--- branches/string_macros/src/ops/string.ops	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/ops/string.ops	Wed Sep 22 01:17:13 2010	(r49221)
@@ -40,11 +40,11 @@
 =cut
 
 inline op ord(out INT, in STR) :base_core {
-    $1 = string_ord(interp, $2, 0);
+    $1 = STRING_ord(interp, $2, 0);
 }
 
 inline op ord(out INT, in STR, in INT) :base_core {
-    $1 = string_ord(interp, $2, $3);
+    $1 = STRING_ord(interp, $2, $3);
 }
 
 

Modified: branches/string_macros/src/pmc.c
==============================================================================
--- branches/string_macros/src/pmc.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/pmc.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -1004,14 +1004,14 @@
         pos = idx;
         len = Parrot_str_byte_length(interp, role);
 
-        if (pos && (Parrot_str_indexed(interp, what, pos - 1) != 32)) {
+        if (pos && (STRING_ord(interp, what, pos - 1) != 32)) {
             pos += len;
             continue;
         }
 
         if (pos + len < length) {
             pos += len;
-            if (Parrot_str_indexed(interp, what, pos) != 32)
+            if (STRING_ord(interp, what, pos) != 32)
                 continue;
         }
 

Modified: branches/string_macros/src/pmc/codestring.pmc
==============================================================================
--- branches/string_macros/src/pmc/codestring.pmc	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/pmc/codestring.pmc	Wed Sep 22 01:17:13 2010	(r49221)
@@ -151,7 +151,8 @@
     VTABLE_set_string_keyed_int(INTERP, parts, 1, fmt);
 
     /* Add a newline if necessary */
-    if ('\n' != Parrot_str_indexed(INTERP, fmt, Parrot_str_length(INTERP, fmt) - 1))
+    if (STRING_length(fmt) > 0
+    &&  '\n' != STRING_ord(INTERP, fmt, -1))
         VTABLE_set_string_keyed_int(INTERP, parts, 2, newline);
 
     S1 = Parrot_str_join(INTERP, STRINGNULL, parts);
@@ -199,8 +200,8 @@
 
             /* treat \r\n as a single newline */
             if (jpos < eos
-            && string_ord(INTERP, str, jpos - 1)     == 13
-            && string_ord(INTERP, str, jpos)         == 10) {
+            && STRING_ord(INTERP, str, jpos - 1) == 13
+            && STRING_ord(INTERP, str, jpos)     == 10) {
                 ++jpos;
             }
             /* search for the next newline */

Modified: branches/string_macros/src/pmc/nci.pmc
==============================================================================
--- branches/string_macros/src/pmc/nci.pmc	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/pmc/nci.pmc	Wed Sep 22 01:17:13 2010	(r49221)
@@ -78,7 +78,7 @@
     size_t i;
 
     for (i = 0; i < sig_length; ++i) {
-        const INTVAL c = Parrot_str_indexed(interp, sig, i);
+        const INTVAL c = STRING_ord(interp, sig, i);
 
         PARROT_ASSERT(j < buf_length - 1);
 

Modified: branches/string_macros/src/pmc/string.pmc
==============================================================================
--- branches/string_macros/src/pmc/string.pmc	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/pmc/string.pmc	Wed Sep 22 01:17:13 2010	(r49221)
@@ -447,7 +447,7 @@
     VTABLE INTVAL get_integer_keyed_int(INTVAL pos) {
         STRING      *str_val;
         GET_ATTR_str_val(INTERP, SELF, str_val);
-        return string_ord(INTERP, str_val, pos);
+        return STRING_ord(INTERP, str_val, pos);
     }
 
     VTABLE PMC *get_pmc_keyed(PMC *key) {

Modified: branches/string_macros/src/spf_render.c
==============================================================================
--- branches/string_macros/src/spf_render.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/spf_render.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -147,12 +147,12 @@
     if (is_int_type) {
         if (info->flags & FLAG_PREC && info->prec == 0 &&
                 len == 1 &&
-                string_ord(interp, str, 0) == '0') {
+                STRING_ord(interp, str, 0) == '0') {
             str = Parrot_str_chopn(interp, str, len);
             len = 0;
         }
         /* +, space */
-        if (!len || string_ord(interp, str, 0) != '-') {
+        if (!len || STRING_ord(interp, str, 0) != '-') {
             if (info->flags & FLAG_PLUS) {
                 STRING * const cs = CONST_STRING(interp, "+");
                 str = Parrot_str_concat(interp, cs, str);
@@ -197,8 +197,8 @@
         else {                  /* right-align */
             /* signed and zero padded */
             if (info->flags & FLAG_ZERO
-                && (string_ord(interp, str, 0) == '-' ||
-                    string_ord(interp, str, 0) == '+')) {
+                && (STRING_ord(interp, str, 0) == '-' ||
+                    STRING_ord(interp, str, 0) == '+')) {
                 STRING *temp = NULL;
                 STRING *ignored;
                 temp = Parrot_str_substr(interp, str, 1, len-1);
@@ -429,7 +429,7 @@
     char tc[PARROT_SPRINTF_BUFFER_SIZE];
 
     for (i = 0; i < pat_len; ++i) {
-        if (string_ord(interp, pat, i) == '%') {        /* % */
+        if (STRING_ord(interp, pat, i) == '%') {        /* % */
             if (len) {
                 substr = Parrot_str_substr(interp, pat, old, len);
                 /* XXX This shouldn't modify targ the pointer */
@@ -437,7 +437,7 @@
             }
             len = 0;
             old = i;
-            if (string_ord(interp, pat, i + 1) == '%') {
+            if (STRING_ord(interp, pat, i + 1) == '%') {
                 /* skip this one, make next the first char
                  * of literal sequence, starting at old */
                 ++i;
@@ -548,7 +548,7 @@
  */
 
                 for (++i; i < pat_len && info.phase != PHASE_DONE; ++i) {
-                    const INTVAL ch = string_ord(interp, pat, i);
+                    const INTVAL ch = STRING_ord(interp, pat, i);
 
                     switch (info.phase) {
                     /*@fallthrough@ */ case PHASE_FLAGS:

Modified: branches/string_macros/src/string/api.c
==============================================================================
--- branches/string_macros/src/string/api.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/string/api.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -756,14 +756,13 @@
 
 /*
 
-=item C<INTVAL Parrot_str_indexed(PARROT_INTERP, const STRING *s, UINTVAL idx)>
+=item C<INTVAL Parrot_str_indexed(PARROT_INTERP, const STRING *s, INTVAL idx)>
 
-Returns the character (or glyph, depending upon the string's encoding).  This
-abstracts the process of finding the Nth character in a (possibly Unicode or
-JIS-encoded) string, the idea being that once the encoding functions are
-fleshed out, this function can do the right thing.
+Returns the codepoint at a given index into a string. Negative indexes are
+treated as counting from the end of the string. Throws an exception if C<s>
+is null or C<idx> is out of bounds.
 
-Note that this is not range-checked.
+Identical to the STRING_ord macro.
 
 =cut
 
@@ -772,11 +771,15 @@
 PARROT_EXPORT
 PARROT_WARN_UNUSED_RESULT
 INTVAL
-Parrot_str_indexed(PARROT_INTERP, ARGIN(const STRING *s), UINTVAL idx)
+Parrot_str_indexed(PARROT_INTERP, ARGIN(const STRING *s), INTVAL idx)
 {
     ASSERT_ARGS(Parrot_str_indexed)
-    ASSERT_STRING_SANITY(s);
-    return (INTVAL)STRING_ord(interp, s, idx);
+
+    if (s == NULL)
+        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNEXPECTED_NULL,
+            "Invalid operation on null string");
+
+    return STRING_ord(interp, s, idx);
 }
 
 
@@ -826,26 +829,10 @@
 string_ord(PARROT_INTERP, ARGIN(const STRING *s), INTVAL idx)
 {
     ASSERT_ARGS(string_ord)
-    const UINTVAL len        = STRING_length(s);
 
-    if (idx < 0)
-        idx += len;
-
-    if ((UINTVAL)idx >= len) {
-        const char *err_msg;
-
-        if (STRING_IS_NULL(s))
-            err_msg = "Cannot get character of NULL string";
-        else if (!len)
-            err_msg = "Cannot get character of empty string";
-        else if (idx >= 0)
-            err_msg = "Cannot get character past end of string";
-        else if (idx < 0)
-            err_msg = "Cannot get character before beginning of string";
-
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ORD_OUT_OF_STRING,
-            err_msg);
-    }
+    if (s == NULL)
+        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNEXPECTED_NULL,
+            "Invalid operation on null string");
 
     return STRING_ord(interp, s, idx);
 }
@@ -2847,7 +2834,7 @@
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
             "increment only for length = 1 done");
 
-    o = (UINTVAL)string_ord(interp, s, 0);
+    o = (UINTVAL)STRING_ord(interp, s, 0);
 
     if ((o >= 'A' && o < 'Z') || (o >= 'a' && o < 'z')) {
         ++o;

Modified: branches/string_macros/src/string/encoding/shared.c
==============================================================================
--- branches/string_macros/src/string/encoding/shared.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/string/encoding/shared.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -348,6 +348,35 @@
 
 /*
 
+=item C<void encoding_ord_error(PARROT_INTERP, const STRING *s, INTVAL offset)>
+
+Throws the right exception if STRING_ord was called with a wrong index.
+C<offset> is the wrong offset into the string C<s>.
+
+=cut
+
+*/
+
+void
+encoding_ord_error(PARROT_INTERP, ARGIN(const STRING *s), INTVAL offset)
+{
+    const UINTVAL len = STRING_length(s);
+    const char   *err_msg;
+
+    if (!len)
+        err_msg = "Cannot get character of empty string";
+    else if (offset >= 0)
+        err_msg = "Cannot get character past end of string";
+    else if (offset < 0)
+        err_msg = "Cannot get character before beginning of string";
+
+    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ORD_OUT_OF_STRING,
+        err_msg);
+}
+
+
+/*
+
 =item C<STRING * encoding_substr(PARROT_INTERP, const STRING *src, UINTVAL
 offset, UINTVAL count)>
 
@@ -766,7 +795,7 @@
 
 /*
 
-=item C<UINTVAL fixed8_ord(PARROT_INTERP, const STRING *src, UINTVAL offset)>
+=item C<UINTVAL fixed8_ord(PARROT_INTERP, const STRING *src, INTVAL idx)>
 
 codepoints are bytes, so delegate
 
@@ -776,20 +805,18 @@
 
 PARROT_WARN_UNUSED_RESULT
 UINTVAL
-fixed8_ord(PARROT_INTERP, ARGIN(const STRING *src),
-        UINTVAL offset)
+fixed8_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
 {
     ASSERT_ARGS(fixed8_ord)
-    const unsigned char * const buf = (unsigned char *)src->strstart;
+    const UINTVAL len = STRING_length(src);
 
-    if (offset >= src->bufused) {
-/*        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-                "fixed8_ord past the end of the buffer (%i of %i)",
-                offset, src->bufused); */
-        return 0;
-    }
+    if (idx < 0)
+        idx += len;
+
+    if ((UINTVAL)idx >= len)
+        encoding_ord_error(interp, src, idx);
 
-    return buf[offset];
+    return (unsigned char)src->strstart[idx];
 }
 
 

Modified: branches/string_macros/src/string/encoding/shared.h
==============================================================================
--- branches/string_macros/src/string/encoding/shared.h	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/string/encoding/shared.h	Wed Sep 22 01:17:13 2010	(r49221)
@@ -85,6 +85,12 @@
         __attribute__nonnull__(1)
         __attribute__nonnull__(3);
 
+void encoding_ord_error(PARROT_INTERP,
+    ARGIN(const STRING *s),
+    INTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
 PARROT_WARN_UNUSED_RESULT
 INTVAL encoding_rindex(PARROT_INTERP,
     SHIM(const STRING *src),
@@ -181,7 +187,7 @@
         FUNC_MODIFIES(*iter);
 
 PARROT_WARN_UNUSED_RESULT
-UINTVAL fixed8_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+UINTVAL fixed8_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -277,6 +283,9 @@
 #define ASSERT_ARGS_encoding_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_encoding_ord_error __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(s))
 #define ASSERT_ARGS_encoding_rindex __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_encoding_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\

Modified: branches/string_macros/src/string/encoding/ucs2.c
==============================================================================
--- branches/string_macros/src/string/encoding/ucs2.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/string/encoding/ucs2.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -86,9 +86,7 @@
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-static UINTVAL ucs2_ord(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
+static UINTVAL ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -212,8 +210,7 @@
 
 /*
 
-=item C<static UINTVAL ucs2_ord(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
+=item C<static UINTVAL ucs2_ord(PARROT_INTERP, const STRING *src, INTVAL idx)>
 
 Returns the codepoint in string C<src> at position C<offset>.
 
@@ -222,15 +219,24 @@
 */
 
 static UINTVAL
-ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
 {
     ASSERT_ARGS(ucs2_ord)
 #if PARROT_HAS_ICU
-    const UChar * const s = (const UChar*) src->strstart;
-    UNUSED(interp);
-    return s[offset];
+    const UINTVAL len = STRING_length(src);
+    const UChar  *s;
+
+    if (idx < 0)
+        idx += len;
+
+    if ((UINTVAL)idx >= len)
+        encoding_ord_error(interp, src, idx);
+
+    s = (const UChar *)src->strstart;
+
+    return s[idx];
 #else
-    UNUSED(offset);
+    UNUSED(idx);
     UNUSED(src);
     no_ICU_lib(interp);
 #endif

Modified: branches/string_macros/src/string/encoding/ucs4.c
==============================================================================
--- branches/string_macros/src/string/encoding/ucs4.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/string/encoding/ucs4.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -86,9 +86,7 @@
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-static UINTVAL ucs4_ord(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
+static UINTVAL ucs4_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -225,8 +223,7 @@
 
 /*
 
-=item C<static UINTVAL ucs4_ord(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
+=item C<static UINTVAL ucs4_ord(PARROT_INTERP, const STRING *src, INTVAL idx)>
 
 Returns the codepoint in string C<src> at position C<offset>.
 
@@ -235,15 +232,24 @@
 */
 
 static UINTVAL
-ucs4_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+ucs4_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
 {
     ASSERT_ARGS(ucs4_ord)
 #if PARROT_HAS_ICU
-    const UChar32 * const s = (const UChar32*) src->strstart;
-    UNUSED(interp);
-    return s[offset];
+    const UINTVAL  len = STRING_length(src);
+    const UChar32 *s;
+
+    if (idx < 0)
+        idx += len;
+
+    if ((UINTVAL)idx >= len)
+        encoding_ord_error(interp, src, idx);
+
+    s = (const UChar32 *)src->strstart;
+
+    return s[idx];
 #else
-    UNUSED(offset);
+    UNUSED(idx);
     UNUSED(src);
     no_ICU_lib(interp);
 #endif

Modified: branches/string_macros/src/string/encoding/utf16.c
==============================================================================
--- branches/string_macros/src/string/encoding/utf16.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/string/encoding/utf16.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -73,7 +73,7 @@
 
 static UINTVAL utf16_ord(PARROT_INTERP,
     ARGIN(const STRING *src),
-    UINTVAL offset)
+    INTVAL idx)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -264,8 +264,7 @@
 
 /*
 
-=item C<static UINTVAL utf16_ord(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
+=item C<static UINTVAL utf16_ord(PARROT_INTERP, const STRING *src, INTVAL idx)>
 
 Returns the codepoint in string C<src> at position C<offset>.
 
@@ -274,21 +273,29 @@
 */
 
 static UINTVAL
-utf16_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+utf16_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
 {
     ASSERT_ARGS(utf16_ord)
 #if PARROT_HAS_ICU
-    const UChar * const s = (UChar*) src->strstart;
-    UINTVAL c, pos;
-    UNUSED(interp);
+    const UINTVAL len = STRING_length(src);
+    const UChar  *s;
+    UINTVAL       c, pos;
+
+    if (idx < 0)
+        idx += len;
+
+    if ((UINTVAL)idx >= len)
+        encoding_ord_error(interp, src, idx);
 
+    s   = (UChar *)src->strstart;
     pos = 0;
-    U16_FWD_N_UNSAFE(s, pos, offset);
+    U16_FWD_N_UNSAFE(s, pos, idx);
     U16_GET_UNSAFE(s, pos, c);
+
     return c;
 #else
     UNUSED(src);
-    UNUSED(offset);
+    UNUSED(idx);
 
     Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
         "no ICU lib loaded");

Modified: branches/string_macros/src/string/encoding/utf8.c
==============================================================================
--- branches/string_macros/src/string/encoding/utf8.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/string/encoding/utf8.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -78,9 +78,7 @@
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-static UINTVAL utf8_ord(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
+static UINTVAL utf8_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -94,11 +92,11 @@
 
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
-static const void * utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n)
+static const utf8_t * utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n)
         __attribute__nonnull__(1);
 
 PARROT_CANNOT_RETURN_NULL
-static const void * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n)
+static const utf8_t * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n)
         __attribute__nonnull__(1);
 
 PARROT_CAN_RETURN_NULL
@@ -304,8 +302,7 @@
 
 /*
 
-=item C<static UINTVAL utf8_ord(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
+=item C<static UINTVAL utf8_ord(PARROT_INTERP, const STRING *src, INTVAL idx)>
 
 Returns the codepoint in string C<src> at position C<offset>.
 
@@ -314,10 +311,20 @@
 */
 
 static UINTVAL
-utf8_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+utf8_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
 {
     ASSERT_ARGS(utf8_ord)
-    const utf8_t * const start = (const utf8_t *)utf8_skip_forward(src->strstart, offset);
+    const UINTVAL len = STRING_length(src);
+    const utf8_t *start;
+
+    if (idx < 0)
+        idx += len;
+
+    if ((UINTVAL)idx >= len)
+        encoding_ord_error(interp, src, idx);
+
+    start = utf8_skip_forward(src->strstart, idx);
+
     return utf8_decode(interp, start);
 }
 
@@ -407,7 +414,7 @@
 
 /*
 
-=item C<static const void * utf8_skip_forward(const void *ptr, UINTVAL n)>
+=item C<static const utf8_t * utf8_skip_forward(const void *ptr, UINTVAL n)>
 
 Moves C<ptr> C<n> characters forward.
 
@@ -416,7 +423,7 @@
 */
 
 PARROT_CANNOT_RETURN_NULL
-static const void *
+static const utf8_t *
 utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n)
 {
     ASSERT_ARGS(utf8_skip_forward)
@@ -432,7 +439,7 @@
 
 /*
 
-=item C<static const void * utf8_skip_backward(const void *ptr, UINTVAL n)>
+=item C<static const utf8_t * utf8_skip_backward(const void *ptr, UINTVAL n)>
 
 Moves C<ptr> C<n> characters back.
 
@@ -444,7 +451,7 @@
 
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
-static const void *
+static const utf8_t *
 utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n)
 {
     ASSERT_ARGS(utf8_skip_backward)
@@ -479,10 +486,10 @@
     const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos);
 
     if (offset > 0) {
-        u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, offset);
+        u8ptr = utf8_skip_forward(u8ptr, offset);
     }
     else if (offset < 0) {
-        u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -offset);
+        u8ptr = utf8_skip_backward(u8ptr, -offset);
     }
 
     return utf8_decode(interp, u8ptr);
@@ -508,10 +515,10 @@
     const utf8_t *u8ptr = (utf8_t *)((char *)str->strstart + i->bytepos);
 
     if (skip > 0) {
-        u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, skip);
+        u8ptr = utf8_skip_forward(u8ptr, skip);
     }
     else if (skip < 0) {
-        u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr, -skip);
+        u8ptr = utf8_skip_backward(u8ptr, -skip);
     }
 
     i->charpos += skip;
@@ -629,22 +636,22 @@
     if (pos < i->charpos) {
         if (pos <= (i->charpos >> 1)) {
             /* go forward from start */
-            u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr, pos);
+            u8ptr = utf8_skip_forward(u8ptr, pos);
         }
         else {
             /* go backward from current */
-            u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + i->bytepos, i->charpos - pos);
+            u8ptr = utf8_skip_backward(u8ptr + i->bytepos, i->charpos - pos);
         }
     }
     else {
         const UINTVAL  len = str->strlen;
         if (pos <= i->charpos + ((len - i->charpos) >> 1)) {
             /* go forward from current */
-            u8ptr = (const utf8_t *)utf8_skip_forward(u8ptr + i->bytepos, pos - i->charpos);
+            u8ptr = utf8_skip_forward(u8ptr + i->bytepos, pos - i->charpos);
         }
         else {
             /* go backward from end */
-            u8ptr = (const utf8_t *)utf8_skip_backward(u8ptr + str->bufused, len - pos);
+            u8ptr = utf8_skip_backward(u8ptr + str->bufused, len - pos);
         }
     }
 

Modified: branches/string_macros/src/warnings.c
==============================================================================
--- branches/string_macros/src/warnings.c	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/src/warnings.c	Wed Sep 22 01:17:13 2010	(r49221)
@@ -74,7 +74,7 @@
         Parrot_io_puts(interp, Parrot_io_STDERR(interp), "Unknown warning\n");
     else {
         Parrot_io_putps(interp, Parrot_io_STDERR(interp), msg);
-        if (string_ord(interp, msg, -1) != '\n')
+        if (STRING_ord(interp, msg, -1) != '\n')
             Parrot_io_eprintf(interp, "%c", '\n');
     }
     print_pbc_location(interp);

Modified: branches/string_macros/t/op/string.t
==============================================================================
--- branches/string_macros/t/op/string.t	Wed Sep 22 01:16:25 2010	(r49220)
+++ branches/string_macros/t/op/string.t	Wed Sep 22 01:17:13 2010	(r49221)
@@ -650,7 +650,7 @@
    ord $I0,$S0
    ok( 0, 'no exception: 2-param ord, empty string register' )
  handler:
-   .exception_is( 'Cannot get character of NULL string' )
+   .exception_is( 'Invalid operation on null string' )
 .end
 
 .sub exception_three_param_ord_empty_string
@@ -666,7 +666,7 @@
    ord $I0,$S0,0
    ok( 0, 'no exception: 3-param ord, empty string register' )
  handler:
-   .exception_is( 'Cannot get character of NULL string' )
+   .exception_is( 'Invalid operation on null string' )
 .end
 
 .sub two_param_ord_one_character_string


More information about the parrot-commits mailing list