[svn:parrot] r49417 - in trunk: include/parrot src src/dynpmc src/io src/ops src/pmc src/string src/string/encoding t/op

nwellnhof at svn.parrot.org nwellnhof at svn.parrot.org
Sat Oct 2 22:26:14 UTC 2010


Author: nwellnhof
Date: Sat Oct  2 22:26:13 2010
New Revision: 49417
URL: https://trac.parrot.org/parrot/changeset/49417

Log:
[str] Switch to STRING_substr macro

Move the whole 'substr' logic into the string vtable functions

Modified:
   trunk/include/parrot/string.h
   trunk/src/dynext.c
   trunk/src/dynpmc/os.pmc
   trunk/src/io/api.c
   trunk/src/io/buffer.c
   trunk/src/library.c
   trunk/src/ops/core_ops.c
   trunk/src/ops/string.ops
   trunk/src/packfile.c
   trunk/src/pmc/codestring.pmc
   trunk/src/pmc/scalar.pmc
   trunk/src/pmc/string.pmc
   trunk/src/pmc/stringbuilder.pmc
   trunk/src/spf_render.c
   trunk/src/spf_vtable.c
   trunk/src/string/api.c
   trunk/src/string/encoding/ascii.c
   trunk/src/string/encoding/binary.c
   trunk/src/string/encoding/latin1.c
   trunk/src/string/encoding/shared.c
   trunk/src/string/encoding/shared.h
   trunk/src/string/encoding/ucs2.c
   trunk/src/string/encoding/ucs4.c
   trunk/src/string/encoding/utf16.c
   trunk/t/op/string.t

Modified: trunk/include/parrot/string.h
==============================================================================
--- trunk/include/parrot/string.h	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/include/parrot/string.h	Sat Oct  2 22:26:13 2010	(r49417)
@@ -97,7 +97,7 @@
 
 typedef UINTVAL  (*str_vtable_scan_t)(PARROT_INTERP, ARGIN(const STRING *src));
 typedef UINTVAL  (*str_vtable_ord_t)(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset);
-typedef STRING * (*str_vtable_substr_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
+typedef STRING * (*str_vtable_substr_t)(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL count);
 
 /* character classes */
 typedef INTVAL   (*str_vtable_is_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset);

Modified: trunk/src/dynext.c
==============================================================================
--- trunk/src/dynext.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/dynext.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -355,7 +355,7 @@
         *handle = Parrot_dlopen((char *)lib->strstart + 3, 0);
 
         if (*handle)
-            return Parrot_str_substr(interp, lib, 3, lib->strlen - 3);
+            return STRING_substr(interp, lib, 3, lib->strlen - 3);
     }
 #endif
 
@@ -363,7 +363,7 @@
 #ifdef __CYGWIN__
     if (!STRING_length(lib) >= 3 && memcmp(lib->strstart, "lib", 3) == 0) {
         path = Parrot_str_concat(interp, CONST_STRING(interp, "cyg"),
-            Parrot_str_substr(interp, lib, 3, lib->strlen - 3));
+            STRING_substr(interp, lib, 3, lib->strlen - 3));
 
         *handle = dlopen_string(interp, flags, path);
 

Modified: trunk/src/dynpmc/os.pmc
==============================================================================
--- trunk/src/dynpmc/os.pmc	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/dynpmc/os.pmc	Sat Oct  2 22:26:13 2010	(r49417)
@@ -506,11 +506,8 @@
         HANDLE hFind = INVALID_HANDLE_VALUE;
 
         /* Add \* to the directory name and start search. */
-        STRING *last_char = Parrot_str_substr(INTERP, path,
-                                              Parrot_str_length(INTERP, path) - 1, 1, NULL, 0);
-        int trailing_slash = STRING_equal(INTERP, last_char, string_from_literal(INTERP, "\\"))
-                             ||
-                             STRING_equal(INTERP, last_char, string_from_literal(INTERP, "/"));
+        INTVAL last_char = STRING_ord(INTERP, path, -1);
+        int trailing_slash = last_char == '\\' || last_char == '/';
         cpath = Parrot_str_to_cstring(INTERP, Parrot_str_concat(INTERP,
                 path, string_from_literal(INTERP, trailing_slash ? "*" : "\\*"), 0));
         hFind = FindFirstFile(cpath, &file_find_data);

Modified: trunk/src/io/api.c
==============================================================================
--- trunk/src/io/api.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/io/api.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -364,7 +364,7 @@
             if (offset + read_length > orig_length)
                 read_length = orig_length - offset;
 
-            result = Parrot_str_substr(interp, string_orig, offset, read_length);
+            result = STRING_substr(interp, string_orig, offset, read_length);
             SETATTR_StringHandle_read_offset(interp, pmc, offset + read_length);
         }
     }
@@ -422,7 +422,7 @@
         else
             read_length = newline_pos - offset + 1; /* +1 to include the newline */
 
-        result = Parrot_str_substr(interp, result, offset, read_length);
+        result = STRING_substr(interp, result, offset, read_length);
         SETATTR_StringHandle_read_offset(interp, pmc, newline_pos + 1);
     }
     else

Modified: trunk/src/io/buffer.c
==============================================================================
--- trunk/src/io/buffer.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/io/buffer.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -338,7 +338,7 @@
             STRING *sf;
 
             s->strlen = s->bufused = current + len;
-            sf        = Parrot_str_substr(interp, s, current, len);
+            sf        = STRING_substr(interp, s, current, len);
             got       = PIO_READ(interp, filehandle, &sf);
             s->strlen = s->bufused = current + got;
 

Modified: trunk/src/library.c
==============================================================================
--- trunk/src/library.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/library.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -573,7 +573,7 @@
 
     if (!STRING_IS_NULL(test_path)) {
         if (Parrot_str_byte_length(interp, test_path) > 4) {
-            STRING *orig_ext = Parrot_str_substr(interp, test_path, -4, 4);
+            STRING *orig_ext = STRING_substr(interp, test_path, -4, 4);
             /* First try substituting .pbc for the .pir extension */
             if (STRING_equal(interp, orig_ext, pir_extension)) {
                 STRING * const without_ext = Parrot_str_chopn(interp, test_path, 4);
@@ -600,7 +600,7 @@
 
         /* Finally, try substituting .pbc for the .pasm extension. */
         if (Parrot_str_byte_length(interp, test_path) > 5) {
-            STRING * const orig_ext = Parrot_str_substr(interp, test_path, -5, 5);
+            STRING * const orig_ext = STRING_substr(interp, test_path, -5, 5);
             if (STRING_equal(interp, orig_ext, pasm_extension)) {
                 STRING * const without_ext = Parrot_str_chopn(interp, test_path, 5);
                 test_path = Parrot_str_concat(interp, without_ext, bytecode_extension);
@@ -866,17 +866,17 @@
     ++pos_dot;
     ++pos_sl;
     if (pos_sl && pos_dot) {
-        stem = Parrot_str_substr(interp, in, pos_sl, pos_dot - pos_sl - 1);
-        *wo_ext = Parrot_str_substr(interp, in, 0, pos_dot - 1);
-        *ext = Parrot_str_substr(interp, in, pos_dot, len - pos_dot);
+        stem = STRING_substr(interp, in, pos_sl, pos_dot - pos_sl - 1);
+        *wo_ext = STRING_substr(interp, in, 0, pos_dot - 1);
+        *ext = STRING_substr(interp, in, pos_dot, len - pos_dot);
     }
     else if (pos_dot) {
-        stem = Parrot_str_substr(interp, in, 0, pos_dot - 1);
+        stem = STRING_substr(interp, in, 0, pos_dot - 1);
         *wo_ext = stem;
-        *ext = Parrot_str_substr(interp, in, pos_dot, len - pos_dot);
+        *ext = STRING_substr(interp, in, pos_dot, len - pos_dot);
     }
     else if (pos_sl) {
-        stem = Parrot_str_substr(interp, in, pos_sl, len - pos_sl);
+        stem = STRING_substr(interp, in, pos_sl, len - pos_sl);
         *wo_ext = in;
         *ext = STRINGNULL;
     }

Modified: trunk/src/ops/core_ops.c
==============================================================================
--- trunk/src/ops/core_ops.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/ops/core_ops.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -22537,7 +22537,7 @@
 Parrot_substr_s_s_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
     const INTVAL len = Parrot_str_byte_length(interp, SREG(2));
-    SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), len);
+    SREG(1) = STRING_substr(interp, SREG(2), IREG(3), len);
 
 return (opcode_t *)cur_opcode + 4;}
 
@@ -22545,7 +22545,7 @@
 Parrot_substr_s_sc_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
     const INTVAL len = Parrot_str_byte_length(interp, SCONST(2));
-    SREG(1) = Parrot_str_substr(interp, SCONST(2), IREG(3), len);
+    SREG(1) = STRING_substr(interp, SCONST(2), IREG(3), len);
 
 return (opcode_t *)cur_opcode + 4;}
 
@@ -22553,7 +22553,7 @@
 Parrot_substr_s_s_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
     const INTVAL len = Parrot_str_byte_length(interp, SREG(2));
-    SREG(1) = Parrot_str_substr(interp, SREG(2), ICONST(3), len);
+    SREG(1) = STRING_substr(interp, SREG(2), ICONST(3), len);
 
 return (opcode_t *)cur_opcode + 4;}
 
@@ -22561,63 +22561,63 @@
 Parrot_substr_s_sc_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
     const INTVAL len = Parrot_str_byte_length(interp, SCONST(2));
-    SREG(1) = Parrot_str_substr(interp, SCONST(2), ICONST(3), len);
+    SREG(1) = STRING_substr(interp, SCONST(2), ICONST(3), len);
 
 return (opcode_t *)cur_opcode + 4;}
 
 opcode_t *
 Parrot_substr_s_s_i_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), IREG(4));
+    SREG(1) = STRING_substr(interp, SREG(2), IREG(3), IREG(4));
 
 return (opcode_t *)cur_opcode + 5;}
 
 opcode_t *
 Parrot_substr_s_sc_i_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_str_substr(interp, SCONST(2), IREG(3), IREG(4));
+    SREG(1) = STRING_substr(interp, SCONST(2), IREG(3), IREG(4));
 
 return (opcode_t *)cur_opcode + 5;}
 
 opcode_t *
 Parrot_substr_s_s_ic_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_str_substr(interp, SREG(2), ICONST(3), IREG(4));
+    SREG(1) = STRING_substr(interp, SREG(2), ICONST(3), IREG(4));
 
 return (opcode_t *)cur_opcode + 5;}
 
 opcode_t *
 Parrot_substr_s_sc_ic_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_str_substr(interp, SCONST(2), ICONST(3), IREG(4));
+    SREG(1) = STRING_substr(interp, SCONST(2), ICONST(3), IREG(4));
 
 return (opcode_t *)cur_opcode + 5;}
 
 opcode_t *
 Parrot_substr_s_s_i_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), ICONST(4));
+    SREG(1) = STRING_substr(interp, SREG(2), IREG(3), ICONST(4));
 
 return (opcode_t *)cur_opcode + 5;}
 
 opcode_t *
 Parrot_substr_s_sc_i_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_str_substr(interp, SCONST(2), IREG(3), ICONST(4));
+    SREG(1) = STRING_substr(interp, SCONST(2), IREG(3), ICONST(4));
 
 return (opcode_t *)cur_opcode + 5;}
 
 opcode_t *
 Parrot_substr_s_s_ic_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_str_substr(interp, SREG(2), ICONST(3), ICONST(4));
+    SREG(1) = STRING_substr(interp, SREG(2), ICONST(3), ICONST(4));
 
 return (opcode_t *)cur_opcode + 5;}
 
 opcode_t *
 Parrot_substr_s_sc_ic_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_str_substr(interp, SCONST(2), ICONST(3), ICONST(4));
+    SREG(1) = STRING_substr(interp, SCONST(2), ICONST(3), ICONST(4));
 
 return (opcode_t *)cur_opcode + 5;}
 

Modified: trunk/src/ops/string.ops
==============================================================================
--- trunk/src/ops/string.ops	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/ops/string.ops	Sat Oct  2 22:26:13 2010	(r49417)
@@ -252,11 +252,11 @@
 
 inline op substr(out STR, in STR, in INT) :base_core {
     const INTVAL len = Parrot_str_byte_length(interp, $2);
-    $1 = Parrot_str_substr(interp, $2, $3, len);
+    $1 = STRING_substr(interp, $2, $3, len);
 }
 
 inline op substr(out STR, in STR, in INT, in INT) :base_core {
-    $1 = Parrot_str_substr(interp, $2, $3, $4);
+    $1 = STRING_substr(interp, $2, $3, $4);
 }
 
 inline op substr(out STR, invar PMC, in INT, in INT) :base_core {

Modified: trunk/src/packfile.c
==============================================================================
--- trunk/src/packfile.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/packfile.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -2903,7 +2903,7 @@
 
     /* find seg e.g. CODE_DB => CODE and attach it */
     str_len     = Parrot_str_length(interp, debug->base.name);
-    code_name   = Parrot_str_substr(interp, debug->base.name, 0, str_len - 3);
+    code_name   = STRING_substr(interp, debug->base.name, 0, str_len - 3);
     code        = (PackFile_ByteCode *)PackFile_find_segment(interp, self->dir, code_name, 0);
 
     if (!code || code->base.type != PF_BYTEC_SEG) {
@@ -3777,7 +3777,7 @@
 
     /* Need to associate this segment with the applicable code segment. */
     str_len     = Parrot_str_length(interp, self->base.name);
-    code_name   = Parrot_str_substr(interp, self->base.name, 0, str_len - 4);
+    code_name   = STRING_substr(interp, self->base.name, 0, str_len - 4);
     code        = (PackFile_ByteCode *)PackFile_find_segment(interp,
                                 self->base.dir, code_name, 0);
 
@@ -4235,7 +4235,7 @@
     /* Get the base path of the located module */
     parrot_split_path_ext(interp, path, &found_path, &found_ext);
     name_length = Parrot_str_length(interp, lang_name);
-    found_path = Parrot_str_substr(interp, found_path, 0,
+    found_path = STRING_substr(interp, found_path, 0,
             Parrot_str_length(interp, found_path)-name_length);
 
     Parrot_lib_add_path(interp, Parrot_str_concat(interp, found_path, CONST_STRING(interp, "include/")),

Modified: trunk/src/pmc/codestring.pmc
==============================================================================
--- trunk/src/pmc/codestring.pmc	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/pmc/codestring.pmc	Sat Oct  2 22:26:13 2010	(r49417)
@@ -119,7 +119,7 @@
         if (pos < 0)
             break;
 
-        key = Parrot_str_substr(INTERP, fmt, pos+1, 1);
+        key = STRING_substr(INTERP, fmt, pos+1, 1);
 
         if (VTABLE_exists_keyed_str(INTERP, hash, key)) {
             repl = VTABLE_get_string_keyed_str(INTERP, hash, key);

Modified: trunk/src/pmc/scalar.pmc
==============================================================================
--- trunk/src/pmc/scalar.pmc	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/pmc/scalar.pmc	Sat Oct  2 22:26:13 2010	(r49417)
@@ -831,7 +831,7 @@
 */
 
     VTABLE STRING *substr_str(INTVAL offset, INTVAL length) {
-        return Parrot_str_substr(INTERP, VTABLE_get_string(INTERP, SELF),
+        return STRING_substr(INTERP, VTABLE_get_string(INTERP, SELF),
                 offset, length);
     }
 

Modified: trunk/src/pmc/string.pmc
==============================================================================
--- trunk/src/pmc/string.pmc	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/pmc/string.pmc	Sat Oct  2 22:26:13 2010	(r49417)
@@ -365,7 +365,7 @@
     VTABLE void substr(INTVAL offset, INTVAL length, PMC *dest) {
         STRING *str_val, *s2;
         GET_ATTR_str_val(INTERP, SELF, str_val);
-        s2 = Parrot_str_substr(INTERP, str_val, offset, length);
+        s2 = STRING_substr(INTERP, str_val, offset, length);
         VTABLE_set_string_native(INTERP, dest, s2);
     }
 
@@ -382,7 +382,7 @@
     VTABLE STRING *substr_str(INTVAL offset, INTVAL length) {
         STRING *str_val;
         GET_ATTR_str_val(INTERP, SELF, str_val);
-        return Parrot_str_substr(INTERP, str_val, offset, length);
+        return STRING_substr(INTERP, str_val, offset, length);
     }
 
 /*
@@ -437,7 +437,7 @@
     VTABLE STRING *get_string_keyed_int(INTVAL pos) {
         STRING      *str_val;
         GET_ATTR_str_val(INTERP, SELF, str_val);
-        return Parrot_str_substr(INTERP, str_val, pos, 1);
+        return STRING_substr(INTERP, str_val, pos, 1);
     }
 
     VTABLE INTVAL get_integer_keyed(PMC *key) {

Modified: trunk/src/pmc/stringbuilder.pmc
==============================================================================
--- trunk/src/pmc/stringbuilder.pmc	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/pmc/stringbuilder.pmc	Sat Oct  2 22:26:13 2010	(r49417)
@@ -320,7 +320,7 @@
         /* We must clone here becase we can reallocate buffer behind the scene... */
         /* TODO Optimize it to avoid creation of redundant STRING */
         return Parrot_str_clone(INTERP,
-                Parrot_str_substr(INTERP, buffer, offset, length));
+                STRING_substr(INTERP, buffer, offset, length));
     }
 
 /*
@@ -369,7 +369,7 @@
                 else {
                     /* remaining string can be added as is. */
                     VTABLE_push_string(INTERP, stringbuilder,
-                        Parrot_str_substr(INTERP, fmt, pos,
+                        STRING_substr(INTERP, fmt, pos,
                             Parrot_str_length(INTERP, fmt) -pos));
                     }
                 break;
@@ -377,13 +377,13 @@
             else {
                 /* slurp up to just before the % sign... */
                 VTABLE_push_string(INTERP, stringbuilder,
-                    Parrot_str_substr(INTERP, fmt, pos, percentPos - pos));
+                    STRING_substr(INTERP, fmt, pos, percentPos - pos));
                 /* skip the % sign */
                 pos = percentPos + 1 ;
             }
 
             /* key is always a single character */
-            key = Parrot_str_substr(INTERP, fmt, pos++, 1);
+            key = STRING_substr(INTERP, fmt, pos++, 1);
 
             if (VTABLE_exists_keyed_str(INTERP, hash, key)) {
                 VTABLE_push_string(INTERP, stringbuilder,
@@ -411,7 +411,7 @@
             else {
                 /* %foo has no special meaning, pass it through unchanged */
                 VTABLE_push_string(INTERP, stringbuilder,
-                    Parrot_str_substr(INTERP, fmt, pos-2, 2));
+                    STRING_substr(INTERP, fmt, pos-2, 2));
             }
         }
 

Modified: trunk/src/spf_render.c
==============================================================================
--- trunk/src/spf_render.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/spf_render.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -201,7 +201,7 @@
                     STRING_ord(interp, str, 0) == '+')) {
                 STRING *temp = NULL;
                 STRING *ignored;
-                temp = Parrot_str_substr(interp, str, 1, len-1);
+                temp = STRING_substr(interp, str, 1, len-1);
                 str = Parrot_str_chopn(interp, str, -1);
                 str = Parrot_str_concat(interp, str, fill);
                 str = Parrot_str_concat(interp, str, temp);
@@ -431,7 +431,7 @@
     for (i = 0; i < pat_len; ++i) {
         if (STRING_ord(interp, pat, i) == '%') {        /* % */
             if (len) {
-                substr = Parrot_str_substr(interp, pat, old, len);
+                substr = STRING_substr(interp, pat, old, len);
                 /* XXX This shouldn't modify targ the pointer */
                 targ = Parrot_str_concat(interp, targ, substr);
             }
@@ -941,7 +941,7 @@
         }
     }
     if (len) {
-        substr = Parrot_str_substr(interp, pat, old, len);
+        substr = STRING_substr(interp, pat, old, len);
         targ = Parrot_str_concat(interp, targ, substr);
     }
 

Modified: trunk/src/spf_vtable.c
==============================================================================
--- trunk/src/spf_vtable.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/spf_vtable.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -452,7 +452,7 @@
 
     ++obj->index;
     s = VTABLE_get_string(interp, tmp);
-    return Parrot_str_substr(interp, s, 0, 1);
+    return STRING_substr(interp, s, 0, 1);
 }
 
 /*

Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/api.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -958,7 +958,12 @@
 offset, INTVAL length)>
 
 Returns substring of length C<length> from C<offset> from the specified
-Parrot string.
+Parrot string. If C<offset> is negative, it counts from the end of the
+string. Returns the empty string if C<offset> equals the length of the
+string. Throws an exception if C<src> is null or C<offset> is out of bounds.
+Truncates C<length> if it extends beyond the end of the string.
+
+Identical to the STRING_substr macro.
 
 =cut
 
@@ -972,35 +977,9 @@
         ARGIN_NULLOK(const STRING *src), INTVAL offset, INTVAL length)
 {
     ASSERT_ARGS(Parrot_str_substr)
-    const UINTVAL strlen = STRING_length(src);
-    UINTVAL       maxlen;
-
-    if (offset < 0)
-        offset += strlen;
-
-    if ((UINTVAL)offset >= strlen || length <= 0) {
-        if (STRING_IS_NULL(src))
-            Parrot_ex_throw_from_c_args(interp, NULL,
-                EXCEPTION_SUBSTR_OUT_OF_STRING, "Cannot substr on a null string");
-
-        /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
-        if ((UINTVAL)offset == strlen || length <= 0)
-            return Parrot_str_new_noinit(interp, 0);
-
-        Parrot_ex_throw_from_c_args(interp, NULL,
-            EXCEPTION_SUBSTR_OUT_OF_STRING,
-            "Cannot take substr outside string");
-    }
-
-    ASSERT_STRING_SANITY(src);
-
-    maxlen = strlen - offset;
-
-    if ((UINTVAL)length > maxlen)
-        length = maxlen;
 
-    if (length == strlen && !offset)
-        return (STRING *)src;
+    if (src == NULL)
+        src = STRINGNULL;
 
     return STRING_substr(interp, src, offset, length);
 }
@@ -1257,7 +1236,7 @@
     if (n >= 0)
         end += STRING_length(s);
 
-    return Parrot_str_substr(interp, s, 0, end);
+    return STRING_substr(interp, s, 0, end);
 }
 
 

Modified: trunk/src/string/encoding/ascii.c
==============================================================================
--- trunk/src/string/encoding/ascii.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/encoding/ascii.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -518,7 +518,7 @@
 
     fixed8_scan,
     fixed8_ord,
-    fixed8_substr,
+    fixed_substr,
 
     ascii_is_cclass,
     ascii_find_cclass,

Modified: trunk/src/string/encoding/binary.c
==============================================================================
--- trunk/src/string/encoding/binary.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/encoding/binary.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -240,7 +240,7 @@
 
     fixed8_scan,
     fixed8_ord,
-    fixed8_substr,
+    fixed_substr,
 
     binary_is_cclass,
     binary_find_cclass,

Modified: trunk/src/string/encoding/latin1.c
==============================================================================
--- trunk/src/string/encoding/latin1.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/encoding/latin1.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -545,7 +545,7 @@
 
     fixed8_scan,
     fixed8_ord,
-    fixed8_substr,
+    fixed_substr,
 
     latin1_is_cclass,
     latin1_find_cclass,

Modified: trunk/src/string/encoding/shared.c
==============================================================================
--- trunk/src/string/encoding/shared.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/encoding/shared.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -378,8 +378,8 @@
 
 /*
 
-=item C<STRING * encoding_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
+=item C<STRING * encoding_substr(PARROT_INTERP, const STRING *src, INTVAL
+offset, INTVAL length)>
 
 Returns the codepoints in string C<src> at position C<offset> and length
 C<count>.
@@ -390,28 +390,51 @@
 
 PARROT_CANNOT_RETURN_NULL
 STRING *
-encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
 {
     ASSERT_ARGS(encoding_substr)
-
-    STRING * const return_string = Parrot_str_copy(interp, src);
+    const UINTVAL  strlen = STRING_length(src);
+    STRING        *return_string;
     String_iter    iter;
     UINTVAL        start;
 
+    if (offset < 0)
+        offset += strlen;
+
+    if ((UINTVAL)offset >= strlen || length <= 0) {
+        /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+        if ((UINTVAL)offset == strlen || length <= 0)
+            return Parrot_str_new_constant(interp, "");
+
+        Parrot_ex_throw_from_c_args(interp, NULL,
+            EXCEPTION_SUBSTR_OUT_OF_STRING,
+            "Cannot take substr outside string");
+    }
+
+    return_string = Parrot_str_copy(interp, src);
+
+    if (offset == 0 && (UINTVAL)length >= strlen)
+        return return_string;
+
     STRING_ITER_INIT(interp, &iter);
 
     if (offset)
         STRING_iter_set_position(interp, src, &iter, offset);
 
-    start                   = iter.bytepos;
-    return_string->strstart = (char *)return_string->strstart + start;
+    start = iter.bytepos;
+    return_string->strstart += start;
 
-    if (count)
-        STRING_iter_set_position(interp, src, &iter, offset + count);
+    if ((UINTVAL)length >= strlen - (UINTVAL)offset) {
+        return_string->bufused -= start;
+        return_string->strlen  -= offset;
+    }
+    else {
+        STRING_iter_set_position(interp, src, &iter, offset + length);
+        return_string->bufused = iter.bytepos - start;
+        return_string->strlen  = length;
+    }
 
-    return_string->bufused  = iter.bytepos - start;
-    return_string->strlen   = count;
-    return_string->hashval  = 0;
+    return_string->hashval = 0;
 
     return return_string;
 }
@@ -823,11 +846,11 @@
 
 /*
 
-=item C<STRING * fixed8_substr(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL count)>
+=item C<STRING * fixed_substr(PARROT_INTERP, const STRING *src, INTVAL offset,
+INTVAL length)>
 
 Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.
+C<count>. Works for all fixed size encodings.
 
 =cut
 
@@ -836,16 +859,41 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 STRING *
-fixed8_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+fixed_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
 {
-    ASSERT_ARGS(fixed8_substr)
-    STRING * const return_string = Parrot_str_copy(interp, src);
+    ASSERT_ARGS(fixed_substr)
+    const UINTVAL  strlen = STRING_length(src);
+    STRING        *return_string;
+    UINTVAL        maxlen, bytes_per_codepoint;
+
+    if (offset < 0)
+        offset += strlen;
+
+    if ((UINTVAL)offset >= strlen || length <= 0) {
+        /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+        if ((UINTVAL)offset == strlen || length <= 0)
+            return Parrot_str_new_constant(interp, "");
+
+        Parrot_ex_throw_from_c_args(interp, NULL,
+            EXCEPTION_SUBSTR_OUT_OF_STRING,
+            "Cannot take substr outside string");
+    }
+
+    return_string = Parrot_str_copy(interp, src);
+
+    if (offset == 0 && (UINTVAL)length >= strlen)
+        return return_string;
+
+    bytes_per_codepoint = src->encoding->max_bytes_per_codepoint;
+    maxlen              = strlen - offset;
+
+    if ((UINTVAL)length > maxlen)
+        length = maxlen;
 
-    return_string->encoding      = src->encoding;
-    return_string->strstart      = (char *)return_string->strstart + offset;
-    return_string->bufused       = count;
-    return_string->strlen        = count;
-    return_string->hashval       = 0;
+    return_string->strstart += offset * bytes_per_codepoint;
+    return_string->bufused   = length * bytes_per_codepoint;
+    return_string->strlen    = length;
+    return_string->hashval   = 0;
 
     return return_string;
 }

Modified: trunk/src/string/encoding/shared.h
==============================================================================
--- trunk/src/string/encoding/shared.h	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/encoding/shared.h	Sat Oct  2 22:26:13 2010	(r49417)
@@ -105,8 +105,8 @@
 PARROT_CANNOT_RETURN_NULL
 STRING * encoding_substr(PARROT_INTERP,
     ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
+    INTVAL offset,
+    INTVAL length)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -207,10 +207,10 @@
 
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
-STRING * fixed8_substr(PARROT_INTERP,
+STRING * fixed_substr(PARROT_INTERP,
     ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
+    INTVAL offset,
+    INTVAL length)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -339,7 +339,7 @@
 #define ASSERT_ARGS_fixed8_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_fixed8_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_fixed_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_unicode_chr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\

Modified: trunk/src/string/encoding/ucs2.c
==============================================================================
--- trunk/src/string/encoding/ucs2.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/encoding/ucs2.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -97,15 +97,6 @@
 
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
-static STRING * ucs2_substr(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
 static STRING * ucs2_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
@@ -139,9 +130,6 @@
 #define ASSERT_ARGS_ucs2_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs2_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_ucs2_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
@@ -242,39 +230,6 @@
 #endif
 }
 
-/*
-
-=item C<static STRING * ucs2_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-ucs2_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(ucs2_substr)
-    STRING * const return_string = Parrot_str_copy(interp, src);
-
-#if PARROT_HAS_ICU
-    return_string->strstart = (char*)src->strstart + offset * sizeof (UChar);
-    return_string->bufused  = count * sizeof (UChar);
-    return_string->strlen   = count;
-    return_string->hashval  = 0;
-    return return_string;
-#else
-    UNUSED(src);
-    UNUSED(offset);
-    UNUSED(count);
-    no_ICU_lib(interp);
-#endif
-}
 
 /*
 
@@ -471,7 +426,7 @@
 
     ucs2_scan,
     ucs2_ord,
-    ucs2_substr,
+    fixed_substr,
 
     encoding_is_cclass,
     encoding_find_cclass,

Modified: trunk/src/string/encoding/ucs4.c
==============================================================================
--- trunk/src/string/encoding/ucs4.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/encoding/ucs4.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -97,15 +97,6 @@
 
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
-static STRING * ucs4_substr(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
 static STRING * ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
@@ -139,9 +130,6 @@
 #define ASSERT_ARGS_ucs4_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs4_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_ucs4_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src))
@@ -258,36 +246,6 @@
 
 /*
 
-=item C<static STRING * ucs4_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the C<count> codepoints stored at position C<offset> in string
-C<src> as a new string.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-ucs4_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(ucs4_substr)
-#if PARROT_HAS_ICU
-    return Parrot_str_new_init(interp, (char*)src->strstart + offset * sizeof (UChar32),
-                               count * sizeof (UChar32), src->encoding, 0);
-#else
-    UNUSED(src);
-    UNUSED(offset);
-    UNUSED(count);
-    no_ICU_lib(interp);
-#endif
-}
-
-
-/*
-
 =item C<static UINTVAL ucs4_iter_get(PARROT_INTERP, const STRING *str, const
 String_iter *i, INTVAL offset)>
 
@@ -471,7 +429,7 @@
 
     ucs4_scan,
     ucs4_ord,
-    ucs4_substr,
+    fixed_substr,
 
     encoding_is_cclass,
     encoding_find_cclass,

Modified: trunk/src/string/encoding/utf16.c
==============================================================================
--- trunk/src/string/encoding/utf16.c	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/src/string/encoding/utf16.c	Sat Oct  2 22:26:13 2010	(r49417)
@@ -86,8 +86,8 @@
 PARROT_CANNOT_RETURN_NULL
 static STRING * utf16_substr(PARROT_INTERP,
     ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
+    INTVAL offset,
+    INTVAL length)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -304,8 +304,8 @@
 
 /*
 
-=item C<static STRING * utf16_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
+=item C<static STRING * utf16_substr(PARROT_INTERP, const STRING *src, INTVAL
+offset, INTVAL length)>
 
 Returns the codepoints in string C<src> at position C<offset> and length
 C<count>.
@@ -317,26 +317,55 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 static STRING *
-utf16_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+utf16_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
 {
     ASSERT_ARGS(utf16_substr)
 #if PARROT_HAS_ICU
-    UINTVAL pos = 0, start;
     const UChar * const s = (UChar*) src->strstart;
-    STRING * const return_string = Parrot_str_copy(interp, src);
+    const UINTVAL  strlen = STRING_length(src);
+    STRING        *return_string;
+    UINTVAL        pos = 0, start;
+
+    if (offset < 0)
+        offset += strlen;
+
+    if ((UINTVAL)offset >= strlen || length <= 0) {
+        /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+        if ((UINTVAL)offset == strlen || length <= 0)
+            return Parrot_str_new_noinit(interp, 0);
+
+        Parrot_ex_throw_from_c_args(interp, NULL,
+            EXCEPTION_SUBSTR_OUT_OF_STRING,
+            "Cannot take substr outside string");
+    }
+
+    return_string = Parrot_str_copy(interp, src);
+
+    if (offset == 0 && (UINTVAL)length >= strlen)
+        return return_string;
 
     U16_FWD_N_UNSAFE(s, pos, offset);
+
     start = pos * sizeof (UChar);
-    return_string->strstart = (char *)return_string->strstart + start;
-    U16_FWD_N_UNSAFE(s, pos, count);
-    return_string->bufused = pos * sizeof (UChar) - start;
-    return_string->strlen = count;
+    return_string->strstart += start;
+
+    if ((UINTVAL)length >= strlen - (UINTVAL)offset) {
+        return_string->bufused -= start;
+        return_string->strlen  -= offset;
+    }
+    else {
+        U16_FWD_N_UNSAFE(s, pos, length);
+        return_string->bufused = pos * sizeof (UChar) - start;
+        return_string->strlen  = length;
+    }
+
     return_string->hashval = 0;
+
     return return_string;
 #else
     UNUSED(src);
     UNUSED(offset);
-    UNUSED(count);
+    UNUSED(length);
 
     Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
         "no ICU lib loaded");

Modified: trunk/t/op/string.t
==============================================================================
--- trunk/t/op/string.t	Sat Oct  2 22:25:15 2010	(r49416)
+++ trunk/t/op/string.t	Sat Oct  2 22:26:13 2010	(r49417)
@@ -318,7 +318,7 @@
     .local int r
     null s
     eh = new ['ExceptionHandler']
-    eh.'handle_types'(.EXCEPTION_SUBSTR_OUT_OF_STRING)
+    eh.'handle_types'(.EXCEPTION_UNEXPECTED_NULL)
     set_addr eh, handler
     push_eh eh
     r = 1


More information about the parrot-commits mailing list