[svn:parrot] r49417 - in trunk: include/parrot src src/dynpmc src/io src/ops src/pmc src/string src/string/encoding t/op
nwellnhof at svn.parrot.org
nwellnhof at svn.parrot.org
Sat Oct 2 22:26:14 UTC 2010
Author: nwellnhof
Date: Sat Oct 2 22:26:13 2010
New Revision: 49417
URL: https://trac.parrot.org/parrot/changeset/49417
Log:
[str] Switch to STRING_substr macro
Move the whole 'substr' logic into the string vtable functions
Modified:
trunk/include/parrot/string.h
trunk/src/dynext.c
trunk/src/dynpmc/os.pmc
trunk/src/io/api.c
trunk/src/io/buffer.c
trunk/src/library.c
trunk/src/ops/core_ops.c
trunk/src/ops/string.ops
trunk/src/packfile.c
trunk/src/pmc/codestring.pmc
trunk/src/pmc/scalar.pmc
trunk/src/pmc/string.pmc
trunk/src/pmc/stringbuilder.pmc
trunk/src/spf_render.c
trunk/src/spf_vtable.c
trunk/src/string/api.c
trunk/src/string/encoding/ascii.c
trunk/src/string/encoding/binary.c
trunk/src/string/encoding/latin1.c
trunk/src/string/encoding/shared.c
trunk/src/string/encoding/shared.h
trunk/src/string/encoding/ucs2.c
trunk/src/string/encoding/ucs4.c
trunk/src/string/encoding/utf16.c
trunk/t/op/string.t
Modified: trunk/include/parrot/string.h
==============================================================================
--- trunk/include/parrot/string.h Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/include/parrot/string.h Sat Oct 2 22:26:13 2010 (r49417)
@@ -97,7 +97,7 @@
typedef UINTVAL (*str_vtable_scan_t)(PARROT_INTERP, ARGIN(const STRING *src));
typedef UINTVAL (*str_vtable_ord_t)(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset);
-typedef STRING * (*str_vtable_substr_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
+typedef STRING * (*str_vtable_substr_t)(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL count);
/* character classes */
typedef INTVAL (*str_vtable_is_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset);
Modified: trunk/src/dynext.c
==============================================================================
--- trunk/src/dynext.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/dynext.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -355,7 +355,7 @@
*handle = Parrot_dlopen((char *)lib->strstart + 3, 0);
if (*handle)
- return Parrot_str_substr(interp, lib, 3, lib->strlen - 3);
+ return STRING_substr(interp, lib, 3, lib->strlen - 3);
}
#endif
@@ -363,7 +363,7 @@
#ifdef __CYGWIN__
if (!STRING_length(lib) >= 3 && memcmp(lib->strstart, "lib", 3) == 0) {
path = Parrot_str_concat(interp, CONST_STRING(interp, "cyg"),
- Parrot_str_substr(interp, lib, 3, lib->strlen - 3));
+ STRING_substr(interp, lib, 3, lib->strlen - 3));
*handle = dlopen_string(interp, flags, path);
Modified: trunk/src/dynpmc/os.pmc
==============================================================================
--- trunk/src/dynpmc/os.pmc Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/dynpmc/os.pmc Sat Oct 2 22:26:13 2010 (r49417)
@@ -506,11 +506,8 @@
HANDLE hFind = INVALID_HANDLE_VALUE;
/* Add \* to the directory name and start search. */
- STRING *last_char = Parrot_str_substr(INTERP, path,
- Parrot_str_length(INTERP, path) - 1, 1, NULL, 0);
- int trailing_slash = STRING_equal(INTERP, last_char, string_from_literal(INTERP, "\\"))
- ||
- STRING_equal(INTERP, last_char, string_from_literal(INTERP, "/"));
+ INTVAL last_char = STRING_ord(INTERP, path, -1);
+ int trailing_slash = last_char == '\\' || last_char == '/';
cpath = Parrot_str_to_cstring(INTERP, Parrot_str_concat(INTERP,
path, string_from_literal(INTERP, trailing_slash ? "*" : "\\*"), 0));
hFind = FindFirstFile(cpath, &file_find_data);
Modified: trunk/src/io/api.c
==============================================================================
--- trunk/src/io/api.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/io/api.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -364,7 +364,7 @@
if (offset + read_length > orig_length)
read_length = orig_length - offset;
- result = Parrot_str_substr(interp, string_orig, offset, read_length);
+ result = STRING_substr(interp, string_orig, offset, read_length);
SETATTR_StringHandle_read_offset(interp, pmc, offset + read_length);
}
}
@@ -422,7 +422,7 @@
else
read_length = newline_pos - offset + 1; /* +1 to include the newline */
- result = Parrot_str_substr(interp, result, offset, read_length);
+ result = STRING_substr(interp, result, offset, read_length);
SETATTR_StringHandle_read_offset(interp, pmc, newline_pos + 1);
}
else
Modified: trunk/src/io/buffer.c
==============================================================================
--- trunk/src/io/buffer.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/io/buffer.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -338,7 +338,7 @@
STRING *sf;
s->strlen = s->bufused = current + len;
- sf = Parrot_str_substr(interp, s, current, len);
+ sf = STRING_substr(interp, s, current, len);
got = PIO_READ(interp, filehandle, &sf);
s->strlen = s->bufused = current + got;
Modified: trunk/src/library.c
==============================================================================
--- trunk/src/library.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/library.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -573,7 +573,7 @@
if (!STRING_IS_NULL(test_path)) {
if (Parrot_str_byte_length(interp, test_path) > 4) {
- STRING *orig_ext = Parrot_str_substr(interp, test_path, -4, 4);
+ STRING *orig_ext = STRING_substr(interp, test_path, -4, 4);
/* First try substituting .pbc for the .pir extension */
if (STRING_equal(interp, orig_ext, pir_extension)) {
STRING * const without_ext = Parrot_str_chopn(interp, test_path, 4);
@@ -600,7 +600,7 @@
/* Finally, try substituting .pbc for the .pasm extension. */
if (Parrot_str_byte_length(interp, test_path) > 5) {
- STRING * const orig_ext = Parrot_str_substr(interp, test_path, -5, 5);
+ STRING * const orig_ext = STRING_substr(interp, test_path, -5, 5);
if (STRING_equal(interp, orig_ext, pasm_extension)) {
STRING * const without_ext = Parrot_str_chopn(interp, test_path, 5);
test_path = Parrot_str_concat(interp, without_ext, bytecode_extension);
@@ -866,17 +866,17 @@
++pos_dot;
++pos_sl;
if (pos_sl && pos_dot) {
- stem = Parrot_str_substr(interp, in, pos_sl, pos_dot - pos_sl - 1);
- *wo_ext = Parrot_str_substr(interp, in, 0, pos_dot - 1);
- *ext = Parrot_str_substr(interp, in, pos_dot, len - pos_dot);
+ stem = STRING_substr(interp, in, pos_sl, pos_dot - pos_sl - 1);
+ *wo_ext = STRING_substr(interp, in, 0, pos_dot - 1);
+ *ext = STRING_substr(interp, in, pos_dot, len - pos_dot);
}
else if (pos_dot) {
- stem = Parrot_str_substr(interp, in, 0, pos_dot - 1);
+ stem = STRING_substr(interp, in, 0, pos_dot - 1);
*wo_ext = stem;
- *ext = Parrot_str_substr(interp, in, pos_dot, len - pos_dot);
+ *ext = STRING_substr(interp, in, pos_dot, len - pos_dot);
}
else if (pos_sl) {
- stem = Parrot_str_substr(interp, in, pos_sl, len - pos_sl);
+ stem = STRING_substr(interp, in, pos_sl, len - pos_sl);
*wo_ext = in;
*ext = STRINGNULL;
}
Modified: trunk/src/ops/core_ops.c
==============================================================================
--- trunk/src/ops/core_ops.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/ops/core_ops.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -22537,7 +22537,7 @@
Parrot_substr_s_s_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
const INTVAL len = Parrot_str_byte_length(interp, SREG(2));
- SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), len);
+ SREG(1) = STRING_substr(interp, SREG(2), IREG(3), len);
return (opcode_t *)cur_opcode + 4;}
@@ -22545,7 +22545,7 @@
Parrot_substr_s_sc_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
const INTVAL len = Parrot_str_byte_length(interp, SCONST(2));
- SREG(1) = Parrot_str_substr(interp, SCONST(2), IREG(3), len);
+ SREG(1) = STRING_substr(interp, SCONST(2), IREG(3), len);
return (opcode_t *)cur_opcode + 4;}
@@ -22553,7 +22553,7 @@
Parrot_substr_s_s_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
const INTVAL len = Parrot_str_byte_length(interp, SREG(2));
- SREG(1) = Parrot_str_substr(interp, SREG(2), ICONST(3), len);
+ SREG(1) = STRING_substr(interp, SREG(2), ICONST(3), len);
return (opcode_t *)cur_opcode + 4;}
@@ -22561,63 +22561,63 @@
Parrot_substr_s_sc_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
const INTVAL len = Parrot_str_byte_length(interp, SCONST(2));
- SREG(1) = Parrot_str_substr(interp, SCONST(2), ICONST(3), len);
+ SREG(1) = STRING_substr(interp, SCONST(2), ICONST(3), len);
return (opcode_t *)cur_opcode + 4;}
opcode_t *
Parrot_substr_s_s_i_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), IREG(4));
+ SREG(1) = STRING_substr(interp, SREG(2), IREG(3), IREG(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_sc_i_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SCONST(2), IREG(3), IREG(4));
+ SREG(1) = STRING_substr(interp, SCONST(2), IREG(3), IREG(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_s_ic_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SREG(2), ICONST(3), IREG(4));
+ SREG(1) = STRING_substr(interp, SREG(2), ICONST(3), IREG(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_sc_ic_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SCONST(2), ICONST(3), IREG(4));
+ SREG(1) = STRING_substr(interp, SCONST(2), ICONST(3), IREG(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_s_i_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), ICONST(4));
+ SREG(1) = STRING_substr(interp, SREG(2), IREG(3), ICONST(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_sc_i_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SCONST(2), IREG(3), ICONST(4));
+ SREG(1) = STRING_substr(interp, SCONST(2), IREG(3), ICONST(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_s_ic_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SREG(2), ICONST(3), ICONST(4));
+ SREG(1) = STRING_substr(interp, SREG(2), ICONST(3), ICONST(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_sc_ic_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SCONST(2), ICONST(3), ICONST(4));
+ SREG(1) = STRING_substr(interp, SCONST(2), ICONST(3), ICONST(4));
return (opcode_t *)cur_opcode + 5;}
Modified: trunk/src/ops/string.ops
==============================================================================
--- trunk/src/ops/string.ops Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/ops/string.ops Sat Oct 2 22:26:13 2010 (r49417)
@@ -252,11 +252,11 @@
inline op substr(out STR, in STR, in INT) :base_core {
const INTVAL len = Parrot_str_byte_length(interp, $2);
- $1 = Parrot_str_substr(interp, $2, $3, len);
+ $1 = STRING_substr(interp, $2, $3, len);
}
inline op substr(out STR, in STR, in INT, in INT) :base_core {
- $1 = Parrot_str_substr(interp, $2, $3, $4);
+ $1 = STRING_substr(interp, $2, $3, $4);
}
inline op substr(out STR, invar PMC, in INT, in INT) :base_core {
Modified: trunk/src/packfile.c
==============================================================================
--- trunk/src/packfile.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/packfile.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -2903,7 +2903,7 @@
/* find seg e.g. CODE_DB => CODE and attach it */
str_len = Parrot_str_length(interp, debug->base.name);
- code_name = Parrot_str_substr(interp, debug->base.name, 0, str_len - 3);
+ code_name = STRING_substr(interp, debug->base.name, 0, str_len - 3);
code = (PackFile_ByteCode *)PackFile_find_segment(interp, self->dir, code_name, 0);
if (!code || code->base.type != PF_BYTEC_SEG) {
@@ -3777,7 +3777,7 @@
/* Need to associate this segment with the applicable code segment. */
str_len = Parrot_str_length(interp, self->base.name);
- code_name = Parrot_str_substr(interp, self->base.name, 0, str_len - 4);
+ code_name = STRING_substr(interp, self->base.name, 0, str_len - 4);
code = (PackFile_ByteCode *)PackFile_find_segment(interp,
self->base.dir, code_name, 0);
@@ -4235,7 +4235,7 @@
/* Get the base path of the located module */
parrot_split_path_ext(interp, path, &found_path, &found_ext);
name_length = Parrot_str_length(interp, lang_name);
- found_path = Parrot_str_substr(interp, found_path, 0,
+ found_path = STRING_substr(interp, found_path, 0,
Parrot_str_length(interp, found_path)-name_length);
Parrot_lib_add_path(interp, Parrot_str_concat(interp, found_path, CONST_STRING(interp, "include/")),
Modified: trunk/src/pmc/codestring.pmc
==============================================================================
--- trunk/src/pmc/codestring.pmc Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/pmc/codestring.pmc Sat Oct 2 22:26:13 2010 (r49417)
@@ -119,7 +119,7 @@
if (pos < 0)
break;
- key = Parrot_str_substr(INTERP, fmt, pos+1, 1);
+ key = STRING_substr(INTERP, fmt, pos+1, 1);
if (VTABLE_exists_keyed_str(INTERP, hash, key)) {
repl = VTABLE_get_string_keyed_str(INTERP, hash, key);
Modified: trunk/src/pmc/scalar.pmc
==============================================================================
--- trunk/src/pmc/scalar.pmc Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/pmc/scalar.pmc Sat Oct 2 22:26:13 2010 (r49417)
@@ -831,7 +831,7 @@
*/
VTABLE STRING *substr_str(INTVAL offset, INTVAL length) {
- return Parrot_str_substr(INTERP, VTABLE_get_string(INTERP, SELF),
+ return STRING_substr(INTERP, VTABLE_get_string(INTERP, SELF),
offset, length);
}
Modified: trunk/src/pmc/string.pmc
==============================================================================
--- trunk/src/pmc/string.pmc Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/pmc/string.pmc Sat Oct 2 22:26:13 2010 (r49417)
@@ -365,7 +365,7 @@
VTABLE void substr(INTVAL offset, INTVAL length, PMC *dest) {
STRING *str_val, *s2;
GET_ATTR_str_val(INTERP, SELF, str_val);
- s2 = Parrot_str_substr(INTERP, str_val, offset, length);
+ s2 = STRING_substr(INTERP, str_val, offset, length);
VTABLE_set_string_native(INTERP, dest, s2);
}
@@ -382,7 +382,7 @@
VTABLE STRING *substr_str(INTVAL offset, INTVAL length) {
STRING *str_val;
GET_ATTR_str_val(INTERP, SELF, str_val);
- return Parrot_str_substr(INTERP, str_val, offset, length);
+ return STRING_substr(INTERP, str_val, offset, length);
}
/*
@@ -437,7 +437,7 @@
VTABLE STRING *get_string_keyed_int(INTVAL pos) {
STRING *str_val;
GET_ATTR_str_val(INTERP, SELF, str_val);
- return Parrot_str_substr(INTERP, str_val, pos, 1);
+ return STRING_substr(INTERP, str_val, pos, 1);
}
VTABLE INTVAL get_integer_keyed(PMC *key) {
Modified: trunk/src/pmc/stringbuilder.pmc
==============================================================================
--- trunk/src/pmc/stringbuilder.pmc Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/pmc/stringbuilder.pmc Sat Oct 2 22:26:13 2010 (r49417)
@@ -320,7 +320,7 @@
/* We must clone here becase we can reallocate buffer behind the scene... */
/* TODO Optimize it to avoid creation of redundant STRING */
return Parrot_str_clone(INTERP,
- Parrot_str_substr(INTERP, buffer, offset, length));
+ STRING_substr(INTERP, buffer, offset, length));
}
/*
@@ -369,7 +369,7 @@
else {
/* remaining string can be added as is. */
VTABLE_push_string(INTERP, stringbuilder,
- Parrot_str_substr(INTERP, fmt, pos,
+ STRING_substr(INTERP, fmt, pos,
Parrot_str_length(INTERP, fmt) -pos));
}
break;
@@ -377,13 +377,13 @@
else {
/* slurp up to just before the % sign... */
VTABLE_push_string(INTERP, stringbuilder,
- Parrot_str_substr(INTERP, fmt, pos, percentPos - pos));
+ STRING_substr(INTERP, fmt, pos, percentPos - pos));
/* skip the % sign */
pos = percentPos + 1 ;
}
/* key is always a single character */
- key = Parrot_str_substr(INTERP, fmt, pos++, 1);
+ key = STRING_substr(INTERP, fmt, pos++, 1);
if (VTABLE_exists_keyed_str(INTERP, hash, key)) {
VTABLE_push_string(INTERP, stringbuilder,
@@ -411,7 +411,7 @@
else {
/* %foo has no special meaning, pass it through unchanged */
VTABLE_push_string(INTERP, stringbuilder,
- Parrot_str_substr(INTERP, fmt, pos-2, 2));
+ STRING_substr(INTERP, fmt, pos-2, 2));
}
}
Modified: trunk/src/spf_render.c
==============================================================================
--- trunk/src/spf_render.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/spf_render.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -201,7 +201,7 @@
STRING_ord(interp, str, 0) == '+')) {
STRING *temp = NULL;
STRING *ignored;
- temp = Parrot_str_substr(interp, str, 1, len-1);
+ temp = STRING_substr(interp, str, 1, len-1);
str = Parrot_str_chopn(interp, str, -1);
str = Parrot_str_concat(interp, str, fill);
str = Parrot_str_concat(interp, str, temp);
@@ -431,7 +431,7 @@
for (i = 0; i < pat_len; ++i) {
if (STRING_ord(interp, pat, i) == '%') { /* % */
if (len) {
- substr = Parrot_str_substr(interp, pat, old, len);
+ substr = STRING_substr(interp, pat, old, len);
/* XXX This shouldn't modify targ the pointer */
targ = Parrot_str_concat(interp, targ, substr);
}
@@ -941,7 +941,7 @@
}
}
if (len) {
- substr = Parrot_str_substr(interp, pat, old, len);
+ substr = STRING_substr(interp, pat, old, len);
targ = Parrot_str_concat(interp, targ, substr);
}
Modified: trunk/src/spf_vtable.c
==============================================================================
--- trunk/src/spf_vtable.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/spf_vtable.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -452,7 +452,7 @@
++obj->index;
s = VTABLE_get_string(interp, tmp);
- return Parrot_str_substr(interp, s, 0, 1);
+ return STRING_substr(interp, s, 0, 1);
}
/*
Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/api.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -958,7 +958,12 @@
offset, INTVAL length)>
Returns substring of length C<length> from C<offset> from the specified
-Parrot string.
+Parrot string. If C<offset> is negative, it counts from the end of the
+string. Returns the empty string if C<offset> equals the length of the
+string. Throws an exception if C<src> is null or C<offset> is out of bounds.
+Truncates C<length> if it extends beyond the end of the string.
+
+Identical to the STRING_substr macro.
=cut
@@ -972,35 +977,9 @@
ARGIN_NULLOK(const STRING *src), INTVAL offset, INTVAL length)
{
ASSERT_ARGS(Parrot_str_substr)
- const UINTVAL strlen = STRING_length(src);
- UINTVAL maxlen;
-
- if (offset < 0)
- offset += strlen;
-
- if ((UINTVAL)offset >= strlen || length <= 0) {
- if (STRING_IS_NULL(src))
- Parrot_ex_throw_from_c_args(interp, NULL,
- EXCEPTION_SUBSTR_OUT_OF_STRING, "Cannot substr on a null string");
-
- /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
- if ((UINTVAL)offset == strlen || length <= 0)
- return Parrot_str_new_noinit(interp, 0);
-
- Parrot_ex_throw_from_c_args(interp, NULL,
- EXCEPTION_SUBSTR_OUT_OF_STRING,
- "Cannot take substr outside string");
- }
-
- ASSERT_STRING_SANITY(src);
-
- maxlen = strlen - offset;
-
- if ((UINTVAL)length > maxlen)
- length = maxlen;
- if (length == strlen && !offset)
- return (STRING *)src;
+ if (src == NULL)
+ src = STRINGNULL;
return STRING_substr(interp, src, offset, length);
}
@@ -1257,7 +1236,7 @@
if (n >= 0)
end += STRING_length(s);
- return Parrot_str_substr(interp, s, 0, end);
+ return STRING_substr(interp, s, 0, end);
}
Modified: trunk/src/string/encoding/ascii.c
==============================================================================
--- trunk/src/string/encoding/ascii.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/encoding/ascii.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -518,7 +518,7 @@
fixed8_scan,
fixed8_ord,
- fixed8_substr,
+ fixed_substr,
ascii_is_cclass,
ascii_find_cclass,
Modified: trunk/src/string/encoding/binary.c
==============================================================================
--- trunk/src/string/encoding/binary.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/encoding/binary.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -240,7 +240,7 @@
fixed8_scan,
fixed8_ord,
- fixed8_substr,
+ fixed_substr,
binary_is_cclass,
binary_find_cclass,
Modified: trunk/src/string/encoding/latin1.c
==============================================================================
--- trunk/src/string/encoding/latin1.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/encoding/latin1.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -545,7 +545,7 @@
fixed8_scan,
fixed8_ord,
- fixed8_substr,
+ fixed_substr,
latin1_is_cclass,
latin1_find_cclass,
Modified: trunk/src/string/encoding/shared.c
==============================================================================
--- trunk/src/string/encoding/shared.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/encoding/shared.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -378,8 +378,8 @@
/*
-=item C<STRING * encoding_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
+=item C<STRING * encoding_substr(PARROT_INTERP, const STRING *src, INTVAL
+offset, INTVAL length)>
Returns the codepoints in string C<src> at position C<offset> and length
C<count>.
@@ -390,28 +390,51 @@
PARROT_CANNOT_RETURN_NULL
STRING *
-encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
{
ASSERT_ARGS(encoding_substr)
-
- STRING * const return_string = Parrot_str_copy(interp, src);
+ const UINTVAL strlen = STRING_length(src);
+ STRING *return_string;
String_iter iter;
UINTVAL start;
+ if (offset < 0)
+ offset += strlen;
+
+ if ((UINTVAL)offset >= strlen || length <= 0) {
+ /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+ if ((UINTVAL)offset == strlen || length <= 0)
+ return Parrot_str_new_constant(interp, "");
+
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_SUBSTR_OUT_OF_STRING,
+ "Cannot take substr outside string");
+ }
+
+ return_string = Parrot_str_copy(interp, src);
+
+ if (offset == 0 && (UINTVAL)length >= strlen)
+ return return_string;
+
STRING_ITER_INIT(interp, &iter);
if (offset)
STRING_iter_set_position(interp, src, &iter, offset);
- start = iter.bytepos;
- return_string->strstart = (char *)return_string->strstart + start;
+ start = iter.bytepos;
+ return_string->strstart += start;
- if (count)
- STRING_iter_set_position(interp, src, &iter, offset + count);
+ if ((UINTVAL)length >= strlen - (UINTVAL)offset) {
+ return_string->bufused -= start;
+ return_string->strlen -= offset;
+ }
+ else {
+ STRING_iter_set_position(interp, src, &iter, offset + length);
+ return_string->bufused = iter.bytepos - start;
+ return_string->strlen = length;
+ }
- return_string->bufused = iter.bytepos - start;
- return_string->strlen = count;
- return_string->hashval = 0;
+ return_string->hashval = 0;
return return_string;
}
@@ -823,11 +846,11 @@
/*
-=item C<STRING * fixed8_substr(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL count)>
+=item C<STRING * fixed_substr(PARROT_INTERP, const STRING *src, INTVAL offset,
+INTVAL length)>
Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.
+C<count>. Works for all fixed size encodings.
=cut
@@ -836,16 +859,41 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
STRING *
-fixed8_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+fixed_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
{
- ASSERT_ARGS(fixed8_substr)
- STRING * const return_string = Parrot_str_copy(interp, src);
+ ASSERT_ARGS(fixed_substr)
+ const UINTVAL strlen = STRING_length(src);
+ STRING *return_string;
+ UINTVAL maxlen, bytes_per_codepoint;
+
+ if (offset < 0)
+ offset += strlen;
+
+ if ((UINTVAL)offset >= strlen || length <= 0) {
+ /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+ if ((UINTVAL)offset == strlen || length <= 0)
+ return Parrot_str_new_constant(interp, "");
+
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_SUBSTR_OUT_OF_STRING,
+ "Cannot take substr outside string");
+ }
+
+ return_string = Parrot_str_copy(interp, src);
+
+ if (offset == 0 && (UINTVAL)length >= strlen)
+ return return_string;
+
+ bytes_per_codepoint = src->encoding->max_bytes_per_codepoint;
+ maxlen = strlen - offset;
+
+ if ((UINTVAL)length > maxlen)
+ length = maxlen;
- return_string->encoding = src->encoding;
- return_string->strstart = (char *)return_string->strstart + offset;
- return_string->bufused = count;
- return_string->strlen = count;
- return_string->hashval = 0;
+ return_string->strstart += offset * bytes_per_codepoint;
+ return_string->bufused = length * bytes_per_codepoint;
+ return_string->strlen = length;
+ return_string->hashval = 0;
return return_string;
}
Modified: trunk/src/string/encoding/shared.h
==============================================================================
--- trunk/src/string/encoding/shared.h Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/encoding/shared.h Sat Oct 2 22:26:13 2010 (r49417)
@@ -105,8 +105,8 @@
PARROT_CANNOT_RETURN_NULL
STRING * encoding_substr(PARROT_INTERP,
ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
+ INTVAL offset,
+ INTVAL length)
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -207,10 +207,10 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
-STRING * fixed8_substr(PARROT_INTERP,
+STRING * fixed_substr(PARROT_INTERP,
ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
+ INTVAL offset,
+ INTVAL length)
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -339,7 +339,7 @@
#define ASSERT_ARGS_fixed8_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_fixed8_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_fixed_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_unicode_chr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
Modified: trunk/src/string/encoding/ucs2.c
==============================================================================
--- trunk/src/string/encoding/ucs2.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/encoding/ucs2.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -97,15 +97,6 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
-static STRING * ucs2_substr(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
static STRING * ucs2_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -139,9 +130,6 @@
#define ASSERT_ARGS_ucs2_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs2_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ucs2_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -242,39 +230,6 @@
#endif
}
-/*
-
-=item C<static STRING * ucs2_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-ucs2_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(ucs2_substr)
- STRING * const return_string = Parrot_str_copy(interp, src);
-
-#if PARROT_HAS_ICU
- return_string->strstart = (char*)src->strstart + offset * sizeof (UChar);
- return_string->bufused = count * sizeof (UChar);
- return_string->strlen = count;
- return_string->hashval = 0;
- return return_string;
-#else
- UNUSED(src);
- UNUSED(offset);
- UNUSED(count);
- no_ICU_lib(interp);
-#endif
-}
/*
@@ -471,7 +426,7 @@
ucs2_scan,
ucs2_ord,
- ucs2_substr,
+ fixed_substr,
encoding_is_cclass,
encoding_find_cclass,
Modified: trunk/src/string/encoding/ucs4.c
==============================================================================
--- trunk/src/string/encoding/ucs4.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/encoding/ucs4.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -97,15 +97,6 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
-static STRING * ucs4_substr(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
static STRING * ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -139,9 +130,6 @@
#define ASSERT_ARGS_ucs4_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs4_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ucs4_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -258,36 +246,6 @@
/*
-=item C<static STRING * ucs4_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the C<count> codepoints stored at position C<offset> in string
-C<src> as a new string.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-ucs4_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(ucs4_substr)
-#if PARROT_HAS_ICU
- return Parrot_str_new_init(interp, (char*)src->strstart + offset * sizeof (UChar32),
- count * sizeof (UChar32), src->encoding, 0);
-#else
- UNUSED(src);
- UNUSED(offset);
- UNUSED(count);
- no_ICU_lib(interp);
-#endif
-}
-
-
-/*
-
=item C<static UINTVAL ucs4_iter_get(PARROT_INTERP, const STRING *str, const
String_iter *i, INTVAL offset)>
@@ -471,7 +429,7 @@
ucs4_scan,
ucs4_ord,
- ucs4_substr,
+ fixed_substr,
encoding_is_cclass,
encoding_find_cclass,
Modified: trunk/src/string/encoding/utf16.c
==============================================================================
--- trunk/src/string/encoding/utf16.c Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/src/string/encoding/utf16.c Sat Oct 2 22:26:13 2010 (r49417)
@@ -86,8 +86,8 @@
PARROT_CANNOT_RETURN_NULL
static STRING * utf16_substr(PARROT_INTERP,
ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
+ INTVAL offset,
+ INTVAL length)
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -304,8 +304,8 @@
/*
-=item C<static STRING * utf16_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
+=item C<static STRING * utf16_substr(PARROT_INTERP, const STRING *src, INTVAL
+offset, INTVAL length)>
Returns the codepoints in string C<src> at position C<offset> and length
C<count>.
@@ -317,26 +317,55 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
-utf16_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+utf16_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
{
ASSERT_ARGS(utf16_substr)
#if PARROT_HAS_ICU
- UINTVAL pos = 0, start;
const UChar * const s = (UChar*) src->strstart;
- STRING * const return_string = Parrot_str_copy(interp, src);
+ const UINTVAL strlen = STRING_length(src);
+ STRING *return_string;
+ UINTVAL pos = 0, start;
+
+ if (offset < 0)
+ offset += strlen;
+
+ if ((UINTVAL)offset >= strlen || length <= 0) {
+ /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+ if ((UINTVAL)offset == strlen || length <= 0)
+ return Parrot_str_new_noinit(interp, 0);
+
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_SUBSTR_OUT_OF_STRING,
+ "Cannot take substr outside string");
+ }
+
+ return_string = Parrot_str_copy(interp, src);
+
+ if (offset == 0 && (UINTVAL)length >= strlen)
+ return return_string;
U16_FWD_N_UNSAFE(s, pos, offset);
+
start = pos * sizeof (UChar);
- return_string->strstart = (char *)return_string->strstart + start;
- U16_FWD_N_UNSAFE(s, pos, count);
- return_string->bufused = pos * sizeof (UChar) - start;
- return_string->strlen = count;
+ return_string->strstart += start;
+
+ if ((UINTVAL)length >= strlen - (UINTVAL)offset) {
+ return_string->bufused -= start;
+ return_string->strlen -= offset;
+ }
+ else {
+ U16_FWD_N_UNSAFE(s, pos, length);
+ return_string->bufused = pos * sizeof (UChar) - start;
+ return_string->strlen = length;
+ }
+
return_string->hashval = 0;
+
return return_string;
#else
UNUSED(src);
UNUSED(offset);
- UNUSED(count);
+ UNUSED(length);
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
"no ICU lib loaded");
Modified: trunk/t/op/string.t
==============================================================================
--- trunk/t/op/string.t Sat Oct 2 22:25:15 2010 (r49416)
+++ trunk/t/op/string.t Sat Oct 2 22:26:13 2010 (r49417)
@@ -318,7 +318,7 @@
.local int r
null s
eh = new ['ExceptionHandler']
- eh.'handle_types'(.EXCEPTION_SUBSTR_OUT_OF_STRING)
+ eh.'handle_types'(.EXCEPTION_UNEXPECTED_NULL)
set_addr eh, handler
push_eh eh
r = 1
More information about the parrot-commits
mailing list