[svn:parrot] r49223 - in branches/string_macros: include/parrot src src/dynpmc src/io src/ops src/pmc src/string src/string/encoding t/op
nwellnhof at svn.parrot.org
nwellnhof at svn.parrot.org
Wed Sep 22 01:19:27 UTC 2010
Author: nwellnhof
Date: Wed Sep 22 01:19:27 2010
New Revision: 49223
URL: https://trac.parrot.org/parrot/changeset/49223
Log:
[str] Switch to STRING_substr macro
Move the whole 'substr' logic into the string vtable functions
Modified:
branches/string_macros/include/parrot/string.h
branches/string_macros/src/dynext.c
branches/string_macros/src/dynpmc/os.pmc
branches/string_macros/src/io/api.c
branches/string_macros/src/library.c
branches/string_macros/src/ops/core_ops.c
branches/string_macros/src/ops/string.ops
branches/string_macros/src/packfile.c
branches/string_macros/src/pmc/codestring.pmc
branches/string_macros/src/pmc/scalar.pmc
branches/string_macros/src/pmc/string.pmc
branches/string_macros/src/pmc/stringbuilder.pmc
branches/string_macros/src/spf_render.c
branches/string_macros/src/spf_vtable.c
branches/string_macros/src/string/api.c
branches/string_macros/src/string/encoding/ascii.c
branches/string_macros/src/string/encoding/binary.c
branches/string_macros/src/string/encoding/latin1.c
branches/string_macros/src/string/encoding/shared.c
branches/string_macros/src/string/encoding/shared.h
branches/string_macros/src/string/encoding/ucs2.c
branches/string_macros/src/string/encoding/ucs4.c
branches/string_macros/src/string/encoding/utf16.c
branches/string_macros/t/op/string.t
Modified: branches/string_macros/include/parrot/string.h
==============================================================================
--- branches/string_macros/include/parrot/string.h Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/include/parrot/string.h Wed Sep 22 01:19:27 2010 (r49223)
@@ -97,7 +97,7 @@
typedef UINTVAL (*str_vtable_scan_t)(PARROT_INTERP, ARGIN(const STRING *src));
typedef UINTVAL (*str_vtable_ord_t)(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset);
-typedef STRING * (*str_vtable_substr_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
+typedef STRING * (*str_vtable_substr_t)(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL count);
/* character classes */
typedef INTVAL (*str_vtable_is_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset);
Modified: branches/string_macros/src/dynext.c
==============================================================================
--- branches/string_macros/src/dynext.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/dynext.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -355,7 +355,7 @@
*handle = Parrot_dlopen((char *)lib->strstart + 3, 0);
if (*handle)
- return Parrot_str_substr(interp, lib, 3, lib->strlen - 3);
+ return STRING_substr(interp, lib, 3, lib->strlen - 3);
}
#endif
@@ -363,7 +363,7 @@
#ifdef __CYGWIN__
if (!STRING_length(lib) >= 3 && memcmp(lib->strstart, "lib", 3) == 0) {
path = Parrot_str_concat(interp, CONST_STRING(interp, "cyg"),
- Parrot_str_substr(interp, lib, 3, lib->strlen - 3));
+ STRING_substr(interp, lib, 3, lib->strlen - 3));
*handle = dlopen_string(interp, flags, path);
Modified: branches/string_macros/src/dynpmc/os.pmc
==============================================================================
--- branches/string_macros/src/dynpmc/os.pmc Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/dynpmc/os.pmc Wed Sep 22 01:19:27 2010 (r49223)
@@ -506,11 +506,8 @@
HANDLE hFind = INVALID_HANDLE_VALUE;
/* Add \* to the directory name and start search. */
- STRING *last_char = Parrot_str_substr(INTERP, path,
- Parrot_str_length(INTERP, path) - 1, 1, NULL, 0);
- int trailing_slash = STRING_equal(INTERP, last_char, string_from_literal(INTERP, "\\"))
- ||
- STRING_equal(INTERP, last_char, string_from_literal(INTERP, "/"));
+ INTVAL last_char = STRING_ord(INTERP, path, -1);
+ int trailing_slash = last_char == '\\' || last_char == '/';
cpath = Parrot_str_to_cstring(INTERP, Parrot_str_concat(INTERP,
path, string_from_literal(INTERP, trailing_slash ? "*" : "\\*"), 0));
hFind = FindFirstFile(cpath, &file_find_data);
Modified: branches/string_macros/src/io/api.c
==============================================================================
--- branches/string_macros/src/io/api.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/io/api.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -364,7 +364,7 @@
if (offset + read_length > orig_length)
read_length = orig_length - offset;
- result = Parrot_str_substr(interp, string_orig, offset, read_length);
+ result = STRING_substr(interp, string_orig, offset, read_length);
SETATTR_StringHandle_read_offset(interp, pmc, offset + read_length);
}
}
@@ -422,7 +422,7 @@
else
read_length = newline_pos - offset + 1; /* +1 to include the newline */
- result = Parrot_str_substr(interp, result, offset, read_length);
+ result = STRING_substr(interp, result, offset, read_length);
SETATTR_StringHandle_read_offset(interp, pmc, newline_pos + 1);
}
else
Modified: branches/string_macros/src/library.c
==============================================================================
--- branches/string_macros/src/library.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/library.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -573,7 +573,7 @@
if (!STRING_IS_NULL(test_path)) {
if (Parrot_str_byte_length(interp, test_path) > 4) {
- STRING *orig_ext = Parrot_str_substr(interp, test_path, -4, 4);
+ STRING *orig_ext = STRING_substr(interp, test_path, -4, 4);
/* First try substituting .pbc for the .pir extension */
if (STRING_equal(interp, orig_ext, pir_extension)) {
STRING * const without_ext = Parrot_str_chopn(interp, test_path, 4);
@@ -600,7 +600,7 @@
/* Finally, try substituting .pbc for the .pasm extension. */
if (Parrot_str_byte_length(interp, test_path) > 5) {
- STRING * const orig_ext = Parrot_str_substr(interp, test_path, -5, 5);
+ STRING * const orig_ext = STRING_substr(interp, test_path, -5, 5);
if (STRING_equal(interp, orig_ext, pasm_extension)) {
STRING * const without_ext = Parrot_str_chopn(interp, test_path, 5);
test_path = Parrot_str_concat(interp, without_ext, bytecode_extension);
@@ -866,17 +866,17 @@
++pos_dot;
++pos_sl;
if (pos_sl && pos_dot) {
- stem = Parrot_str_substr(interp, in, pos_sl, pos_dot - pos_sl - 1);
- *wo_ext = Parrot_str_substr(interp, in, 0, pos_dot - 1);
- *ext = Parrot_str_substr(interp, in, pos_dot, len - pos_dot);
+ stem = STRING_substr(interp, in, pos_sl, pos_dot - pos_sl - 1);
+ *wo_ext = STRING_substr(interp, in, 0, pos_dot - 1);
+ *ext = STRING_substr(interp, in, pos_dot, len - pos_dot);
}
else if (pos_dot) {
- stem = Parrot_str_substr(interp, in, 0, pos_dot - 1);
+ stem = STRING_substr(interp, in, 0, pos_dot - 1);
*wo_ext = stem;
- *ext = Parrot_str_substr(interp, in, pos_dot, len - pos_dot);
+ *ext = STRING_substr(interp, in, pos_dot, len - pos_dot);
}
else if (pos_sl) {
- stem = Parrot_str_substr(interp, in, pos_sl, len - pos_sl);
+ stem = STRING_substr(interp, in, pos_sl, len - pos_sl);
*wo_ext = in;
*ext = STRINGNULL;
}
Modified: branches/string_macros/src/ops/core_ops.c
==============================================================================
--- branches/string_macros/src/ops/core_ops.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/ops/core_ops.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -22534,7 +22534,7 @@
Parrot_substr_s_s_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
const INTVAL len = Parrot_str_byte_length(interp, SREG(2));
- SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), len);
+ SREG(1) = STRING_substr(interp, SREG(2), IREG(3), len);
return (opcode_t *)cur_opcode + 4;}
@@ -22542,7 +22542,7 @@
Parrot_substr_s_sc_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
const INTVAL len = Parrot_str_byte_length(interp, CONST(2).u.string);
- SREG(1) = Parrot_str_substr(interp, CONST(2).u.string, IREG(3), len);
+ SREG(1) = STRING_substr(interp, CONST(2).u.string, IREG(3), len);
return (opcode_t *)cur_opcode + 4;}
@@ -22550,7 +22550,7 @@
Parrot_substr_s_s_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
const INTVAL len = Parrot_str_byte_length(interp, SREG(2));
- SREG(1) = Parrot_str_substr(interp, SREG(2), cur_opcode[3], len);
+ SREG(1) = STRING_substr(interp, SREG(2), cur_opcode[3], len);
return (opcode_t *)cur_opcode + 4;}
@@ -22558,63 +22558,63 @@
Parrot_substr_s_sc_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
const INTVAL len = Parrot_str_byte_length(interp, CONST(2).u.string);
- SREG(1) = Parrot_str_substr(interp, CONST(2).u.string, cur_opcode[3], len);
+ SREG(1) = STRING_substr(interp, CONST(2).u.string, cur_opcode[3], len);
return (opcode_t *)cur_opcode + 4;}
opcode_t *
Parrot_substr_s_s_i_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), IREG(4));
+ SREG(1) = STRING_substr(interp, SREG(2), IREG(3), IREG(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_sc_i_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, CONST(2).u.string, IREG(3), IREG(4));
+ SREG(1) = STRING_substr(interp, CONST(2).u.string, IREG(3), IREG(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_s_ic_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SREG(2), cur_opcode[3], IREG(4));
+ SREG(1) = STRING_substr(interp, SREG(2), cur_opcode[3], IREG(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_sc_ic_i(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, CONST(2).u.string, cur_opcode[3], IREG(4));
+ SREG(1) = STRING_substr(interp, CONST(2).u.string, cur_opcode[3], IREG(4));
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_s_i_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SREG(2), IREG(3), cur_opcode[4]);
+ SREG(1) = STRING_substr(interp, SREG(2), IREG(3), cur_opcode[4]);
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_sc_i_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, CONST(2).u.string, IREG(3), cur_opcode[4]);
+ SREG(1) = STRING_substr(interp, CONST(2).u.string, IREG(3), cur_opcode[4]);
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_s_ic_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, SREG(2), cur_opcode[3], cur_opcode[4]);
+ SREG(1) = STRING_substr(interp, SREG(2), cur_opcode[3], cur_opcode[4]);
return (opcode_t *)cur_opcode + 5;}
opcode_t *
Parrot_substr_s_sc_ic_ic(opcode_t *cur_opcode, PARROT_INTERP) {
const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
- SREG(1) = Parrot_str_substr(interp, CONST(2).u.string, cur_opcode[3], cur_opcode[4]);
+ SREG(1) = STRING_substr(interp, CONST(2).u.string, cur_opcode[3], cur_opcode[4]);
return (opcode_t *)cur_opcode + 5;}
Modified: branches/string_macros/src/ops/string.ops
==============================================================================
--- branches/string_macros/src/ops/string.ops Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/ops/string.ops Wed Sep 22 01:19:27 2010 (r49223)
@@ -252,11 +252,11 @@
inline op substr(out STR, in STR, in INT) :base_core {
const INTVAL len = Parrot_str_byte_length(interp, $2);
- $1 = Parrot_str_substr(interp, $2, $3, len);
+ $1 = STRING_substr(interp, $2, $3, len);
}
inline op substr(out STR, in STR, in INT, in INT) :base_core {
- $1 = Parrot_str_substr(interp, $2, $3, $4);
+ $1 = STRING_substr(interp, $2, $3, $4);
}
inline op substr(out STR, invar PMC, in INT, in INT) :base_core {
Modified: branches/string_macros/src/packfile.c
==============================================================================
--- branches/string_macros/src/packfile.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/packfile.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -3040,7 +3040,7 @@
/* find seg e.g. CODE_DB => CODE and attach it */
str_len = Parrot_str_length(interp, debug->base.name);
- code_name = Parrot_str_substr(interp, debug->base.name, 0, str_len - 3);
+ code_name = STRING_substr(interp, debug->base.name, 0, str_len - 3);
code = (PackFile_ByteCode *)PackFile_find_segment(interp, self->dir, code_name, 0);
if (!code || code->base.type != PF_BYTEC_SEG) {
@@ -4473,7 +4473,7 @@
/* Need to associate this segment with the applicable code segment. */
str_len = Parrot_str_length(interp, self->base.name);
- code_name = Parrot_str_substr(interp, self->base.name, 0, str_len - 4);
+ code_name = STRING_substr(interp, self->base.name, 0, str_len - 4);
code = (PackFile_ByteCode *)PackFile_find_segment(interp,
self->base.dir, code_name, 0);
@@ -4930,7 +4930,7 @@
/* Get the base path of the located module */
parrot_split_path_ext(interp, path, &found_path, &found_ext);
name_length = Parrot_str_length(interp, lang_name);
- found_path = Parrot_str_substr(interp, found_path, 0,
+ found_path = STRING_substr(interp, found_path, 0,
Parrot_str_length(interp, found_path)-name_length);
Parrot_lib_add_path(interp, Parrot_str_concat(interp, found_path, CONST_STRING(interp, "include/")),
Modified: branches/string_macros/src/pmc/codestring.pmc
==============================================================================
--- branches/string_macros/src/pmc/codestring.pmc Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/pmc/codestring.pmc Wed Sep 22 01:19:27 2010 (r49223)
@@ -119,7 +119,7 @@
if (pos < 0)
break;
- key = Parrot_str_substr(INTERP, fmt, pos+1, 1);
+ key = STRING_substr(INTERP, fmt, pos+1, 1);
if (VTABLE_exists_keyed_str(INTERP, hash, key)) {
repl = VTABLE_get_string_keyed_str(INTERP, hash, key);
Modified: branches/string_macros/src/pmc/scalar.pmc
==============================================================================
--- branches/string_macros/src/pmc/scalar.pmc Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/pmc/scalar.pmc Wed Sep 22 01:19:27 2010 (r49223)
@@ -831,7 +831,7 @@
*/
VTABLE STRING *substr_str(INTVAL offset, INTVAL length) {
- return Parrot_str_substr(INTERP, VTABLE_get_string(INTERP, SELF),
+ return STRING_substr(INTERP, VTABLE_get_string(INTERP, SELF),
offset, length);
}
Modified: branches/string_macros/src/pmc/string.pmc
==============================================================================
--- branches/string_macros/src/pmc/string.pmc Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/pmc/string.pmc Wed Sep 22 01:19:27 2010 (r49223)
@@ -365,7 +365,7 @@
VTABLE void substr(INTVAL offset, INTVAL length, PMC *dest) {
STRING *str_val, *s2;
GET_ATTR_str_val(INTERP, SELF, str_val);
- s2 = Parrot_str_substr(INTERP, str_val, offset, length);
+ s2 = STRING_substr(INTERP, str_val, offset, length);
VTABLE_set_string_native(INTERP, dest, s2);
}
@@ -382,7 +382,7 @@
VTABLE STRING *substr_str(INTVAL offset, INTVAL length) {
STRING *str_val;
GET_ATTR_str_val(INTERP, SELF, str_val);
- return Parrot_str_substr(INTERP, str_val, offset, length);
+ return STRING_substr(INTERP, str_val, offset, length);
}
/*
@@ -437,7 +437,7 @@
VTABLE STRING *get_string_keyed_int(INTVAL pos) {
STRING *str_val;
GET_ATTR_str_val(INTERP, SELF, str_val);
- return Parrot_str_substr(INTERP, str_val, pos, 1);
+ return STRING_substr(INTERP, str_val, pos, 1);
}
VTABLE INTVAL get_integer_keyed(PMC *key) {
Modified: branches/string_macros/src/pmc/stringbuilder.pmc
==============================================================================
--- branches/string_macros/src/pmc/stringbuilder.pmc Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/pmc/stringbuilder.pmc Wed Sep 22 01:19:27 2010 (r49223)
@@ -320,7 +320,7 @@
/* We must clone here becase we can reallocate buffer behind the scene... */
/* TODO Optimize it to avoid creation of redundant STRING */
return Parrot_str_clone(INTERP,
- Parrot_str_substr(INTERP, buffer, offset, length));
+ STRING_substr(INTERP, buffer, offset, length));
}
/*
@@ -369,7 +369,7 @@
else {
/* remaining string can be added as is. */
VTABLE_push_string(INTERP, stringbuilder,
- Parrot_str_substr(INTERP, fmt, pos,
+ STRING_substr(INTERP, fmt, pos,
Parrot_str_length(INTERP, fmt) -pos));
}
break;
@@ -377,13 +377,13 @@
else {
/* slurp up to just before the % sign... */
VTABLE_push_string(INTERP, stringbuilder,
- Parrot_str_substr(INTERP, fmt, pos, percentPos - pos));
+ STRING_substr(INTERP, fmt, pos, percentPos - pos));
/* skip the % sign */
pos = percentPos + 1 ;
}
/* key is always a single character */
- key = Parrot_str_substr(INTERP, fmt, pos++, 1);
+ key = STRING_substr(INTERP, fmt, pos++, 1);
if (VTABLE_exists_keyed_str(INTERP, hash, key)) {
VTABLE_push_string(INTERP, stringbuilder,
@@ -411,7 +411,7 @@
else {
/* %foo has no special meaning, pass it through unchanged */
VTABLE_push_string(INTERP, stringbuilder,
- Parrot_str_substr(INTERP, fmt, pos-2, 2));
+ STRING_substr(INTERP, fmt, pos-2, 2));
}
}
Modified: branches/string_macros/src/spf_render.c
==============================================================================
--- branches/string_macros/src/spf_render.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/spf_render.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -201,7 +201,7 @@
STRING_ord(interp, str, 0) == '+')) {
STRING *temp = NULL;
STRING *ignored;
- temp = Parrot_str_substr(interp, str, 1, len-1);
+ temp = STRING_substr(interp, str, 1, len-1);
str = Parrot_str_chopn(interp, str, -1);
str = Parrot_str_concat(interp, str, fill);
str = Parrot_str_concat(interp, str, temp);
@@ -431,7 +431,7 @@
for (i = 0; i < pat_len; ++i) {
if (STRING_ord(interp, pat, i) == '%') { /* % */
if (len) {
- substr = Parrot_str_substr(interp, pat, old, len);
+ substr = STRING_substr(interp, pat, old, len);
/* XXX This shouldn't modify targ the pointer */
targ = Parrot_str_concat(interp, targ, substr);
}
@@ -941,7 +941,7 @@
}
}
if (len) {
- substr = Parrot_str_substr(interp, pat, old, len);
+ substr = STRING_substr(interp, pat, old, len);
targ = Parrot_str_concat(interp, targ, substr);
}
Modified: branches/string_macros/src/spf_vtable.c
==============================================================================
--- branches/string_macros/src/spf_vtable.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/spf_vtable.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -452,7 +452,7 @@
++obj->index;
s = VTABLE_get_string(interp, tmp);
- return Parrot_str_substr(interp, s, 0, 1);
+ return STRING_substr(interp, s, 0, 1);
}
/*
Modified: branches/string_macros/src/string/api.c
==============================================================================
--- branches/string_macros/src/string/api.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/api.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -960,7 +960,12 @@
offset, INTVAL length)>
Returns substring of length C<length> from C<offset> from the specified
-Parrot string.
+Parrot string. If C<offset> is negative, it counts from the end of the
+string. Returns the empty string if C<offset> equals the length of the
+string. Throws an exception if C<src> is null or C<offset> is out of bounds.
+Truncates C<length> if it extends beyond the end of the string.
+
+Identical to the STRING_substr macro.
=cut
@@ -974,35 +979,10 @@
ARGIN_NULLOK(const STRING *src), INTVAL offset, INTVAL length)
{
ASSERT_ARGS(Parrot_str_substr)
- const UINTVAL strlen = STRING_length(src);
- UINTVAL maxlen;
-
- if (offset < 0)
- offset += strlen;
-
- if ((UINTVAL)offset >= strlen || length <= 0) {
- if (STRING_IS_NULL(src))
- Parrot_ex_throw_from_c_args(interp, NULL,
- EXCEPTION_SUBSTR_OUT_OF_STRING, "Cannot substr on a null string");
-
- /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
- if ((UINTVAL)offset == strlen || length <= 0)
- return Parrot_str_new_noinit(interp, 0);
-
- Parrot_ex_throw_from_c_args(interp, NULL,
- EXCEPTION_SUBSTR_OUT_OF_STRING,
- "Cannot take substr outside string");
- }
-
- ASSERT_STRING_SANITY(src);
- maxlen = strlen - offset;
-
- if ((UINTVAL)length > maxlen)
- length = maxlen;
-
- if (length == strlen && !offset)
- return (STRING *)src;
+ if (src == NULL)
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNEXPECTED_NULL,
+ "Invalid operation on null string");
return STRING_substr(interp, src, offset, length);
}
@@ -1259,7 +1239,7 @@
if (n >= 0)
end += STRING_length(s);
- return Parrot_str_substr(interp, s, 0, end);
+ return STRING_substr(interp, s, 0, end);
}
Modified: branches/string_macros/src/string/encoding/ascii.c
==============================================================================
--- branches/string_macros/src/string/encoding/ascii.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/encoding/ascii.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -518,7 +518,7 @@
fixed8_scan,
fixed8_ord,
- fixed8_substr,
+ fixed_substr,
ascii_is_cclass,
ascii_find_cclass,
Modified: branches/string_macros/src/string/encoding/binary.c
==============================================================================
--- branches/string_macros/src/string/encoding/binary.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/encoding/binary.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -240,7 +240,7 @@
fixed8_scan,
fixed8_ord,
- fixed8_substr,
+ fixed_substr,
binary_is_cclass,
binary_find_cclass,
Modified: branches/string_macros/src/string/encoding/latin1.c
==============================================================================
--- branches/string_macros/src/string/encoding/latin1.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/encoding/latin1.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -545,7 +545,7 @@
fixed8_scan,
fixed8_ord,
- fixed8_substr,
+ fixed_substr,
latin1_is_cclass,
latin1_find_cclass,
Modified: branches/string_macros/src/string/encoding/shared.c
==============================================================================
--- branches/string_macros/src/string/encoding/shared.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/encoding/shared.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -377,8 +377,8 @@
/*
-=item C<STRING * encoding_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
+=item C<STRING * encoding_substr(PARROT_INTERP, const STRING *src, INTVAL
+offset, INTVAL length)>
Returns the codepoints in string C<src> at position C<offset> and length
C<count>.
@@ -389,28 +389,51 @@
PARROT_CANNOT_RETURN_NULL
STRING *
-encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
{
ASSERT_ARGS(encoding_substr)
-
- STRING * const return_string = Parrot_str_copy(interp, src);
+ const UINTVAL strlen = STRING_length(src);
+ STRING *return_string;
String_iter iter;
UINTVAL start;
+ if (offset < 0)
+ offset += strlen;
+
+ if ((UINTVAL)offset >= strlen || length <= 0) {
+ /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+ if ((UINTVAL)offset == strlen || length <= 0)
+ return Parrot_str_new_constant(interp, "");
+
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_SUBSTR_OUT_OF_STRING,
+ "Cannot take substr outside string");
+ }
+
+ return_string = Parrot_str_copy(interp, src);
+
+ if (offset == 0 && (UINTVAL)length >= strlen)
+ return return_string;
+
STRING_ITER_INIT(interp, &iter);
if (offset)
STRING_iter_set_position(interp, src, &iter, offset);
- start = iter.bytepos;
- return_string->strstart = (char *)return_string->strstart + start;
+ start = iter.bytepos;
+ return_string->strstart += start;
- if (count)
- STRING_iter_set_position(interp, src, &iter, offset + count);
+ if ((UINTVAL)length >= strlen - (UINTVAL)offset) {
+ return_string->bufused -= start;
+ return_string->strlen -= offset;
+ }
+ else {
+ STRING_iter_set_position(interp, src, &iter, offset + length);
+ return_string->bufused = iter.bytepos - start;
+ return_string->strlen = length;
+ }
- return_string->bufused = iter.bytepos - start;
- return_string->strlen = count;
- return_string->hashval = 0;
+ return_string->hashval = 0;
return return_string;
}
@@ -822,11 +845,11 @@
/*
-=item C<STRING * fixed8_substr(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL count)>
+=item C<STRING * fixed_substr(PARROT_INTERP, const STRING *src, INTVAL offset,
+INTVAL length)>
Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.
+C<count>. Works for all fixed size encodings.
=cut
@@ -835,16 +858,41 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
STRING *
-fixed8_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+fixed_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
{
- ASSERT_ARGS(fixed8_substr)
- STRING * const return_string = Parrot_str_copy(interp, src);
+ ASSERT_ARGS(fixed_substr)
+ const UINTVAL strlen = STRING_length(src);
+ STRING *return_string;
+ UINTVAL maxlen, bytes_per_codepoint;
+
+ if (offset < 0)
+ offset += strlen;
+
+ if ((UINTVAL)offset >= strlen || length <= 0) {
+ /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+ if ((UINTVAL)offset == strlen || length <= 0)
+ return Parrot_str_new_constant(interp, "");
+
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_SUBSTR_OUT_OF_STRING,
+ "Cannot take substr outside string");
+ }
+
+ return_string = Parrot_str_copy(interp, src);
+
+ if (offset == 0 && (UINTVAL)length >= strlen)
+ return return_string;
+
+ bytes_per_codepoint = src->encoding->max_bytes_per_codepoint;
+ maxlen = strlen - offset;
+
+ if ((UINTVAL)length > maxlen)
+ length = maxlen;
- return_string->encoding = src->encoding;
- return_string->strstart = (char *)return_string->strstart + offset;
- return_string->bufused = count;
- return_string->strlen = count;
- return_string->hashval = 0;
+ return_string->strstart += offset * bytes_per_codepoint;
+ return_string->bufused = length * bytes_per_codepoint;
+ return_string->strlen = length;
+ return_string->hashval = 0;
return return_string;
}
Modified: branches/string_macros/src/string/encoding/shared.h
==============================================================================
--- branches/string_macros/src/string/encoding/shared.h Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/encoding/shared.h Wed Sep 22 01:19:27 2010 (r49223)
@@ -105,8 +105,8 @@
PARROT_CANNOT_RETURN_NULL
STRING * encoding_substr(PARROT_INTERP,
ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
+ INTVAL offset,
+ INTVAL length)
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -207,10 +207,10 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
-STRING * fixed8_substr(PARROT_INTERP,
+STRING * fixed_substr(PARROT_INTERP,
ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
+ INTVAL offset,
+ INTVAL length)
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -339,7 +339,7 @@
#define ASSERT_ARGS_fixed8_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_fixed8_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_fixed_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_unicode_chr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
Modified: branches/string_macros/src/string/encoding/ucs2.c
==============================================================================
--- branches/string_macros/src/string/encoding/ucs2.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/encoding/ucs2.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -97,15 +97,6 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
-static STRING * ucs2_substr(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
static STRING * ucs2_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -139,9 +130,6 @@
#define ASSERT_ARGS_ucs2_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs2_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ucs2_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -242,39 +230,6 @@
#endif
}
-/*
-
-=item C<static STRING * ucs2_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-ucs2_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(ucs2_substr)
- STRING * const return_string = Parrot_str_copy(interp, src);
-
-#if PARROT_HAS_ICU
- return_string->strstart = (char*)src->strstart + offset * sizeof (UChar);
- return_string->bufused = count * sizeof (UChar);
- return_string->strlen = count;
- return_string->hashval = 0;
- return return_string;
-#else
- UNUSED(src);
- UNUSED(offset);
- UNUSED(count);
- no_ICU_lib(interp);
-#endif
-}
/*
@@ -471,7 +426,7 @@
ucs2_scan,
ucs2_ord,
- ucs2_substr,
+ fixed_substr,
encoding_is_cclass,
encoding_find_cclass,
Modified: branches/string_macros/src/string/encoding/ucs4.c
==============================================================================
--- branches/string_macros/src/string/encoding/ucs4.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/encoding/ucs4.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -97,15 +97,6 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
-static STRING * ucs4_substr(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
static STRING * ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -139,9 +130,6 @@
#define ASSERT_ARGS_ucs4_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs4_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
#define ASSERT_ARGS_ucs4_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(src))
@@ -258,36 +246,6 @@
/*
-=item C<static STRING * ucs4_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the C<count> codepoints stored at position C<offset> in string
-C<src> as a new string.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-ucs4_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(ucs4_substr)
-#if PARROT_HAS_ICU
- return Parrot_str_new_init(interp, (char*)src->strstart + offset * sizeof (UChar32),
- count * sizeof (UChar32), src->encoding, 0);
-#else
- UNUSED(src);
- UNUSED(offset);
- UNUSED(count);
- no_ICU_lib(interp);
-#endif
-}
-
-
-/*
-
=item C<static UINTVAL ucs4_iter_get(PARROT_INTERP, const STRING *str, const
String_iter *i, INTVAL offset)>
@@ -471,7 +429,7 @@
ucs4_scan,
ucs4_ord,
- ucs4_substr,
+ fixed_substr,
encoding_is_cclass,
encoding_find_cclass,
Modified: branches/string_macros/src/string/encoding/utf16.c
==============================================================================
--- branches/string_macros/src/string/encoding/utf16.c Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/src/string/encoding/utf16.c Wed Sep 22 01:19:27 2010 (r49223)
@@ -86,8 +86,8 @@
PARROT_CANNOT_RETURN_NULL
static STRING * utf16_substr(PARROT_INTERP,
ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
+ INTVAL offset,
+ INTVAL length)
__attribute__nonnull__(1)
__attribute__nonnull__(2);
@@ -304,8 +304,8 @@
/*
-=item C<static STRING * utf16_substr(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
+=item C<static STRING * utf16_substr(PARROT_INTERP, const STRING *src, INTVAL
+offset, INTVAL length)>
Returns the codepoints in string C<src> at position C<offset> and length
C<count>.
@@ -317,21 +317,50 @@
PARROT_WARN_UNUSED_RESULT
PARROT_CANNOT_RETURN_NULL
static STRING *
-utf16_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+utf16_substr(PARROT_INTERP, ARGIN(const STRING *src), INTVAL offset, INTVAL length)
{
ASSERT_ARGS(utf16_substr)
#if PARROT_HAS_ICU
- UINTVAL pos = 0, start;
const UChar * const s = (UChar*) src->strstart;
- STRING * const return_string = Parrot_str_copy(interp, src);
+ const UINTVAL strlen = STRING_length(src);
+ STRING *return_string;
+ UINTVAL pos = 0, start;
+
+ if (offset < 0)
+ offset += strlen;
+
+ if ((UINTVAL)offset >= strlen || length <= 0) {
+ /* Allow regexes to return $' easily for "aaa" =~ /aaa/ */
+ if ((UINTVAL)offset == strlen || length <= 0)
+ return Parrot_str_new_noinit(interp, 0);
+
+ Parrot_ex_throw_from_c_args(interp, NULL,
+ EXCEPTION_SUBSTR_OUT_OF_STRING,
+ "Cannot take substr outside string");
+ }
+
+ return_string = Parrot_str_copy(interp, src);
+
+ if (offset == 0 && (UINTVAL)length >= strlen)
+ return return_string;
U16_FWD_N_UNSAFE(s, pos, offset);
+
start = pos * sizeof (UChar);
- return_string->strstart = (char *)return_string->strstart + start;
- U16_FWD_N_UNSAFE(s, pos, count);
- return_string->bufused = pos * sizeof (UChar) - start;
- return_string->strlen = count;
+ return_string->strstart += start;
+
+ if ((UINTVAL)length >= strlen - (UINTVAL)offset) {
+ return_string->bufused -= start;
+ return_string->strlen -= offset;
+ }
+ else {
+ U16_FWD_N_UNSAFE(s, pos, length);
+ return_string->bufused = pos * sizeof (UChar) - start;
+ return_string->strlen = length;
+ }
+
return_string->hashval = 0;
+
return return_string;
#else
UNUSED(src);
Modified: branches/string_macros/t/op/string.t
==============================================================================
--- branches/string_macros/t/op/string.t Wed Sep 22 01:18:29 2010 (r49222)
+++ branches/string_macros/t/op/string.t Wed Sep 22 01:19:27 2010 (r49223)
@@ -318,7 +318,7 @@
.local int r
null s
eh = new ['ExceptionHandler']
- eh.'handle_types'(.EXCEPTION_SUBSTR_OUT_OF_STRING)
+ eh.'handle_types'(.EXCEPTION_UNEXPECTED_NULL)
set_addr eh, handler
push_eh eh
r = 1
More information about the parrot-commits
mailing list