[svn:parrot] r45573 - branches/immutable_strings_part1/src/string
chromatic at svn.parrot.org
chromatic at svn.parrot.org
Sun Apr 11 20:13:29 UTC 2010
Author: chromatic
Date: Sun Apr 11 20:13:28 2010
New Revision: 45573
URL: https://trac.parrot.org/parrot/changeset/45573
Log:
[str] Revised Parrot_str_join() to avoid creating a new PMC and to transcode
strings unnecessarily. This is much faster than before, but it could use
prettifying.
Modified:
branches/immutable_strings_part1/src/string/api.c
Modified: branches/immutable_strings_part1/src/string/api.c
==============================================================================
--- branches/immutable_strings_part1/src/string/api.c Sun Apr 11 17:23:45 2010 (r45572)
+++ branches/immutable_strings_part1/src/string/api.c Sun Apr 11 20:13:28 2010 (r45573)
@@ -2971,50 +2971,74 @@
Parrot_str_join(PARROT_INTERP, ARGIN_NULLOK(STRING *j), ARGIN(PMC *ar))
{
ASSERT_ARGS(Parrot_str_join)
- STRING *res;
- STRING *s;
- const int ar_len = VTABLE_elements(interp, ar);
- int i;
- int total_length = 0;
- PMC *chunks;
- char *pos;
+ STRING **chunks;
+ STRING *res;
+ STRING *s;
+ char *pos;
+ const int ar_len = VTABLE_elements(interp, ar);
+ int total_length = 0;
+ int transcoded = 0;
+ int i;
if (ar_len == 0)
return Parrot_str_new_noinit(interp, enum_stringrep_one, 0);
- /* FIXME It's very-very bad implementation of C<join>. */
+ if (!j || STRING_IS_NULL(j))
+ j = Parrot_str_new_noinit(interp, enum_stringrep_one, 0);
- /* Allocate new RSA. Gather all strings in same encoding. And join them */
- chunks = pmc_new(interp, enum_class_ResizableStringArray);
- j = Parrot_utf8_encoding_ptr->to_encoding(interp, j);
+ chunks = (STRING **)Parrot_gc_allocate_fixed_size_storage(interp,
+ ar_len * sizeof (STRING *));
for (i = 0; i < ar_len; ++i) {
- STRING * next = VTABLE_get_string_keyed_int(interp, ar, i);
+ STRING *next = VTABLE_get_string_keyed_int(interp, ar, i);
- next = Parrot_utf8_encoding_ptr->to_encoding(interp, next);
+ if (next->encoding != j->encoding) {
+ ENCODING *e = j->encoding;
+ CHARSET *c = string_rep_compatible(interp, next, j, &e);
+ if (e == Parrot_fixed_8_encoding_ptr)
+ e = Parrot_utf8_encoding_ptr;
+ j = e->to_encoding(interp, j);
+ transcoded = 1;
+ }
+
+ chunks[i] = next;
total_length += next->bufused;
- VTABLE_push_string(interp, chunks, next);
}
- total_length += (ar_len - 1) * j->bufused;
+ /* with the right charset, transcode any strings if necessary*/
+ if (transcoded) {
+ CHARSET *c = j->charset;
+ ENCODING *e = j->encoding;
+
+ for (i = 0; i < ar_len; ++i) {
+ STRING *s = chunks[i];
+ if (s->encoding != e || s->charset != c) {
+ STRING *new = e->to_encoding(interp, s);
+ chunks[i] = new;
+ total_length += s->bufused - new->bufused;
+ }
+ }
+ }
+
+ /* add the length of the separator, now that it's transcoded */
+ total_length += j->bufused * ar_len;
res = Parrot_gc_new_string_header(interp, 0);
Parrot_gc_allocate_string_storage(interp, res, total_length);
- res->charset = Parrot_unicode_charset_ptr;
- res->encoding = Parrot_utf8_encoding_ptr;
- res->bufused = total_length;
+ res->charset = j->charset;
+ res->encoding = j->encoding;
/* Iterate over chunks and append it to res */
pos = res->strstart;
/* Copy first chunk */
- s = VTABLE_get_string_keyed_int(interp, chunks, 0);
+ s = chunks[0];
mem_sys_memcopy(pos, s->strstart, s->bufused);
pos += s->bufused;
for (i = 1; i < ar_len; ++i) {
- STRING *next = VTABLE_get_string_keyed_int(interp, chunks, i);
+ STRING *next = chunks[i];
mem_sys_memcopy(pos, j->strstart, j->bufused);
pos += j->bufused;
@@ -3025,7 +3049,11 @@
PARROT_ASSERT(pos <= res->strstart + Buffer_buflen(res));
}
+ res->bufused = pos - res->strstart;
+
(void)Parrot_str_length(interp, res);
+ Parrot_gc_free_fixed_size_storage(interp, ar_len * sizeof (STRING *),
+ chunks);
return res;
}
More information about the parrot-commits
mailing list