[svn:parrot] r48823 - in trunk: src/pmc t/pmc
nwellnhof at svn.parrot.org
nwellnhof at svn.parrot.org
Tue Sep 7 18:33:21 UTC 2010
Author: nwellnhof
Date: Tue Sep 7 18:33:21 2010
New Revision: 48823
URL: https://trac.parrot.org/parrot/changeset/48823
Log:
[pmc] Various fixes to StringBuilder PMC
- Optimize reallocation
- Use Parrot_str_clone again, it should be safe now
- Keep encoding if all strings have the same encoding
Modified:
trunk/src/pmc/stringbuilder.pmc
trunk/t/pmc/stringbuilder.t
Modified: trunk/src/pmc/stringbuilder.pmc
==============================================================================
--- trunk/src/pmc/stringbuilder.pmc Tue Sep 7 18:33:00 2010 (r48822)
+++ trunk/src/pmc/stringbuilder.pmc Tue Sep 7 18:33:21 2010 (r48823)
@@ -24,10 +24,7 @@
/* HEADERIZER BEGIN: static */
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-static size_t calculate_capacity(SHIM_INTERP,
- size_t current,
- size_t additional);
-
+static size_t calculate_capacity(SHIM_INTERP, size_t needed);
#define ASSERT_ARGS_calculate_capacity __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */
@@ -143,16 +140,11 @@
*/
VTABLE STRING *get_string() {
- STRING *buffer, *result;
+ STRING *buffer;
GET_ATTR_buffer(INTERP, SELF, buffer);
/* We need to build a new string because outside of StringBuilder
- * strings are immutable. Don't use Parrot_str_clone, the buffer
- * is not a STRING valid enough for that. */
- result = Parrot_str_new_init(INTERP,
- buffer->strstart, buffer->bufused,
- buffer->encoding, buffer->charset,
- 0);
- return result;
+ * strings are immutable. */
+ return Parrot_str_clone(INTERP, buffer);
}
/*
@@ -177,37 +169,59 @@
GET_ATTR_buffer(INTERP, SELF, buffer);
- /* If strings are incompatible - convert them */
- /* TODO Ask chromatic why in Parrot_str_join he ignored charset */
- cs = Parrot_str_rep_compatible(interp, buffer, s, &enc);
- if (cs) {
- buffer->charset = cs;
- buffer->encoding = enc;
+ if (buffer->bufused == 0) {
+ /* Always copy the encoding of the first string. The IO functions
+ assume that the concatenation of utf8 strings doesn't change
+ the encoding. */
+ buffer->charset = s->charset;
+ buffer->encoding = s->encoding;
}
else {
- if (buffer->encoding != Parrot_utf8_encoding_ptr) {
- /* Create new temporary string */
- STRING * const new_buffer = Parrot_utf8_encoding_ptr->to_encoding(interp, buffer);
- mem_gc_free(INTERP, buffer->_bufstart);
- STRUCT_COPY(buffer, new_buffer);
- buffer->flags = PObj_is_string_FLAG | PObj_external_FLAG;
-
- buffer->_bufstart = buffer->strstart = mem_gc_allocate_n_typed(INTERP,
- new_buffer->_buflen, char);
- mem_sys_memcopy(buffer->_bufstart, new_buffer->_bufstart, new_buffer->_buflen);
+ cs = Parrot_str_rep_compatible(interp, buffer, s, &enc);
- SET_ATTR_buffer(INTERP, SELF, buffer);
+ if (cs) {
+ buffer->charset = cs;
+ buffer->encoding = enc;
}
+ else {
+ /* If strings are incompatible - convert them to utf8 */
+
+ if (s->encoding != Parrot_utf8_encoding_ptr)
+ s = Parrot_utf8_encoding_ptr->to_encoding(interp, s);
+
+ if (buffer->encoding != Parrot_utf8_encoding_ptr) {
+ /* Create new temporary string */
+ STRING * new_buffer;
+
+ new_buffer = Parrot_utf8_encoding_ptr->to_encoding(interp, buffer);
+ total_size = new_buffer->bufused + s->bufused;
+
+ if (total_size > buffer->_buflen) {
+ /* Reallocate */
+ mem_gc_free(INTERP, buffer->_bufstart);
+ total_size = calculate_capacity(INTERP, total_size);
+ buffer->_bufstart = buffer->strstart
+ = mem_gc_allocate_n_typed(INTERP, total_size, char);
+ buffer->_buflen = total_size;
+ }
- if (s->encoding != Parrot_utf8_encoding_ptr)
- s = Parrot_utf8_encoding_ptr->to_encoding(interp, s);
+ buffer->bufused = new_buffer->bufused;
+ buffer->charset = new_buffer->charset;
+ buffer->encoding = new_buffer->encoding;
+
+ mem_sys_memcopy(buffer->strstart, new_buffer->strstart,
+ new_buffer->bufused);
+ }
+ }
}
- /* Calculate (possibly new) total size */
- total_size = calculate_capacity(INTERP, buffer->bufused, s->bufused);
+ total_size = buffer->bufused + s->bufused;
/* Reallocate if necessary */
- if (total_size > Buffer_buflen(buffer)) {
+ if (total_size > buffer->_buflen) {
+ /* Calculate (possibly new) total size */
+ total_size = calculate_capacity(INTERP, total_size);
+
/* Parrot_unicode_charset_ptr can produce NULL buffer */
buffer->_bufstart = buffer->strstart = mem_gc_realloc_n_typed(INTERP,
buffer->_bufstart, total_size, char);
@@ -265,7 +279,7 @@
STRING * buffer;
/* Calculate (possibly new) total size */
- size_t total_size = calculate_capacity(INTERP, 0, s->bufused);
+ size_t total_size = calculate_capacity(INTERP, s->bufused);
GET_ATTR_buffer(INTERP, SELF, buffer);
@@ -462,17 +476,9 @@
/*
-=item C<static size_t calculate_capacity(PARROT_INTERP, size_t current, size_t
-additional)>
+=item C<static size_t calculate_capacity(PARROT_INTERP, size_t needed)>
-Calculate capacity for string. Usually StringBuilders used for "large"
-strings. So capacity rounded up by next algorithm:
- - By 128 bytes if total capacity less then 1KB
- - By 1KB if total less than 4KB
- - By 4KB if total less than 1MB
- - By 1MB otherwise.
-
-This function is subject for tuning on real-world usage scenarios.
+Calculate capacity for string. We allocate double the amount needed.
=back
@@ -481,22 +487,15 @@
*/
static size_t
-calculate_capacity(SHIM_INTERP, size_t current, size_t additional)
+calculate_capacity(SHIM_INTERP, size_t needed)
{
ASSERT_ARGS(calculate_capacity)
- size_t total_size = current + additional;
- size_t chunk_size = 1024*1024;
-
- if (total_size < 1024)
- chunk_size = 128;
- else if (total_size < 4096)
- chunk_size = 1024;
- else if (total_size < 1024*1024)
- chunk_size = 4096;
- total_size = (total_size / chunk_size + 1) * chunk_size;
+ needed *= 2;
+ /* round up to 16 */
+ needed = (needed + 15) & ~15;
- return total_size;
+ return needed;
}
/*
Modified: trunk/t/pmc/stringbuilder.t
==============================================================================
--- trunk/t/pmc/stringbuilder.t Tue Sep 7 18:33:00 2010 (r48822)
+++ trunk/t/pmc/stringbuilder.t Tue Sep 7 18:33:21 2010 (r48823)
@@ -77,9 +77,6 @@
is( $S0, "foo", "... without clobbering first string")
- $I0 = sb
- is( $I0, 128, "... and capacity still 128" )
-
$I0 = sb.'get_string_length'()
is( $I0, 6, "... and string length is correct")
@@ -91,9 +88,6 @@
$S1 = sb
is( $S0, $S1, "Push 128 chars string works")
- $I0 = sb
- is( $I0, 256, "... and capacity increased" )
-
$S99 = repeat "x", 1000
push sb, $S99
@@ -101,9 +95,6 @@
$S1 = sb
is( $S0, $S1, "Push 1000 chars string works")
- $I0 = sb
- is( $I0, 2048, "... and capacity increased" )
-
$S99 = repeat "x", 12000
push sb, $S99
@@ -111,13 +102,11 @@
$S1 = sb
is( $S0, $S1, "Push 10000 chars string works")
- $I0 = sb
- is( $I0, 16384, "... and capacity increased" )
+ null $S99
+ push sb, $S99
- null $S0
- push sb, $S0
- $I0 = sb
- is( $I0, 16384, "push a null string does nothing" )
+ $S1 = sb
+ is( $S0, $S1, "push a null string does nothing" )
.end
.sub 'test_push_pmc'
More information about the parrot-commits
mailing list