[svn:parrot] r48823 - in trunk: src/pmc t/pmc

nwellnhof at svn.parrot.org nwellnhof at svn.parrot.org
Tue Sep 7 18:33:21 UTC 2010


Author: nwellnhof
Date: Tue Sep  7 18:33:21 2010
New Revision: 48823
URL: https://trac.parrot.org/parrot/changeset/48823

Log:
[pmc] Various fixes to StringBuilder PMC

- Optimize reallocation
- Use Parrot_str_clone again, it should be safe now
- Keep encoding if all strings have the same encoding

Modified:
   trunk/src/pmc/stringbuilder.pmc
   trunk/t/pmc/stringbuilder.t

Modified: trunk/src/pmc/stringbuilder.pmc
==============================================================================
--- trunk/src/pmc/stringbuilder.pmc	Tue Sep  7 18:33:00 2010	(r48822)
+++ trunk/src/pmc/stringbuilder.pmc	Tue Sep  7 18:33:21 2010	(r48823)
@@ -24,10 +24,7 @@
 /* HEADERIZER BEGIN: static */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-static size_t calculate_capacity(SHIM_INTERP,
-    size_t current,
-    size_t additional);
-
+static size_t calculate_capacity(SHIM_INTERP, size_t needed);
 #define ASSERT_ARGS_calculate_capacity __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
@@ -143,16 +140,11 @@
 */
 
     VTABLE STRING *get_string() {
-        STRING *buffer, *result;
+        STRING *buffer;
         GET_ATTR_buffer(INTERP, SELF, buffer);
         /* We need to build a new string because outside of StringBuilder
-         * strings are immutable. Don't use Parrot_str_clone, the buffer
-         * is not a STRING valid enough for that. */
-        result = Parrot_str_new_init(INTERP,
-            buffer->strstart, buffer->bufused,
-            buffer->encoding, buffer->charset,
-            0);
-        return result;
+         * strings are immutable. */
+        return Parrot_str_clone(INTERP, buffer);
     }
 
 /*
@@ -177,37 +169,59 @@
 
         GET_ATTR_buffer(INTERP, SELF, buffer);
 
-        /* If strings are incompatible - convert them */
-        /* TODO Ask chromatic why in Parrot_str_join he ignored charset */
-        cs = Parrot_str_rep_compatible(interp, buffer, s, &enc);
-        if (cs) {
-            buffer->charset = cs;
-            buffer->encoding = enc;
+        if (buffer->bufused == 0) {
+            /* Always copy the encoding of the first string. The IO functions
+               assume that the concatenation of utf8 strings doesn't change
+               the encoding. */
+            buffer->charset  = s->charset;
+            buffer->encoding = s->encoding;
         }
         else {
-            if (buffer->encoding != Parrot_utf8_encoding_ptr) {
-                /* Create new temporary string */
-                STRING * const new_buffer = Parrot_utf8_encoding_ptr->to_encoding(interp, buffer);
-                mem_gc_free(INTERP, buffer->_bufstart);
-                STRUCT_COPY(buffer, new_buffer);
-                buffer->flags     = PObj_is_string_FLAG | PObj_external_FLAG;
-
-                buffer->_bufstart = buffer->strstart = mem_gc_allocate_n_typed(INTERP,
-                                                        new_buffer->_buflen, char);
-                mem_sys_memcopy(buffer->_bufstart, new_buffer->_bufstart, new_buffer->_buflen);
+            cs = Parrot_str_rep_compatible(interp, buffer, s, &enc);
 
-                SET_ATTR_buffer(INTERP, SELF, buffer);
+            if (cs) {
+                buffer->charset = cs;
+                buffer->encoding = enc;
             }
+            else {
+                /* If strings are incompatible - convert them to utf8 */
+
+                if (s->encoding != Parrot_utf8_encoding_ptr)
+                    s = Parrot_utf8_encoding_ptr->to_encoding(interp, s);
+
+                if (buffer->encoding != Parrot_utf8_encoding_ptr) {
+                    /* Create new temporary string */
+                    STRING * new_buffer;
+
+                    new_buffer = Parrot_utf8_encoding_ptr->to_encoding(interp, buffer);
+                    total_size = new_buffer->bufused + s->bufused;
+
+                    if (total_size > buffer->_buflen) {
+                        /* Reallocate */
+                        mem_gc_free(INTERP, buffer->_bufstart);
+                        total_size = calculate_capacity(INTERP, total_size);
+                        buffer->_bufstart = buffer->strstart
+                                          = mem_gc_allocate_n_typed(INTERP, total_size, char);
+                        buffer->_buflen   = total_size;
+                    }
 
-            if (s->encoding != Parrot_utf8_encoding_ptr)
-                s = Parrot_utf8_encoding_ptr->to_encoding(interp, s);
+                    buffer->bufused  = new_buffer->bufused;
+                    buffer->charset  = new_buffer->charset;
+                    buffer->encoding = new_buffer->encoding;
+
+                    mem_sys_memcopy(buffer->strstart, new_buffer->strstart,
+                            new_buffer->bufused);
+                }
+            }
         }
 
-        /* Calculate (possibly new) total size */
-        total_size = calculate_capacity(INTERP, buffer->bufused, s->bufused);
+        total_size = buffer->bufused + s->bufused;
 
         /* Reallocate if necessary */
-        if (total_size > Buffer_buflen(buffer)) {
+        if (total_size > buffer->_buflen) {
+            /* Calculate (possibly new) total size */
+            total_size = calculate_capacity(INTERP, total_size);
+
             /* Parrot_unicode_charset_ptr can produce NULL buffer */
             buffer->_bufstart = buffer->strstart = mem_gc_realloc_n_typed(INTERP,
                 buffer->_bufstart, total_size, char);
@@ -265,7 +279,7 @@
         STRING * buffer;
 
         /* Calculate (possibly new) total size */
-        size_t total_size = calculate_capacity(INTERP, 0, s->bufused);
+        size_t total_size = calculate_capacity(INTERP, s->bufused);
 
         GET_ATTR_buffer(INTERP, SELF, buffer);
 
@@ -462,17 +476,9 @@
 
 /*
 
-=item C<static size_t calculate_capacity(PARROT_INTERP, size_t current, size_t
-additional)>
+=item C<static size_t calculate_capacity(PARROT_INTERP, size_t needed)>
 
-Calculate capacity for string. Usually StringBuilders used for "large"
-strings. So capacity rounded up by next algorithm:
-  - By 128 bytes if total capacity less then 1KB
-  - By 1KB if total less than 4KB
-  - By 4KB if total less than 1MB
-  - By 1MB otherwise.
-
-This function is subject for tuning on real-world usage scenarios.
+Calculate capacity for string. We allocate double the amount needed.
 
 =back
 
@@ -481,22 +487,15 @@
 */
 
 static size_t
-calculate_capacity(SHIM_INTERP, size_t current, size_t additional)
+calculate_capacity(SHIM_INTERP, size_t needed)
 {
     ASSERT_ARGS(calculate_capacity)
-    size_t total_size = current + additional;
-    size_t chunk_size = 1024*1024;
-
-    if (total_size < 1024)
-        chunk_size = 128;
-    else if (total_size < 4096)
-        chunk_size = 1024;
-    else if (total_size < 1024*1024)
-        chunk_size = 4096;
 
-    total_size = (total_size / chunk_size + 1) * chunk_size;
+    needed *= 2;
+    /* round up to 16 */
+    needed  = (needed + 15) & ~15;
 
-    return total_size;
+    return needed;
 }
 
 /*

Modified: trunk/t/pmc/stringbuilder.t
==============================================================================
--- trunk/t/pmc/stringbuilder.t	Tue Sep  7 18:33:00 2010	(r48822)
+++ trunk/t/pmc/stringbuilder.t	Tue Sep  7 18:33:21 2010	(r48823)
@@ -77,9 +77,6 @@
 
     is( $S0, "foo", "... without clobbering first string")
 
-    $I0 = sb
-    is( $I0, 128, "... and capacity still 128" )
-
     $I0 = sb.'get_string_length'()
     is( $I0, 6,   "... and string length is correct")
 
@@ -91,9 +88,6 @@
     $S1 = sb
     is( $S0, $S1, "Push 128 chars string works")
 
-    $I0 = sb
-    is( $I0, 256, "... and capacity increased" )
-
     $S99 = repeat "x", 1000
     push sb, $S99
 
@@ -101,9 +95,6 @@
     $S1 = sb
     is( $S0, $S1, "Push 1000 chars string works")
 
-    $I0 = sb
-    is( $I0, 2048, "... and capacity increased" )
-
     $S99 = repeat "x", 12000
     push sb, $S99
 
@@ -111,13 +102,11 @@
     $S1 = sb
     is( $S0, $S1, "Push 10000 chars string works")
 
-    $I0 = sb
-    is( $I0, 16384, "... and capacity increased" )
+    null $S99
+    push sb, $S99
 
-    null $S0
-    push sb, $S0
-    $I0 = sb
-    is( $I0, 16384, "push a null string does nothing" )
+    $S1 = sb
+    is( $S0, $S1, "push a null string does nothing" )
 .end
 
 .sub 'test_push_pmc'


More information about the parrot-commits mailing list