[svn:parrot] r39286 - in trunk: src src/string t/op

bacek at svn.parrot.org bacek at svn.parrot.org
Sun May 31 11:47:52 UTC 2009


Author: bacek
Date: Sun May 31 11:47:51 2009
New Revision: 39286
URL: https://trac.parrot.org/parrot/changeset/39286

Log:
[core] Don't rely on CHARSET for compute STRING hashvalue. Closes TT#24

It always bad idea to break contract with Hash. So steal code
from unicode/compute_hash to be used for all string.

Modified:
   trunk/src/hash.c
   trunk/src/string/api.c
   trunk/t/op/stringu.t

Modified: trunk/src/hash.c
==============================================================================
--- trunk/src/hash.c	Sun May 31 07:54:01 2009	(r39285)
+++ trunk/src/hash.c	Sun May 31 11:47:51 2009	(r39286)
@@ -202,7 +202,7 @@
     if (!s2)
         return 1;
 
-    if (s1->charset == s2->charset && s1->hashval != s2->hashval)
+    if (s1->hashval != s2->hashval)
         return 1;
 
     /* COWed strings */

Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c	Sun May 31 07:54:01 2009	(r39285)
+++ trunk/src/string/api.c	Sun May 31 11:47:51 2009	(r39286)
@@ -2405,19 +2405,27 @@
 Parrot_str_to_hashval(PARROT_INTERP, ARGMOD_NULLOK(STRING *s))
 {
     ASSERT_ARGS(Parrot_str_to_hashval)
-    register size_t h;
-    const UINTVAL seed = interp->hash_seed;
+    String_iter iter;
+    UINTVAL     offs;
+    size_t      hashval = interp->hash_seed;
 
     if (!s)
-        return seed;
+        return hashval;
 
     /* ZZZZZ workaround for something not setting up encodings right */
     saneify_string(s);
 
-    h          = CHARSET_COMPUTE_HASH(interp, s, seed);
-    s->hashval = h;
+    ENCODING_ITER_INIT(interp, s, &iter);
+
+    for (offs = 0; offs < s->strlen; ++offs) {
+        const UINTVAL c = iter.get_and_advance(interp, &iter);
+        hashval += hashval << 5;
+        hashval += c;
+    }
+
+    s->hashval = hashval;
 
-    return h;
+    return hashval;
 }
 
 

Modified: trunk/t/op/stringu.t
==============================================================================
--- trunk/t/op/stringu.t	Sun May 31 07:54:01 2009	(r39285)
+++ trunk/t/op/stringu.t	Sun May 31 11:47:51 2009	(r39286)
@@ -462,7 +462,7 @@
 hello
 OUTPUT
 
-pir_output_is( <<'CODE', <<OUTPUT, "UTF-8 and Unicode hash keys, full bucket", 'todo' => 'TT #24');
+pir_output_is( <<'CODE', <<OUTPUT, "UTF-8 and Unicode hash keys, full bucket" );
 .sub 'main'
     .local string str0, str1
     str0 = unicode:"infix:\u00b1"
@@ -483,13 +483,13 @@
   fill_done:
 
     $I0 = iseq str0, str1
-    print "iseq str0, str1               => "
+    #print "iseq str0, str1               => "
     say $I0
 
     $S0 = hash[str0]
     $S1 = hash[str1]
     $I0 = iseq $S0, $S1
-    print "iseq hash[str0], hash[str1]   => "
+    #print "iseq hash[str0], hash[str1]   => "
     say $I0
     say $S0
     say $S1


More information about the parrot-commits mailing list