[svn:parrot] r39286 - in trunk: src src/string t/op
bacek at svn.parrot.org
bacek at svn.parrot.org
Sun May 31 11:47:52 UTC 2009
Author: bacek
Date: Sun May 31 11:47:51 2009
New Revision: 39286
URL: https://trac.parrot.org/parrot/changeset/39286
Log:
[core] Don't rely on CHARSET for compute STRING hashvalue. Closes TT#24
It always bad idea to break contract with Hash. So steal code
from unicode/compute_hash to be used for all string.
Modified:
trunk/src/hash.c
trunk/src/string/api.c
trunk/t/op/stringu.t
Modified: trunk/src/hash.c
==============================================================================
--- trunk/src/hash.c Sun May 31 07:54:01 2009 (r39285)
+++ trunk/src/hash.c Sun May 31 11:47:51 2009 (r39286)
@@ -202,7 +202,7 @@
if (!s2)
return 1;
- if (s1->charset == s2->charset && s1->hashval != s2->hashval)
+ if (s1->hashval != s2->hashval)
return 1;
/* COWed strings */
Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c Sun May 31 07:54:01 2009 (r39285)
+++ trunk/src/string/api.c Sun May 31 11:47:51 2009 (r39286)
@@ -2405,19 +2405,27 @@
Parrot_str_to_hashval(PARROT_INTERP, ARGMOD_NULLOK(STRING *s))
{
ASSERT_ARGS(Parrot_str_to_hashval)
- register size_t h;
- const UINTVAL seed = interp->hash_seed;
+ String_iter iter;
+ UINTVAL offs;
+ size_t hashval = interp->hash_seed;
if (!s)
- return seed;
+ return hashval;
/* ZZZZZ workaround for something not setting up encodings right */
saneify_string(s);
- h = CHARSET_COMPUTE_HASH(interp, s, seed);
- s->hashval = h;
+ ENCODING_ITER_INIT(interp, s, &iter);
+
+ for (offs = 0; offs < s->strlen; ++offs) {
+ const UINTVAL c = iter.get_and_advance(interp, &iter);
+ hashval += hashval << 5;
+ hashval += c;
+ }
+
+ s->hashval = hashval;
- return h;
+ return hashval;
}
Modified: trunk/t/op/stringu.t
==============================================================================
--- trunk/t/op/stringu.t Sun May 31 07:54:01 2009 (r39285)
+++ trunk/t/op/stringu.t Sun May 31 11:47:51 2009 (r39286)
@@ -462,7 +462,7 @@
hello
OUTPUT
-pir_output_is( <<'CODE', <<OUTPUT, "UTF-8 and Unicode hash keys, full bucket", 'todo' => 'TT #24');
+pir_output_is( <<'CODE', <<OUTPUT, "UTF-8 and Unicode hash keys, full bucket" );
.sub 'main'
.local string str0, str1
str0 = unicode:"infix:\u00b1"
@@ -483,13 +483,13 @@
fill_done:
$I0 = iseq str0, str1
- print "iseq str0, str1 => "
+ #print "iseq str0, str1 => "
say $I0
$S0 = hash[str0]
$S1 = hash[str1]
$I0 = iseq $S0, $S1
- print "iseq hash[str0], hash[str1] => "
+ #print "iseq hash[str0], hash[str1] => "
say $I0
say $S0
say $S1
More information about the parrot-commits
mailing list