[svn:parrot] r48209 - branches/gsoc_nfg/src/string/encoding

darbelo at svn.parrot.org darbelo at svn.parrot.org
Wed Jul 28 22:45:25 UTC 2010


Author: darbelo
Date: Wed Jul 28 22:45:25 2010
New Revision: 48209
URL: https://trac.parrot.org/parrot/changeset/48209

Log:
Add utf hashing. We can't fallback to the charset anymore.

Modified:
   branches/gsoc_nfg/src/string/encoding/utf16.c
   branches/gsoc_nfg/src/string/encoding/utf8.c
   branches/gsoc_nfg/src/string/encoding/utf8.h

Modified: branches/gsoc_nfg/src/string/encoding/utf16.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/utf16.c	Wed Jul 28 22:45:11 2010	(r48208)
+++ branches/gsoc_nfg/src/string/encoding/utf16.c	Wed Jul 28 22:45:25 2010	(r48209)
@@ -20,6 +20,7 @@
 
 #include "parrot/parrot.h"
 #include "../unicode.h"
+#include "utf8.h"
 
 /* HEADERIZER HFILE: src/string/encoding/utf16.h */
 
@@ -558,7 +559,7 @@
         bytes,
         iter_init,
         find_cclass,
-        NULL
+        utf_hash
     };
     STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
     Parrot_register_encoding(interp, "utf16", return_encoding);

Modified: branches/gsoc_nfg/src/string/encoding/utf8.c
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/utf8.c	Wed Jul 28 22:45:11 2010	(r48208)
+++ branches/gsoc_nfg/src/string/encoding/utf8.c	Wed Jul 28 22:45:25 2010	(r48209)
@@ -759,6 +759,35 @@
 
 /*
 
+=item C<size_t utf_hash(PARROT_INTERP, const STRING *src, size_t seed)>
+
+Computes the hash of the given STRING C<src> with starting seed value C<seed>.
+
+=cut
+
+*/
+
+size_t
+utf_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t seed)
+{
+    ASSERT_ARGS(utf_hash)
+    String_iter iter;
+    UINTVAL     offs;
+    size_t      hashval = seed;
+
+    ENCODING_ITER_INIT(interp, src, &iter);
+
+    for (offs = 0; offs < src->strlen; ++offs) {
+        const UINTVAL c = iter.get_and_advance(interp, &iter);
+        hashval += hashval << 5;
+        hashval += c;
+    }
+
+    return hashval;
+}
+
+/*
+
 =item C<void Parrot_encoding_utf8_init(PARROT_INTERP)>
 
 Initializes the UTF-8 encoding.
@@ -785,7 +814,7 @@
         bytes,
         iter_init,
         find_cclass,
-        NULL
+        utf_hash
     };
     STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
     Parrot_register_encoding(interp, "utf8", return_encoding);

Modified: branches/gsoc_nfg/src/string/encoding/utf8.h
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/utf8.h	Wed Jul 28 22:45:11 2010	(r48208)
+++ branches/gsoc_nfg/src/string/encoding/utf8.h	Wed Jul 28 22:45:25 2010	(r48209)
@@ -19,8 +19,15 @@
 void Parrot_encoding_utf8_init(PARROT_INTERP)
         __attribute__nonnull__(1);
 
+size_t utf_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t seed)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
 #define ASSERT_ARGS_Parrot_encoding_utf8_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_utf_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: src/string/encoding/utf8.c */
 


More information about the parrot-commits mailing list