[svn:parrot] r45626 - in trunk: include/parrot src/string src/string/encoding

chromatic at svn.parrot.org chromatic at svn.parrot.org
Tue Apr 13 08:32:59 UTC 2010


Author: chromatic
Date: Tue Apr 13 08:32:58 2010
New Revision: 45626
URL: https://trac.parrot.org/parrot/changeset/45626

Log:
[encoding] Added ->hash member to ENCODING struct.  This allows encodings to
violate all sorts of encapsulation when calculating the hash value of a string,
as they know the width of the characters and don't have to dance with string
iterators.  This speeds up startup by 2.785% and improves runtime performance.
Note that currently only the fixed_8 encoding has a hashing function; other
encodings could use them for similar speedups.

Modified:
   trunk/include/parrot/encoding.h
   trunk/src/string/api.c
   trunk/src/string/encoding/fixed_8.c
   trunk/src/string/encoding/ucs2.c
   trunk/src/string/encoding/utf16.c
   trunk/src/string/encoding/utf8.c

Modified: trunk/include/parrot/encoding.h
==============================================================================
--- trunk/include/parrot/encoding.h	Tue Apr 13 08:22:26 2010	(r45625)
+++ trunk/include/parrot/encoding.h	Tue Apr 13 08:32:58 2010	(r45626)
@@ -30,6 +30,7 @@
 typedef UINTVAL (*encoding_codepoints_t)(PARROT_INTERP, STRING *src);
 typedef UINTVAL (*encoding_bytes_t)(PARROT_INTERP, STRING *src);
 typedef UINTVAL (*encoding_find_cclass_t)(PARROT_INTERP, STRING *s, const INTVAL *typetable, INTVAL flags, UINTVAL offset, UINTVAL count);
+typedef size_t (*encoding_hash_t)(PARROT_INTERP, const STRING *s, size_t hashval);
 
 /* iterator support */
 
@@ -57,6 +58,7 @@
     encoding_bytes_t                    bytes;
     encoding_iter_init_t                iter_init;
     encoding_find_cclass_t              find_cclass;
+    encoding_hash_t                     hash;
 };
 
 typedef struct _encoding ENCODING;
@@ -234,6 +236,8 @@
     ((src)->encoding)->iter_init((i), (src), (iter))
 #define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \
     ((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end))
+#define ENCODING_HASH(i, src, seed) \
+    ((src)->encoding)->hash((i), (src), (seed))
 
 #endif /* PARROT_ENCODING_H_GUARD */
 

Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c	Tue Apr 13 08:22:26 2010	(r45625)
+++ trunk/src/string/api.c	Tue Apr 13 08:32:58 2010	(r45626)
@@ -2656,15 +2656,19 @@
     if (STRING_IS_NULL(s) || !s->strlen)
         return hashval;
 
-    /* ZZZZZ workaround for something not setting up encodings right */
-    saneify_string(s);
+    if (s->encoding->hash)
+        hashval = ENCODING_HASH(interp, s, hashval);
+    else {
+        /* ZZZZZ workaround for something not setting up encodings right */
+        saneify_string(s);
 
-    ENCODING_ITER_INIT(interp, s, &iter);
+        ENCODING_ITER_INIT(interp, s, &iter);
 
-    for (offs = 0; offs < s->strlen; ++offs) {
-        const UINTVAL c = iter.get_and_advance(interp, &iter);
-        hashval += hashval << 5;
-        hashval += c;
+        for (offs = 0; offs < s->strlen; ++offs) {
+            const UINTVAL c = iter.get_and_advance(interp, &iter);
+            hashval += hashval << 5;
+            hashval += c;
+        }
     }
 
     s->hashval = hashval;

Modified: trunk/src/string/encoding/fixed_8.c
==============================================================================
--- trunk/src/string/encoding/fixed_8.c	Tue Apr 13 08:22:26 2010	(r45625)
+++ trunk/src/string/encoding/fixed_8.c	Tue Apr 13 08:32:58 2010	(r45626)
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2004-2009, Parrot Foundation.
+Copyright (C) 2004-2010, Parrot Foundation.
 $Id$
 
 =head1 NAME
@@ -63,6 +63,12 @@
         __attribute__nonnull__(2)
         FUNC_MODIFIES(*iter);
 
+static size_t fixed_8_hash(PARROT_INTERP,
+    ARGIN(const STRING *s),
+    size_t hashval)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
 PARROT_WARN_UNUSED_RESULT
 static UINTVAL get_byte(PARROT_INTERP,
     ARGIN(const STRING *source_string),
@@ -186,6 +192,9 @@
     , PARROT_ASSERT_ARG(iter))
 #define ASSERT_ARGS_fixed8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(s))
 #define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(source_string))
@@ -660,6 +669,34 @@
     iter->set_position    = fixed8_set_position;
 }
 
+
+/*
+
+=item C<static size_t fixed_8_hash(PARROT_INTERP, const STRING *s, size_t
+hashval)>
+
+Returns the hashed value of the string, given a seed in hashval.
+
+=cut
+
+*/
+
+static size_t
+fixed_8_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
+{
+    ASSERT_ARGS(fixed_8_hash)
+    unsigned char *pos = s->strstart;
+    UINTVAL        len = s->strlen;
+
+    while (len--) {
+        hashval += hashval << 5;
+        hashval += *(pos++);
+    }
+
+    return hashval;
+}
+
+
 /*
 
 =item C<ENCODING * Parrot_encoding_fixed_8_init(PARROT_INTERP)>
@@ -695,7 +732,8 @@
         codepoints,
         bytes,
         iter_init,
-        find_cclass
+        find_cclass,
+        fixed_8_hash
 
     };
     STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);

Modified: trunk/src/string/encoding/ucs2.c
==============================================================================
--- trunk/src/string/encoding/ucs2.c	Tue Apr 13 08:22:26 2010	(r45625)
+++ trunk/src/string/encoding/ucs2.c	Tue Apr 13 08:32:58 2010	(r45626)
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2001-2009, Parrot Foundation.
+Copyright (C) 2001-2010, Parrot Foundation.
 $Id$
 
 =head1 NAME
@@ -729,7 +729,8 @@
         codepoints,
         bytes,
         iter_init,
-        find_cclass
+        find_cclass,
+        NULL
     };
     STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
     Parrot_register_encoding(interp, "ucs2", return_encoding);

Modified: trunk/src/string/encoding/utf16.c
==============================================================================
--- trunk/src/string/encoding/utf16.c	Tue Apr 13 08:22:26 2010	(r45625)
+++ trunk/src/string/encoding/utf16.c	Tue Apr 13 08:32:58 2010	(r45626)
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2001-2009, Parrot Foundation.
+Copyright (C) 2001-2010, Parrot Foundation.
 $Id$
 
 =head1 NAME
@@ -843,7 +843,8 @@
         codepoints,
         bytes,
         iter_init,
-        find_cclass
+        find_cclass,
+        NULL
     };
     STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
     Parrot_register_encoding(interp, "utf16", return_encoding);

Modified: trunk/src/string/encoding/utf8.c
==============================================================================
--- trunk/src/string/encoding/utf8.c	Tue Apr 13 08:22:26 2010	(r45625)
+++ trunk/src/string/encoding/utf8.c	Tue Apr 13 08:32:58 2010	(r45626)
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2001-2009, Parrot Foundation.
+Copyright (C) 2001-2010, Parrot Foundation.
 $Id$
 
 =head1 NAME
@@ -1058,7 +1058,8 @@
         codepoints,
         bytes,
         iter_init,
-        find_cclass
+        find_cclass,
+        NULL
     };
     STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
     Parrot_register_encoding(interp, "utf8", return_encoding);


More information about the parrot-commits mailing list