[svn:parrot] r45626 - in trunk: include/parrot src/string src/string/encoding
chromatic at svn.parrot.org
chromatic at svn.parrot.org
Tue Apr 13 08:32:59 UTC 2010
Author: chromatic
Date: Tue Apr 13 08:32:58 2010
New Revision: 45626
URL: https://trac.parrot.org/parrot/changeset/45626
Log:
[encoding] Added ->hash member to ENCODING struct. This allows encodings to
violate all sorts of encapsulation when calculating the hash value of a string,
as they know the width of the characters and don't have to dance with string
iterators. This speeds up startup by 2.785% and improves runtime performance.
Note that currently only the fixed_8 encoding has a hashing function; other
encodings could use them for similar speedups.
Modified:
trunk/include/parrot/encoding.h
trunk/src/string/api.c
trunk/src/string/encoding/fixed_8.c
trunk/src/string/encoding/ucs2.c
trunk/src/string/encoding/utf16.c
trunk/src/string/encoding/utf8.c
Modified: trunk/include/parrot/encoding.h
==============================================================================
--- trunk/include/parrot/encoding.h Tue Apr 13 08:22:26 2010 (r45625)
+++ trunk/include/parrot/encoding.h Tue Apr 13 08:32:58 2010 (r45626)
@@ -30,6 +30,7 @@
typedef UINTVAL (*encoding_codepoints_t)(PARROT_INTERP, STRING *src);
typedef UINTVAL (*encoding_bytes_t)(PARROT_INTERP, STRING *src);
typedef UINTVAL (*encoding_find_cclass_t)(PARROT_INTERP, STRING *s, const INTVAL *typetable, INTVAL flags, UINTVAL offset, UINTVAL count);
+typedef size_t (*encoding_hash_t)(PARROT_INTERP, const STRING *s, size_t hashval);
/* iterator support */
@@ -57,6 +58,7 @@
encoding_bytes_t bytes;
encoding_iter_init_t iter_init;
encoding_find_cclass_t find_cclass;
+ encoding_hash_t hash;
};
typedef struct _encoding ENCODING;
@@ -234,6 +236,8 @@
((src)->encoding)->iter_init((i), (src), (iter))
#define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \
((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end))
+#define ENCODING_HASH(i, src, seed) \
+ ((src)->encoding)->hash((i), (src), (seed))
#endif /* PARROT_ENCODING_H_GUARD */
Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c Tue Apr 13 08:22:26 2010 (r45625)
+++ trunk/src/string/api.c Tue Apr 13 08:32:58 2010 (r45626)
@@ -2656,15 +2656,19 @@
if (STRING_IS_NULL(s) || !s->strlen)
return hashval;
- /* ZZZZZ workaround for something not setting up encodings right */
- saneify_string(s);
+ if (s->encoding->hash)
+ hashval = ENCODING_HASH(interp, s, hashval);
+ else {
+ /* ZZZZZ workaround for something not setting up encodings right */
+ saneify_string(s);
- ENCODING_ITER_INIT(interp, s, &iter);
+ ENCODING_ITER_INIT(interp, s, &iter);
- for (offs = 0; offs < s->strlen; ++offs) {
- const UINTVAL c = iter.get_and_advance(interp, &iter);
- hashval += hashval << 5;
- hashval += c;
+ for (offs = 0; offs < s->strlen; ++offs) {
+ const UINTVAL c = iter.get_and_advance(interp, &iter);
+ hashval += hashval << 5;
+ hashval += c;
+ }
}
s->hashval = hashval;
Modified: trunk/src/string/encoding/fixed_8.c
==============================================================================
--- trunk/src/string/encoding/fixed_8.c Tue Apr 13 08:22:26 2010 (r45625)
+++ trunk/src/string/encoding/fixed_8.c Tue Apr 13 08:32:58 2010 (r45626)
@@ -1,5 +1,5 @@
/*
-Copyright (C) 2004-2009, Parrot Foundation.
+Copyright (C) 2004-2010, Parrot Foundation.
$Id$
=head1 NAME
@@ -63,6 +63,12 @@
__attribute__nonnull__(2)
FUNC_MODIFIES(*iter);
+static size_t fixed_8_hash(PARROT_INTERP,
+ ARGIN(const STRING *s),
+ size_t hashval)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2);
+
PARROT_WARN_UNUSED_RESULT
static UINTVAL get_byte(PARROT_INTERP,
ARGIN(const STRING *source_string),
@@ -186,6 +192,9 @@
, PARROT_ASSERT_ARG(iter))
#define ASSERT_ARGS_fixed8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(s))
#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(source_string))
@@ -660,6 +669,34 @@
iter->set_position = fixed8_set_position;
}
+
+/*
+
+=item C<static size_t fixed_8_hash(PARROT_INTERP, const STRING *s, size_t
+hashval)>
+
+Returns the hashed value of the string, given a seed in hashval.
+
+=cut
+
+*/
+
+static size_t
+fixed_8_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
+{
+ ASSERT_ARGS(fixed_8_hash)
+ unsigned char *pos = s->strstart;
+ UINTVAL len = s->strlen;
+
+ while (len--) {
+ hashval += hashval << 5;
+ hashval += *(pos++);
+ }
+
+ return hashval;
+}
+
+
/*
=item C<ENCODING * Parrot_encoding_fixed_8_init(PARROT_INTERP)>
@@ -695,7 +732,8 @@
codepoints,
bytes,
iter_init,
- find_cclass
+ find_cclass,
+ fixed_8_hash
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Modified: trunk/src/string/encoding/ucs2.c
==============================================================================
--- trunk/src/string/encoding/ucs2.c Tue Apr 13 08:22:26 2010 (r45625)
+++ trunk/src/string/encoding/ucs2.c Tue Apr 13 08:32:58 2010 (r45626)
@@ -1,5 +1,5 @@
/*
-Copyright (C) 2001-2009, Parrot Foundation.
+Copyright (C) 2001-2010, Parrot Foundation.
$Id$
=head1 NAME
@@ -729,7 +729,8 @@
codepoints,
bytes,
iter_init,
- find_cclass
+ find_cclass,
+ NULL
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "ucs2", return_encoding);
Modified: trunk/src/string/encoding/utf16.c
==============================================================================
--- trunk/src/string/encoding/utf16.c Tue Apr 13 08:22:26 2010 (r45625)
+++ trunk/src/string/encoding/utf16.c Tue Apr 13 08:32:58 2010 (r45626)
@@ -1,5 +1,5 @@
/*
-Copyright (C) 2001-2009, Parrot Foundation.
+Copyright (C) 2001-2010, Parrot Foundation.
$Id$
=head1 NAME
@@ -843,7 +843,8 @@
codepoints,
bytes,
iter_init,
- find_cclass
+ find_cclass,
+ NULL
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "utf16", return_encoding);
Modified: trunk/src/string/encoding/utf8.c
==============================================================================
--- trunk/src/string/encoding/utf8.c Tue Apr 13 08:22:26 2010 (r45625)
+++ trunk/src/string/encoding/utf8.c Tue Apr 13 08:32:58 2010 (r45626)
@@ -1,5 +1,5 @@
/*
-Copyright (C) 2001-2009, Parrot Foundation.
+Copyright (C) 2001-2010, Parrot Foundation.
$Id$
=head1 NAME
@@ -1058,7 +1058,8 @@
codepoints,
bytes,
iter_init,
- find_cclass
+ find_cclass,
+ NULL
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
Parrot_register_encoding(interp, "utf8", return_encoding);
More information about the parrot-commits
mailing list