[svn:parrot] r48833 - in trunk: . compilers/imcc config/gen/makefiles config/inter examples/config/file include/parrot lib/Parrot/Configure/Step src src/dynpmc src/io src/ops src/packfile src/pmc src/string src/string/charset src/string/encoding t/op t/pmc tools/dev

nwellnhof at svn.parrot.org nwellnhof at svn.parrot.org
Tue Sep 7 22:58:41 UTC 2010


Author: nwellnhof
Date: Tue Sep  7 22:58:38 2010
New Revision: 48833
URL: https://trac.parrot.org/parrot/changeset/48833

Log:
Merge branch charset_massacre

Added:
   trunk/src/string/encoding/ascii.c
   trunk/src/string/encoding/ascii.h
   trunk/src/string/encoding/binary.c
   trunk/src/string/encoding/binary.h
      - copied, changed from r48832, trunk/src/string/charset/binary.h
   trunk/src/string/encoding/latin1.c
   trunk/src/string/encoding/latin1.h
   trunk/src/string/encoding/shared.c
      - copied, changed from r48832, trunk/src/string/charset/unicode.c
   trunk/src/string/encoding/shared.h
   trunk/src/string/encoding/tables.c
      - copied, changed from r48832, trunk/src/string/charset/tables.c
   trunk/src/string/encoding/tables.h
      - copied, changed from r48832, trunk/src/string/charset/tables.h
Deleted:
   trunk/config/inter/charset.pm
   trunk/config/inter/encoding.pm
   trunk/include/parrot/charset.h
   trunk/src/string/charset.c
   trunk/src/string/charset/ascii.c
   trunk/src/string/charset/ascii.h
   trunk/src/string/charset/binary.c
   trunk/src/string/charset/binary.h
   trunk/src/string/charset/iso-8859-1.c
   trunk/src/string/charset/iso-8859-1.h
   trunk/src/string/charset/tables.c
   trunk/src/string/charset/tables.h
   trunk/src/string/charset/unicode.c
   trunk/src/string/charset/unicode.h
   trunk/src/string/encoding/fixed_8.c
   trunk/src/string/encoding/fixed_8.h
Modified:
   trunk/Configure.pl
   trunk/MANIFEST
   trunk/compilers/imcc/pbc.c
   trunk/config/gen/makefiles/root.in
   trunk/examples/config/file/configcompiler
   trunk/examples/config/file/configwithfatalstep
   trunk/include/parrot/encoding.h
   trunk/include/parrot/parrot.h
   trunk/include/parrot/pobj.h
   trunk/include/parrot/string.h
   trunk/include/parrot/string_funcs.h
   trunk/lib/Parrot/Configure/Step/List.pm
   trunk/src/dynext.c
   trunk/src/dynpmc/Defines.in
   trunk/src/global_setup.c
   trunk/src/hash.c
   trunk/src/io/buffer.c
   trunk/src/io/utf8.c
   trunk/src/library.c
   trunk/src/ops/core_ops.c
   trunk/src/ops/string.ops
   trunk/src/packdump.c
   trunk/src/packfile.c
   trunk/src/packfile/pf_items.c
   trunk/src/packout.c
   trunk/src/pmc/bytebuffer.pmc
   trunk/src/pmc/packfile.pmc
   trunk/src/pmc/packfilefixupentry.pmc
   trunk/src/pmc/string.pmc
   trunk/src/pmc/stringbuilder.pmc
   trunk/src/pmc/stringiterator.pmc
   trunk/src/spf_vtable.c
   trunk/src/string/api.c
   trunk/src/string/encoding.c
   trunk/src/string/encoding/ucs2.c
   trunk/src/string/encoding/ucs2.h
   trunk/src/string/encoding/ucs4.c
   trunk/src/string/encoding/ucs4.h
   trunk/src/string/encoding/utf16.c
   trunk/src/string/encoding/utf16.h
   trunk/src/string/encoding/utf8.c
   trunk/src/string/encoding/utf8.h
   trunk/src/string/primitives.c
   trunk/t/op/string_cs.t
   trunk/t/pmc/bytebuffer.t
   trunk/t/pmc/filehandle.t
   trunk/t/pmc/io.t
   trunk/tools/dev/gen_charset_tables.pl

Modified: trunk/Configure.pl
==============================================================================
--- trunk/Configure.pl	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/Configure.pl	Tue Sep  7 22:58:38 2010	(r48833)
@@ -604,8 +604,6 @@
     init::optimize
     inter::shlibs
     inter::libparrot
-    inter::charset
-    inter::encoding
     inter::types
     auto::ops
     auto::alignptrs

Modified: trunk/MANIFEST
==============================================================================
--- trunk/MANIFEST	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/MANIFEST	Tue Sep  7 22:58:38 2010	(r48833)
@@ -348,8 +348,6 @@
 config/init/install.pm                                      []
 config/init/manifest.pm                                     []
 config/init/optimize.pm                                     []
-config/inter/charset.pm                                     []
-config/inter/encoding.pm                                    []
 config/inter/lex.pm                                         []
 config/inter/libparrot.pm                                   []
 config/inter/make.pm                                        []
@@ -947,7 +945,6 @@
 include/parrot/caches.h                                     [main]include
 include/parrot/call.h                                       [main]include
 include/parrot/cclass.h                                     [main]include
-include/parrot/charset.h                                    [main]include
 include/parrot/compiler.h                                   [main]include
 include/parrot/context.h                                    [main]include
 include/parrot/core_types.h                                 [main]include
@@ -1444,20 +1441,17 @@
 src/spf_render.c                                            []
 src/spf_vtable.c                                            []
 src/string/api.c                                            []
-src/string/charset.c                                        []
-src/string/charset/ascii.c                                  []
-src/string/charset/ascii.h                                  []
-src/string/charset/binary.c                                 []
-src/string/charset/binary.h                                 []
-src/string/charset/iso-8859-1.c                             []
-src/string/charset/iso-8859-1.h                             []
-src/string/charset/tables.c                                 []
-src/string/charset/tables.h                                 []
-src/string/charset/unicode.c                                []
-src/string/charset/unicode.h                                []
 src/string/encoding.c                                       []
-src/string/encoding/fixed_8.c                               []
-src/string/encoding/fixed_8.h                               []
+src/string/encoding/ascii.c                                 []
+src/string/encoding/ascii.h                                 []
+src/string/encoding/binary.c                                []
+src/string/encoding/binary.h                                []
+src/string/encoding/latin1.c                                []
+src/string/encoding/latin1.h                                []
+src/string/encoding/shared.c                                []
+src/string/encoding/shared.h                                []
+src/string/encoding/tables.c                                []
+src/string/encoding/tables.h                                []
 src/string/encoding/ucs2.c                                  []
 src/string/encoding/ucs2.h                                  []
 src/string/encoding/ucs4.c                                  []

Modified: trunk/compilers/imcc/pbc.c
==============================================================================
--- trunk/compilers/imcc/pbc.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/compilers/imcc/pbc.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -973,9 +973,8 @@
          * get first part as charset, rest as string
          */
         STRING     *s;
-        const CHARSET *s_charset;
-        const ENCODING *s_encoding = NULL;
-        const ENCODING *src_encoding;
+        const STR_VTABLE *s_encoding;
+        const STR_VTABLE *src_encoding;
         #define MAX_NAME 31
         char charset_name[MAX_NAME + 1];
         char encoding_name[MAX_NAME + 1];
@@ -983,38 +982,38 @@
         char * p2 = strchr(r->name, ':');
         PARROT_ASSERT(p && p[-1] == ':');
         if (p2 < p -1) {
+            /* Handle the old 'encoding:charset' format by trying
+             * encoding as well as charset */
             strncpy(encoding_name, buf, p2 - buf);
             encoding_name[p2-buf] = '\0';
             strncpy(charset_name, p2 +1, p - p2 - 2);
             charset_name[p- p2 - 2] = '\0';
             /*fprintf(stderr, "%s:%s\n", charset_name, encoding_name);*/
-            s_charset = Parrot_find_charset(interp, charset_name);
-            if (s_charset == NULL)
-                Parrot_ex_throw_from_c_args(interp, NULL,
-                        EXCEPTION_INVALID_STRING_REPRESENTATION,
-                        "Unknown charset '%s'", charset_name);
             s_encoding = Parrot_find_encoding(interp, encoding_name);
-            if (s_encoding == NULL)
-                Parrot_ex_throw_from_c_args(interp, NULL,
-                        EXCEPTION_INVALID_STRING_REPRESENTATION,
-                        "Unknown encoding '%s'", encoding_name);
+            if (s_encoding == NULL) {
+                s_encoding = Parrot_find_encoding(interp, charset_name);
+                if (s_encoding == NULL)
+                    Parrot_ex_throw_from_c_args(interp, NULL,
+                            EXCEPTION_INVALID_STRING_REPRESENTATION,
+                            "Unknown encoding '%s:%s'",
+                            encoding_name, charset_name);
+            }
         }
         else {
-            strncpy(charset_name, buf, p - buf - 1);
-            charset_name[p - buf - 1] = '\0';
-            /*fprintf(stderr, "%s\n", charset_name);*/
-            s_charset = Parrot_find_charset(interp, charset_name);
-            if (s_charset == NULL)
+            strncpy(encoding_name, buf, p - buf - 1);
+            encoding_name[p - buf - 1] = '\0';
+            charset_name[0] = '\0';
+            /*fprintf(stderr, "%s\n", encoding_name);*/
+            s_encoding = Parrot_find_encoding(interp, encoding_name);
+            if (s_encoding == NULL)
                 Parrot_ex_throw_from_c_args(interp, NULL,
                         EXCEPTION_INVALID_STRING_REPRESENTATION,
-                        "Unknown charset '%s'", charset_name);
+                        "Unknown encoding '%s'", encoding_name);
         }
-        if (strcmp(charset_name, "unicode") == 0)
-            src_encoding = Parrot_utf8_encoding_ptr;
+        if (s_encoding->max_bytes_per_codepoint == 1)
+            src_encoding = Parrot_ascii_encoding_ptr;
         else
-            src_encoding = Parrot_fixed_8_encoding_ptr;
-        if (s_encoding == NULL)
-            s_encoding = src_encoding;
+            src_encoding = Parrot_utf8_encoding_ptr;
 
         /* past delim */
         buf     = p + 1;
@@ -1032,10 +1031,10 @@
             }
             {
                 STRING * aux = Parrot_str_new_init(interp, buf, p - buf,
-                        src_encoding, s_charset, 0);
+                        src_encoding, 0);
                 s = Parrot_str_unescape_string(interp, aux,
-                        s_charset, s_encoding, PObj_constant_FLAG);
-                if (!CHARSET_VALIDATE(interp, s))
+                        s_encoding, PObj_constant_FLAG);
+                if (!STRING_validate(interp, s))
                        Parrot_ex_throw_from_c_args(interp, NULL,
                                EXCEPTION_INVALID_STRING_REPRESENTATION,
                                "Malformed string");
@@ -1882,7 +1881,7 @@
     char   *src, *chr, *start;
     int     base;
 
-    if (s->encoding != Parrot_fixed_8_encoding_ptr)
+    if (STRING_max_bytes_per_codepoint(s) != 1)
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
             "unhandled string encoding in FixedIntegerArray initialization");
 

Modified: trunk/config/gen/makefiles/root.in
==============================================================================
--- trunk/config/gen/makefiles/root.in	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/config/gen/makefiles/root.in	Tue Sep  7 22:58:38 2010	(r48833)
@@ -368,7 +368,6 @@
 	$(INC_DIR)/interpreter.h \
 	$(INC_DIR)/datatypes.h \
 	$(INC_DIR)/encoding.h \
-	$(INC_DIR)/charset.h \
 	$(INC_DIR)/string.h \
 	$(INC_DIR)/string_primitives.h \
 	$(INC_DIR)/hash.h \
@@ -417,11 +416,20 @@
 GENERAL_H_FILES   = $(NONGEN_HEADERS) $(GEN_HEADERS) \
 
 
-CHARSET_O_FILES   = @TEMP_charset_o@
 CLASS_PMC_FILES   = @TEMP_pmc_classes_pmc@
 CLASS_O_FILES     = @TEMP_pmc_classes_o@
 CLASS_STR_FILES   = @TEMP_pmc_classes_str@
-ENCODING_O_FILES  = @TEMP_encoding_o@
+
+ENCODING_O_FILES = \
+    src/string/encoding/shared$(O) \
+    src/string/encoding/tables$(O) \
+    src/string/encoding/ascii$(O) \
+    src/string/encoding/latin1$(O) \
+    src/string/encoding/binary$(O) \
+    src/string/encoding/utf8$(O) \
+    src/string/encoding/utf16$(O) \
+    src/string/encoding/ucs2$(O) \
+    src/string/encoding/ucs4$(O)
 
 IO_O_FILES = \
     src/io/core$(O) \
@@ -440,7 +448,6 @@
     src/string/api$(O) \
     src/ops/core_ops$(O) \
 #IF(i386_has_gcc_cmpxchg):    src/atomic/gcc_x86$(O) \
-    src/string/charset$(O) \
     src/core_pmcs$(O) \
     src/datatypes$(O) \
     src/debug$(O) \
@@ -934,7 +941,6 @@
 
 O_FILES = \
     $(INTERP_O_FILES) \
-    $(CHARSET_O_FILES) \
     $(IO_O_FILES) \
     $(CLASS_O_FILES) \
     $(ENCODING_O_FILES) \
@@ -1511,18 +1517,40 @@
 
 src/spf_vtable$(O) : $(PARROT_H_HEADERS) src/spf_vtable.str src/spf_vtable.c
 
-src/string/encoding$(O) : $(PARROT_H_HEADERS) src/string/encoding.c
+src/string/encoding$(O) : $(PARROT_H_HEADERS)
 
-src/string/charset$(O) : $(PARROT_H_HEADERS) src/string/charset.c \
-	src/string/encoding/fixed_8.h \
-	src/string/encoding/utf8.h \
-	src/string/encoding/utf16.h \
-	src/string/encoding/ucs2.h \
-	src/string/encoding/ucs4.h \
-	src/string/charset/ascii.h \
-	src/string/charset/binary.h \
-	src/string/charset/iso-8859-1.h \
-	src/string/charset/unicode.h
+src/string/encoding/tables$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/tables.h
+src/string/encoding/shared$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/shared.h \
+    src/string/encoding/tables.h
+src/string/encoding/ascii$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/ascii.h \
+    src/string/encoding/shared.h \
+    src/string/encoding/tables.h
+src/string/encoding/latin1$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/latin1.h \
+    src/string/encoding/shared.h \
+    src/string/encoding/tables.h
+src/string/encoding/binary$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/binary.h \
+    src/string/encoding/shared.h
+src/string/encoding/utf8$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/utf8.h \
+    src/string/encoding/shared.h \
+    src/string/unicode.h
+src/string/encoding/utf16$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/utf16.h \
+    src/string/encoding/shared.h \
+    src/string/unicode.h
+src/string/encoding/ucs2$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/ucs2.h \
+    src/string/encoding/shared.h \
+    src/string/unicode.h
+src/string/encoding/ucs4$(O) : $(PARROT_H_HEADERS) \
+    src/string/encoding/ucs4.h \
+    src/string/encoding/shared.h \
+    src/string/unicode.h
 
 src/pbc_merge$(O) : $(INC_DIR)/embed.h src/pbc_merge.c \
 	include/pmc/pmc_sub.h $(INC_DIR)/oplib/ops.h $(PARROT_H_HEADERS)
@@ -1553,10 +1581,6 @@
 
 @TEMP_pmc_build@
 
- at TEMP_charset_build@
-
- at TEMP_encoding_build@
-
 # $(CONFIGURE_GENERATED_FILES) : Configure.pl
 #	$(PERL) Configure.pl
 
@@ -2276,7 +2300,6 @@
     src/pmc \
     src/runcore \
     src/string \
-    src/string/charset \
     src/string/encoding \
     $(BUILD_DIR) \
     $(BUILD_DIR)/t/perl \

Deleted: trunk/config/inter/charset.pm
==============================================================================
--- trunk/config/inter/charset.pm	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,96 +0,0 @@
-# Copyright (C) 2001-2003, Parrot Foundation.
-# $Id$
-
-=head1 NAME
-
-config/inter/charset.pm - charset files
-
-=head1 DESCRIPTION
-
-Asks the user to select which charset files to include.
-
-=cut
-
-package inter::charset;
-
-use strict;
-use warnings;
-
-use File::Basename qw/basename/;
-
-use base qw(Parrot::Configure::Step);
-
-use Parrot::Configure::Utils ':inter';
-
-
-sub _init {
-    my $self = shift;
-    my %data;
-    $data{description} = q{Which charset files should be compiled in};
-    $data{result}      = q{};
-    return \%data;
-}
-
-my @charsets_defaults =
-    defined( $ENV{TEST_CHARSET} )
-    ? $ENV{TEST_CHARSET}
-    : sort map { basename($_) } glob "./src/string/charset/*.c";
-
-sub runstep {
-    my ( $self, $conf ) = @_;
-
-    my @charset = @charsets_defaults;
-
-    my $charset_list = join ( ' ', grep { defined $_ } @charset );
-
-    if ( $conf->options->get('ask') ) {
-        print <<"END";
-
-
-The following charsets are available:
-  @charset
-END
-        $charset_list = prompt(
-            'Which charsets would you like?',
-            $charset_list
-        );
-    }
-
-    # names of class files for src/pmc/Makefile
-    ( my $TEMP_charset_o = $charset_list ) =~ s/\.c/\$(O)/g;
-
-    my $TEMP_charset_build = <<"E_NOTE";
-
-# the following part of the Makefile was built by 'config/inter/charset.pm'
-
-E_NOTE
-
-    foreach my $charset ( split( /\s+/, $charset_list ) ) {
-        $charset =~ s/\.c$//;
-        $TEMP_charset_build .= <<END
-src/string/charset/$charset\$(O): src/string/charset/$charset.h src/string/charset/ascii.h src/string/charset/$charset.c src/string/charset/tables.h \$(NONGEN_HEADERS)
-
-
-END
-    }
-
-    # build list of libraries for link line in Makefile
-    $TEMP_charset_o =~ s{^| }{ src/string/charset/}g;
-
-    $conf->data->set(
-        charset            => $charset_list,
-        TEMP_charset_o     => $TEMP_charset_o,
-        TEMP_charset_build => $TEMP_charset_build,
-    );
-
-    return 1;
-}
-
-1;
-
-# Local Variables:
-#   mode: cperl
-#   cperl-indent-level: 4
-#   fill-column: 100
-# End:
-# vim: expandtab shiftwidth=4:

Deleted: trunk/config/inter/encoding.pm
==============================================================================
--- trunk/config/inter/encoding.pm	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,96 +0,0 @@
-# Copyright (C) 2001-2003, Parrot Foundation.
-# $Id$
-
-=head1 NAME
-
-config/inter/encoding.pm - encoding files
-
-=head1 DESCRIPTION
-
-Asks the user to select which encoding files to include.
-
-=cut
-
-package inter::encoding;
-
-use strict;
-use warnings;
-
-use base qw(Parrot::Configure::Step);
-
-use File::Basename qw/basename/;
-
-use Parrot::Configure::Utils ':inter';
-
-
-sub _init {
-    my $self = shift;
-    my %data;
-    $data{description} = q{Which encoding files should be compiled in};
-    $data{result}      = q{};
-    return \%data;
-}
-
-my @encodings_defaults =
-    defined( $ENV{TEST_ENCODING} )
-    ? $ENV{TEST_ENCODING}
-    : sort map { basename($_) } glob "./src/string/encoding/*.c";
-
-sub runstep {
-    my ( $self, $conf ) = @_;
-
-    my @encodings = @encodings_defaults;
-
-    my $encoding_list = join( ' ', grep { defined $_ } @encodings );
-
-    if ( $conf->options->get('ask') ) {
-        print <<"END";
-
-
-The following encodings are available:
-  @encodings
-END
-        $encoding_list = prompt(
-            'Which encodings would you like?',
-            $encoding_list
-        );
-    }
-
-    # names of class files for src/pmc/Makefile
-    ( my $TEMP_encoding_o = $encoding_list ) =~ s/\.c/\$(O)/g;
-
-    my $TEMP_encoding_build = <<"E_NOTE";
-
-# the following part of the Makefile was built by 'config/inter/encoding.pm'
-
-E_NOTE
-
-    foreach my $encoding ( split( /\s+/, $encoding_list ) ) {
-        $encoding =~ s/\.c$//;
-        $TEMP_encoding_build .= <<END
-src/string/encoding/$encoding\$(O): src/string/encoding/$encoding.h src/string/encoding/$encoding.c src/string/unicode.h \$(NONGEN_HEADERS)
-
-
-END
-    }
-
-    # build list of libraries for link line in Makefile
-    $TEMP_encoding_o =~ s{^| }{ src/string/encoding/}g;
-
-    $conf->data->set(
-        encoding            => $encoding_list,
-        TEMP_encoding_o     => $TEMP_encoding_o,
-        TEMP_encoding_build => $TEMP_encoding_build,
-    );
-
-    return 1;
-}
-
-1;
-
-# Local Variables:
-#   mode: cperl
-#   cperl-indent-level: 4
-#   fill-column: 100
-# End:
-# vim: expandtab shiftwidth=4:

Modified: trunk/examples/config/file/configcompiler
==============================================================================
--- trunk/examples/config/file/configcompiler	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/examples/config/file/configcompiler	Tue Sep  7 22:58:38 2010	(r48833)
@@ -34,8 +34,6 @@
 init::optimize
 inter::shlibs
 inter::libparrot
-inter::charset
-inter::encoding
 inter::types
 auto::ops
 auto::pmc

Modified: trunk/examples/config/file/configwithfatalstep
==============================================================================
--- trunk/examples/config/file/configwithfatalstep	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/examples/config/file/configwithfatalstep	Tue Sep  7 22:58:38 2010	(r48833)
@@ -26,8 +26,6 @@
 init::optimize
 inter::shlibs
 inter::libparrot
-inter::charset
-inter::encoding
 inter::types
 auto::ops
 auto::pmc

Deleted: trunk/include/parrot/charset.h
==============================================================================
--- trunk/include/parrot/charset.h	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,277 +0,0 @@
-/* charset.h
- *  Copyright (C) 2004-2010, Parrot Foundation.
- *  SVN Info
- *     $Id$
- *  Overview:
- *     This is the header for the 8-bit fixed-width encoding
- */
-
-#ifndef PARROT_CHARSET_H_GUARD
-#define PARROT_CHARSET_H_GUARD
-
-
-#include "parrot/encoding.h"
-#include "parrot/cclass.h"
-
-struct _charset;
-typedef struct _charset CHARSET;
-
-
-#if !defined PARROT_NO_EXTERN_CHARSET_PTRS
-PARROT_DATA CHARSET *Parrot_iso_8859_1_charset_ptr;
-PARROT_DATA CHARSET *Parrot_binary_charset_ptr;
-PARROT_DATA CHARSET *Parrot_default_charset_ptr;
-PARROT_DATA CHARSET *Parrot_unicode_charset_ptr;
-PARROT_DATA CHARSET *Parrot_ascii_charset_ptr;
-#endif
-
-#define PARROT_DEFAULT_CHARSET Parrot_ascii_charset_ptr
-#define PARROT_BINARY_CHARSET Parrot_binary_charset_ptr
-#define PARROT_UNICODE_CHARSET Parrot_unicode_charset_ptr
-
-typedef STRING * (*charset_get_graphemes_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
-typedef STRING * (*charset_to_charset_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_from_unicode_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_compose_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_decompose_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_upcase_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_downcase_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_titlecase_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_upcase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_downcase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef STRING * (*charset_titlecase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef INTVAL   (*charset_compare_t)(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs));
-typedef INTVAL   (*charset_index_t)(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search_string), UINTVAL offset);
-typedef INTVAL   (*charset_rindex_t)(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search_string), UINTVAL offset);
-typedef UINTVAL  (*charset_validate_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef INTVAL   (*charset_is_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
-typedef INTVAL   (*charset_find_not_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
-typedef INTVAL   (*charset_is_wordchar_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_wordchar_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_not_wordchar_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_is_whitespace_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_whitespace_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_not_whitespace_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_is_digit_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_digit_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_not_digit_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_is_punctuation_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_punctuation_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_not_punctuation_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_is_newline_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_newline_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_not_newline_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef INTVAL   (*charset_find_word_boundary_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef STRING * (*charset_string_from_codepoint_t)(PARROT_INTERP, UINTVAL codepoint);
-typedef size_t   (*charset_compute_hash_t)(PARROT_INTERP, ARGIN(const STRING *src), size_t seed);
-
-typedef STRING * (*charset_converter_t)(PARROT_INTERP, ARGIN(const STRING *src));
-
-/* HEADERIZER BEGIN: src/string/charset.c */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-const char * Parrot_charset_c_name(SHIM_INTERP, INTVAL number_of_charset);
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING * Parrot_charset_name(SHIM_INTERP, INTVAL number_of_charset);
-
-PARROT_EXPORT
-PARROT_WARN_UNUSED_RESULT
-INTVAL Parrot_charset_number(PARROT_INTERP,
-    ARGIN(const STRING *charsetname))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_WARN_UNUSED_RESULT
-INTVAL Parrot_charset_number_of_str(SHIM_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(2);
-
-PARROT_EXPORT
-void Parrot_charsets_encodings_deinit(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-PARROT_EXPORT
-void Parrot_charsets_encodings_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_WARN_UNUSED_RESULT
-PARROT_CAN_RETURN_NULL
-const CHARSET * Parrot_default_charset(SHIM_INTERP);
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-const CHARSET * Parrot_find_charset(SHIM_INTERP,
-    ARGIN(const char *charsetname))
-        __attribute__nonnull__(2);
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_WARN_UNUSED_RESULT
-PARROT_CAN_RETURN_NULL
-charset_converter_t Parrot_find_charset_converter(SHIM_INTERP,
-    ARGIN(const CHARSET *lhs),
-    ARGIN(const CHARSET *rhs))
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-const CHARSET * Parrot_get_charset(SHIM_INTERP, INTVAL number_of_charset);
-
-PARROT_EXPORT
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-const CHARSET * Parrot_load_charset(PARROT_INTERP,
-    ARGIN(const char *charsetname))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_EXPORT
-INTVAL Parrot_make_default_charset(SHIM_INTERP,
-    SHIM(const char *charsetname),
-    ARGIN(const CHARSET *charset))
-        __attribute__nonnull__(3);
-
-PARROT_EXPORT
-PARROT_CANNOT_RETURN_NULL
-PARROT_MALLOC
-CHARSET * Parrot_new_charset(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-PARROT_EXPORT
-INTVAL Parrot_register_charset(PARROT_INTERP,
-    ARGIN(const char *charsetname),
-    ARGIN(CHARSET *charset))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_EXPORT
-void Parrot_register_charset_converter(PARROT_INTERP,
-    ARGIN(const CHARSET *lhs),
-    ARGIN(const CHARSET *rhs),
-    ARGIN(charset_converter_t func))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        __attribute__nonnull__(4);
-
-#define ASSERT_ARGS_Parrot_charset_c_name __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_Parrot_charset_name __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_Parrot_charset_number __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(charsetname))
-#define ASSERT_ARGS_Parrot_charset_number_of_str __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_Parrot_charsets_encodings_deinit \
-     __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_Parrot_charsets_encodings_init \
-     __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_Parrot_default_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_Parrot_find_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(charsetname))
-#define ASSERT_ARGS_Parrot_find_charset_converter __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_Parrot_get_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_Parrot_load_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(charsetname))
-#define ASSERT_ARGS_Parrot_make_default_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(charset))
-#define ASSERT_ARGS_Parrot_new_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_Parrot_register_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(charsetname) \
-    , PARROT_ASSERT_ARG(charset))
-#define ASSERT_ARGS_Parrot_register_charset_converter \
-     __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs) \
-    , PARROT_ASSERT_ARG(func))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: src/string/charset.c */
-
-struct _charset {
-    const char *name;
-    charset_get_graphemes_t get_graphemes;
-    charset_to_charset_t to_charset;
-    charset_compose_t compose;
-    charset_decompose_t decompose;
-    charset_upcase_t upcase;
-    charset_downcase_t downcase;
-    charset_titlecase_t titlecase;
-    charset_upcase_first_t upcase_first;
-    charset_downcase_first_t downcase_first;
-    charset_titlecase_first_t titlecase_first;
-    charset_compare_t compare;
-    charset_index_t index;
-    charset_rindex_t rindex;
-    charset_validate_t validate;
-    charset_is_cclass_t is_cclass;
-    charset_find_cclass_t find_cclass;
-    charset_find_not_cclass_t find_not_cclass;
-    charset_string_from_codepoint_t string_from_codepoint;
-    charset_compute_hash_t compute_hash;
-    const ENCODING *preferred_encoding;
-};
-
-#define CHARSET_GET_GRAPEMES(interp, source, offset, count) ((source)->charset)->get_graphemes((interp), (source), (offset), (count))
-#define CHARSET_TO_UNICODE(interp, source, dest) ((source)->charset)->to_unicode((interp), (source), (dest))
-#define CHARSET_COMPOSE(interp, source) ((source)->charset)->compose((interp), (source))
-#define CHARSET_DECOMPOSE(interp, source) ((source)->charset)->decompose((interp), (source))
-#define CHARSET_UPCASE(interp, source) ((source)->charset)->upcase((interp), (source))
-#define CHARSET_DOWNCASE(interp, source) ((source)->charset)->downcase((interp), (source))
-#define CHARSET_TITLECASE(interp, source) ((source)->charset)->titlecase((interp), (source))
-#define CHARSET_UPCASE_FIRST(interp, source) ((source)->charset)->upcase_first((interp), (source))
-#define CHARSET_DOWNCASE_FIRST(interp, source) ((source)->charset)->downcase_first((interp), (source))
-#define CHARSET_TITLECASE_FIRST(interp, source) ((source)->charset)->titlecase_first((interp), (source))
-#define CHARSET_COMPARE(interp, lhs, rhs) ((const CHARSET *)(lhs)->charset)->compare((interp), (lhs), (rhs))
-#define CHARSET_INDEX(interp, source, search, offset) ((source)->charset)->index((interp), (source), (search), (offset))
-#define CHARSET_RINDEX(interp, source, search, offset) ((source)->charset)->rindex((interp), (source), (search), (offset))
-#define CHARSET_VALIDATE(interp, source) ((source)->charset)->validate((interp), (source))
-#define CHARSET_IS_CCLASS(interp, flags, source, offset) ((source)->charset)->is_cclass((interp), (flags), (source), (offset))
-#define CHARSET_FIND_CCLASS(interp, flags, source, offset, count) ((source)->charset)->find_cclass((interp), (flags), (source), (offset), (count))
-#define CHARSET_FIND_NOT_CCLASS(interp, flags, source, offset, count) ((source)->charset)->find_not_cclass((interp), (flags), (source), (offset), (count))
-#define CHARSET_COMPUTE_HASH(interp, source, seed) ((source)->charset)->compute_hash((interp), (source), (seed))
-#define CHARSET_GET_PREFERRED_ENCODING(interp, source) ((source)->charset)->preferred_encoding
-
-#define CHARSET_TO_ENCODING(interp, source) ((source)->encoding)->to_encoding((interp), (source))
-#define CHARSET_COPY_TO_ENCODING(interp, source) ((source)->encoding)->copy_to_encoding((interp), (source))
-#define CHARSET_GET_CODEPOINT(interp, source, offset) ((source)->encoding)->get_codepoint((interp), (source), (offset))
-#define CHARSET_GET_BYTE(interp, source, offset) ((source)->encoding)->get_byte((interp), (source), (offset))
-#define CHARSET_SET_BYTE(interp, source, offset, value) ((source)->encoding)->set_byte((interp), (source), (offset), (value))
-#define CHARSET_GET_CODEPOINTS(interp, source, offset, count) ((source)->encoding)->get_codepoints((interp), (source), (offset), (count))
-#define CHARSET_GET_BYTES(interp, source, offset, count) ((source)->encoding)->get_bytes((interp), (source), (offset), (count))
-#define CHARSET_CODEPOINTS(interp, source) ((source)->encoding)->codepoints((interp), (source))
-#define CHARSET_BYTES(interp, source) ((source)->encoding)->bytes((interp), (source))
-
-
-#endif /* PARROT_CHARSET_H_GUARD */
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Modified: trunk/include/parrot/encoding.h
==============================================================================
--- trunk/include/parrot/encoding.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/include/parrot/encoding.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -15,68 +15,15 @@
 
 #include "parrot/parrot.h"
 
-typedef STRING * (*encoding_to_encoding_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef UINTVAL  (*encoding_get_codepoint_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef UINTVAL  (*encoding_get_byte_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
-typedef void     (*encoding_set_byte_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
-typedef STRING * (*encoding_get_codepoints_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
-typedef STRING * (*encoding_get_bytes_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
-typedef UINTVAL  (*encoding_codepoints_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef UINTVAL  (*encoding_bytes_t)(PARROT_INTERP, ARGIN(const STRING *src));
-typedef UINTVAL  (*encoding_find_cclass_t)(PARROT_INTERP, ARGIN(const STRING *s), ARGIN(const INTVAL *typetable), INTVAL flags, UINTVAL offset, UINTVAL count);
-typedef size_t   (*encoding_hash_t)(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval);
-
-/* iterator support */
-
-struct string_iterator_t;       /* s. parrot/string.h */
-
-typedef UINTVAL (*encoding_iter_get_t)(
-    PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL  offset);
-typedef void    (*encoding_iter_skip_t)(
-    PARROT_INTERP, const STRING *str,       String_iter *i, INTVAL  skip);
-typedef UINTVAL (*encoding_iter_get_and_advance_t)(
-    PARROT_INTERP, const STRING *str,       String_iter *i);
-typedef void    (*encoding_iter_set_and_advance_t)(
-    PARROT_INTERP,       STRING *str,       String_iter *i, UINTVAL c);
-typedef void    (*encoding_iter_set_position_t)(
-    PARROT_INTERP, const STRING *str,       String_iter *i, UINTVAL pos);
-
-struct _encoding {
-    ARGIN(const char *name);
-    UINTVAL max_bytes_per_codepoint;
-    encoding_to_encoding_t              to_encoding;
-    encoding_get_codepoint_t            get_codepoint;
-    encoding_get_byte_t                 get_byte;
-    encoding_set_byte_t                 set_byte;
-    encoding_get_codepoints_t           get_codepoints;
-    encoding_get_bytes_t                get_bytes;
-    encoding_codepoints_t               codepoints;
-    encoding_bytes_t                    bytes;
-    encoding_find_cclass_t              find_cclass;
-    encoding_hash_t                     hash;
-    encoding_iter_get_t                 iter_get;
-    encoding_iter_skip_t                iter_skip;
-    encoding_iter_get_and_advance_t     iter_get_and_advance;
-    encoding_iter_set_and_advance_t     iter_set_and_advance;
-    encoding_iter_set_position_t        iter_set_position;
-};
-
-typedef struct _encoding ENCODING;
-
-#if !defined PARROT_NO_EXTERN_ENCODING_PTRS
-PARROT_DATA ENCODING *Parrot_fixed_8_encoding_ptr;
-PARROT_DATA ENCODING *Parrot_utf8_encoding_ptr;
-PARROT_DATA ENCODING *Parrot_utf16_encoding_ptr;
-PARROT_DATA ENCODING *Parrot_ucs2_encoding_ptr;
-PARROT_DATA ENCODING *Parrot_ucs4_encoding_ptr;
-PARROT_DATA ENCODING *Parrot_default_encoding_ptr;
-#endif
-
-#define PARROT_DEFAULT_ENCODING Parrot_fixed_8_encoding_ptr
-#define PARROT_FIXED_8_ENCODING Parrot_fixed_8_encoding_ptr
-#define PARROT_DEFAULT_FOR_UNICODE_ENCODING NULL
+PARROT_DATA STR_VTABLE *Parrot_ascii_encoding_ptr;
+PARROT_DATA STR_VTABLE *Parrot_latin1_encoding_ptr;
+PARROT_DATA STR_VTABLE *Parrot_binary_encoding_ptr;
+PARROT_DATA STR_VTABLE *Parrot_utf8_encoding_ptr;
+PARROT_DATA STR_VTABLE *Parrot_utf16_encoding_ptr;
+PARROT_DATA STR_VTABLE *Parrot_ucs2_encoding_ptr;
+PARROT_DATA STR_VTABLE *Parrot_ucs4_encoding_ptr;
 
-typedef INTVAL (*encoding_converter_t)(PARROT_INTERP, ENCODING *lhs, ENCODING *rhs);
+PARROT_DATA STR_VTABLE *Parrot_default_encoding_ptr;
 
 /* HEADERIZER BEGIN: src/string/encoding.c */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
@@ -85,7 +32,7 @@
 PARROT_PURE_FUNCTION
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
-const ENCODING * Parrot_default_encoding(SHIM_INTERP);
+const STR_VTABLE * Parrot_default_encoding(SHIM_INTERP);
 
 PARROT_EXPORT
 PARROT_PURE_FUNCTION
@@ -114,32 +61,28 @@
         __attribute__nonnull__(2);
 
 PARROT_EXPORT
+void Parrot_encodings_init(PARROT_INTERP)
+        __attribute__nonnull__(1);
+
+PARROT_EXPORT
 PARROT_PURE_FUNCTION
 PARROT_WARN_UNUSED_RESULT
 PARROT_CAN_RETURN_NULL
-const ENCODING * Parrot_find_encoding(SHIM_INTERP,
+const STR_VTABLE * Parrot_find_encoding(SHIM_INTERP,
     ARGIN(const char *encodingname))
         __attribute__nonnull__(2);
 
 PARROT_EXPORT
-PARROT_DOES_NOT_RETURN
-encoding_converter_t Parrot_find_encoding_converter(PARROT_INTERP,
-    ARGIN(ENCODING *lhs),
-    ARGIN(ENCODING *rhs))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_EXPORT
 PARROT_PURE_FUNCTION
 PARROT_WARN_UNUSED_RESULT
 PARROT_CAN_RETURN_NULL
-const ENCODING* Parrot_get_encoding(SHIM_INTERP, INTVAL number_of_encoding);
+const STR_VTABLE* Parrot_get_encoding(SHIM_INTERP,
+    INTVAL number_of_encoding);
 
 PARROT_EXPORT
 PARROT_DOES_NOT_RETURN
 PARROT_CANNOT_RETURN_NULL
-const ENCODING * Parrot_load_encoding(PARROT_INTERP,
+const STR_VTABLE * Parrot_load_encoding(PARROT_INTERP,
     ARGIN(const char *encodingname))
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
@@ -147,24 +90,21 @@
 PARROT_EXPORT
 INTVAL Parrot_make_default_encoding(SHIM_INTERP,
     SHIM(const char *encodingname),
-    ARGIN(ENCODING *encoding))
+    ARGIN(STR_VTABLE *encoding))
         __attribute__nonnull__(3);
 
 PARROT_EXPORT
 PARROT_MALLOC
 PARROT_CANNOT_RETURN_NULL
-ENCODING * Parrot_new_encoding(PARROT_INTERP)
+STR_VTABLE * Parrot_new_encoding(PARROT_INTERP)
         __attribute__nonnull__(1);
 
 PARROT_EXPORT
-INTVAL Parrot_register_encoding(PARROT_INTERP,
-    ARGIN(const char *encodingname),
-    ARGIN(ENCODING *encoding))
+INTVAL Parrot_register_encoding(PARROT_INTERP, ARGIN(STR_VTABLE *encoding))
         __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
+        __attribute__nonnull__(2);
 
-void parrot_deinit_encodings(PARROT_INTERP)
+void Parrot_deinit_encodings(PARROT_INTERP)
         __attribute__nonnull__(1);
 
 void Parrot_str_internal_register_encoding_names(PARROT_INTERP)
@@ -178,13 +118,10 @@
     , PARROT_ASSERT_ARG(encodingname))
 #define ASSERT_ARGS_Parrot_encoding_number_of_str __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_Parrot_encodings_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_Parrot_find_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(encodingname))
-#define ASSERT_ARGS_Parrot_find_encoding_converter \
-     __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs))
 #define ASSERT_ARGS_Parrot_get_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
 #define ASSERT_ARGS_Parrot_load_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
@@ -195,9 +132,8 @@
        PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_Parrot_register_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(encodingname) \
     , PARROT_ASSERT_ARG(encoding))
-#define ASSERT_ARGS_parrot_deinit_encodings __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+#define ASSERT_ARGS_Parrot_deinit_encodings __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_Parrot_str_internal_register_encoding_names \
      __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
@@ -205,27 +141,6 @@
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: src/string/encoding.c */
 
-#define ENCODING_MAX_BYTES_PER_CODEPOINT(i, src) \
-    ((src)->encoding)->max_bytes_per_codepoint
-#define ENCODING_GET_CODEPOINT(i, src, offset) \
-    ((src)->encoding)->get_codepoint((i), (src), (offset))
-#define ENCODING_GET_BYTE(i, src, offset) \
-    ((src)->encoding)->get_byte((i), (src), (offset))
-#define ENCODING_SET_BYTE(i, src, offset, value) \
-    ((src)->encoding)->set_byte((i), (src), (offset), (value))
-#define ENCODING_GET_CODEPOINTS(i, src, offset, count) \
-    ((src)->encoding)->get_codepoints((i), (src), (offset), (count))
-#define ENCODING_GET_BYTES(i, src, offset, count) \
-    ((src)->encoding)->get_bytes((i), (src), (offset), (count))
-#define ENCODING_CODEPOINTS(i, src) \
-    ((src)->encoding)->codepoints((i), (src))
-#define ENCODING_BYTES(i, src) \
-    ((src)->encoding)->bytes((i), (src))
-#define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \
-    ((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end))
-#define ENCODING_HASH(i, src, seed) \
-    ((src)->encoding)->hash((i), (src), (seed))
-
 #endif /* PARROT_ENCODING_H_GUARD */
 
 /*

Modified: trunk/include/parrot/parrot.h
==============================================================================
--- trunk/include/parrot/parrot.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/include/parrot/parrot.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -255,7 +255,6 @@
 #include "parrot/interpreter.h"
 #include "parrot/datatypes.h"
 #include "parrot/encoding.h"
-#include "parrot/charset.h"
 #include "parrot/string.h"
 #include "parrot/string_primitives.h"
 #include "parrot/hash.h"

Modified: trunk/include/parrot/pobj.h
==============================================================================
--- trunk/include/parrot/pobj.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/include/parrot/pobj.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -31,6 +31,11 @@
     size_t     _buflen;                 /* Length of buffer data. */
 } Buffer;
 
+typedef enum Forward_flag {
+    Buffer_moved_FLAG   = 1 << 0,
+    Buffer_shared_FLAG  = 1 << 1
+} Forward_flags;
+
 /* Use these macros to access the two buffer header slots. */
 
 #define Buffer_bufstart(buffer)    (buffer)->_bufstart
@@ -96,8 +101,7 @@
     UINTVAL     hashval;                /* Cached hash value. */
 
     /*    parrot_string_representation_t representation;*/
-    const struct _encoding *encoding;   /* Pointer to encoding structure. */
-    const struct _charset  *charset;    /* Pointer to charset structure. */
+    const struct _str_vtable *encoding; /* Pointer to string vtable. */
 };
 
 /* Here is the Parrot PMC object, "inheriting" from PObj. */

Modified: trunk/include/parrot/string.h
==============================================================================
--- trunk/include/parrot/string.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/include/parrot/string.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -18,38 +18,51 @@
 
 #ifdef PARROT_IN_CORE
 
+#include "parrot/compiler.h"
 #include "parrot/pobj.h"
+#include "parrot/cclass.h"
 #include "parrot/parrot.h"
 
-typedef struct parrot_string_t STRING;
-
-typedef enum Forward_flag {
-    Buffer_moved_FLAG   = 1 << 0,
-    Buffer_shared_FLAG  = 1 << 1
-} Forward_flags;
-
-/* String iterator */
-typedef struct string_iterator_t {
-    UINTVAL bytepos;
-    UINTVAL charpos;
-} String_iter;
-
-#define STRING_ITER_INIT(i, iter) \
-    (iter)->charpos = (iter)->bytepos = 0
-#define STRING_ITER_GET(i, str, iter, offset) \
-    ((str)->encoding)->iter_get((i), (str), (iter), (offset))
-#define STRING_ITER_SKIP(i, str, iter, skip) \
-    ((str)->encoding)->iter_skip((i), (str), (iter), (skip))
-#define STRING_ITER_GET_AND_ADVANCE(i, str, iter) \
-    ((str)->encoding)->iter_get_and_advance((i), (str), (iter))
-#define STRING_ITER_SET_AND_ADVANCE(i, str, iter, c) \
-    ((str)->encoding)->iter_set_and_advance((i), (str), (iter), (c))
-#define STRING_ITER_SET_POSITION(i, str, iter, pos) \
-    ((str)->encoding)->iter_set_position((i), (str), (iter), (pos))
-
 #define STREQ(x, y)  (strcmp((x), (y))==0)
 #define STRNEQ(x, y) (strcmp((x), (y))!=0)
 
+#define STRING_length(src) ((src) ? (src)->strlen : 0U)
+#define STRING_byte_length(src) ((src) ? (src)->bufused : 0U)
+#define STRING_max_bytes_per_codepoint(src) ((src)->encoding)->max_bytes_per_codepoint
+
+#define STRING_equal(interp, lhs, rhs) ((lhs)->encoding)->equal((interp), (lhs), (rhs))
+#define STRING_compare(interp, lhs, rhs) ((lhs)->encoding)->compare((interp), (lhs), (rhs))
+#define STRING_index(interp, src, search, offset) ((src)->encoding)->index((interp), (src), (search), (offset))
+#define STRING_rindex(interp, src, search, offset) ((src)->encoding)->rindex((interp), (src), (search), (offset))
+#define STRING_hash(i, src, seed) ((src)->encoding)->hash((i), (src), (seed))
+#define STRING_validate(interp, src) ((src)->encoding)->validate((interp), (src))
+
+#define STRING_scan(i, src) ((src)->encoding)->scan((i), (src))
+#define STRING_ord(i, src, offset) ((src)->encoding)->ord((i), (src), (offset))
+#define STRING_substr(i, src, offset, count) ((src)->encoding)->substr((i), (src), (offset), (count))
+
+#define STRING_is_cclass(interp, flags, src, offset) ((src)->encoding)->is_cclass((interp), (flags), (src), (offset))
+#define STRING_find_cclass(interp, flags, src, offset, count) ((src)->encoding)->find_cclass((interp), (flags), (src), (offset), (count))
+#define STRING_find_not_cclass(interp, flags, src, offset, count) ((src)->encoding)->find_not_cclass((interp), (flags), (src), (offset), (count))
+
+#define STRING_get_graphemes(interp, src, offset, count) ((src)->encoding)->get_graphemes((interp), (src), (offset), (count))
+#define STRING_compose(interp, src) ((src)->encoding)->compose((interp), (src))
+#define STRING_decompose(interp, src) ((src)->encoding)->decompose((interp), (src))
+
+#define STRING_upcase(interp, src) ((src)->encoding)->upcase((interp), (src))
+#define STRING_downcase(interp, src) ((src)->encoding)->downcase((interp), (src))
+#define STRING_titlecase(interp, src) ((src)->encoding)->titlecase((interp), (src))
+#define STRING_upcase_first(interp, src) ((src)->encoding)->upcase_first((interp), (src))
+#define STRING_downcase_first(interp, src) ((src)->encoding)->downcase_first((interp), (src))
+#define STRING_titlecase_first(interp, src) ((src)->encoding)->titlecase_first((interp), (src))
+
+#define STRING_ITER_INIT(i, iter) (iter)->charpos = (iter)->bytepos = 0
+#define STRING_iter_get(i, str, iter, offset) ((str)->encoding)->iter_get((i), (str), (iter), (offset))
+#define STRING_iter_skip(i, str, iter, skip) ((str)->encoding)->iter_skip((i), (str), (iter), (skip))
+#define STRING_iter_get_and_advance(i, str, iter) ((str)->encoding)->iter_get_and_advance((i), (str), (iter))
+#define STRING_iter_set_and_advance(i, str, iter, c) ((str)->encoding)->iter_set_and_advance((i), (str), (iter), (c))
+#define STRING_iter_set_position(i, str, iter, pos) ((str)->encoding)->iter_set_position((i), (str), (iter), (pos))
+
 /* stringinfo parameters */
 
 /* &gen_from_def(stringinfo.pasm) */
@@ -63,6 +76,98 @@
 
 /* &end_gen */
 
+typedef struct parrot_string_t STRING;
+
+/* String iterator */
+typedef struct string_iterator_t {
+    UINTVAL bytepos;
+    UINTVAL charpos;
+} String_iter;
+
+/* constructors */
+typedef STRING * (*str_vtable_to_encoding_t)(PARROT_INTERP, ARGIN(const STRING *src));
+typedef STRING * (*str_vtable_chr_t)(PARROT_INTERP, UINTVAL codepoint);
+
+typedef INTVAL   (*str_vtable_equal_t)(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs));
+typedef INTVAL   (*str_vtable_compare_t)(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs));
+typedef INTVAL   (*str_vtable_index_t)(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search_string), UINTVAL offset);
+typedef INTVAL   (*str_vtable_rindex_t)(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search_string), UINTVAL offset);
+typedef size_t   (*str_vtable_hash_t)(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval);
+typedef UINTVAL  (*str_vtable_validate_t)(PARROT_INTERP, ARGIN(const STRING *src));
+
+typedef UINTVAL  (*str_vtable_scan_t)(PARROT_INTERP, ARGIN(const STRING *src));
+typedef UINTVAL  (*str_vtable_ord_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset);
+typedef STRING * (*str_vtable_substr_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
+
+/* character classes */
+typedef INTVAL   (*str_vtable_is_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset);
+typedef INTVAL   (*str_vtable_find_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
+typedef INTVAL   (*str_vtable_find_not_cclass_t)(PARROT_INTERP, INTVAL, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
+
+/* graphemes */
+typedef STRING * (*str_vtable_get_graphemes_t)(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count);
+typedef STRING * (*str_vtable_compose_t)(PARROT_INTERP, ARGIN(const STRING *src));
+typedef STRING * (*str_vtable_decompose_t)(PARROT_INTERP, ARGIN(const STRING *src));
+
+/* case conversion, TODO: move to single function with a flag */
+typedef STRING * (*str_vtable_upcase_t)(PARROT_INTERP, ARGIN(const STRING *src));
+typedef STRING * (*str_vtable_downcase_t)(PARROT_INTERP, ARGIN(const STRING *src));
+typedef STRING * (*str_vtable_titlecase_t)(PARROT_INTERP, ARGIN(const STRING *src));
+typedef STRING * (*str_vtable_upcase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
+typedef STRING * (*str_vtable_downcase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
+typedef STRING * (*str_vtable_titlecase_first_t)(PARROT_INTERP, ARGIN(const STRING *src));
+
+/* iterator functions */
+typedef UINTVAL  (*str_vtable_iter_get_t)(PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset);
+typedef void     (*str_vtable_iter_skip_t)(PARROT_INTERP, const STRING *str, String_iter *i, INTVAL skip);
+typedef UINTVAL  (*str_vtable_iter_get_and_advance_t)(PARROT_INTERP, const STRING *str, String_iter *i);
+typedef void     (*str_vtable_iter_set_and_advance_t)(PARROT_INTERP, STRING *str, String_iter *i, UINTVAL c);
+typedef void     (*str_vtable_iter_set_position_t)(PARROT_INTERP, const STRING *str, String_iter *i, UINTVAL pos);
+
+struct _str_vtable {
+    int         num;
+    const char *name;
+    STRING     *name_str;
+    UINTVAL     max_bytes_per_codepoint;
+
+    str_vtable_to_encoding_t            to_encoding;
+    str_vtable_chr_t                    chr;
+
+    str_vtable_equal_t                  equal;
+    str_vtable_compare_t                compare;
+    str_vtable_index_t                  index;
+    str_vtable_rindex_t                 rindex;
+    str_vtable_hash_t                   hash;
+    str_vtable_validate_t               validate;
+
+    str_vtable_scan_t                   scan;
+    str_vtable_ord_t                    ord;
+    str_vtable_substr_t                 substr;
+
+    str_vtable_is_cclass_t              is_cclass;
+    str_vtable_find_cclass_t            find_cclass;
+    str_vtable_find_not_cclass_t        find_not_cclass;
+
+    str_vtable_get_graphemes_t          get_graphemes;
+    str_vtable_compose_t                compose;
+    str_vtable_decompose_t              decompose;
+
+    str_vtable_upcase_t                 upcase;
+    str_vtable_downcase_t               downcase;
+    str_vtable_titlecase_t              titlecase;
+    str_vtable_upcase_first_t           upcase_first;
+    str_vtable_downcase_first_t         downcase_first;
+    str_vtable_titlecase_first_t        titlecase_first;
+
+    str_vtable_iter_get_t               iter_get;
+    str_vtable_iter_skip_t              iter_skip;
+    str_vtable_iter_get_and_advance_t   iter_get_and_advance;
+    str_vtable_iter_set_and_advance_t   iter_set_and_advance;
+    str_vtable_iter_set_position_t      iter_set_position;
+};
+
+typedef struct _str_vtable STR_VTABLE;
+
 #endif /* PARROT_IN_CORE */
 #endif /* PARROT_STRING_H_GUARD */
 

Modified: trunk/include/parrot/string_funcs.h
==============================================================================
--- trunk/include/parrot/string_funcs.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/include/parrot/string_funcs.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -297,12 +297,10 @@
 STRING * Parrot_str_new_init(PARROT_INTERP,
     ARGIN_NULLOK(const char *buffer),
     UINTVAL len,
-    ARGIN(const ENCODING *encoding),
-    ARGIN(const CHARSET *charset),
+    ARGIN(const STR_VTABLE *encoding),
     UINTVAL flags)
         __attribute__nonnull__(1)
-        __attribute__nonnull__(4)
-        __attribute__nonnull__(5);
+        __attribute__nonnull__(4);
 
 PARROT_EXPORT
 PARROT_CANNOT_RETURN_NULL
@@ -326,15 +324,12 @@
 PARROT_EXPORT
 PARROT_IGNORABLE_RESULT
 PARROT_CAN_RETURN_NULL
-const CHARSET * Parrot_str_rep_compatible(PARROT_INTERP,
+const STR_VTABLE * Parrot_str_rep_compatible(PARROT_INTERP,
     ARGIN(const STRING *a),
-    ARGIN(const STRING *b),
-    ARGOUT(const ENCODING **e))
+    ARGIN(const STRING *b))
         __attribute__nonnull__(1)
         __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        __attribute__nonnull__(4)
-        FUNC_MODIFIES(*e);
+        __attribute__nonnull__(3);
 
 PARROT_EXPORT
 PARROT_CANNOT_RETURN_NULL
@@ -415,13 +410,11 @@
 PARROT_CANNOT_RETURN_NULL
 STRING * Parrot_str_unescape_string(PARROT_INTERP,
     ARGIN(const STRING *src),
-    ARGIN(const CHARSET *charset),
-    ARGIN(const ENCODING *encoding),
+    ARGIN(const STR_VTABLE *encoding),
     UINTVAL flags)
         __attribute__nonnull__(1)
         __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        __attribute__nonnull__(4);
+        __attribute__nonnull__(3);
 
 PARROT_EXPORT
 void Parrot_str_unpin(PARROT_INTERP, ARGMOD(STRING *s))
@@ -454,17 +447,7 @@
 STRING * string_make(PARROT_INTERP,
     ARGIN_NULLOK(const char *buffer),
     UINTVAL len,
-    ARGIN_NULLOK(const char *charset_name),
-    UINTVAL flags)
-        __attribute__nonnull__(1);
-
-PARROT_EXPORT
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-STRING * string_make_from_charset(PARROT_INTERP,
-    ARGIN_NULLOK(const char *buffer),
-    UINTVAL len,
-    INTVAL charset_nr,
+    ARGIN_NULLOK(const char *encoding_name),
     UINTVAL flags)
         __attribute__nonnull__(1);
 
@@ -608,8 +591,7 @@
     , PARROT_ASSERT_ARG(buffer))
 #define ASSERT_ARGS_Parrot_str_new_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(encoding) \
-    , PARROT_ASSERT_ARG(charset))
+    , PARROT_ASSERT_ARG(encoding))
 #define ASSERT_ARGS_Parrot_str_new_noinit __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_Parrot_str_not_equal __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
@@ -619,8 +601,7 @@
 #define ASSERT_ARGS_Parrot_str_rep_compatible __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(a) \
-    , PARROT_ASSERT_ARG(b) \
-    , PARROT_ASSERT_ARG(e))
+    , PARROT_ASSERT_ARG(b))
 #define ASSERT_ARGS_Parrot_str_repeat __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(s))
@@ -649,7 +630,6 @@
 #define ASSERT_ARGS_Parrot_str_unescape_string __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(src) \
-    , PARROT_ASSERT_ARG(charset) \
     , PARROT_ASSERT_ARG(encoding))
 #define ASSERT_ARGS_Parrot_str_unpin __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
@@ -663,8 +643,6 @@
     , PARROT_ASSERT_ARG(s))
 #define ASSERT_ARGS_string_make __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_string_make_from_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
 #define ASSERT_ARGS_string_max_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(s))
 #define ASSERT_ARGS_string_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\

Modified: trunk/lib/Parrot/Configure/Step/List.pm
==============================================================================
--- trunk/lib/Parrot/Configure/Step/List.pm	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/lib/Parrot/Configure/Step/List.pm	Tue Sep  7 22:58:38 2010	(r48833)
@@ -28,8 +28,6 @@
     init::optimize
     inter::shlibs
     inter::libparrot
-    inter::charset
-    inter::encoding
     inter::types
     auto::ops
     auto::pmc

Modified: trunk/src/dynext.c
==============================================================================
--- trunk/src/dynext.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/dynext.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -546,7 +546,7 @@
     char   * const  raw_str = Parrot_str_to_cstring(s, orig);
     STRING * const  ret     =
         Parrot_str_new_init(d, raw_str, strlen(raw_str),
-            PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+            Parrot_default_encoding_ptr,
             PObj_constant_FLAG);
     Parrot_str_free_cstring(raw_str);
     return ret;

Modified: trunk/src/dynpmc/Defines.in
==============================================================================
--- trunk/src/dynpmc/Defines.in	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/dynpmc/Defines.in	Tue Sep  7 22:58:38 2010	(r48833)
@@ -24,7 +24,6 @@
     include/parrot/caches.h \
     include/parrot/call.h \
     include/parrot/cclass.h \
-    include/parrot/charset.h \
     include/parrot/compiler.h \
     include/parrot/config.h \
     include/parrot/context.h \

Modified: trunk/src/global_setup.c
==============================================================================
--- trunk/src/global_setup.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/global_setup.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -98,7 +98,7 @@
         STRING * const config_string =
             Parrot_str_new_init(interp,
                                (const char *)parrot_config_stored, parrot_config_size_stored,
-                               PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+                               Parrot_default_encoding_ptr,
                                PObj_external_FLAG|PObj_constant_FLAG);
 
         config_hash = Parrot_thaw(interp, config_string);

Modified: trunk/src/hash.c
==============================================================================
--- trunk/src/hash.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/hash.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -165,7 +165,7 @@
     if (s1->hashval != s2->hashval)
         return 1;
 
-    return CHARSET_COMPARE(interp, s1, s2);
+    return STRING_compare(interp, s1, s2);
 }
 
 
@@ -188,11 +188,8 @@
     STRING const *s1 = (STRING const *)search_key;
     STRING const *s2 = (STRING const *)bucket_key;
 
-    if (s1 && s2 && (
-            s1->charset != s2->charset ||
-            s1->encoding != s2->encoding)) {
+    if (s1 && s2 && s1->encoding != s2->encoding)
         return 1;
-    }
 
     return hash_compare_string(interp, search_key, bucket_key);
 }
@@ -1174,7 +1171,7 @@
 
             if (s == s2
             || (hashval == s2->hashval
-            &&  CHARSET_COMPARE(interp, s, s2) == 0))
+            &&  STRING_compare(interp, s, s2) == 0))
                 return bucket;
 
             bucket = bucket->next;
@@ -1284,7 +1281,7 @@
 
             if (s == s2
             || (hashval == s2->hashval
-            &&  CHARSET_COMPARE(interp, s, s2) == 0))
+            &&  STRING_compare(interp, s, s2) == 0))
                 break;
 
             bucket = bucket->next;

Modified: trunk/src/io/buffer.c
==============================================================================
--- trunk/src/io/buffer.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/io/buffer.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -232,7 +232,7 @@
     char    *buf  = (char *) Parrot_io_get_buffer_start(interp, filehandle);
     size_t   size = Parrot_io_get_buffer_size(interp, filehandle);
     STRING  *s    = Parrot_str_new_init(interp, buf, size,
-                        PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+                        Parrot_default_encoding_ptr,
                         PObj_external_FLAG);
     size_t   got  = PIO_READ(interp, filehandle, &s);
 
@@ -338,7 +338,7 @@
 
         if (len >= Parrot_io_get_buffer_size(interp, filehandle)) {
             STRING *sf = Parrot_str_new_init(interp, (char *)out_buf, len,
-                PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+                Parrot_default_encoding_ptr,
                 PObj_external_FLAG);
             got                 = PIO_READ(interp, filehandle, &sf);
             s->strlen           = s->bufused = current + got;

Modified: trunk/src/io/utf8.c
==============================================================================
--- trunk/src/io/utf8.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/io/utf8.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -53,7 +53,6 @@
 
     size_t len  = Parrot_io_read_buffer(interp, filehandle, buf);
     s           = *buf;
-    s->charset  = Parrot_unicode_charset_ptr;
     s->encoding = Parrot_utf8_encoding_ptr;
 
     /* count chars, verify utf8 */
@@ -74,8 +73,8 @@
 
                 /* need len - 1 more chars */
                 --len2;
-                s2 = Parrot_str_new_init(interp, NULL, len2, Parrot_utf8_encoding_ptr,
-                                         Parrot_unicode_charset_ptr, 0);
+                s2 = Parrot_str_new_init(interp, NULL, len2,
+                        Parrot_utf8_encoding_ptr, 0);
                 s2->bufused  = len2;
 
                 read = Parrot_io_read_buffer(interp, filehandle, &s2);

Modified: trunk/src/library.c
==============================================================================
--- trunk/src/library.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/library.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -345,7 +345,7 @@
     const char * const file_name = (const char *)file->strstart;
     if (file->strlen <= 1)
         return 0;
-    PARROT_ASSERT(file->encoding == Parrot_fixed_8_encoding_ptr ||
+    PARROT_ASSERT(STRING_max_bytes_per_codepoint(file) == 1 ||
             file->encoding == Parrot_utf8_encoding_ptr);
 
     /* XXX  ../foo, ./bar */
@@ -887,23 +887,21 @@
     /* This is a quick fix for TT #65
      * TODO: redo it with the string reimplementation
      */
-    const char *   charset = Parrot_charset_c_name(interp,
-            Parrot_charset_number_of_str(interp, in));
-    STRING * const slash1  = string_make(interp, "/", 1, charset,
-            PObj_external_FLAG|PObj_constant_FLAG);
-    STRING * const slash2  = string_make(interp, "\\", 1, charset,
-            PObj_external_FLAG|PObj_constant_FLAG);
-    STRING * const dot     = string_make(interp, ".", 1, charset,
-            PObj_external_FLAG|PObj_constant_FLAG);
+    STRING * const slash1 = Parrot_str_new_init(interp, "/", 1,
+            in->encoding, PObj_external_FLAG|PObj_constant_FLAG);
+    STRING * const slash2 = Parrot_str_new_init(interp, "\\", 1,
+            in->encoding, PObj_external_FLAG|PObj_constant_FLAG);
+    STRING * const dot    = Parrot_str_new_init(interp, ".", 1,
+            in->encoding, PObj_external_FLAG|PObj_constant_FLAG);
 
     const INTVAL len = Parrot_str_byte_length(interp, in);
     STRING *stem;
     INTVAL pos_sl, pos_dot;
 
-    pos_sl = CHARSET_RINDEX(interp, in, slash1, len);
+    pos_sl = STRING_rindex(interp, in, slash1, len);
     if (pos_sl == -1)
-        pos_sl = CHARSET_RINDEX(interp, in, slash2, len);
-    pos_dot = CHARSET_RINDEX(interp, in, dot, len);
+        pos_sl = STRING_rindex(interp, in, slash2, len);
+    pos_dot = STRING_rindex(interp, in, dot, len);
 
     /* ignore dot in directory name */
     if (pos_dot != -1 && pos_dot < pos_sl)

Modified: trunk/src/ops/core_ops.c
==============================================================================
--- trunk/src/ops/core_ops.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/ops/core_ops.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -23103,35 +23103,35 @@
 opcode_t *
 Parrot_charset_i_s(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    IREG(1) = Parrot_charset_number_of_str(interp, SREG(2));
+    IREG(1) = Parrot_encoding_number_of_str(interp, SREG(2));
 
 return (opcode_t *)cur_opcode + 3;}
 
 opcode_t *
 Parrot_charset_i_sc(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    IREG(1) = Parrot_charset_number_of_str(interp, CONST(2).u.string);
+    IREG(1) = Parrot_encoding_number_of_str(interp, CONST(2).u.string);
 
 return (opcode_t *)cur_opcode + 3;}
 
 opcode_t *
 Parrot_charsetname_s_i(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_charset_name(interp, IREG(2));
+    SREG(1) = Parrot_encoding_name(interp, IREG(2));
 
 return (opcode_t *)cur_opcode + 3;}
 
 opcode_t *
 Parrot_charsetname_s_ic(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    SREG(1) = Parrot_charset_name(interp, cur_opcode[2]);
+    SREG(1) = Parrot_encoding_name(interp, cur_opcode[2]);
 
 return (opcode_t *)cur_opcode + 3;}
 
 opcode_t *
 Parrot_find_charset_i_s(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    const INTVAL n = Parrot_charset_number(interp, SREG(2));
+    const INTVAL n = Parrot_encoding_number(interp, SREG(2));
     if (n < 0) {
         opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
                 EXCEPTION_INVALID_CHARTYPE,
@@ -23144,7 +23144,7 @@
 opcode_t *
 Parrot_find_charset_i_sc(opcode_t *cur_opcode, PARROT_INTERP)  {
     const Parrot_Context * const CUR_CTX = Parrot_pcc_get_context_struct(interp, interp->ctx);
-    const INTVAL n = Parrot_charset_number(interp, CONST(2).u.string);
+    const INTVAL n = Parrot_encoding_number(interp, CONST(2).u.string);
     if (n < 0) {
         opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
                 EXCEPTION_INVALID_CHARTYPE,

Modified: trunk/src/ops/string.ops
==============================================================================
--- trunk/src/ops/string.ops	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/ops/string.ops	Tue Sep  7 22:58:38 2010	(r48833)
@@ -474,15 +474,15 @@
 =cut
 
 op charset(out INT, in STR) :base_core {
-    $1 = Parrot_charset_number_of_str(interp, $2);
+    $1 = Parrot_encoding_number_of_str(interp, $2);
 }
 
 op charsetname(out STR, in INT) :base_core {
-    $1 = Parrot_charset_name(interp, $2);
+    $1 = Parrot_encoding_name(interp, $2);
 }
 
 op find_charset(out INT, in STR) :base_core {
-    const INTVAL n = Parrot_charset_number(interp, $2);
+    const INTVAL n = Parrot_encoding_number(interp, $2);
     if (n < 0) {
         opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
                 EXCEPTION_INVALID_CHARTYPE,

Modified: trunk/src/packdump.c
==============================================================================
--- trunk/src/packdump.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/packdump.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -169,8 +169,8 @@
       case PFC_STRING:
         Parrot_io_printf(interp, "    [ 'PFC_STRING', {\n");
         pobj_flag_dump(interp, (long)PObj_get_FLAGS(self->u.string));
-        Parrot_io_printf(interp, "        CHARSET  => %ld,\n",
-                   self->u.string->charset);
+        Parrot_io_printf(interp, "        ENCODING => %ld,\n",
+                   self->u.string->encoding);
         i = self->u.string->bufused;
         Parrot_io_printf(interp, "        SIZE     => %ld,\n",
                    (long)i);

Modified: trunk/src/packfile.c
==============================================================================
--- trunk/src/packfile.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/packfile.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -3202,7 +3202,7 @@
         /* Set up new entry and insert it. */
         PackFile_DebugFilenameMapping *mapping = debug->mappings + insert_pos;
         STRING *namestr = Parrot_str_new_init(interp, filename, strlen(filename),
-                PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET, 0);
+                Parrot_default_encoding_ptr, 0);
         size_t count = ct->const_count;
         size_t i;
 
@@ -3228,7 +3228,7 @@
             fnconst           = &ct->constants[ct->const_count - 1];
             fnconst->type     = PFC_STRING;
             fnconst->u.string = Parrot_str_new_init(interp, filename, strlen(filename),
-                    PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+                    Parrot_default_encoding_ptr,
                     PObj_constant_FLAG);
         }
 

Modified: trunk/src/packfile/pf_items.c
==============================================================================
--- trunk/src/packfile/pf_items.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/packfile/pf_items.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -1217,7 +1217,7 @@
     const int wordsize = pf ? pf->header->wordsize : sizeof (opcode_t);
     size_t  size       = PF_fetch_opcode(pf, cursor);
     STRING *s          = Parrot_str_new_init(interp, (const char *)*cursor, size,
-                            Parrot_fixed_8_encoding_ptr, Parrot_binary_charset_ptr,
+                            Parrot_binary_encoding_ptr,
                             PObj_external_FLAG);
     *((const unsigned char **)(cursor)) += ROUND_UP_B(size, wordsize);
     return s;
@@ -1244,8 +1244,7 @@
     ASSERT_ARGS(PF_store_buf)
     const int  wordsize = sizeof (opcode_t);
 
-    PARROT_ASSERT(s->encoding == Parrot_fixed_8_encoding_ptr);
-    PARROT_ASSERT(s->charset  == Parrot_binary_charset_ptr);
+    PARROT_ASSERT(s->encoding == Parrot_binary_encoding_ptr);
 
     *cursor++ = s->bufused;
 
@@ -1315,9 +1314,7 @@
     STRING   *s;
     UINTVAL   flags;
     UINTVAL   encoding_nr;
-    UINTVAL   charset_nr;
-    const ENCODING *encoding;
-    const CHARSET  *charset;
+    const STR_VTABLE *encoding;
     size_t    size;
     const int wordsize          = pf ? pf->header->wordsize : sizeof (opcode_t);
     opcode_t  flag_charset_word = PF_fetch_opcode(pf, cursor);
@@ -1326,30 +1323,24 @@
         return STRINGNULL;
 
     /* decode flags, charset and encoding */
-    flags         = (flag_charset_word & 0x1 ? PObj_constant_FLAG : 0) |
-                    (flag_charset_word & 0x2 ? PObj_private7_FLAG : 0) ;
-    encoding_nr   = (flag_charset_word >> 16);
-    charset_nr    = (flag_charset_word >> 8) & 0xFF;
+    flags       = (flag_charset_word & 0x1 ? PObj_constant_FLAG : 0) |
+                  (flag_charset_word & 0x2 ? PObj_private7_FLAG : 0) ;
+    encoding_nr = (flag_charset_word >> 8) & 0xFF;
 
 
     size = (size_t)PF_fetch_opcode(pf, cursor);
 
     TRACE_PRINTF(("PF_fetch_string(): flags=0x%04x, ", flags));
     TRACE_PRINTF(("encoding_nr=%ld, ", encoding_nr));
-    TRACE_PRINTF(("charset_nr=%ld, ", charset_nr));
     TRACE_PRINTF(("size=%ld.\n", size));
 
     encoding = Parrot_get_encoding(interp, encoding_nr);
-    charset  = Parrot_get_charset(interp, charset_nr);
     if (!encoding)
             Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
                     "Invalid encoding number '%d' specified", encoding_nr);
-    if (!charset)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-                    "Invalid charset number '%d' specified", charset_nr);
 
     s = Parrot_str_new_init(interp, (const char *)*cursor, size,
-            encoding, charset, flags);
+            encoding, flags);
 
     /* print only printable characters */
     TRACE_PRINTF_VAL(("PF_fetch_string(): string is '%s' at 0x%x\n",
@@ -1411,8 +1402,7 @@
      */
 
     /* encode charset_nr, encoding_nr and flags into the same word */
-    *cursor++ = (Parrot_encoding_number_of_str(NULL, s) << 16)       |
-                (Parrot_charset_number_of_str(NULL, s) << 8)         |
+    *cursor++ = (Parrot_encoding_number_of_str(NULL, s) << 8)         |
                 (PObj_get_FLAGS(s) & PObj_constant_FLAG ? 0x1 : 0x0) |
                 (PObj_get_FLAGS(s) & PObj_private7_FLAG ? 0x2 : 0x0) ;
     *cursor++ = s->bufused;

Modified: trunk/src/packout.c
==============================================================================
--- trunk/src/packout.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/packout.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -290,10 +290,7 @@
             if (constant->type == PFC_STRING) {
                 STRING * const sc = constant->u.string;
                 if (Parrot_str_equal(interp, key_str, sc)
-                &&  Parrot_charset_number_of_str(interp, key_str)
-                ==  Parrot_charset_number_of_str(interp, sc)
-                &&  Parrot_encoding_number_of_str(interp, key_str)
-                ==  Parrot_encoding_number_of_str(interp, sc)) {
+                &&  key_str->encoding == sc->encoding) {
                     return i;
                 }
             }

Modified: trunk/src/pmc/bytebuffer.pmc
==============================================================================
--- trunk/src/pmc/bytebuffer.pmc	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/pmc/bytebuffer.pmc	Tue Sep  7 22:58:38 2010	(r48833)
@@ -23,8 +23,7 @@
 static STRING * build_string(PARROT_INTERP,
     ARGIN(const unsigned char *content),
     INTVAL size,
-    ARGIN_NULLOK(const CHARSET *charset),
-    ARGIN_NULLOK(const ENCODING *encoding))
+    ARGIN_NULLOK(const STR_VTABLE *encoding))
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
 
@@ -315,17 +314,15 @@
 
 */
 
-    METHOD get_string(STRING *charsetname, STRING *encodingname) {
+    METHOD get_string(STRING *encodingname) {
         STRING *result;
         unsigned char *content;
         INTVAL size;
-        const CHARSET *charset = Parrot_get_charset(INTERP,
-            Parrot_charset_number(INTERP, charsetname));
-        const ENCODING *encoding = Parrot_get_encoding(INTERP,
+        const STR_VTABLE *encoding = Parrot_get_encoding(INTERP,
             Parrot_encoding_number(INTERP, encodingname));
         GET_ATTR_content(INTERP, SELF, content);
         GET_ATTR_size(INTERP, SELF, size);
-        result = build_string(INTERP, content, size, charset, encoding);
+        result = build_string(INTERP, content, size, encoding);
         RETURN(STRING *result);
     }
 
@@ -344,11 +341,11 @@
         STRING *result;
         unsigned char *content;
         INTVAL size;
-        const CHARSET* charset = STRING_IS_NULL(as) ? PARROT_DEFAULT_CHARSET : as->charset;
-        const ENCODING *encoding = STRING_IS_NULL(as) ? PARROT_DEFAULT_ENCODING : as->encoding;
+        const STR_VTABLE *encoding = STRING_IS_NULL(as) ?
+                Parrot_default_encoding_ptr : as->encoding;
         GET_ATTR_content(INTERP, SELF, content);
         GET_ATTR_size(INTERP, SELF, size);
-        result = build_string(INTERP, content, size, charset, encoding);
+        result = build_string(INTERP, content, size, encoding);
         RETURN(STRING *result);
     }
 
@@ -368,7 +365,7 @@
 decrease the number of reallocations.
 
 =item C<static STRING * build_string(PARROT_INTERP, const unsigned char
-*content, INTVAL size, const CHARSET *charset, const ENCODING *encoding)>
+*content, INTVAL size, const STR_VTABLE *encoding)>
 
 Build a string fro the buffer content with the charset and encoding specified.
 
@@ -393,19 +390,15 @@
 static STRING *
 build_string(PARROT_INTERP, ARGIN(const unsigned char *content),
         INTVAL size,
-        ARGIN_NULLOK(const CHARSET *charset),
-        ARGIN_NULLOK(const ENCODING *encoding))
+        ARGIN_NULLOK(const STR_VTABLE *encoding))
 {
     ASSERT_ARGS(build_string)
     STRING *result;
-    if (charset == NULL)
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
-                "Invalid charset");
     if (encoding == NULL)
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
                 "Invalid encoding");
-    result = Parrot_str_new_init(interp, (const char *)content, size, encoding, charset, 0);
-    if (!CHARSET_VALIDATE(interp, result))
+    result = Parrot_str_new_init(interp, (const char *)content, size, encoding, 0);
+    if (!STRING_validate(interp, result))
         Parrot_ex_throw_from_c_args(interp, NULL,
                 EXCEPTION_INVALID_STRING_REPRESENTATION,
                 "Invalid buffer content");

Modified: trunk/src/pmc/packfile.pmc
==============================================================================
--- trunk/src/pmc/packfile.pmc	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/pmc/packfile.pmc	Tue Sep  7 22:58:38 2010	(r48833)
@@ -160,7 +160,7 @@
         PackFile_pack(INTERP, pf, ptr);
 
         str = Parrot_str_new_init(INTERP, (const char*)ptr, length,
-                PARROT_FIXED_8_ENCODING, PARROT_BINARY_CHARSET, 0);
+                Parrot_binary_encoding_ptr, 0);
         Parrot_gc_free_memory_chunk(INTERP, ptr);
 
         PackFile_destroy(INTERP, pf);

Modified: trunk/src/pmc/packfilefixupentry.pmc
==============================================================================
--- trunk/src/pmc/packfilefixupentry.pmc	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/pmc/packfilefixupentry.pmc	Tue Sep  7 22:58:38 2010	(r48833)
@@ -98,8 +98,7 @@
 
         attrs->type     = entry->type;
         attrs->name     = Parrot_str_new_init(INTERP, entry->name,
-                strlen(entry->name), PARROT_FIXED_8_ENCODING,
-                PARROT_BINARY_CHARSET, 0);
+                strlen(entry->name), Parrot_binary_encoding_ptr, 0);
         attrs->offset   = entry->offset;
     }
 

Modified: trunk/src/pmc/string.pmc
==============================================================================
--- trunk/src/pmc/string.pmc	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/pmc/string.pmc	Tue Sep  7 22:58:38 2010	(r48833)
@@ -193,7 +193,7 @@
         if (PObj_constant_TEST(SELF) && !PObj_constant_TEST(value)) {
             char * const copy = Parrot_str_to_cstring(INTERP, value);
             value             = Parrot_str_new_init(INTERP, copy, strlen(copy),
-                PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+                Parrot_default_encoding_ptr,
                 PObj_constant_FLAG);
             Parrot_str_free_cstring(copy);
         }
@@ -692,7 +692,7 @@
         if (!len)
             RETURN(STRING src);
 
-        if (src->charset != Parrot_ascii_charset_ptr)
+        if (src->encoding != Parrot_ascii_encoding_ptr)
             Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_ENCODING,
                 "Can't translate non-ascii");
 
@@ -730,7 +730,7 @@
         if (!len)
             RETURN(INTVAL 0);
 
-        if (src->encoding != Parrot_fixed_8_encoding_ptr)
+        if (STRING_max_bytes_per_codepoint(src) != 1)
             Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_ENCODING,
                 "Can't is_integer non fixed_8");
 
@@ -794,7 +794,7 @@
         if (!Parrot_str_length(INTERP, substring))
             RETURN(INTVAL res);
 
-        res = CHARSET_RINDEX(INTERP, src, substring, (UINTVAL)start);
+        res = STRING_rindex(INTERP, src, substring, (UINTVAL)start);
         RETURN(INTVAL res);
     }
 
@@ -808,15 +808,13 @@
 
 */
 
-    METHOD unescape(STRING *charsetname, STRING *encodingname)
+    METHOD unescape(STRING *encodingname)
     {
-        const CHARSET *charset = Parrot_get_charset(INTERP,
-            Parrot_charset_number(INTERP, charsetname));
-        const ENCODING *encoding = Parrot_get_encoding(INTERP,
+        const STR_VTABLE *encoding = Parrot_get_encoding(INTERP,
             Parrot_encoding_number(INTERP, encodingname));
         STRING * const src = VTABLE_get_string(INTERP, SELF);
         STRING * const dest = Parrot_str_unescape_string(INTERP, src,
-                charset, encoding, 0);
+                encoding, 0);
         RETURN(STRING *dest);
     }
 

Modified: trunk/src/pmc/stringbuilder.pmc
==============================================================================
--- trunk/src/pmc/stringbuilder.pmc	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/pmc/stringbuilder.pmc	Tue Sep  7 22:58:38 2010	(r48833)
@@ -64,7 +64,6 @@
         STRING * const buffer = mem_gc_allocate_zeroed_typed(INTERP, STRING);
 
         buffer->encoding      = Parrot_default_encoding_ptr;
-        buffer->charset       = Parrot_default_charset_ptr;
         buffer->_buflen       = initial_size;
         buffer->_bufstart     = buffer->strstart
                               = mem_gc_allocate_n_typed(INTERP,
@@ -158,10 +157,9 @@
 */
 
     VTABLE void push_string(STRING *s) {
-        STRING          *buffer;
-        size_t           total_size;
-        const CHARSET   *cs;
-        const ENCODING  *enc = NULL;
+        STRING           *buffer;
+        size_t            total_size;
+        const STR_VTABLE *enc;
 
         /* Early return on NULL strings */
         if (STRING_IS_NULL(s))
@@ -173,14 +171,12 @@
             /* Always copy the encoding of the first string. The IO functions
                assume that the concatenation of utf8 strings doesn't change
                the encoding. */
-            buffer->charset  = s->charset;
             buffer->encoding = s->encoding;
         }
         else {
-            cs = Parrot_str_rep_compatible(interp, buffer, s, &enc);
+            enc = Parrot_str_rep_compatible(interp, buffer, s);
 
-            if (cs) {
-                buffer->charset = cs;
+            if (enc) {
                 buffer->encoding = enc;
             }
             else {
@@ -206,7 +202,6 @@
                     }
 
                     buffer->bufused  = new_buffer->bufused;
-                    buffer->charset  = new_buffer->charset;
                     buffer->encoding = new_buffer->encoding;
 
                     mem_sys_memcopy(buffer->strstart, new_buffer->strstart,
@@ -222,7 +217,6 @@
             /* Calculate (possibly new) total size */
             total_size = calculate_capacity(INTERP, total_size);
 
-            /* Parrot_unicode_charset_ptr can produce NULL buffer */
             buffer->_bufstart = buffer->strstart = mem_gc_realloc_n_typed(INTERP,
                 buffer->_bufstart, total_size, char);
             buffer->_buflen   = total_size;
@@ -298,7 +292,6 @@
         buffer->bufused  = s->bufused;
         buffer->strlen   = Parrot_str_length(INTERP, s);
         buffer->encoding = s->encoding;
-        buffer->charset  = s->charset;
     }
 
     VTABLE void set_pmc(PMC *s) {

Modified: trunk/src/pmc/stringiterator.pmc
==============================================================================
--- trunk/src/pmc/stringiterator.pmc	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/pmc/stringiterator.pmc	Tue Sep  7 22:58:38 2010	(r48833)
@@ -156,11 +156,11 @@
         GET_ATTR_str_val(INTERP, SELF, str_val);
         if (value == ITERATE_FROM_START) {
             SET_ATTR_reverse(INTERP, SELF, 0);
-            STRING_ITER_SET_POSITION(INTERP, str_val, iter, 0);
+            STRING_iter_set_position(INTERP, str_val, iter, 0);
         }
         else if (value == ITERATE_FROM_END) {
             SET_ATTR_reverse(INTERP, SELF, 1);
-            STRING_ITER_SET_POSITION(INTERP, str_val, iter, str_val->strlen);
+            STRING_iter_set_position(INTERP, str_val, iter, str_val->strlen);
         }
         else {
             Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION,
@@ -209,7 +209,7 @@
                 "StopIteration");
 
         ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String));
-        STRING_ITER_SKIP(INTERP, str_val, iter, 1);
+        STRING_iter_skip(INTERP, str_val, iter, 1);
         substr = Parrot_str_iter_substr(INTERP, str_val, &old_iter, iter);
         VTABLE_set_string_native(INTERP, ret, substr);
         return ret;
@@ -234,7 +234,7 @@
             Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
                 "StopIteration");
 
-        STRING_ITER_SKIP(INTERP, str_val, iter, 1);
+        STRING_iter_skip(INTERP, str_val, iter, 1);
         return Parrot_str_iter_substr(INTERP, str_val, &old_iter, iter);
     }
 
@@ -256,7 +256,7 @@
             Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
                 "StopIteration");
 
-        return STRING_ITER_GET_AND_ADVANCE(INTERP, str_val, iter);
+        return STRING_iter_get_and_advance(INTERP, str_val, iter);
     }
 
 /*
@@ -281,7 +281,7 @@
                 "StopIteration");
 
         ret = Parrot_pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp, enum_class_String));
-        STRING_ITER_SKIP(INTERP, str_val, iter, -1);
+        STRING_iter_skip(INTERP, str_val, iter, -1);
         substr = Parrot_str_iter_substr(INTERP, str_val, iter, &old_iter);
         VTABLE_set_string_native(INTERP, ret, substr);
         return ret;
@@ -307,7 +307,7 @@
             Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
                 "StopIteration");
 
-        STRING_ITER_SKIP(INTERP, str_val, iter, -1);
+        STRING_iter_skip(INTERP, str_val, iter, -1);
         return Parrot_str_iter_substr(INTERP, str_val, iter, &old_iter);
     }
 
@@ -330,8 +330,8 @@
             Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
                 "StopIteration");
 
-        STRING_ITER_SKIP(INTERP, str_val, iter, -1);
-        return STRING_ITER_GET(INTERP, str_val, iter, 0);
+        STRING_iter_skip(INTERP, str_val, iter, -1);
+        return STRING_iter_get(INTERP, str_val, iter, 0);
     }
 
 /*
@@ -354,7 +354,7 @@
             Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_OUT_OF_BOUNDS,
                 "StopIteration");
 
-        return STRING_ITER_GET(INTERP, str_val, iter, idx);
+        return STRING_iter_get(INTERP, str_val, iter, idx);
     }
 
 /*
@@ -379,9 +379,9 @@
                 "StopIteration");
 
         if (idx != 0)
-            STRING_ITER_SKIP(INTERP, str_val, &iter, idx);
+            STRING_iter_skip(INTERP, str_val, &iter, idx);
         next_iter = iter;
-        STRING_ITER_SKIP(INTERP, str_val, &next_iter, 1);
+        STRING_iter_skip(INTERP, str_val, &next_iter, 1);
 
         return Parrot_str_iter_substr(INTERP, str_val, &iter, &next_iter);
     }

Modified: trunk/src/spf_vtable.c
==============================================================================
--- trunk/src/spf_vtable.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/spf_vtable.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -452,7 +452,6 @@
 
     ++obj->index;
     s = VTABLE_get_string(interp, tmp);
-    /* XXX Parrot_str_copy like below? + adjusting bufused */
     return Parrot_str_substr(interp, s, 0, 1);
 }
 

Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/api.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -35,7 +35,6 @@
 #define nonnull_encoding_name(s) (s) ? (s)->encoding->name : "null string"
 #define ASSERT_STRING_SANITY(s) \
     PARROT_ASSERT((s)->encoding); \
-    PARROT_ASSERT((s)->charset); \
     PARROT_ASSERT(!PObj_on_free_list_TEST(s))
 
 /* HEADERIZER HFILE: include/parrot/string_funcs.h */
@@ -46,14 +45,11 @@
 PARROT_INLINE
 PARROT_IGNORABLE_RESULT
 PARROT_CAN_RETURN_NULL
-static const CHARSET * string_rep_compatible(SHIM_INTERP,
+static const STR_VTABLE * string_rep_compatible(SHIM_INTERP,
     ARGIN(const STRING *a),
-    ARGIN(const STRING *b),
-    ARGOUT(const ENCODING **e))
+    ARGIN(const STRING *b))
         __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        __attribute__nonnull__(4)
-        FUNC_MODIFIES(*e);
+        __attribute__nonnull__(3);
 
 PARROT_DOES_NOT_RETURN
 PARROT_COLD
@@ -62,8 +58,7 @@
 
 #define ASSERT_ARGS_string_rep_compatible __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(a) \
-    , PARROT_ASSERT_ARG(b) \
-    , PARROT_ASSERT_ARG(e))
+    , PARROT_ASSERT_ARG(b))
 #define ASSERT_ARGS_throw_illegal_escape __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
@@ -137,15 +132,15 @@
         return;
     }
 
-    /* Set up the cstring cache, then load the basic encodings and charsets */
+    /* Set up the cstring cache, then load the basic encodings */
     const_cstring_hash          = parrot_new_cstring_hash(interp);
     interp->const_cstring_hash  = const_cstring_hash;
-    Parrot_charsets_encodings_init(interp);
+    Parrot_encodings_init(interp);
 
 #if PARROT_CATCH_NULL
     /* initialize STRINGNULL, but not in the constant table */
     STRINGNULL = Parrot_str_new_init(interp, NULL, 0,
-                       PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+                       Parrot_default_encoding_ptr,
                        PObj_constant_FLAG);
 #endif
 
@@ -158,7 +153,7 @@
             Parrot_str_new_init(interp,
                 parrot_cstrings[i].string,
                 parrot_cstrings[i].len,
-                PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+                Parrot_default_encoding_ptr,
                 PObj_external_FLAG|PObj_constant_FLAG);
         parrot_hash_put(interp, const_cstring_hash,
             PARROT_const_cast(char *, parrot_cstrings[i].string), (void *)s);
@@ -187,7 +182,7 @@
     if (!interp->parent_interpreter) {
         mem_internal_free(interp->const_cstring_table);
         interp->const_cstring_table = NULL;
-        Parrot_charsets_encodings_deinit(interp);
+        Parrot_deinit_encodings(interp);
         parrot_hash_destroy(interp, interp->const_cstring_hash);
     }
 }
@@ -218,8 +213,7 @@
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARTYPE,
             "Unsupported representation");
 
-    s->charset  = PARROT_DEFAULT_CHARSET;
-    s->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, s);
+    s->encoding = Parrot_default_encoding_ptr;
 
     Parrot_gc_allocate_string_storage(interp, s,
         (size_t)string_max_bytes(interp, s, capacity));
@@ -230,10 +224,10 @@
 
 /*
 
-=item C<static const CHARSET * string_rep_compatible(PARROT_INTERP, const STRING
-*a, const STRING *b, const ENCODING **e)>
+=item C<static const STR_VTABLE * string_rep_compatible(PARROT_INTERP, const
+STRING *a, const STRING *b)>
 
-Find the "lowest" possible charset and encoding for the given string. E.g.
+Find the "lowest" possible encoding for the given string. E.g.
 
   ascii <op> utf8 => utf8
                   => ascii, B<if> C<STRING *b> has ascii chars only.
@@ -247,66 +241,55 @@
 PARROT_INLINE
 PARROT_IGNORABLE_RESULT
 PARROT_CAN_RETURN_NULL
-static const CHARSET *
+static const STR_VTABLE *
 string_rep_compatible(SHIM_INTERP,
-    ARGIN(const STRING *a), ARGIN(const STRING *b), ARGOUT(const ENCODING **e))
+    ARGIN(const STRING *a), ARGIN(const STRING *b))
 {
     ASSERT_ARGS(string_rep_compatible)
 
-    if (a->encoding == b->encoding && a->charset == b->charset) {
-        *e = a->encoding;
-        return a->charset;
+    if (a->encoding == b->encoding) {
+        return a->encoding;
     }
 
     /* a table could possibly simplify the logic */
     if (a->encoding == Parrot_utf8_encoding_ptr
-    &&  b->charset  == Parrot_ascii_charset_ptr) {
+    &&  b->encoding == Parrot_ascii_encoding_ptr) {
         if (a->strlen == a->bufused) {
-            *e = Parrot_fixed_8_encoding_ptr;
-            return b->charset;
+            return b->encoding;
         }
-        *e = a->encoding;
-        return a->charset;
+        return a->encoding;
     }
 
     if (b->encoding == Parrot_utf8_encoding_ptr
-    &&  a->charset  == Parrot_ascii_charset_ptr) {
+    &&  a->encoding == Parrot_ascii_encoding_ptr) {
         if (b->strlen == b->bufused) {
-            *e = Parrot_fixed_8_encoding_ptr;
-            return a->charset;
+            return a->encoding;
         }
-        *e = b->encoding;
-        return b->charset;
+        return b->encoding;
     }
 
-    if (a->encoding != b->encoding)
+    if (STRING_max_bytes_per_codepoint(a) != 1 ||
+        STRING_max_bytes_per_codepoint(b) != 1)
         return NULL;
 
-    if (a->encoding != Parrot_fixed_8_encoding_ptr)
-        return NULL;
-
-    *e = Parrot_fixed_8_encoding_ptr;
-
-    if (a->charset == b->charset)
-        return a->charset;
-    if (b->charset == Parrot_ascii_charset_ptr)
-        return a->charset;
-    if (a->charset == Parrot_ascii_charset_ptr)
-        return b->charset;
-    if (a->charset == Parrot_binary_charset_ptr)
-        return a->charset;
-    if (b->charset == Parrot_binary_charset_ptr)
-        return b->charset;
+    if (b->encoding == Parrot_ascii_encoding_ptr)
+        return a->encoding;
+    if (a->encoding == Parrot_ascii_encoding_ptr)
+        return b->encoding;
+    if (a->encoding == Parrot_binary_encoding_ptr)
+        return a->encoding;
+    if (b->encoding == Parrot_binary_encoding_ptr)
+        return b->encoding;
 
     return NULL;
 }
 
 /*
 
-=item C<const CHARSET * Parrot_str_rep_compatible(PARROT_INTERP, const STRING
-*a, const STRING *b, const ENCODING **e)>
+=item C<const STR_VTABLE * Parrot_str_rep_compatible(PARROT_INTERP, const STRING
+*a, const STRING *b)>
 
-Find the "lowest" possible charset and encoding for the given string. E.g.
+Find the "lowest" possible encoding for the given string. E.g.
 
   ascii <op> utf8 => utf8
                   => ascii, B<if> C<STRING *b> has ascii chars only.
@@ -320,12 +303,12 @@
 PARROT_EXPORT
 PARROT_IGNORABLE_RESULT
 PARROT_CAN_RETURN_NULL
-const CHARSET *
+const STR_VTABLE *
 Parrot_str_rep_compatible(PARROT_INTERP,
-    ARGIN(const STRING *a), ARGIN(const STRING *b), ARGOUT(const ENCODING **e))
+    ARGIN(const STRING *a), ARGIN(const STRING *b))
 {
     ASSERT_ARGS(Parrot_str_rep_compatible)
-    return string_rep_compatible(interp, a, b, e);
+    return string_rep_compatible(interp, a, b);
 }
 
 /*
@@ -358,7 +341,6 @@
     result->bufused  = s->bufused;
     result->hashval  = s->hashval;
     result->encoding = s->encoding;
-    result->charset  = s->charset;
 
     return result;
 }
@@ -427,30 +409,28 @@
             ARGIN_NULLOK(const STRING *b))
 {
     ASSERT_ARGS(Parrot_str_concat)
-    const CHARSET   *cs;
-    const ENCODING  *enc = NULL;
-    STRING          *dest;
-    UINTVAL          total_length;
-
-    /* XXX should this be a CHARSET method? */
-
-    /* If B isn't real, we just bail */
-    const UINTVAL b_len = b ? Parrot_str_length(interp, b) : 0;
-    if (!b_len)
-        return STRING_IS_NULL(a) ? STRINGNULL : Parrot_str_copy(interp, a);
-
-    /* Is A real? */
-    if (STRING_IS_NULL(a) || Buffer_bufstart(a) == NULL)
-        return Parrot_str_copy(interp, b);
+    const STR_VTABLE *enc;
+    STRING           *dest;
+    UINTVAL           total_length;
+
+    if (STRING_IS_NULL(a)) {
+        if (STRING_IS_NULL(b))
+            return STRINGNULL;
+        else
+            return Parrot_str_copy(interp, b);
+    }
+    else {
+        if (STRING_IS_NULL(b))
+            return Parrot_str_copy(interp, a);
+    }
 
     ASSERT_STRING_SANITY(a);
     ASSERT_STRING_SANITY(b);
 
-    cs = string_rep_compatible(interp, a, b, &enc);
+    enc = string_rep_compatible(interp, a, b);
 
-    if (!cs) {
+    if (!enc) {
         /* upgrade strings for concatenation */
-        cs = Parrot_unicode_charset_ptr;
         if (a->encoding == Parrot_ucs4_encoding_ptr
             || b->encoding == Parrot_ucs4_encoding_ptr)
             enc = Parrot_ucs4_encoding_ptr;
@@ -462,22 +442,15 @@
         else
             enc = Parrot_utf8_encoding_ptr;
 
-        a = Parrot_unicode_charset_ptr->to_charset(interp, a);
-        b = Parrot_unicode_charset_ptr->to_charset(interp, b);
-
-        if (a->encoding != enc)
-            a = enc->to_encoding(interp, a);
-        if (b->encoding != enc)
-            b = enc->to_encoding(interp, b);
+        a = enc->to_encoding(interp, a);
+        b = enc->to_encoding(interp, b);
     }
     /* calc usable and total bytes */
     total_length = a->bufused + b->bufused;
 
     dest = Parrot_str_new_noinit(interp, enum_stringrep_one, total_length);
     PARROT_ASSERT(enc);
-    PARROT_ASSERT(cs);
     dest->encoding = enc;
-    dest->charset  = cs;
 
     /* Copy A first */
     mem_sys_memcopy(dest->strstart, a->strstart, a->bufused);
@@ -487,7 +460,7 @@
             b->strstart, b->bufused);
 
     dest->bufused = a->bufused + b->bufused;
-    dest->strlen  = a->strlen + b_len;
+    dest->strlen  = a->strlen + b->strlen;
 
     return dest;
 }
@@ -516,7 +489,7 @@
     const UINTVAL buff_length = (len > 0) ? len : buffer ? strlen(buffer) : 0;
 
     return Parrot_str_new_init(interp, buffer, buff_length,
-        PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET, 0);
+        Parrot_default_encoding_ptr, 0);
 }
 
 
@@ -549,8 +522,7 @@
     result->strstart        = (char *)Buffer_bufstart(result);
     result->bufused         = len;
     result->strlen          = len;
-    result->encoding        = Parrot_fixed_8_encoding_ptr;
-    result->charset         = Parrot_binary_charset_ptr;
+    result->encoding      = Parrot_binary_encoding_ptr;
 
     Buffer_buflen(buffer)   = 0;
     Buffer_bufstart(buffer) = NULL;
@@ -616,7 +588,7 @@
         return s;
 
     s = Parrot_str_new_init(interp, buffer, strlen(buffer),
-                       PARROT_DEFAULT_ENCODING, PARROT_DEFAULT_CHARSET,
+                       Parrot_default_encoding_ptr,
                        PObj_external_FLAG|PObj_constant_FLAG);
 
     parrot_hash_put(interp, cstring_cache,
@@ -629,12 +601,12 @@
 /*
 
 =item C<STRING * string_make(PARROT_INTERP, const char *buffer, UINTVAL len,
-const char *charset_name, UINTVAL flags)>
+const char *encoding_name, UINTVAL flags)>
 
 Creates and returns a new Parrot string using C<len> bytes of string data read
 from C<buffer>.
 
-The value of C<charset_name> specifies the string's representation.
+The value of C<encoding_name> specifies the string's representation.
 The currently recognised values are:
 
     'iso-8859-1'
@@ -645,7 +617,7 @@
 The encoding is implicitly guessed; C<unicode> implies the C<utf-8> encoding,
 and the other three assume C<fixed-8> encoding.
 
-If C<charset> is unspecified, the default charset 'ascii' will be used.
+If C<encoding_name> is unspecified, the default encoding 'ascii' will be used.
 
 The value of C<flags> is optionally one or more C<PObj_*> flags C<OR>-ed
 together.
@@ -659,74 +631,28 @@
 PARROT_CANNOT_RETURN_NULL
 STRING *
 string_make(PARROT_INTERP, ARGIN_NULLOK(const char *buffer),
-        UINTVAL len, ARGIN_NULLOK(const char *charset_name), UINTVAL flags)
+        UINTVAL len, ARGIN_NULLOK(const char *encoding_name), UINTVAL flags)
 {
     ASSERT_ARGS(string_make)
-    const CHARSET *charset;
+    const STR_VTABLE *encoding;
 
-    if (charset_name) {
-        charset = Parrot_find_charset(interp, charset_name);
-        if (!charset)
+    if (encoding_name) {
+        encoding = Parrot_find_encoding(interp, encoding_name);
+        if (!encoding)
             Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-                "Can't make '%s' charset strings", charset_name);
+                "Can't make '%s' encoding strings", encoding_name);
     }
     else
-        charset = Parrot_get_charset(interp, 0);
-
-    return Parrot_str_new_init(interp, buffer, len,
-        charset->preferred_encoding, charset, flags);
-}
-
-
-/*
-
-=item C<STRING * string_make_from_charset(PARROT_INTERP, const char *buffer,
-UINTVAL len, INTVAL charset_nr, UINTVAL flags)>
-
-Creates and returns a new Parrot string using C<len> bytes of string data read
-from C<buffer>.
-
-The value of C<charset_name> specifies the string's representation.  It must be
-a valid charset identifier.
-
-    'iso-8859-1'
-    'ascii'
-    'binary'
-    'unicode'
-
-The encoding is implicitly guessed; C<unicode> implies the C<utf-8> encoding,
-and the other three assume C<fixed-8> encoding.
-
-The value of C<flags> is optionally one or more C<PObj_*> flags C<OR>-ed
-together.
-
-=cut
+        encoding = Parrot_default_encoding_ptr;
 
-*/
-
-PARROT_EXPORT
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-STRING *
-string_make_from_charset(PARROT_INTERP, ARGIN_NULLOK(const char *buffer),
-    UINTVAL len, INTVAL charset_nr, UINTVAL flags)
-{
-    ASSERT_ARGS(string_make_from_charset)
-    const CHARSET *charset = Parrot_get_charset(interp, charset_nr);
-
-    if (!charset)
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-            "Invalid charset number '%d' specified", charset_nr);
-
-    return Parrot_str_new_init(interp, buffer, len,
-        charset->preferred_encoding, charset, flags);
+    return Parrot_str_new_init(interp, buffer, len, encoding, flags);
 }
 
 
 /*
 
 =item C<STRING * Parrot_str_new_init(PARROT_INTERP, const char *buffer, UINTVAL
-len, const ENCODING *encoding, const CHARSET *charset, UINTVAL flags)>
+len, const STR_VTABLE *encoding, UINTVAL flags)>
 
 Given a buffer, its length, an encoding, a character set, and STRING flags,
 creates and returns a new string.  Don't call this directly.
@@ -740,13 +666,12 @@
 PARROT_CANNOT_RETURN_NULL
 STRING *
 Parrot_str_new_init(PARROT_INTERP, ARGIN_NULLOK(const char *buffer), UINTVAL len,
-        ARGIN(const ENCODING *encoding), ARGIN(const CHARSET *charset), UINTVAL flags)
+        ARGIN(const STR_VTABLE *encoding), UINTVAL flags)
 {
     ASSERT_ARGS(Parrot_str_new_init)
     DECL_CONST_CAST;
     STRING * const s = Parrot_gc_new_string_header(interp, flags);
     s->encoding      = encoding;
-    s->charset       = charset;
 
     if (flags & PObj_external_FLAG) {
         /*
@@ -761,10 +686,10 @@
         Buffer_bufstart(s) = s->strstart = PARROT_const_cast(char *, buffer);
         Buffer_buflen(s)   = s->bufused  = len;
 
-        if (encoding == Parrot_fixed_8_encoding_ptr)
+        if (encoding->max_bytes_per_codepoint == 1)
             s->strlen = len;
         else
-            s->strlen = CHARSET_CODEPOINTS(interp, s);
+            s->strlen = STRING_scan(interp, s);
 
         return s;
     }
@@ -774,10 +699,10 @@
     if (buffer) {
         mem_sys_memcopy(s->strstart, buffer, len);
         s->bufused = len;
-        if (encoding == Parrot_fixed_8_encoding_ptr)
+        if (encoding->max_bytes_per_codepoint == 1)
             s->strlen = len;
         else
-            s->strlen = CHARSET_CODEPOINTS(interp, s);
+            s->strlen = STRING_scan(interp, s);
     }
     else
         s->strlen = s->bufused = 0;
@@ -835,7 +760,7 @@
 {
     ASSERT_ARGS(Parrot_str_indexed)
     ASSERT_STRING_SANITY(s);
-    return (INTVAL)CHARSET_GET_CODEPOINT(interp, s, idx);
+    return (INTVAL)STRING_ord(interp, s, idx);
 }
 
 
@@ -883,7 +808,7 @@
         STRING *src    = PARROT_const_cast(STRING *, s);
         STRING *search = PARROT_const_cast(STRING *, s2);
 
-        return CHARSET_INDEX(interp, src, search, (UINTVAL)start);
+        return STRING_index(interp, src, search, (UINTVAL)start);
     }
 }
 
@@ -951,17 +876,13 @@
 string_chr(PARROT_INTERP, UINTVAL character)
 {
     ASSERT_ARGS(string_chr)
-    if (character > 0xff)
-        return Parrot_unicode_charset_ptr->string_from_codepoint(interp,
-                character);
 
+    if (character > 0xff)
+        return Parrot_utf8_encoding_ptr->chr(interp, character);
     else if (character > 0x7f)
-        return Parrot_iso_8859_1_charset_ptr->string_from_codepoint(interp,
-                character);
-
+        return Parrot_latin1_encoding_ptr->chr(interp, character);
     else
-        return Parrot_ascii_charset_ptr->string_from_codepoint(interp,
-                 character);
+        return Parrot_ascii_encoding_ptr->chr(interp, character);
 }
 
 
@@ -1012,7 +933,7 @@
 {
     ASSERT_ARGS(string_max_bytes)
     PARROT_ASSERT(s->encoding);
-    return ENCODING_MAX_BYTES_PER_CODEPOINT(interp, s) * nchars;
+    return STRING_max_bytes_per_codepoint(s) * nchars;
 }
 
 
@@ -1034,7 +955,7 @@
     ASSERT_ARGS(Parrot_str_repeat)
     STRING * const dest = Parrot_str_new_init(interp, NULL,
                         s->bufused * num,
-                        s->encoding, s->charset, 0);
+                        s->encoding, 0);
     if (num > 0) {
         /* copy s into dest num times */
         UINTVAL length = s->bufused;
@@ -1102,7 +1023,7 @@
     if (true_length > (src->strlen - true_offset))
         true_length = (UINTVAL)(src->strlen - true_offset);
 
-    return CHARSET_GET_CODEPOINTS(interp, src, true_offset, true_length);
+    return STRING_substr(interp, src, true_offset, true_length);
 }
 
 /*
@@ -1176,12 +1097,12 @@
     }
 
     STRING_ITER_INIT(interp, &search_iter);
-    c0 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter);
+    c0 = STRING_iter_get_and_advance(interp, search, &search_iter);
     search_start = search_iter;
     next_start = *start;
 
     while (start->charpos + len <= src->strlen) {
-        UINTVAL c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, &next_start);
+        UINTVAL c1 = STRING_iter_get_and_advance(interp, src, &next_start);
 
         if (c1 == c0) {
             UINTVAL c2;
@@ -1190,8 +1111,8 @@
             do {
                 if (search_iter.charpos >= len)
                     return start->charpos;
-                c1 = STRING_ITER_GET_AND_ADVANCE(interp, src, end);
-                c2 = STRING_ITER_GET_AND_ADVANCE(interp, search, &search_iter);
+                c1 = STRING_iter_get_and_advance(interp, src, end);
+                c2 = STRING_iter_get_and_advance(interp, search, &search_iter);
             } while (c1 == c2);
 
             search_iter = search_start;
@@ -1237,12 +1158,11 @@
     INTVAL offset, INTVAL length, ARGIN(const STRING *rep))
 {
     ASSERT_ARGS(Parrot_str_replace)
-    String_iter     iter;
-    const CHARSET  *cs;
-    const ENCODING *enc;
-    STRING         *dest        = NULL;
-    UINTVAL         true_offset = (UINTVAL)offset;
-    UINTVAL         true_length = (UINTVAL)length;
+    String_iter       iter;
+    const STR_VTABLE *enc;
+    STRING           *dest        = NULL;
+    UINTVAL           true_offset = (UINTVAL)offset;
+    UINTVAL           true_length = (UINTVAL)length;
 
     UINTVAL         start_byte, end_byte, start_char, end_char;
     INTVAL          buf_size;
@@ -1269,24 +1189,23 @@
         true_length = (UINTVAL)(src->strlen - true_offset);
 
     /* may have different reps..... */
-    cs = string_rep_compatible(interp, src, rep, &enc);
+    enc = string_rep_compatible(interp, src, rep);
 
-    if (!cs) {
+    if (!enc) {
         src = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
         rep = Parrot_utf16_encoding_ptr->to_encoding(interp, rep);
-        /* Remember selected charset and encoding */
+        /* Remember selected encoding */
         enc = src->encoding;
-        cs  = src->charset;
     }
 
     /* get byte position of the part that will be replaced */
     STRING_ITER_INIT(interp, &iter);
 
-    STRING_ITER_SET_POSITION(interp, src, &iter, true_offset);
+    STRING_iter_set_position(interp, src, &iter, true_offset);
     start_byte = iter.bytepos;
     start_char = iter.charpos;
 
-    STRING_ITER_SKIP(interp, src, &iter, true_length);
+    STRING_iter_skip(interp, src, &iter, true_length);
     end_byte   = iter.bytepos;
     end_char   = iter.charpos;
 
@@ -1299,9 +1218,8 @@
     /* Now do the replacement */
     dest = Parrot_gc_new_string_header(interp, 0);
 
-    /* Set encoding and charset to compatible */
+    /* Set encoding to compatible */
     dest->encoding = enc;
-    dest->charset  = cs;
 
     /* Clear COW flag. We own buffer */
     PObj_get_FLAGS(dest) = PObj_is_string_FLAG
@@ -1373,7 +1291,7 @@
         return chopped;
     }
 
-    if (chopped->encoding == Parrot_fixed_8_encoding_ptr) {
+    if (STRING_max_bytes_per_codepoint(chopped) == 1) {
         chopped->bufused = new_length;
     }
     else if (chopped->encoding == Parrot_ucs2_encoding_ptr) {
@@ -1384,7 +1302,7 @@
         String_iter iter;
 
         STRING_ITER_INIT(interp, &iter);
-        STRING_ITER_SET_POSITION(interp, s, &iter, new_length);
+        STRING_iter_set_position(interp, s, &iter, new_length);
         chopped->bufused = iter.bytepos;
     }
 
@@ -1423,7 +1341,7 @@
     ASSERT_STRING_SANITY(s1);
     ASSERT_STRING_SANITY(s2);
 
-    return CHARSET_COMPARE(interp, s1, s2);
+    return STRING_compare(interp, s1, s2);
 }
 
 
@@ -1496,7 +1414,7 @@
      * both strings are non-null
      * both strings have same length
      */
-    return CHARSET_COMPARE(interp, s1, s2) == 0;
+    return STRING_compare(interp, s1, s2) == 0;
 }
 
 
@@ -1522,13 +1440,13 @@
     STRING *res;
     size_t  minlen;
 
-    /* we could also trans_charset to iso-8859-1 */
-    if (s1 && s1->encoding != Parrot_fixed_8_encoding_ptr)
+    /* we could also trans_encoding to iso-8859-1 */
+    if (s1 && STRING_max_bytes_per_codepoint(s1) != 1)
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
             "string bitwise_and (%s/%s) unsupported",
             s1->encoding->name, nonnull_encoding_name(s2));
 
-    if (s2 && s2->encoding != Parrot_fixed_8_encoding_ptr)
+    if (s2 && STRING_max_bytes_per_codepoint(s2) != 1)
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
             "string bitwise_and (%s/%s) unsupported",
             nonnull_encoding_name(s1), s2->encoding->name);
@@ -1540,7 +1458,7 @@
         minlen = 0;
 
     res = Parrot_str_new_init(interp, NULL, minlen,
-            Parrot_fixed_8_encoding_ptr, Parrot_binary_charset_ptr, 0);
+            Parrot_binary_encoding_ptr, 0);
 
     if (STRING_IS_NULL(s1) || STRING_IS_NULL(s2)) {
         res->bufused = 0;
@@ -1664,7 +1582,7 @@
     size_t  maxlen = 0;
 
     if (!STRING_IS_NULL(s1)) {
-        if (s1->encoding != Parrot_fixed_8_encoding_ptr)
+        if (STRING_max_bytes_per_codepoint(s1) != 1)
             Parrot_ex_throw_from_c_args(interp, NULL,
                 EXCEPTION_INVALID_ENCODING,
                 "string bitwise_or (%s/%s) unsupported",
@@ -1674,7 +1592,7 @@
     }
 
     if (!STRING_IS_NULL(s2)) {
-        if (s2->encoding != Parrot_fixed_8_encoding_ptr)
+        if (STRING_max_bytes_per_codepoint(s2) != 1)
             Parrot_ex_throw_from_c_args(interp, NULL,
                 EXCEPTION_INVALID_ENCODING,
                 "string bitwise_or (%s/%s) unsupported",
@@ -1685,7 +1603,7 @@
     }
 
     res = Parrot_str_new_init(interp, NULL, maxlen,
-            Parrot_fixed_8_encoding_ptr, Parrot_binary_charset_ptr, 0);
+            Parrot_binary_encoding_ptr, 0);
 
     if (!maxlen) {
         res->bufused = 0;
@@ -1730,7 +1648,7 @@
     size_t  maxlen = 0;
 
     if (!STRING_IS_NULL(s1)) {
-        if (s1->encoding != Parrot_fixed_8_encoding_ptr)
+        if (STRING_max_bytes_per_codepoint(s1) != 1)
             Parrot_ex_throw_from_c_args(interp, NULL,
                 EXCEPTION_INVALID_ENCODING,
                 "string bitwise_xor (%s/%s) unsupported",
@@ -1740,7 +1658,7 @@
     }
 
     if (!STRING_IS_NULL(s2)) {
-        if (s2->encoding != Parrot_fixed_8_encoding_ptr)
+        if (STRING_max_bytes_per_codepoint(s2) != 1)
             Parrot_ex_throw_from_c_args(interp, NULL,
                 EXCEPTION_INVALID_ENCODING,
                 "string bitwise_xor (%s/%s) unsupported",
@@ -1751,7 +1669,7 @@
     }
 
     res = Parrot_str_new_init(interp, NULL, maxlen,
-            Parrot_fixed_8_encoding_ptr, Parrot_binary_charset_ptr, 0);
+            Parrot_binary_encoding_ptr, 0);
 
     if (!maxlen) {
         res->bufused = 0;
@@ -1807,11 +1725,11 @@
     size_t  len;
 
     if (!STRING_IS_NULL(s)) {
-        if (s->encoding != Parrot_fixed_8_encoding_ptr)
+        if (STRING_max_bytes_per_codepoint(s) != 1)
             Parrot_ex_throw_from_c_args(interp, NULL,
                 EXCEPTION_INVALID_ENCODING,
-                "string bitwise_not (%s/%s) unsupported",
-                s->encoding->name, s->encoding->name);
+                "string bitwise_not (%s) unsupported",
+                s->encoding->name);
 
         len = s->bufused;
     }
@@ -1819,7 +1737,7 @@
         len = 0;
 
     res = Parrot_str_new_init(interp, NULL, len,
-            Parrot_fixed_8_encoding_ptr, Parrot_binary_charset_ptr, 0);
+            Parrot_binary_encoding_ptr, 0);
 
     if (!len) {
         res->bufused = 0;
@@ -1965,7 +1883,7 @@
         STRING_ITER_INIT(interp, &iter);
 
         while (state != parse_end && iter.charpos < s->strlen) {
-            const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter);
+            const UINTVAL c = STRING_iter_get_and_advance(interp, s, &iter);
             /* Check for overflow */
             if (c > 255)
                 break;
@@ -2064,7 +1982,7 @@
 
     /* Handcrafted FSM to read float value */
     while (state != parse_end && iter.charpos < s->strlen) {
-        const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, s, &iter);
+        const UINTVAL c = STRING_iter_get_and_advance(interp, s, &iter);
         /* Check for overflow */
         if (c > 255)
             break;
@@ -2431,15 +2349,8 @@
     size_t hashval = interp->hash_seed;
 
     if (!STRING_IS_NULL(s)) {
-        if (s->strlen) {
-            if (s->encoding->hash)
-                hashval = ENCODING_HASH(interp, s, hashval);
-            else if (s->charset->compute_hash)
-                hashval = CHARSET_COMPUTE_HASH(interp, s, hashval);
-            else {
-                exit_fatal(1, "String subsystem not properly initialized");
-            }
-        }
+        if (s->strlen)
+            hashval = STRING_hash(interp, s, hashval);
 
         s->hashval = hashval;
     }
@@ -2512,14 +2423,14 @@
 
     /* create ascii result */
     result = Parrot_str_new_init(interp, NULL, charlen,
-            Parrot_fixed_8_encoding_ptr, Parrot_ascii_charset_ptr, 0);
+            Parrot_ascii_encoding_ptr, 0);
 
     /* more work TODO */
     STRING_ITER_INIT(interp, &iter);
     dp = (unsigned char *)result->strstart;
 
     for (i = 0; len > 0; --len) {
-        UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
+        UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
         if (c < 0x7f) {
             /* process ASCII chars */
             if (i >= charlen - 2) {
@@ -2622,12 +2533,11 @@
 /*
 
 =item C<STRING * Parrot_str_unescape_string(PARROT_INTERP, const STRING *src,
-const CHARSET *charset, const ENCODING *encoding, UINTVAL flags)>
+const STR_VTABLE *encoding, UINTVAL flags)>
 
 EXPERIMENTAL, see TT #1628
 
-Unescapes the src string returnning a new string with the charset
-and encoding specified.
+Unescapes the src string returnning a new string with the encoding specified.
 
 
 =cut
@@ -2638,8 +2548,7 @@
 PARROT_CANNOT_RETURN_NULL
 STRING *
 Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
-        ARGIN(const CHARSET *charset),
-        ARGIN(const ENCODING *encoding),
+        ARGIN(const STR_VTABLE *encoding),
         UINTVAL flags)
 {
     ASSERT_ARGS(Parrot_str_unescape_string)
@@ -2653,7 +2562,6 @@
     char digbuf[9];
     int pending;
 
-    result->charset = charset;
     result->encoding = encoding;
     reserved = string_max_bytes(interp, result, srclen);
     Parrot_gc_allocate_string_storage(interp, result, reserved);
@@ -2662,14 +2570,14 @@
     STRING_ITER_INIT(interp, &itersrc);
     STRING_ITER_INIT(interp, &iterdest);
     while (itersrc.bytepos < srclen) {
-        INTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+        INTVAL c = STRING_iter_get_and_advance(interp, src, &itersrc);
         INTVAL next;
 
         do {
             pending = 0;
             next = c;
             if (c == '\\') {
-                c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+                c = STRING_iter_get_and_advance(interp, src, &itersrc);
                 switch (c) {
                 /* Common one char sequences */
                 case 'a': next = '\a'; break;
@@ -2682,7 +2590,7 @@
                 case 'e': next = '\x1B'; break;
                 /* Escape character */
                 case 'c':
-                    c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+                    c = STRING_iter_get_and_advance(interp, src, &itersrc);
                     /* This assumes ascii-alike encoding */
                     if (c < 'A' || c > 'Z')
                         throw_illegal_escape(interp);
@@ -2690,11 +2598,11 @@
                     break;
                 case 'x':
                     digcount = 0;
-                    c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+                    c = STRING_iter_get_and_advance(interp, src, &itersrc);
                     if (c == '{') {
                         /* \x{h..h} 1..8 hex digits */
                         while (itersrc.bytepos < srclen) {
-                            c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+                            c = STRING_iter_get_and_advance(interp, src, &itersrc);
                             if (c == '}')
                                 break;
                             if (!isxdigit(c))
@@ -2718,7 +2626,7 @@
                                 pending = 0;
                                 break;
                             }
-                            c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+                            c = STRING_iter_get_and_advance(interp, src, &itersrc);
                         }
                     }
                     if (digcount == 0)
@@ -2729,7 +2637,7 @@
                 case 'u':
                     /* \uhhhh 4 hex digits */
                     for (digcount = 0; digcount < 4; ++digcount) {
-                        c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+                        c = STRING_iter_get_and_advance(interp, src, &itersrc);
                         if (!isxdigit(c))
                             throw_illegal_escape(interp);
                         digbuf[digcount] = c;
@@ -2740,7 +2648,7 @@
                 case 'U':
                     /* \Uhhhhhhhh 8 hex digits */
                     for (digcount = 0; digcount < 8; ++digcount) {
-                        c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+                        c = STRING_iter_get_and_advance(interp, src, &itersrc);
                         if (!isxdigit(c))
                             throw_illegal_escape(interp);
                         digbuf[digcount] = c;
@@ -2753,7 +2661,7 @@
                     /* \ooo 1..3 oct digits */
                     digbuf[0] = c;
                     for (digcount = 1; digcount < 3; ++digcount) {
-                        c = STRING_ITER_GET_AND_ADVANCE(interp, src, &itersrc);
+                        c = STRING_iter_get_and_advance(interp, src, &itersrc);
                         if (c < '0' || c > '7')
                             break;
                         digbuf[digcount] = c;
@@ -2767,7 +2675,7 @@
                     next = c;
                 }
             }
-            STRING_ITER_SET_AND_ADVANCE(interp, result, &iterdest, next);
+            STRING_iter_set_and_advance(interp, result, &iterdest, next);
         } while (pending);
     }
     result->bufused = iterdest.bytepos;
@@ -2802,9 +2710,8 @@
 {
     ASSERT_ARGS(Parrot_str_unescape)
 
-    STRING         *result;
-    const CHARSET  *charset;
-    const ENCODING *encoding = NULL;
+    STRING           *result;
+    const STR_VTABLE *encoding;
 
     /* does the encoding have a character set? */
     const char     *p        = enc_char ? strchr(enc_char, ':') : NULL;
@@ -2823,25 +2730,17 @@
         #define MAX_ENCODING_NAME_ALLOWED 63
         char   buffer[MAX_ENCODING_NAME_ALLOWED + 1];
         size_t l = p - enc_char;
-        charset  = NULL;
 
         if (l < MAX_ENCODING_NAME_ALLOWED) {
             memcpy(buffer, enc_char, l);
             buffer[l] = '\0';
-            encoding  = Parrot_find_encoding(interp, buffer);
         }
-        if (!encoding)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-                "Can't make '%s' encoding strings", enc_char);
-
-        charset = Parrot_find_charset(interp, p + 1);
-        if (!charset)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-                "Can't make '%s' charset strings", p + 1);
+        else {
+            buffer[0] = '\0';
+        }
 
-        result   = Parrot_str_new_init(interp, cstring, clength,
-                        encoding, charset, flags);
-        encoding = Parrot_fixed_8_encoding_ptr;
+        result   = string_make(interp, cstring, clength, buffer, flags);
+        encoding = Parrot_ascii_encoding_ptr;
     }
     else {
         result   = string_make(interp, cstring, clength, enc_char, flags);
@@ -2880,9 +2779,9 @@
 
     /* Force validating the string */
     if (encoding != result->encoding)
-        result->strlen = CHARSET_CODEPOINTS(interp, result);
+        result->strlen = STRING_scan(interp, result);
 
-    if (!CHARSET_VALIDATE(interp, result))
+    if (!STRING_validate(interp, result))
         Parrot_ex_throw_from_c_args(interp, NULL,
             EXCEPTION_INVALID_STRING_REPRESENTATION, "Malformed string");
 
@@ -2912,7 +2811,7 @@
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNEXPECTED_NULL,
             "Can't upcase NULL string");
     else {
-        STRING * const res = CHARSET_UPCASE(interp, s);
+        STRING * const res = STRING_upcase(interp, s);
         res->hashval = 0;
         return res;
     }
@@ -2942,7 +2841,7 @@
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNEXPECTED_NULL,
             "Can't downcase NULL string");
     else {
-        STRING * const res = CHARSET_DOWNCASE(interp, s);
+        STRING * const res = STRING_downcase(interp, s);
         res->hashval = 0;
         return res;
     }
@@ -2972,7 +2871,7 @@
         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNEXPECTED_NULL,
             "Can't titlecase NULL string");
     else {
-        STRING * const res = CHARSET_TITLECASE(interp, s);
+        STRING * const res = STRING_titlecase(interp, s);
         res->hashval = 0;
         return res;
     }
@@ -3063,7 +2962,7 @@
     if (!Parrot_str_byte_length(interp, s))
         return 0;
 
-    return CHARSET_IS_CCLASS(interp, flags, s, offset);
+    return STRING_is_cclass(interp, flags, s, offset);
 }
 
 
@@ -3090,7 +2989,7 @@
     if (STRING_IS_NULL(s))
         return -1;
 
-    return CHARSET_FIND_CCLASS(interp, flags, s, offset, count);
+    return STRING_find_cclass(interp, flags, s, offset, count);
 }
 
 
@@ -3119,7 +3018,7 @@
     if (STRING_IS_NULL(s))
         return -1;
 
-    return CHARSET_FIND_NOT_CCLASS(interp, flags, s, offset, count);
+    return STRING_find_not_cclass(interp, flags, s, offset, count);
 }
 
 
@@ -3143,21 +3042,8 @@
         INTVAL charset_nr)
 {
     ASSERT_ARGS(Parrot_str_change_charset)
-    const CHARSET *new_charset;
-
-    if (STRING_IS_NULL(src))
-        return STRINGNULL;
-
-    new_charset = Parrot_get_charset(interp, charset_nr);
-
-    if (!new_charset)
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARTYPE,
-                "charset #%d not found", (int) charset_nr);
 
-    if (new_charset == src->charset)
-        return src;
-
-    return new_charset->to_charset(interp, src);
+    return Parrot_str_change_encoding(interp, src, charset_nr);
 }
 
 
@@ -3166,8 +3052,7 @@
 =item C<STRING* Parrot_str_change_encoding(PARROT_INTERP, STRING *src, INTVAL
 encoding_nr)>
 
-Converts C<src> to the given charset or encoding and returns the result as a
-new string.
+Converts C<src> to the given encoding and returns the result as a new string.
 
 =cut
 
@@ -3181,7 +3066,7 @@
         INTVAL encoding_nr)
 {
     ASSERT_ARGS(Parrot_str_change_encoding)
-    const ENCODING *new_encoding;
+    const STR_VTABLE *new_encoding;
 
     if (STRING_IS_NULL(src))
         return STRINGNULL;
@@ -3223,7 +3108,7 @@
     if (!src->strlen)
         return Parrot_str_new_noinit(interp, enum_stringrep_one, 0);
 
-    return CHARSET_COMPOSE(interp, src);
+    return STRING_compose(interp, src);
 }
 
 
@@ -3325,7 +3210,7 @@
         do {
             const String_iter old_iter = iter;
 
-            STRING_ITER_SKIP(interp, str, &iter, 1);
+            STRING_iter_skip(interp, str, &iter, 1);
             tstr = Parrot_str_iter_substr(interp, str, &old_iter, &iter);
             VTABLE_set_string_keyed_int(interp, res, old_iter.charpos, tstr);
         } while (iter.charpos < slen);

Deleted: trunk/src/string/charset.c
==============================================================================
--- trunk/src/string/charset.c	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,640 +0,0 @@
-/*
-Copyright (C) 2004-2009, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset.c - global charset functions
-
-=head1 DESCRIPTION
-
-These are Parrot's generic charset handling functions
-
-=over 4
-
-=cut
-
-*/
-
-#define PARROT_NO_EXTERN_CHARSET_PTRS
-#include "parrot/parrot.h"
-
-#include "encoding/fixed_8.h"
-#include "encoding/utf8.h"
-#include "encoding/utf16.h"
-#include "encoding/ucs2.h"
-#include "encoding/ucs4.h"
-
-#include "charset/ascii.h"
-#include "charset/binary.h"
-#include "charset/iso-8859-1.h"
-#include "charset/unicode.h"
-
-const CHARSET *Parrot_iso_8859_1_charset_ptr;
-const CHARSET *Parrot_binary_charset_ptr;
-const CHARSET *Parrot_default_charset_ptr;
-const CHARSET *Parrot_unicode_charset_ptr;
-const CHARSET *Parrot_ascii_charset_ptr;
-
-/* all registered charsets are collected in one global structure */
-
-typedef struct To_converter {
-    NOTNULL(const CHARSET *to);
-    NOTNULL(charset_converter_t func);
-} To_converter;
-
-typedef struct One_charset {
-    NOTNULL(CHARSET *charset);
-    STRING          *name;
-    To_converter    *to_converters;
-    int              n_converters;
-} One_charset;
-
-typedef struct All_charsets {
-    One_charset *set;
-    int          n_charsets;
-} All_charsets;
-
-static All_charsets *all_charsets;
-
-/* HEADERIZER HFILE: include/parrot/charset.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-static void Parrot_str_internal_register_charset_names(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-static INTVAL register_charset(PARROT_INTERP,
-    ARGIN(const char *charsetname),
-    ARGIN(CHARSET *charset))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static void register_static_converters(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_Parrot_str_internal_register_charset_names \
-     __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_register_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(charsetname) \
-    , PARROT_ASSERT_ARG(charset))
-#define ASSERT_ARGS_register_static_converters __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-/*
-
-=item C<CHARSET * Parrot_new_charset(PARROT_INTERP)>
-
-Allocates a new C<CHARSET> structure from the system.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_CANNOT_RETURN_NULL
-PARROT_MALLOC
-CHARSET *
-Parrot_new_charset(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_new_charset)
-    return mem_gc_allocate_zeroed_typed(interp, CHARSET);
-}
-
-/*
-
-=item C<void Parrot_charsets_encodings_deinit(PARROT_INTERP)>
-
-Deinitializes (unloads) the charset system. Frees all charsets and the array
-that holds the charsets back to the system.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-void
-Parrot_charsets_encodings_deinit(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charsets_encodings_deinit)
-    int i;
-    const int n = all_charsets->n_charsets;
-
-    for (i = 0; i < n; ++i) {
-        if (all_charsets->set[i].n_converters)
-            mem_gc_free(interp, all_charsets->set[i].to_converters);
-        mem_gc_free(interp, all_charsets->set[i].charset);
-    }
-    mem_gc_free(interp, all_charsets->set);
-    mem_gc_free(interp, all_charsets);
-    all_charsets = NULL;
-    parrot_deinit_encodings(interp);
-}
-
-/*
-
-=item C<const CHARSET * Parrot_find_charset(PARROT_INTERP, const char
-*charsetname)>
-
-Searches through the list of charsets for the charset given by C<charsetname>.
-Returns the charset if it is found, NULL otherwise.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-const CHARSET *
-Parrot_find_charset(SHIM_INTERP, ARGIN(const char *charsetname))
-{
-    ASSERT_ARGS(Parrot_find_charset)
-    int i;
-    const int n = all_charsets->n_charsets;
-
-    for (i = 0; i < n; ++i) {
-        if (STREQ(all_charsets->set[i].charset->name, charsetname))
-            return all_charsets->set[i].charset;
-    }
-
-    return NULL;
-}
-
-/*
-
-=item C<const CHARSET * Parrot_load_charset(PARROT_INTERP, const char
-*charsetname)>
-
-Throws an exception (Can't load charsets dynamically yet. https://trac.parrot.org/parrot/wiki/StringsTasklist).
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-const CHARSET *
-Parrot_load_charset(PARROT_INTERP, ARGIN(const char *charsetname))
-{
-    ASSERT_ARGS(Parrot_load_charset)
-    UNUSED(charsetname);
-
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-        "Can't load charsets yet");
-}
-
-/*
-
-=item C<INTVAL Parrot_charset_number(PARROT_INTERP, const STRING *charsetname)>
-
-Return the number of the charset or -1 if not found.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-Parrot_charset_number(PARROT_INTERP, ARGIN(const STRING *charsetname))
-{
-    ASSERT_ARGS(Parrot_charset_number)
-    int i;
-    const int n = all_charsets->n_charsets;
-
-    for (i = 0; i < n; ++i) {
-        if (Parrot_str_equal(interp, all_charsets->set[i].name, charsetname))
-            return i;
-    }
-    return -1;
-}
-
-/*
-
-=item C<INTVAL Parrot_charset_number_of_str(PARROT_INTERP, const STRING *src)>
-
-Return the number of the charset of the given string or -1 if not found.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-Parrot_charset_number_of_str(SHIM_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(Parrot_charset_number_of_str)
-    int i;
-    const int n = all_charsets->n_charsets;
-
-    for (i = 0; i < n; ++i) {
-        if (src->charset == all_charsets->set[i].charset)
-            return i;
-    }
-    return -1;
-}
-
-/*
-
-=item C<STRING * Parrot_charset_name(PARROT_INTERP, INTVAL number_of_charset)>
-
-Returns the name of the charset given by the INTVAL index
-C<number_of_charset>.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING *
-Parrot_charset_name(SHIM_INTERP, INTVAL number_of_charset)
-{
-    ASSERT_ARGS(Parrot_charset_name)
-    if (number_of_charset < 0 || number_of_charset >= all_charsets->n_charsets)
-        return STRINGNULL;
-    return all_charsets->set[number_of_charset].name;
-}
-
-/*
-
-=item C<const CHARSET * Parrot_get_charset(PARROT_INTERP, INTVAL
-number_of_charset)>
-
-Returns the charset given by the INTVAL index C<number_of_charset>.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-const CHARSET *
-Parrot_get_charset(SHIM_INTERP, INTVAL number_of_charset)
-{
-    ASSERT_ARGS(Parrot_get_charset)
-    if (number_of_charset < 0 || number_of_charset >= all_charsets->n_charsets)
-        return NULL;
-    return all_charsets->set[number_of_charset].charset;
-}
-
-/*
-
-=item C<const char * Parrot_charset_c_name(PARROT_INTERP, INTVAL
-number_of_charset)>
-
-Returns a NULL-terminated C string with the name of the charset given by
-INTVAL index C<number_of_charset>.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_CAN_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-const char *
-Parrot_charset_c_name(SHIM_INTERP, INTVAL number_of_charset)
-{
-    ASSERT_ARGS(Parrot_charset_c_name)
-    if (number_of_charset < 0 || number_of_charset >= all_charsets->n_charsets)
-        return NULL;
-    return all_charsets->set[number_of_charset].charset->name;
-}
-
-/*
-
-=item C<static INTVAL register_charset(PARROT_INTERP, const char *charsetname,
-CHARSET *charset)>
-
-Adds a new charset C<charset> with name <charsetname> to the list of
-all charsets. Returns 0 and does nothing if a charset with that name
-already exists. Returns 1 otherwise.
-
-=cut
-
-*/
-
-static INTVAL
-register_charset(PARROT_INTERP, ARGIN(const char *charsetname),
-        ARGIN(CHARSET *charset))
-{
-    ASSERT_ARGS(register_charset)
-    int i;
-    const int n = all_charsets->n_charsets;
-
-    for (i = 0; i < n; ++i) {
-        if (STREQ(all_charsets->set[i].charset->name, charsetname))
-            return 0;
-    }
-    /*
-     * TODO
-     * this needs either a LOCK or we just forbid dynamic
-     * loading of charsets from inside threads
-     */
-    if (!n)
-        all_charsets->set = mem_gc_allocate_zeroed_typed(interp, One_charset);
-    else
-        all_charsets->set = mem_gc_realloc_n_typed_zeroed(interp,
-                all_charsets->set, n + 1, n, One_charset);
-
-    ++all_charsets->n_charsets;
-    all_charsets->set[n].charset      = charset;
-    all_charsets->set[n].n_converters = 0;
-
-    return 1;
-}
-
-/*
-
-=item C<static void Parrot_str_internal_register_charset_names(PARROT_INTERP)>
-
-Helper function for initializing characterset names. We can't create the
-STRING names until the default encodings and charsets are already initted,
-so the name generation is split into a second init stage.
-
-=cut
-
-*/
-
-static void
-Parrot_str_internal_register_charset_names(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_str_internal_register_charset_names)
-    int n;
-    for (n = 0; n < all_charsets->n_charsets; ++n)
-        all_charsets->set[n].name =
-            Parrot_str_new_constant(interp, all_charsets->set[n].charset->name);
-}
-
-/*
-
-=item C<static void register_static_converters(PARROT_INTERP)>
-
-Registers several standard converters between common charsets, including:
-
-    ISO 8859_1 -> ascii
-    ISO 8859_1 -> bin
-    ascii -> bin
-    ascii -> ISO 8859_1
-
-=cut
-
-*/
-
-static void
-register_static_converters(PARROT_INTERP)
-{
-    ASSERT_ARGS(register_static_converters)
-    Parrot_register_charset_converter(interp,
-            Parrot_iso_8859_1_charset_ptr, Parrot_ascii_charset_ptr,
-            charset_cvt_iso_8859_1_to_ascii);
-    Parrot_register_charset_converter(interp,
-            Parrot_iso_8859_1_charset_ptr, Parrot_binary_charset_ptr,
-            charset_cvt_ascii_to_binary);
-
-    Parrot_register_charset_converter(interp,
-            Parrot_ascii_charset_ptr, Parrot_binary_charset_ptr,
-            charset_cvt_ascii_to_binary);
-    Parrot_register_charset_converter(interp,
-            Parrot_ascii_charset_ptr, Parrot_iso_8859_1_charset_ptr,
-            charset_cvt_ascii_to_iso_8859_1);
-}
-
-/*
-
-=item C<INTVAL Parrot_register_charset(PARROT_INTERP, const char *charsetname,
-CHARSET *charset)>
-
-Register a new charset C<charset> with name C<charsetname>. Charset may only
-be one of the 4 following names:
-
-    binary
-    iso-8859-1
-    unicode
-    ascii
-
-Attempts to register other charsets are ignored. Returns 0 if the registration
-failed, for any reason.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-INTVAL
-Parrot_register_charset(PARROT_INTERP, ARGIN(const char *charsetname),
-        ARGIN(CHARSET *charset))
-{
-    ASSERT_ARGS(Parrot_register_charset)
-    if (!all_charsets) {
-        all_charsets             = mem_gc_allocate_zeroed_typed(interp, All_charsets);
-        all_charsets->set        = NULL;
-        all_charsets->n_charsets = 0;
-    }
-
-    if (STREQ("binary", charsetname)) {
-        Parrot_binary_charset_ptr = charset;
-        return register_charset(interp, charsetname, charset);
-    }
-
-    if (STREQ("iso-8859-1", charsetname)) {
-        Parrot_iso_8859_1_charset_ptr = charset;
-        return register_charset(interp, charsetname, charset);
-    }
-
-    if (STREQ("unicode", charsetname)) {
-        Parrot_unicode_charset_ptr = charset;
-        return register_charset(interp, charsetname, charset);
-    }
-
-    if (STREQ("ascii", charsetname)) {
-        if (!Parrot_default_charset_ptr)
-            Parrot_default_charset_ptr = charset;
-
-        Parrot_ascii_charset_ptr = charset;
-        return register_charset(interp, charsetname, charset);
-    }
-
-    return 0;
-}
-
-/*
-
-=item C<void Parrot_charsets_encodings_init(PARROT_INTERP)>
-
-Creates the initial charsets and encodings, and registers the initial
-charset converters.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-void
-Parrot_charsets_encodings_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charsets_encodings_init)
-    /* the order is crucial here:
-     * 1) encodings, default = fixed_8
-     * 2) charsets   default = ascii */
-    Parrot_encoding_fixed_8_init(interp);
-    Parrot_encoding_utf8_init(interp);
-    Parrot_encoding_ucs2_init(interp);
-    Parrot_encoding_utf16_init(interp);
-    Parrot_encoding_ucs4_init(interp);
-
-    Parrot_charset_ascii_init(interp);
-    Parrot_charset_iso_8859_1_init(interp);
-    Parrot_charset_binary_init(interp);
-    Parrot_charset_unicode_init(interp);
-
-    /* Now that the plugins are registered, we can create STRING
-     * names for them.  */
-    Parrot_str_internal_register_encoding_names(interp);
-    Parrot_str_internal_register_charset_names(interp);
-
-    /* now install charset converters */
-    register_static_converters(interp);
-}
-
-/*
-
-=item C<INTVAL Parrot_make_default_charset(PARROT_INTERP, const char
-*charsetname, const CHARSET *charset)>
-
-Sets the current default charset to C<charset> with name C<charsetname>.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-INTVAL
-Parrot_make_default_charset(SHIM_INTERP, SHIM(const char *charsetname),
-        ARGIN(const CHARSET *charset))
-{
-    ASSERT_ARGS(Parrot_make_default_charset)
-    Parrot_default_charset_ptr = charset;
-    return 1;
-}
-
-/*
-
-=item C<const CHARSET * Parrot_default_charset(PARROT_INTERP)>
-
-Returns the default charset.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_WARN_UNUSED_RESULT
-PARROT_CAN_RETURN_NULL
-const CHARSET *
-Parrot_default_charset(SHIM_INTERP)
-{
-    ASSERT_ARGS(Parrot_default_charset)
-    return Parrot_default_charset_ptr;
-}
-
-/*
-
-=item C<charset_converter_t Parrot_find_charset_converter(PARROT_INTERP, const
-CHARSET *lhs, const CHARSET *rhs)>
-
-Finds a converter from charset C<lhs> to charset C<rhs>.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_PURE_FUNCTION
-PARROT_WARN_UNUSED_RESULT
-PARROT_CAN_RETURN_NULL
-charset_converter_t
-Parrot_find_charset_converter(SHIM_INTERP,
-        ARGIN(const CHARSET *lhs), ARGIN(const CHARSET *rhs))
-{
-    ASSERT_ARGS(Parrot_find_charset_converter)
-    int i;
-    const int n = all_charsets->n_charsets;
-
-    for (i = 0; i < n; ++i) {
-        if (lhs == all_charsets->set[i].charset) {
-            const One_charset * const left = all_charsets->set + i;
-            const int nc = left->n_converters;
-            int j;
-
-            for (j = 0; j < nc; ++j) {
-                if (left->to_converters[j].to == rhs)
-                    return left->to_converters[j].func;
-            }
-        }
-    }
-    return NULL;
-}
-
-/*
-
-=item C<void Parrot_register_charset_converter(PARROT_INTERP, const CHARSET
-*lhs, const CHARSET *rhs, charset_converter_t func)>
-
-Registers a converter C<func> from charset C<lhs> to C<rhs>.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-void
-Parrot_register_charset_converter(PARROT_INTERP,
-        ARGIN(const CHARSET *lhs), ARGIN(const CHARSET *rhs),
-        ARGIN(charset_converter_t func))
-{
-    ASSERT_ARGS(Parrot_register_charset_converter)
-    int i;
-    const int n = all_charsets->n_charsets;
-
-    for (i = 0; i < n; ++i) {
-        if (lhs == all_charsets->set[i].charset) {
-            One_charset * const left = all_charsets->set + i;
-            const int nc = left->n_converters++;
-
-            if (nc) {
-                left->to_converters = mem_gc_realloc_n_typed_zeroed(interp,
-                        left->to_converters, nc + 1, nc, To_converter);
-            }
-            else
-                left->to_converters = mem_gc_allocate_zeroed_typed(interp, To_converter);
-            left->to_converters[nc].to = rhs;
-            left->to_converters[nc].func = func;
-        }
-    }
-}
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/ascii.c
==============================================================================
--- trunk/src/string/charset/ascii.c	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,876 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/ascii.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for ascii data and common
-charset functionality for similar charsets like iso-8859-1.
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "ascii.h"
-
-/*
- * TODO check interpreter error and warnings setting
- */
-
-#include "tables.h"
-
-/* HEADERIZER HFILE: src/string/charset/ascii.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_ascii __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-/*
-
-=item C<STRING * ascii_get_graphemes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Retrieves the graphemes for the STRING C<src>, starting at
-C<offset> and ending at C<offset + count>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING *
-ascii_get_graphemes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(ascii_get_graphemes)
-    return ENCODING_GET_BYTES(interp, src, offset, count);
-}
-
-/*
-
-=item C<static STRING * to_ascii(PARROT_INTERP, const STRING *src)>
-
-Attempts to convert STRING C<src> to ASCII in STRING C<dest>. Throws
-an exception if unconvertable UNICODE characters are involved.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_ascii)
-    String_iter iter;
-    unsigned char *p;
-    const UINTVAL len = src->strlen;
-
-    /* the string can't grow. Just clone it */
-    STRING * const dest = Parrot_str_clone(interp, src);
-
-    p = (unsigned char *)dest->strstart;
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.charpos < len) {
-        const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (c >= 128)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                    "can't convert unicode string to ascii");
-        *p++ = (unsigned char)c;
-    }
-    dest->bufused = len;
-    dest->strlen = len;
-    dest->charset = Parrot_ascii_charset_ptr;
-    dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest);
-    return dest;
-}
-
-/*
-
-=item C<static STRING * to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to ASCII charset STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    const charset_converter_t conversion_func =
-        Parrot_find_charset_converter(interp, src->charset, Parrot_ascii_charset_ptr);
-
-    if (conversion_func) {
-         return conversion_func(interp, src);
-    }
-    else {
-        return to_ascii(interp, src);
-    }
-}
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-Can't compose ASCII strings, so performs a string copy on it and
-returns the new string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(compose)
-
-    STRING * const dest = Parrot_str_copy(interp, src);
-
-    return dest;
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Can't decompose ASCII, so we perform a string copy instead and return
-a pointer to the new string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(decompose)
-
-    STRING * const dest = Parrot_str_copy(interp, src);
-
-    return dest;
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all uppercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase)
-    STRING * const result = Parrot_str_clone(interp, src);
-    const UINTVAL n = src->strlen;
-
-    if (n) {
-        char * const buffer = result->strstart;
-        UINTVAL offset;
-
-        for (offset = 0; offset < n; ++offset) {
-            buffer[offset] = (char)toupper((unsigned char)buffer[offset]);
-        }
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase)
-    STRING       *result = Parrot_str_clone(interp, src);
-    const UINTVAL n      = src->strlen;
-
-    if (n) {
-        char * const buffer = result->strstart;
-        UINTVAL offset;
-
-        for (offset = 0; offset < n; ++offset) {
-            buffer[offset] = (char)tolower((unsigned char)buffer[offset]);
-        }
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING given by C<src> to title case, where
-the first character is upper case and all the rest of the characters
-are lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase)
-    STRING       *result = Parrot_str_clone(interp, src);
-    const UINTVAL n      = src->strlen;
-
-    if (n) {
-        char * const buffer = result->strstart;
-        UINTVAL offset;
-
-        buffer[0] = (char)toupper((unsigned char)buffer[0]);
-        for (offset = 1; offset < n; ++offset) {
-            buffer[offset] = (char)tolower((unsigned char)buffer[offset]);
-        }
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Sets the first character in the STRING C<src> to upper case,
-but doesn't modify the rest of the string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase_first)
-    STRING * const result = Parrot_str_clone(interp, src);
-
-    if (result->strlen > 0) {
-        char * const buffer = result->strstart;
-        buffer[0] = (char)toupper((unsigned char)buffer[0]);
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Sets the first character of the STRING C<src> to lowercase,
-but doesn't modify the rest of the characters.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase_first)
-    STRING * const result = Parrot_str_clone(interp, src);
-
-    if (result->strlen > 0) {
-        char * const buffer = result->strstart;
-        buffer[0] = (char)tolower((unsigned char)buffer[0]);
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first letter of STRING C<src> to upper case,
-but doesn't modify the rest of the string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase_first)
-    STRING * const result = Parrot_str_clone(interp, src);
-
-    if (result->strlen > 0) {
-        char * const buffer = result->strstart;
-        buffer[0] = (char)toupper((unsigned char)buffer[0]);
-    }
-
-    return result;
-}
-
-/*
-
-=item C<INTVAL ascii_compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compares two strings as ASCII strings. If STRING C<lhs> > C<rhs>, returns
-1. If C<lhs> == C<rhs> returns 0. If STRING C<lhs> < C<rhs>, returns  -1.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
-    ASSERT_ARGS(ascii_compare)
-    const UINTVAL l_len = lhs->strlen;
-    const UINTVAL r_len = rhs->strlen;
-    const UINTVAL min_len = l_len > r_len ? r_len : l_len;
-    String_iter iter;
-
-    if (lhs->encoding == rhs->encoding) {
-        const int ret_val = memcmp(lhs->strstart, rhs->strstart, min_len);
-        if (ret_val)
-            return ret_val < 0 ? -1 : 1;
-    }
-    else {
-        STRING_ITER_INIT(interp, &iter);
-        while (iter.charpos < min_len) {
-            const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, iter.charpos);
-            const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &iter);
-            if (cl != cr)
-                return cl < cr ? -1 : 1;
-        }
-    }
-    if (l_len < r_len) {
-        return -1;
-    }
-    if (l_len > r_len) {
-        return 1;
-    }
-    return 0;
-}
-
-/*
-
-=item C<INTVAL mixed_cs_index(PARROT_INTERP, const STRING *src, const STRING
-*search, UINTVAL offs)>
-
-Searches for the first instance of STRING C<search> in STRING C<src>.
-returns the position where the substring is found if it is indeed found.
-Returns -1 otherwise. Operates on different types of strings, not just
-ASCII.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-mixed_cs_index(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search),
-    UINTVAL offs)
-{
-    ASSERT_ARGS(mixed_cs_index)
-    String_iter start, end;
-
-    STRING_ITER_INIT(interp, &start);
-    STRING_ITER_SET_POSITION(interp, src, &start, offs);
-
-    return Parrot_str_iter_index(interp, src, &start, &end, search);
-}
-
-/*
-
-=item C<INTVAL ascii_cs_index(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Searches for the first instance of STRING C<search> in STRING C<src>.
-returns the position where the substring is found if it is indeed found.
-Returns -1 otherwise.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_cs_index(PARROT_INTERP, ARGIN(const STRING *src),
-        ARGIN(const STRING *search_string), UINTVAL offset)
-{
-    ASSERT_ARGS(ascii_cs_index)
-    INTVAL retval;
-    if (src->charset != search_string->charset) {
-        return mixed_cs_index(interp, src, search_string, offset);
-    }
-
-    PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
-    retval = Parrot_byte_index(interp, src,
-            search_string, offset);
-    return retval;
-}
-
-/*
-
-=item C<INTVAL ascii_cs_rindex(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Searches for the last instance of STRING C<search_string> in STRING
-C<src>. Starts searching at C<offset>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_cs_rindex(PARROT_INTERP, ARGIN(const STRING *src),
-        ARGIN(const STRING *search_string), UINTVAL offset)
-{
-    ASSERT_ARGS(ascii_cs_rindex)
-    INTVAL retval;
-
-    if (src->charset != search_string->charset)
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-            "Cross-charset index not supported");
-
-    PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
-    retval = Parrot_byte_rindex(interp, src,
-            search_string, offset);
-    return retval;
-}
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Verifies that the given string is valid ASCII. Returns 1 if it is ASCII,
-returns 0 otherwise.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(validate)
-    String_iter iter;
-    const INTVAL length = Parrot_str_length(interp, src);
-
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.charpos < length) {
-        const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (codepoint >= 0x80)
-            return 0;
-    }
-    return 1;
-}
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING object from a single codepoint C<codepoint>. Returns
-the new STRING.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
-    ASSERT_ARGS(string_from_codepoint)
-    char real_codepoint = (char)codepoint;
-    STRING * const return_string = string_make(interp, &real_codepoint, 1, "ascii", 0);
-    return return_string;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(is_cclass)
-    UINTVAL codepoint;
-
-    if (offset >= src->strlen)
-        return 0;
-    codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
-    if (codepoint >= sizeof (Parrot_ascii_typetable) / sizeof (Parrot_ascii_typetable[0])) {
-        return 0;
-    }
-    return (Parrot_ascii_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.  Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_cclass)
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    return ENCODING_FIND_CCLASS(interp, src, Parrot_ascii_typetable,
-            flags, pos, end);
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP,
-                INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_not_cclass)
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    for (; pos < end; ++pos) {
-        const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, pos);
-        if ((Parrot_ascii_typetable[codepoint] & flags) == 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-/*
-
-=item C<size_t ascii_compute_hash(PARROT_INTERP, const STRING *src, size_t
-seed)>
-
-Computes the hash of STRING C<src> starting with seed value
-C<seed>.
-
-=cut
-
-*/
-
-PARROT_PURE_FUNCTION
-size_t
-ascii_compute_hash(SHIM_INTERP, ARGIN(const STRING *src), size_t seed)
-{
-    ASSERT_ARGS(ascii_compute_hash)
-    size_t hashval = seed;
-    const char *buffptr = (const char *)src->strstart;
-    UINTVAL len = src->strlen;
-
-    PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
-    while (len--) {
-        hashval += hashval << 5;
-        hashval += *buffptr++;
-    }
-    return hashval;
-}
-
-/*
-
-=item C<void Parrot_charset_ascii_init(PARROT_INTERP)>
-
-Initialize the ASCII charset but registering all the necessary
-function pointers and settings.
-
-=cut
-
-*/
-
-void
-Parrot_charset_ascii_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charset_ascii_init)
-    CHARSET * const return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set = {
-        "ascii",
-        ascii_get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        ascii_compare,
-        ascii_cs_index,
-        ascii_cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        ascii_compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-    return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
-    Parrot_register_charset(interp, "ascii", return_set);
-
-    return;
-}
-
-/*
-
-=item C<STRING * charset_cvt_ascii_to_binary(PARROT_INTERP, const STRING *src)>
-
-Converts an ASCII STRING C<src> to a binary STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-STRING *
-charset_cvt_ascii_to_binary(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(charset_cvt_ascii_to_binary)
-    STRING * const dest = Parrot_str_clone(interp, src);
-    UINTVAL offs;
-
-    for (offs = 0; offs < src->strlen; ++offs) {
-        const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
-        ENCODING_SET_BYTE(interp, dest, offs, c);
-    }
-
-    dest->charset = Parrot_binary_charset_ptr;
-    return dest;
-}
-
-/*
-
-=item C<STRING * charset_cvt_ascii_to_iso_8859_1(PARROT_INTERP, const STRING
-*src)>
-
-Converts ASCII STRING C<src> to ISO8859-1 STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-STRING *
-charset_cvt_ascii_to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(charset_cvt_ascii_to_iso_8859_1)
-    STRING * const dest = Parrot_str_clone(interp, src);
-    UINTVAL offs;
-
-    for (offs = 0; offs < src->strlen; ++offs) {
-        const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
-        ENCODING_SET_BYTE(interp, dest, offs, c);
-    }
-
-    dest->charset = Parrot_iso_8859_1_charset_ptr;
-    return dest;
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/ascii.h
==============================================================================
--- trunk/src/string/charset/ascii.h	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,128 +0,0 @@
-/* ascii.h
- *  Copyright (C) 2004-2007, Parrot Foundation.
- *  SVN Info
- *     $Id$
- *  Overview:
- *     This is the header for the ascii charset functions
- *  Data Structure and Algorithms:
- *  History:
- *  Notes:
- *  References:
- */
-
-#ifndef PARROT_CHARSET_ASCII_H_GUARD
-#define PARROT_CHARSET_ASCII_H_GUARD
-
-/*
- * common functions for ascii-ish charsets
- */
-
-/* HEADERIZER BEGIN: src/string/charset/ascii.c */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL ascii_compare(PARROT_INTERP,
-    ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_PURE_FUNCTION
-size_t ascii_compute_hash(SHIM_INTERP,
-    ARGIN(const STRING *src),
-    size_t seed)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL ascii_cs_index(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    ARGIN(const STRING *search_string),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL ascii_cs_rindex(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    ARGIN(const STRING *search_string),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING * ascii_get_graphemes(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-STRING * charset_cvt_ascii_to_binary(PARROT_INTERP,
-    ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-STRING * charset_cvt_ascii_to_iso_8859_1(PARROT_INTERP,
-    ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL mixed_cs_index(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    ARGIN(const STRING *search),
-    UINTVAL offs)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-void Parrot_charset_ascii_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_ascii_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_ascii_compute_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ascii_cs_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src) \
-    , PARROT_ASSERT_ARG(search_string))
-#define ASSERT_ARGS_ascii_cs_rindex __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src) \
-    , PARROT_ASSERT_ARG(search_string))
-#define ASSERT_ARGS_ascii_get_graphemes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_charset_cvt_ascii_to_binary __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_charset_cvt_ascii_to_iso_8859_1 \
-     __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_mixed_cs_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src) \
-    , PARROT_ASSERT_ARG(search))
-#define ASSERT_ARGS_Parrot_charset_ascii_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: src/string/charset/ascii.c */
-
-#endif /* PARROT_CHARSET_ASCII_H_GUARD */
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/binary.c
==============================================================================
--- trunk/src/string/charset/binary.c	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,494 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/binary.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for binary data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-
-/* In local src/string/charset/ directory */
-#include "ascii.h"
-#include "binary.h"
-
-/* HEADERIZER HFILE: src/string/charset/binary.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-static INTVAL compare(SHIM_INTERP,
-    ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-static INTVAL find_cclass(SHIM_INTERP,
-    SHIM(INTVAL flags),
-    SHIM(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count);
-
-static INTVAL find_not_cclass(SHIM_INTERP,
-    SHIM(INTVAL flags),
-    SHIM(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count);
-
-static INTVAL is_cclass(SHIM_INTERP,
-    SHIM(INTVAL flags),
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset));
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-static UINTVAL validate(SHIM_INTERP, SHIM(const STRING *src));
-#define ASSERT_ARGS_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#ifdef EXCEPTION
-#  undef EXCEPTION
-#endif
-
-#define EXCEPTION(err, str) \
-    Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
-
-
-/*
-
-=item C<static STRING* to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to STRING C<dest> in binary mode. Throws
-an exception if a suitable conversion function is not found.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    charset_converter_t conversion_func =
-        Parrot_find_charset_converter(interp, src->charset, Parrot_binary_charset_ptr);
-
-    if (conversion_func)
-         return conversion_func(interp, src);
-
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-        "to_charset for binary not implemented");
-}
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot compose a binary string.
-
-=cut
-
-*/
-
-/* A err. can't compose binary */
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(compose)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't compose binary data");
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot decompose a binary string.
-
-=cut
-
-*/
-
-/* A err. can't decompose binary */
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(decompose)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't decompose binary data");
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-upper case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(upcase)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't upcase binary data");
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(downcase)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't downcase binary data");
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-title case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(titlecase)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't titlecase binary data");
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot set the first "character" of the
-binary string to uppercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(upcase_first)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't upcase binary data");
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot set the first "character"
-of the binary string to lowercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(downcase_first)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't downcase binary data");
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we can't convert the first "character"
-of binary data to title case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(titlecase_first)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't titlecase binary data");
-}
-
-/*
-
-=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compare the two buffers, first by size, then with memcmp.
-
-=cut
-
-*/
-
-static INTVAL
-compare(SHIM_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
-    ASSERT_ARGS(compare)
-    const UINTVAL l_len = lhs->strlen;
-    const UINTVAL r_len = rhs->strlen;
-    if (l_len != r_len)
-        return l_len - r_len;
-
-    return memcmp(lhs->strstart, rhs->strstart, l_len);
-}
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1. All sequential data is valid binary data.
-
-=cut
-
-*/
-
-/* Binary's always valid */
-static UINTVAL
-validate(SHIM_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(validate)
-    return 1;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(SHIM_INTERP, SHIM(INTVAL flags), SHIM(const STRING *src), SHIM(UINTVAL offset))
-{
-    ASSERT_ARGS(is_cclass)
-    return 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(SHIM_INTERP, SHIM(INTVAL flags),
-            SHIM(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_cclass)
-    return offset + count;
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(SHIM_INTERP, SHIM(INTVAL flags),
-               SHIM(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_not_cclass)
-    return offset + count;
-}
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING object from a single codepoint C<codepoint>. Returns
-the new STRING.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
-    ASSERT_ARGS(string_from_codepoint)
-    STRING *return_string;
-    char real_codepoint = (char)codepoint;
-    return_string = string_make(interp, &real_codepoint, 1, "binary", 0);
-    return return_string;
-}
-
-
-/*
-
-=item C<void Parrot_charset_binary_init(PARROT_INTERP)>
-
-Initialize the binary charset, including function pointers and
-settings.
-
-=cut
-
-*/
-
-void
-Parrot_charset_binary_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charset_binary_init)
-    CHARSET * const return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set = {
-        "binary",
-        ascii_get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        compare,
-        ascii_cs_index,
-        ascii_cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        ascii_compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-    return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
-    Parrot_register_charset(interp, "binary", return_set);
-
-    return;
-
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/binary.h
==============================================================================
--- trunk/src/string/charset/binary.h	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,34 +0,0 @@
-/* binary.h
- *  Copyright (C) 2004-2007, Parrot Foundation.
- *  SVN Info
- *     $Id$
- *  Overview:
- *     This is the header for the binary charset functions
- *  Data Structure and Algorithms:
- *  History:
- *  Notes:
- *  References:
- */
-
-#ifndef PARROT_CHARSET_BINARY_H_GUARD
-#define PARROT_CHARSET_BINARY_H_GUARD
-
-/* HEADERIZER BEGIN: src/string/charset/binary.c */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-void Parrot_charset_binary_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_Parrot_charset_binary_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: src/string/charset/binary.c */
-
-#endif /* PARROT_CHARSET_BINARY_H_GUARD */
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/iso-8859-1.c
==============================================================================
--- trunk/src/string/charset/iso-8859-1.c	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,733 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/iso-8859-1.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for iso-8859-1 data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "iso-8859-1.h"
-#include "ascii.h"
-
-/* HEADERIZER HFILE: src/string/charset/iso-8859-1.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static INTVAL find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING * to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_unicode(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_iso_8859_1 __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_unicode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#include "tables.h"
-
-/*
-
-=item C<static STRING * to_iso_8859_1(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to iso-8859-1 in STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_iso_8859_1)
-    UINTVAL src_len;
-    String_iter iter;
-    /* iso-8859-1 is never bigger then source */
-    STRING * dest = Parrot_str_clone(interp, src);
-
-    STRING_ITER_INIT(interp, &iter);
-    src_len = src->strlen;
-    dest->bufused = src_len;
-    while (iter.charpos < src_len) {
-        const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (c >= 0x100)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                "lossy conversion to iso-8559-1");
-
-        Parrot_fixed_8_encoding_ptr->set_byte(interp, dest, iter.charpos - 1, c);
-    }
-    dest->charset = Parrot_iso_8859_1_charset_ptr;
-    dest->encoding = Parrot_fixed_8_encoding_ptr;
-    return dest;
-}
-
-/*
-
-=item C<static STRING * to_unicode(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to unicode STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_unicode(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_unicode)
-    STRING * dest = Parrot_str_clone(interp, src);
-    UINTVAL offs;
-    String_iter iter;
-
-    dest->charset = Parrot_unicode_charset_ptr;
-    dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest);
-    Parrot_gc_reallocate_string_storage(interp, dest, src->strlen);
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.charpos < src->strlen) {
-        const UINTVAL c = ENCODING_GET_BYTE(interp, src, iter.charpos);
-
-        if (iter.bytepos >= Buffer_buflen(dest) - 4) {
-            UINTVAL need = (UINTVAL)((src->strlen - iter.charpos) * 1.5);
-            if (need < 16)
-                need = 16;
-            Parrot_gc_reallocate_string_storage(interp, dest,
-                    Buffer_buflen(dest) + need);
-        }
-        STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, c);
-    }
-    dest->bufused = iter.bytepos;
-    dest->strlen  = iter.charpos;
-    return dest;
-}
-
-/*
-
-=item C<static STRING * to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to an ISO-8859-1 STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING *
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    const charset_converter_t conversion_func =
-        Parrot_find_charset_converter(interp, src->charset, Parrot_iso_8859_1_charset_ptr);
-
-    if (conversion_func)
-        return conversion_func(interp, src);
-    else
-        return to_iso_8859_1(interp, src);
-}
-
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-ISO-8859-1 does not support composing, so we just copy the STRING C<src> and return the
-copy.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(compose)
-
-    STRING * const dest = Parrot_str_copy(interp, src);
-
-    return dest;
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-SO-8859-1 does not support decomposing, so we throw an exception.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(decompose)
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-            "decompose for iso-8859-1 not implemented");
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Convert all graphemes in the STRING C<src> to upper case, for those
-graphemes that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase)
-    unsigned char *buffer;
-    UINTVAL        offset = 0;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    for (offset = 0; offset < result->strlen; ++offset) {
-        unsigned int c = buffer[offset]; /* XXX use encoding ? */
-        if (c >= 0xe0 && c != 0xf7)
-            c &= ~0x20;
-        else
-            c = toupper((unsigned char)c);
-        buffer[offset] = (unsigned char)c;
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts all graphemes in STRING C<src> to lower-case, for those graphemes
-that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase)
-    unsigned char *buffer;
-    UINTVAL        offset = 0;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    for (offset = 0; offset < result->strlen; ++offset) {
-        unsigned int c = buffer[offset];
-        if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
-            c |= 0x20;
-        else
-            c = tolower((unsigned char)c);
-        buffer[offset] = (unsigned char)c;
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the graphemes in STRING C<src> to title case, for those graphemes
-that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase)
-    unsigned char *buffer;
-    unsigned int   c;
-    UINTVAL        offset;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    c = buffer[0];
-    if (c >= 0xe0 && c != 0xf7)
-        c &= ~0x20;
-    else
-        c = toupper((unsigned char)c);
-    buffer[0] = (unsigned char)c;
-
-    for (offset = 1; offset < result->strlen; ++offset) {
-        c = buffer[offset];
-        if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
-            c |= 0x20;
-        else
-            c = tolower((unsigned char)c);
-        buffer[offset] = (unsigned char)c;
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to upper case, if it
-supports cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase_first)
-    unsigned char *buffer;
-    unsigned int   c;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    c = buffer[0];
-    if (c >= 0xe0 && c != 0xf7)
-        c &= ~0x20;
-    else
-        c = toupper((unsigned char)c);
-    buffer[0] = (unsigned char)c;
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first character of the STRING C<src> to lower case, if the
-grapheme supports lower case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase_first)
-    unsigned char *buffer;
-    unsigned int   c;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    c = buffer[0];
-    if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
-        c &= ~0x20;
-    else
-        c = tolower((unsigned char)c);
-    buffer[0] = (unsigned char)c;
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to title case, if the grapheme
-supports case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase_first)
-    return upcase_first(interp, src);
-}
-
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1 if the STRING C<src> is a valid ISO-8859-1 STRING. Returns 0 otherwise.
-
-=cut
-
-*/
-
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(validate)
-    INTVAL offset;
-    const INTVAL length =  Parrot_str_length(interp, src);
-
-    for (offset = 0; offset < length; ++offset) {
-        const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-        if (codepoint >= 0x100)
-            return 0;
-    }
-    return 1;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(is_cclass)
-    UINTVAL codepoint;
-
-    if (offset >= src->strlen) return 0;
-    codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
-    if (codepoint >= sizeof (Parrot_ascii_typetable) /
-                     sizeof (Parrot_ascii_typetable[0])) {
-        return 0;
-    }
-    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.  Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags,
-                ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_cclass)
-    const UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    return ENCODING_FIND_CCLASS(interp, src,
-            Parrot_iso_8859_1_typetable, flags, pos, end);
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP, INTVAL flags,
-                ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_not_cclass)
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    for (; pos < end; ++pos) {
-        const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, pos);
-        if ((Parrot_iso_8859_1_typetable[codepoint] & flags) == 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING from the single codepoint C<codepoint>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
-    ASSERT_ARGS(string_from_codepoint)
-    char real_codepoint = (char)codepoint;
-    STRING * const return_string = string_make(interp, &real_codepoint, 1,
-            "iso-8859-1", 0);
-    return return_string;
-}
-
-/*
-
-=item C<void Parrot_charset_iso_8859_1_init(PARROT_INTERP)>
-
-Initializes the ISO-8859-1 charset by installing all the necessary function pointers.
-
-=cut
-
-*/
-
-void
-Parrot_charset_iso_8859_1_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charset_iso_8859_1_init)
-    CHARSET * const return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set = {
-        "iso-8859-1",
-        ascii_get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        ascii_compare,
-        ascii_cs_index,
-        ascii_cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        ascii_compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-    return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
-    Parrot_register_charset(interp, "iso-8859-1", return_set);
-
-    return;
-}
-
-/*
-
-=item C<STRING * charset_cvt_iso_8859_1_to_ascii(PARROT_INTERP, const STRING
-*src)>
-
-Converts STRING C<src> in ISO-8859-1 to ASCII STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING *
-charset_cvt_iso_8859_1_to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(charset_cvt_iso_8859_1_to_ascii)
-    UINTVAL offs;
-    STRING *dest = Parrot_str_clone(interp, src);
-
-    for (offs = 0; offs < src->strlen; ++offs) {
-        UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
-        if (c >= 0x80)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                "lossy conversion to ascii");
-
-        ENCODING_SET_BYTE(interp, dest, offs, c);
-    }
-    return dest;
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/iso-8859-1.h
==============================================================================
--- trunk/src/string/charset/iso-8859-1.h	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,46 +0,0 @@
-/* iso_8859_1.h
- *  Copyright (C) 2004-2007, Parrot Foundation.
- *  SVN Info
- *     $Id$
- *  Overview:
- *     This is the header for the iso_8859-1 charset functions
- *  Data Structure and Algorithms:
- *  History:
- *  Notes:
- *  References:
- */
-
-#ifndef PARROT_CHARSET_ISO_8859_1_H_GUARD
-#define PARROT_CHARSET_ISO_8859_1_H_GUARD
-
-/* HEADERIZER BEGIN: src/string/charset/iso-8859-1.c */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING * charset_cvt_iso_8859_1_to_ascii(PARROT_INTERP,
-    ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-void Parrot_charset_iso_8859_1_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_charset_cvt_iso_8859_1_to_ascii \
-     __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_Parrot_charset_iso_8859_1_init \
-     __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: src/string/charset/iso-8859-1.c */
-
-#endif /* PARROT_CHARSET_ISO_8859_1_H_GUARD */
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/tables.c
==============================================================================
--- trunk/src/string/charset/tables.c	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,93 +0,0 @@
-/* $Id$
- * Copyright (C) 2005-2007, Parrot Foundation.
- *
- * DO NOT EDIT THIS FILE DIRECTLY!
- * please update the tools/dev/gen_charset_tables.pl script instead.
- *
- * Created by gen_charset_tables.pl 19534 2007-07-02 02:12:08Z petdance
- *  Overview:
- *     This file contains various charset tables.
- *  Data Structure and Algorithms:
- *  History:
- *  Notes:
- *  References:
- */
-
-/* HEADERIZER HFILE: none */
-
-
-#include "tables.h"
-const INTVAL Parrot_iso_8859_1_typetable[256] = {
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
-0x0200, 0x0320, 0x1220, 0x0220, 0x1220, 0x1220, 0x0200, 0x0200, /* 8-15 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
-0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
-0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 40-47 */
-0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, /* 48-55 */
-0x28d8, 0x28d8, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 56-63 */
-0x04c0, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28c5, /* 64-71 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 72-79 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 80-87 */
-0x28c5, 0x28c5, 0x28c5, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x24c0, /* 88-95 */
-0x04c0, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28c6, /* 96-103 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
-0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x1220, 0x0200, 0x0200, /* 128-135 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 136-143 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 144-151 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 152-159 */
-0x04e0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 160-167 */
-0x04c0, 0x04c0, 0x28c4, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 168-175 */
-0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x28c6, 0x04c0, 0x04c0, /* 176-183 */
-0x04c0, 0x04c0, 0x28c4, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 184-191 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 192-199 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 200-207 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x04c0, /* 208-215 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c6, /* 216-223 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 224-231 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 232-239 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x04c0, /* 240-247 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 248-255 */
-};
-const INTVAL Parrot_ascii_typetable[256] = {
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 0-7 */
-0x0200, 0x0320, 0x1220, 0x0220, 0x1220, 0x1220, 0x0200, 0x0200, /* 8-15 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 16-23 */
-0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, /* 24-31 */
-0x0160, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 32-39 */
-0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 40-47 */
-0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, 0x28d8, /* 48-55 */
-0x28d8, 0x28d8, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x04c0, /* 56-63 */
-0x04c0, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28d5, 0x28c5, /* 64-71 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 72-79 */
-0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, 0x28c5, /* 80-87 */
-0x28c5, 0x28c5, 0x28c5, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x24c0, /* 88-95 */
-0x04c0, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28d6, 0x28c6, /* 96-103 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 104-111 */
-0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, 0x28c6, /* 112-119 */
-0x28c6, 0x28c6, 0x28c6, 0x04c0, 0x04c0, 0x04c0, 0x04c0, 0x0200, /* 120-127 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1020, 0x0000, 0x0000, /* 128-135 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 136-143 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 144-151 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 152-159 */
-0x0020, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 160-167 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 168-175 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 176-183 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 184-191 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 192-199 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 200-207 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 208-215 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 216-223 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 224-231 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 232-239 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 240-247 */
-0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /* 248-255 */
-};
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/tables.h
==============================================================================
--- trunk/src/string/charset/tables.h	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,36 +0,0 @@
-/* $Id$
- * Copyright (C) 2005-2007, Parrot Foundation.
- *
- * DO NOT EDIT THIS FILE DIRECTLY!
- * please update the tools/dev/gen_charset_tables.pl script instead.
- *
- * Created by gen_charset_tables.pl 19534 2007-07-02 02:12:08Z petdance
- *  Overview:
- *     This file contains various charset tables.
- *  Data Structure and Algorithms:
- *  History:
- *  Notes:
- *  References:
- */
-
-/* HEADERIZER HFILE: none */
-
-
-#ifndef PARROT_CHARSET_TABLES_H_GUARD
-#define PARROT_CHARSET_TABLES_H_GUARD
-#include "parrot/cclass.h"
-#include "parrot/parrot.h"
-#define WHITESPACE  enum_cclass_whitespace
-#define WORDCHAR    enum_cclass_word
-#define PUNCTUATION enum_cclass_punctuation
-#define DIGIT       enum_cclass_numeric
-extern const INTVAL Parrot_iso_8859_1_typetable[256];
-extern const INTVAL Parrot_ascii_typetable[256];
-#endif /* PARROT_CHARSET_TABLES_H_GUARD */
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
-

Deleted: trunk/src/string/charset/unicode.c
==============================================================================
--- trunk/src/string/charset/unicode.c	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,1075 +0,0 @@
-/*
-Copyright (C) 2005-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/unicode.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for unicode data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "unicode.h"
-#include "ascii.h"
-#include "tables.h"
-
-/* HEADERIZER HFILE: src/string/charset/unicode.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-static INTVAL compare(PARROT_INTERP,
-    ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static size_t compute_hash(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    size_t seed)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static INTVAL cs_rindex(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(const STRING *search_string),
-    SHIM(UINTVAL offset))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-static INTVAL find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_graphemes(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static INTVAL is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_compute_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_cs_rindex __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_graphemes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_u_iscclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#ifdef EXCEPTION
-#  undef EXCEPTION
-#endif
-
-#if PARROT_HAS_ICU
-#  include <unicode/ucnv.h>
-#  include <unicode/utypes.h>
-#  include <unicode/uchar.h>
-#  include <unicode/ustring.h>
-#  include <unicode/unorm.h>
-#endif
-#define EXCEPTION(err, str) \
-    Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
-
-#define UNIMPL EXCEPTION(EXCEPTION_UNIMPLEMENTED, "unimplemented unicode")
-
-
-/*
-
-=item C<static STRING * get_graphemes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Gets the graphemes from STRING C<src> starting at C<offset>. Gets
-C<count> graphemes total.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_graphemes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_graphemes)
-    return ENCODING_GET_CODEPOINTS(interp, src, offset, count);
-}
-
-
-/*
-
-=item C<static STRING* to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts input STRING C<src> to unicode STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    const charset_converter_t conversion_func =
-            Parrot_find_charset_converter(interp, src->charset,
-                    Parrot_unicode_charset_ptr);
-
-    if (conversion_func)
-         return conversion_func(interp, src);
-
-    return Parrot_utf8_encoding_ptr->to_encoding(interp, src);
-}
-
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-If Parrot is built with ICU, composes the STRING C<src>. Attempts to
-denormalize the STRING into the ICU default, NFC.
-
-If Parrot does not have ICU included, throws an exception.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(compose)
-#if PARROT_HAS_ICU
-    STRING *dest;
-    int src_len, dest_len;
-    UErrorCode err;
-    /*
-       U_STABLE int32_t U_EXPORT2
-       unorm_normalize(const UChar *source, int32_t sourceLength,
-       UNormalizationMode mode, int32_t options,
-       UChar *result, int32_t resultLength,
-       UErrorCode *status);
-       */
-    dest_len = src_len = src->strlen;
-    dest     = Parrot_str_new_init(interp, NULL, src_len * sizeof (UChar),
-            src->encoding, src->charset, 0);
-
-    err      = U_ZERO_ERROR;
-    dest_len = unorm_normalize((UChar *)src->strstart, src_len,
-            UNORM_DEFAULT,      /* default is NFC */
-            0,                  /* options 0 default - no specific icu
-                                 * version */
-            (UChar *)dest->strstart, dest_len, &err);
-
-    dest->bufused = dest_len * sizeof (UChar);
-
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, dest, dest->bufused);
-        dest_len = unorm_normalize((UChar *)src->strstart, src_len,
-                UNORM_DEFAULT,      /* default is NFC */
-                0,                  /* options 0 default - no specific
-                                     * icu version */
-                (UChar *)dest->strstart, dest_len, &err);
-        PARROT_ASSERT(U_SUCCESS(err));
-        dest->bufused = dest_len * sizeof (UChar);
-    }
-    dest->strlen = dest_len;
-    return dest;
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Decompose function for unicode charset. This function is not yet implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(decompose)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all upper-case graphemes, for those characters
-which support upper-case versions.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase)
-#if PARROT_HAS_ICU
-    UErrorCode err;
-    int dest_len, src_len, needed;
-    STRING *res;
-#endif
-
-    if (src->bufused  == src->strlen
-            && src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->upcase(interp, src);
-    }
-
-#if PARROT_HAS_ICU
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-    /*
-       U_CAPI int32_t U_EXPORT2
-       u_strToUpper(UChar *dest, int32_t destCapacity,
-       const UChar *src, int32_t srcLength,
-       const char *locale,
-       UErrorCode *pErrorCode);
-       */
-    err = U_ZERO_ERROR;
-
-    /* use all available space - see below XXX */
-    /* TODO downcase, titlecase too */
-    dest_len = Buffer_buflen(res) / sizeof (UChar);
-    src_len  = res->bufused       / sizeof (UChar);
-
-    /*
-     * XXX troubles:
-     *   t/op/string_cs_45  upcase unicode:"\u01f0"
-     *   this creates \u004a \u030c J+NON-SPACING HACEK
-     *   the string needs resizing, *if* the src buffer is
-     *   too short. *But* with icu 3.2/3.4 the src string is
-     *   overwritten with partial result, despite the icu docs sayeth:
-     *
-     *      The source string and the destination buffer
-     *      are allowed to overlap.
-     *
-     *  Workaround:  'preflighting' returns needed length
-     *  Alternative: forget about inplace operation - create new result
-     *
-     *  TODO downcase, titlecase
-     */
-    needed = u_strToUpper(NULL, 0,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-
-    if (needed > dest_len) {
-        Parrot_gc_reallocate_string_storage(interp, res, needed * sizeof (UChar));
-        dest_len = needed;
-    }
-
-    err      = U_ZERO_ERROR;
-    dest_len = u_strToUpper((UChar *)res->strstart, dest_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-    PARROT_ASSERT(U_SUCCESS(err));
-    res->bufused = dest_len * sizeof (UChar);
-
-    /* downgrade if possible */
-    if (dest_len == (int)src->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
-    else {
-        /* string is likely still ucs2 if it was earlier
-         * but strlen changed due to combining char
-         */
-        res->strlen = dest_len;
-    }
-
-    return res;
-
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts all graphemes to lower-case, for those graphemes which have cases.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase)
-#if PARROT_HAS_ICU
-    UErrorCode err;
-    int dest_len, src_len;
-    STRING *res;
-#endif
-
-    if (src->bufused  == src->strlen
-            && src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->downcase(interp, src);
-    }
-
-#if PARROT_HAS_ICU
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-    /*
-U_CAPI int32_t U_EXPORT2
-u_strToLower(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode);
-     */
-    err      = U_ZERO_ERROR;
-    src_len  = res->bufused / sizeof (UChar);
-    dest_len = u_strToLower((UChar *)res->strstart, src_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-    res->bufused = dest_len * sizeof (UChar);
-
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
-        dest_len = u_strToLower((UChar *)res->strstart, dest_len,
-                (UChar *)res->strstart, src_len,
-                NULL,       /* locale = default */
-                &err);
-        PARROT_ASSERT(U_SUCCESS(err));
-    }
-
-    /* downgrade if possible */
-    if (dest_len == (int)res->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
-
-    return res;
-
-#else
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the string to title case, for those characters which support cases.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase)
-#if PARROT_HAS_ICU
-
-    UErrorCode err;
-    int dest_len, src_len;
-    STRING *res;
-
-    if (src->bufused  == src->strlen
-    &&  src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->titlecase(interp, src);
-    }
-
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-
-    /*
-U_CAPI int32_t U_EXPORT2
-u_strToTitle(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             UBreakIterator *titleIter,
-             const char *locale,
-             UErrorCode *pErrorCode);
-     */
-
-    err      = U_ZERO_ERROR;
-    src_len  = res->bufused / sizeof (UChar);
-    dest_len = u_strToTitle((UChar *)res->strstart, src_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* default titleiter */
-            NULL,       /* locale = default */
-            &err);
-    res->bufused = dest_len * sizeof (UChar);
-
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
-        dest_len = u_strToTitle((UChar *)res->strstart, dest_len,
-                (UChar *)res->strstart, src_len,
-                NULL, NULL,
-                &err);
-        PARROT_ASSERT(U_SUCCESS(err));
-    }
-
-    /* downgrade if possible */
-    if (dest_len == (int)res->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
-
-    return res;
-
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in the STRING C<src> to uppercase, if the
-grapheme supports it. Not implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(upcase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in the STRING C<src> to lower-case, if
-the grapheme supports it. Not implemented
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(downcase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to title case, if the
-string supports it. Not implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(titlecase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compares two STRINGs, C<lhs> and C<rhs>. Returns -1 if C<lhs> < C<rhs>. Returns
-0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>.
-
-=cut
-
-*/
-
-static INTVAL
-compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
-    ASSERT_ARGS(compare)
-    String_iter l_iter, r_iter;
-    UINTVAL min_len, l_len, r_len;
-
-    /* TODO make optimized equal - strings are equal length then already */
-    STRING_ITER_INIT(interp, &l_iter);
-    STRING_ITER_INIT(interp, &r_iter);
-
-    l_len = lhs->strlen;
-    r_len = rhs->strlen;
-
-    min_len = l_len > r_len ? r_len : l_len;
-
-    while (l_iter.charpos < min_len) {
-        const UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter);
-        const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter);
-
-        if (cl != cr)
-            return cl < cr ? -1 : 1;
-    }
-
-    if (l_len < r_len)
-        return -1;
-
-    if (l_len > r_len)
-        return 1;
-
-    return 0;
-}
-
-
-/*
-
-=item C<static INTVAL cs_rindex(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Finds the last index of substring C<search_string> in STRING C<src>,
-starting from C<offset>. Not implemented.
-
-=cut
-
-*/
-
-static INTVAL
-cs_rindex(PARROT_INTERP, SHIM(const STRING *src),
-        SHIM(const STRING *search_string), SHIM(UINTVAL offset))
-{
-    ASSERT_ARGS(cs_rindex)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1 if the STRING C<src> is a valid unicode string, returns 0 otherwise.
-
-=cut
-
-*/
-
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(validate)
-    String_iter iter;
-    const INTVAL length = Parrot_str_length(interp, src);
-
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.charpos < length) {
-        const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        /* Check for Unicode non-characters */
-        if (codepoint >= 0xfdd0
-        && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe)
-        &&  codepoint <= 0x10ffff)
-            return 0;
-    }
-
-    return 1;
-}
-
-
-/*
-
-=item C<static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static int
-u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
-{
-    ASSERT_ARGS(u_iscclass)
-#if PARROT_HAS_ICU
-    UNUSED(interp);
-            /* XXX which one
-               return u_charDigitValue(codepoint);
-               */
-    if ((flags & enum_cclass_uppercase)    && u_isupper(codepoint))  return 1;
-    if ((flags & enum_cclass_lowercase)    && u_islower(codepoint))  return 1;
-    if ((flags & enum_cclass_alphabetic)   && u_isalpha(codepoint))  return 1;
-    if ((flags & enum_cclass_numeric)      && u_isdigit(codepoint))  return 1;
-    if ((flags & enum_cclass_hexadecimal)  && u_isxdigit(codepoint)) return 1;
-    if ((flags & enum_cclass_whitespace)   && u_isspace(codepoint))  return 1;
-    if ((flags & enum_cclass_printing)     && u_isprint(codepoint))  return 1;
-    if ((flags & enum_cclass_graphical)    && u_isgraph(codepoint))  return 1;
-    if ((flags & enum_cclass_blank)        && u_isblank(codepoint))  return 1;
-    if ((flags & enum_cclass_control)      && u_iscntrl(codepoint))  return 1;
-    if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint))  return 1;
-    if ((flags & enum_cclass_word)         &&
-        (u_isalnum(codepoint) || codepoint == '_'))                  return 1;
-
-    return 0;
-#else
-    if (codepoint < 256)
-        return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-
-    if (flags == enum_cclass_any)
-        return 1;
-
-    /* All codepoints from u+0100 to u+02af are alphabetic, so we
-     * cheat on the WORD and ALPHABETIC properties to include these
-     * (and incorrectly exclude all others).  This is a stopgap until
-     * ICU is everywhere, or we have better non-ICU unicode support. */
-    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
-        return (codepoint < 0x2b0);
-
-    if (flags & enum_cclass_whitespace) {
-        /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
-        switch (codepoint) {
-          case 0x1680: case 0x180e: case 0x2000: case 0x2001:
-          case 0x2002: case 0x2003: case 0x2004: case 0x2005:
-          case 0x2006: case 0x2007: case 0x2008: case 0x2009:
-          case 0x200a: case 0x2028: case 0x2029: case 0x202f:
-          case 0x205f: case 0x3000:
-            return 1;
-          default:
-            break;
-        }
-    }
-
-    if (flags & enum_cclass_numeric) {
-        /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
-        if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
-        if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
-        if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
-        if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
-        if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
-        if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
-        if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
-        if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
-        if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
-        if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
-        if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
-        if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
-        if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
-        if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
-        if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
-        if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
-        if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
-        if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
-        if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
-        if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
-        if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
-        if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
-    }
-
-    if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-            "no ICU lib loaded");
-
-    return 0;
-#endif
-}
-
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(is_cclass)
-    UINTVAL codepoint;
-
-    if (offset >= src->strlen)
-        return 0;
-
-    codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
-    if (codepoint >= 256)
-        return u_iscclass(interp, codepoint, flags) != 0;
-
-    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_cclass)
-    String_iter iter;
-    UINTVAL     codepoint;
-    UINTVAL     end = offset + count;
-
-    STRING_ITER_INIT(interp, &iter);
-    STRING_ITER_SET_POSITION(interp, src, &iter, offset);
-
-    end = src->strlen < end ? src->strlen : end;
-
-    while (iter.charpos < end) {
-        codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (codepoint >= 256) {
-            if (u_iscclass(interp, codepoint, flags))
-                    return iter.charpos - 1;
-        }
-        else {
-            if (Parrot_iso_8859_1_typetable[codepoint] & flags)
-                return iter.charpos - 1;
-        }
-    }
-
-    return end;
-}
-
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
-        UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_not_cclass)
-    String_iter iter;
-    UINTVAL     codepoint;
-    UINTVAL     end = offset + count;
-    int         bit;
-
-    if (offset > src->strlen) {
-        /* XXX: Throw in this case? */
-        return offset + count;
-    }
-
-    STRING_ITER_INIT(interp, &iter);
-
-    if (offset)
-        STRING_ITER_SET_POSITION(interp, src, &iter, offset);
-
-    end = src->strlen < end ? src->strlen : end;
-
-    if (flags == enum_cclass_any)
-        return end;
-
-    while (iter.charpos < end) {
-        codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (codepoint >= 256) {
-            for (bit = enum_cclass_uppercase;
-                    bit <= enum_cclass_word ; bit <<= 1) {
-                if ((bit & flags) && !u_iscclass(interp, codepoint, bit))
-                    return iter.charpos - 1;
-            }
-        }
-        else {
-            if (!(Parrot_iso_8859_1_typetable[codepoint] & flags))
-                return iter.charpos - 1;
-        }
-    }
-
-    return end;
-}
-
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Returns a one-codepoint string for the given codepoint.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
-    ASSERT_ARGS(string_from_codepoint)
-    String_iter    iter;
-    STRING * const dest = string_make(interp, "", 1, "unicode", 0);
-
-    dest->strlen = 1;
-
-    STRING_ITER_INIT(interp, &iter);
-    STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint);
-    dest->bufused = iter.bytepos;
-
-    return dest;
-}
-
-
-/*
-
-=item C<static size_t compute_hash(PARROT_INTERP, const STRING *src, size_t
-seed)>
-
-Computes the hash of the given STRING C<src> with starting seed value C<seed>.
-
-=cut
-
-*/
-
-static size_t
-compute_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t seed)
-{
-    ASSERT_ARGS(compute_hash)
-    String_iter iter;
-    size_t      hashval = seed;
-
-    STRING_ITER_INIT(interp, &iter);
-
-    while (iter.charpos < src->strlen) {
-        const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        hashval += hashval << 5;
-        hashval += c;
-    }
-
-    return hashval;
-}
-
-
-/*
-
-=item C<void Parrot_charset_unicode_init(PARROT_INTERP)>
-
-Initializes the Unicode charset by installing all the necessary function
-pointers.
-
-=cut
-
-*/
-
-void
-Parrot_charset_unicode_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charset_unicode_init)
-    CHARSET * const      return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set   = {
-        "unicode",
-        get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        compare,
-        mixed_cs_index,
-        cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-
-    /*
-     * for now use utf8
-     * TODO replace it with a fixed uint_16 or uint_32 encoding
-     *      XXX if this is changed, modify string_make so it
-     *          still takes "utf8" when fed "unicode" as charset!
-     */
-    return_set->preferred_encoding = Parrot_utf8_encoding_ptr;
-    Parrot_register_charset(interp, "unicode", return_set);
-
-    return;
-}
-
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: trunk/src/string/charset/unicode.h
==============================================================================
--- trunk/src/string/charset/unicode.h	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,40 +0,0 @@
-/* unicode.h
- *  Copyright (C) 2005-2007, Parrot Foundation.
- *  SVN Info
- *     $Id$
- *  Overview:
- *     This is the header for the unicode charset functions
- *  Data Structure and Algorithms:
- *  History:
- *  Notes:
- *  References:
- */
-
-#ifndef PARROT_CHARSET_UNICODE_H_GUARD
-#define PARROT_CHARSET_UNICODE_H_GUARD
-
-/*
- * init function
- */
-
-
-/* HEADERIZER BEGIN: src/string/charset/unicode.c */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-void Parrot_charset_unicode_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_Parrot_charset_unicode_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: src/string/charset/unicode.c */
-
-
-#endif /* PARROT_CHARSET_UNICODE_H_GUARD */
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Modified: trunk/src/string/encoding.c
==============================================================================
--- trunk/src/string/encoding.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -16,50 +16,28 @@
 
 */
 
-#define PARROT_NO_EXTERN_ENCODING_PTRS
-#include "parrot/parrot.h"
+#include "parrot/encoding.h"
+
+STR_VTABLE *Parrot_default_encoding_ptr = NULL;
+
+static STR_VTABLE **encodings;
+static int          n_encodings;
+/* for backwards compatibility */
+static STRING      *unicode_str;
+static STRING      *fixed_8_str;
 
 /* HEADERIZER HFILE: include/parrot/encoding.h */
 
 /* HEADERIZER BEGIN: static */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-static INTVAL register_encoding(PARROT_INTERP,
-    ARGIN(const char *encodingname),
-    ARGIN(ENCODING *encoding))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-#define ASSERT_ARGS_register_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(encodingname) \
-    , PARROT_ASSERT_ARG(encoding))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
-ENCODING *Parrot_default_encoding_ptr = NULL;
-ENCODING *Parrot_fixed_8_encoding_ptr = NULL;
-ENCODING *Parrot_utf8_encoding_ptr    = NULL;
-ENCODING *Parrot_ucs2_encoding_ptr    = NULL;
-ENCODING *Parrot_utf16_encoding_ptr   = NULL;
-ENCODING *Parrot_ucs4_encoding_ptr    = NULL;
-
-typedef struct One_encoding {
-    NOTNULL(ENCODING *encoding);
-    STRING  *name;
-} One_encoding;
-
-typedef struct All_encodings {
-    int n_encodings;
-    One_encoding *enc;
-} All_encodings;
-
-static All_encodings *all_encodings;
 
 /*
 
-=item C<void parrot_deinit_encodings(PARROT_INTERP)>
+=item C<void Parrot_deinit_encodings(PARROT_INTERP)>
 
 Deinitialize encodings and free all memory used by them.
 
@@ -68,25 +46,20 @@
 */
 
 void
-parrot_deinit_encodings(PARROT_INTERP)
+Parrot_deinit_encodings(PARROT_INTERP)
 {
-    ASSERT_ARGS(parrot_deinit_encodings)
-    const int n = all_encodings->n_encodings;
-    int i;
+    ASSERT_ARGS(Parrot_deinit_encodings)
 
-    for (i = 0; i < n; ++i) {
-        mem_gc_free(interp, all_encodings->enc[i].encoding);
-    }
-    mem_gc_free(interp, all_encodings->enc);
-    mem_gc_free(interp, all_encodings);
-    all_encodings = NULL;
+    mem_gc_free(interp, encodings);
+    encodings   = NULL;
+    n_encodings = 0;
 }
 
 /*
 
-=item C<ENCODING * Parrot_new_encoding(PARROT_INTERP)>
+=item C<STR_VTABLE * Parrot_new_encoding(PARROT_INTERP)>
 
-Allocates the memory for a new C<ENCODING> from the system.
+Allocates the memory for a new string vtable from the system.
 
 =cut
 
@@ -95,16 +68,16 @@
 PARROT_EXPORT
 PARROT_MALLOC
 PARROT_CANNOT_RETURN_NULL
-ENCODING *
+STR_VTABLE *
 Parrot_new_encoding(PARROT_INTERP)
 {
     ASSERT_ARGS(Parrot_new_encoding)
-    return mem_gc_allocate_typed(interp, ENCODING);
+    return mem_gc_allocate_typed(interp, STR_VTABLE);
 }
 
 /*
 
-=item C<const ENCODING * Parrot_find_encoding(PARROT_INTERP, const char
+=item C<const STR_VTABLE * Parrot_find_encoding(PARROT_INTERP, const char
 *encodingname)>
 
 Finds an encoding with the name C<encodingname>. Returns the encoding
@@ -118,22 +91,27 @@
 PARROT_PURE_FUNCTION
 PARROT_WARN_UNUSED_RESULT
 PARROT_CAN_RETURN_NULL
-const ENCODING *
+const STR_VTABLE *
 Parrot_find_encoding(SHIM_INTERP, ARGIN(const char *encodingname))
 {
     ASSERT_ARGS(Parrot_find_encoding)
-    const int n = all_encodings->n_encodings;
+    const int n = n_encodings;
     int i;
 
     for (i = 0; i < n; ++i)
-        if (STREQ(all_encodings->enc[i].encoding->name, encodingname))
-            return all_encodings->enc[i].encoding;
+        if (STREQ(encodings[i]->name, encodingname))
+            return encodings[i];
+
+    /* backwards compatibility */
+    if (strcmp(encodingname, "unicode") == 0)
+        return Parrot_utf8_encoding_ptr;
+
     return NULL;
 }
 
 /*
 
-=item C<const ENCODING * Parrot_load_encoding(PARROT_INTERP, const char
+=item C<const STR_VTABLE * Parrot_load_encoding(PARROT_INTERP, const char
 *encodingname)>
 
 Loads an encoding. Currently throws an exception because we cannot load
@@ -154,7 +132,7 @@
 PARROT_EXPORT
 PARROT_DOES_NOT_RETURN
 PARROT_CANNOT_RETURN_NULL
-const ENCODING *
+const STR_VTABLE *
 Parrot_load_encoding(PARROT_INTERP, ARGIN(const char *encodingname))
 {
     ASSERT_ARGS(Parrot_load_encoding)
@@ -181,13 +159,28 @@
 Parrot_encoding_number(PARROT_INTERP, ARGIN(const STRING *encodingname))
 {
     ASSERT_ARGS(Parrot_encoding_number)
-    const int n = all_encodings->n_encodings;
+    const int n = n_encodings;
     int i;
 
     for (i = 0; i < n; ++i) {
-        if (Parrot_str_equal(interp, all_encodings->enc[i].name, encodingname))
+        if (Parrot_str_equal(interp, encodings[i]->name_str, encodingname))
             return i;
     }
+
+    /* backwards compatibility */
+    if (Parrot_str_equal(interp, encodingname, unicode_str)) {
+        for (i = 0; i < n; ++i) {
+            if (STREQ(encodings[i]->name, "utf8"))
+                return i;
+        }
+    }
+    else if (STRING_equal(interp, encodingname, fixed_8_str)) {
+        for (i = 0; i < n; ++i) {
+            if (STREQ(encodings[i]->name, "ascii"))
+                return i;
+        }
+    }
+
     return -1;
 }
 
@@ -197,6 +190,8 @@
 
 Return the number of the encoding of the given string or -1 if not found.
 
+This could be converted to a macro.
+
 =cut
 
 */
@@ -208,14 +203,8 @@
 Parrot_encoding_number_of_str(SHIM_INTERP, ARGIN(const STRING *src))
 {
     ASSERT_ARGS(Parrot_encoding_number_of_str)
-    const int n = all_encodings->n_encodings;
-    int i;
 
-    for (i = 0; i < n; ++i) {
-        if (src->encoding == all_encodings->enc[i].encoding)
-            return i;
-    }
-    return -1;
+    return src->encoding->num;
 }
 
 /*
@@ -225,6 +214,8 @@
 Returns the name of a character encoding based on the INTVAL index
 C<number_of_encoding> to the All_encodings array.
 
+This could be converted to a macro.
+
 =cut
 
 */
@@ -237,15 +228,15 @@
 Parrot_encoding_name(SHIM_INTERP, INTVAL number_of_encoding)
 {
     ASSERT_ARGS(Parrot_encoding_name)
-    if (number_of_encoding >= all_encodings->n_encodings ||
+    if (number_of_encoding >= n_encodings ||
         number_of_encoding < 0)
         return NULL;
-    return all_encodings->enc[number_of_encoding].name;
+    return encodings[number_of_encoding]->name_str;
 }
 
 /*
 
-=item C<const ENCODING* Parrot_get_encoding(PARROT_INTERP, INTVAL
+=item C<const STR_VTABLE* Parrot_get_encoding(PARROT_INTERP, INTVAL
 number_of_encoding)>
 
 Returns the encoding given by the INTVAL index C<number_of_encoding>.
@@ -258,14 +249,14 @@
 PARROT_PURE_FUNCTION
 PARROT_WARN_UNUSED_RESULT
 PARROT_CAN_RETURN_NULL
-const ENCODING*
+const STR_VTABLE*
 Parrot_get_encoding(SHIM_INTERP, INTVAL number_of_encoding)
 {
     ASSERT_ARGS(Parrot_get_encoding)
-    if (number_of_encoding >= all_encodings->n_encodings ||
+    if (number_of_encoding >= n_encodings ||
         number_of_encoding < 0)
         return NULL;
-    return all_encodings->enc[number_of_encoding].encoding;
+    return encodings[number_of_encoding];
 }
 
 /*
@@ -288,50 +279,10 @@
 Parrot_encoding_c_name(SHIM_INTERP, INTVAL number_of_encoding)
 {
     ASSERT_ARGS(Parrot_encoding_c_name)
-    if (number_of_encoding >= all_encodings->n_encodings ||
+    if (number_of_encoding >= n_encodings ||
         number_of_encoding < 0)
         return NULL;
-    return all_encodings->enc[number_of_encoding].encoding->name;
-}
-
-/*
-
-=item C<static INTVAL register_encoding(PARROT_INTERP, const char *encodingname,
-ENCODING *encoding)>
-
-Registers a new character encoding C<encoding> with the given name
-C<encodingname>. Returns 1 if successful, returns 0 otherwise.
-
-=cut
-
-*/
-
-static INTVAL
-register_encoding(PARROT_INTERP, ARGIN(const char *encodingname),
-        ARGIN(ENCODING *encoding))
-{
-    ASSERT_ARGS(register_encoding)
-    const int n = all_encodings->n_encodings;
-    int i;
-
-    for (i = 0; i < n; ++i) {
-        if (STREQ(all_encodings->enc[i].encoding->name, encodingname))
-            return 0;
-    }
-    /*
-     * TODO
-     * this needs either a LOCK or we just forbid dynamic
-     * loading of encodings from inside threads
-     */
-    if (!n)
-        all_encodings->enc = mem_gc_allocate_zeroed_typed(interp, One_encoding);
-    else
-        all_encodings->enc = mem_gc_realloc_n_typed_zeroed(interp,
-                all_encodings->enc, n + 1, n, One_encoding);
-    ++all_encodings->n_encodings;
-    all_encodings->enc[n].encoding = encoding;
-
-    return 1;
+    return encodings[number_of_encoding]->name;
 }
 
 /*
@@ -352,15 +303,16 @@
 {
     ASSERT_ARGS(Parrot_str_internal_register_encoding_names)
     int n;
-    for (n = 0; n < all_encodings->n_encodings; ++n)
-        all_encodings->enc[n].name =
-            Parrot_str_new_constant(interp, all_encodings->enc[n].encoding->name);
+    for (n = 0; n < n_encodings; ++n)
+        encodings[n]->name_str =
+            Parrot_str_new_constant(interp, encodings[n]->name);
+    unicode_str = Parrot_str_new_constant(interp, "unicode");
+    fixed_8_str = Parrot_str_new_constant(interp, "fixed_8");
 }
 
 /*
 
-=item C<INTVAL Parrot_register_encoding(PARROT_INTERP, const char *encodingname,
-ENCODING *encoding)>
+=item C<INTVAL Parrot_register_encoding(PARROT_INTERP, STR_VTABLE *encoding)>
 
 Registers a character encoding C<encoding> with name C<encodingname>.
 Only allows one of 5 possibilities: fixed_8, utf8, utf16, ucs2 and ucs4.
@@ -371,46 +323,66 @@
 
 PARROT_EXPORT
 INTVAL
-Parrot_register_encoding(PARROT_INTERP, ARGIN(const char *encodingname),
-        ARGIN(ENCODING *encoding))
+Parrot_register_encoding(PARROT_INTERP, ARGIN(STR_VTABLE *encoding))
 {
     ASSERT_ARGS(Parrot_register_encoding)
-    if (!all_encodings) {
-        all_encodings = mem_gc_allocate_zeroed_typed(interp, All_encodings);
-        all_encodings->n_encodings = 0;
-        all_encodings->enc = NULL;
-    }
-    if (STREQ("fixed_8", encodingname)) {
-        Parrot_fixed_8_encoding_ptr = encoding;
-        if (!Parrot_default_encoding_ptr) {
-            Parrot_default_encoding_ptr = encoding;
+    int i;
+    int n = n_encodings;
 
-        }
-        return register_encoding(interp, encodingname, encoding);
-    }
-    if (STREQ("utf8", encodingname)) {
-        Parrot_utf8_encoding_ptr = encoding;
-        return register_encoding(interp, encodingname, encoding);
-    }
-    if (STREQ("utf16", encodingname)) {
-        Parrot_utf16_encoding_ptr = encoding;
-        return register_encoding(interp, encodingname, encoding);
-    }
-    if (STREQ("ucs2", encodingname)) {
-        Parrot_ucs2_encoding_ptr = encoding;
-        return register_encoding(interp, encodingname, encoding);
-    }
-    if (STREQ("ucs4", encodingname)) {
-        Parrot_ucs4_encoding_ptr = encoding;
-        return register_encoding(interp, encodingname, encoding);
+    for (i = 0; i < n_encodings; ++i) {
+        if (STREQ(encodings[i]->name, encoding->name))
+            return 0;
     }
-    return 0;
+
+    if (!n)
+        encodings = mem_gc_allocate_zeroed_typed(interp, STR_VTABLE *);
+    else
+        encodings = mem_gc_realloc_n_typed_zeroed(interp,
+                encodings, n + 1, n, STR_VTABLE *);
+
+    encoding->num = n;
+    encodings[n]  = encoding;
+    ++n_encodings;
+
+    return 1;
+}
+
+/*
+
+=item C<void Parrot_encodings_init(PARROT_INTERP)>
+
+Creates the initial charsets and encodings, and registers the initial
+charset converters.
+
+=cut
+
+*/
+
+PARROT_EXPORT
+void
+Parrot_encodings_init(PARROT_INTERP)
+{
+    ASSERT_ARGS(Parrot_encodings_init)
+
+    Parrot_register_encoding(interp, Parrot_ascii_encoding_ptr);
+    Parrot_register_encoding(interp, Parrot_latin1_encoding_ptr);
+    Parrot_register_encoding(interp, Parrot_binary_encoding_ptr);
+    Parrot_register_encoding(interp, Parrot_utf8_encoding_ptr);
+    Parrot_register_encoding(interp, Parrot_utf16_encoding_ptr);
+    Parrot_register_encoding(interp, Parrot_ucs2_encoding_ptr);
+    Parrot_register_encoding(interp, Parrot_ucs4_encoding_ptr);
+
+    Parrot_default_encoding_ptr = Parrot_ascii_encoding_ptr;
+
+    /* Now that the plugins are registered, we can create STRING
+     * names for them.  */
+    Parrot_str_internal_register_encoding_names(interp);
 }
 
 /*
 
 =item C<INTVAL Parrot_make_default_encoding(PARROT_INTERP, const char
-*encodingname, ENCODING *encoding)>
+*encodingname, STR_VTABLE *encoding)>
 
 Sets the default encoding to C<encoding> with name C<encodingname>.
 
@@ -421,7 +393,7 @@
 PARROT_EXPORT
 INTVAL
 Parrot_make_default_encoding(SHIM_INTERP, SHIM(const char *encodingname),
-        ARGIN(ENCODING *encoding))
+        ARGIN(STR_VTABLE *encoding))
 {
     ASSERT_ARGS(Parrot_make_default_encoding)
     Parrot_default_encoding_ptr = encoding;
@@ -430,7 +402,7 @@
 
 /*
 
-=item C<const ENCODING * Parrot_default_encoding(PARROT_INTERP)>
+=item C<const STR_VTABLE * Parrot_default_encoding(PARROT_INTERP)>
 
 Gets the default encoding.
 
@@ -442,39 +414,13 @@
 PARROT_PURE_FUNCTION
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
-const ENCODING *
+const STR_VTABLE *
 Parrot_default_encoding(SHIM_INTERP)
 {
     ASSERT_ARGS(Parrot_default_encoding)
     return Parrot_default_encoding_ptr;
 }
 
-/*
-
-=item C<encoding_converter_t Parrot_find_encoding_converter(PARROT_INTERP,
-ENCODING *lhs, ENCODING *rhs)>
-
-Finds a converter from encoding C<rhs> to C<lhs>. Not yet implemented, so
-throws an exception.
-
-=cut
-
-*/
-
-PARROT_EXPORT
-PARROT_DOES_NOT_RETURN
-encoding_converter_t
-Parrot_find_encoding_converter(PARROT_INTERP, ARGIN(ENCODING *lhs), ARGIN(ENCODING *rhs))
-{
-    ASSERT_ARGS(Parrot_find_encoding_converter)
-    UNUSED(lhs);
-    UNUSED(rhs);
-
-    /* XXX Apparently unwritten https://trac.parrot.org/parrot/wiki/StringsTasklist */
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-        "Can't find encoding converters yet.");
-}
-
 
 /*
  * Local variables:

Added: trunk/src/string/encoding/ascii.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ trunk/src/string/encoding/ascii.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -0,0 +1,554 @@
+/*
+Copyright (C) 2004-2010, Parrot Foundation.
+$Id$
+
+=head1 NAME
+
+src/string/encoding/ascii.c
+
+=head1 DESCRIPTION
+
+This file implements encoding functions for ASCII strings.
+
+=over 4
+
+=cut
+
+*/
+
+#include "parrot/parrot.h"
+#include "ascii.h"
+#include "shared.h"
+#include "tables.h"
+
+/* HEADERIZER HFILE: src/string/encoding/ascii.h */
+
+/* HEADERIZER BEGIN: static */
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+
+PARROT_CANNOT_RETURN_NULL
+PARROT_WARN_UNUSED_RESULT
+static STRING * ascii_chr(PARROT_INTERP, UINTVAL codepoint)
+        __attribute__nonnull__(1);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* ascii_downcase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* ascii_downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+static INTVAL ascii_find_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+static INTVAL ascii_find_not_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+static INTVAL ascii_is_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* ascii_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* ascii_titlecase_first(PARROT_INTERP,
+    ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING * ascii_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* ascii_upcase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* ascii_upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL ascii_validate(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+#define ASSERT_ARGS_ascii_chr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_ascii_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ascii_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+/* HEADERIZER END: static */
+
+
+/*
+
+=item C<static STRING * ascii_to_encoding(PARROT_INTERP, const STRING *src)>
+
+Converts STRING C<src> to ASCII charset STRING C<dest>.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING *
+ascii_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(ascii_to_encoding)
+    STRING        *dest;
+
+    if (STRING_max_bytes_per_codepoint(src) == 1) {
+        unsigned char * const src_buf  = (unsigned char *)src->strstart;
+        UINTVAL offs;
+
+        for (offs = 0; offs < src->strlen; ++offs) {
+            UINTVAL c = src_buf[offs];
+            if (c >= 0x80)
+                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
+                    "lossy conversion to ascii");
+        }
+
+        dest           = Parrot_str_clone(interp, src);
+        dest->encoding = Parrot_ascii_encoding_ptr;
+    }
+    else {
+        String_iter iter;
+        unsigned char *p;
+        const UINTVAL len = src->strlen;
+
+        dest = Parrot_str_new_init(interp, NULL, len,
+                Parrot_ascii_encoding_ptr, 0);
+        p    = (unsigned char *)dest->strstart;
+        STRING_ITER_INIT(interp, &iter);
+
+        while (iter.charpos < len) {
+            const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
+            if (c >= 0x80)
+                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
+                        "can't convert unicode string to ascii");
+            *p++ = c;
+        }
+
+        dest->bufused = len;
+        dest->strlen  = len;
+    }
+
+    return dest;
+}
+
+/*
+
+=item C<static STRING * ascii_chr(PARROT_INTERP, UINTVAL codepoint)>
+
+Creates a new STRING object from a single codepoint C<codepoint>. Returns
+the new STRING.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+PARROT_WARN_UNUSED_RESULT
+static STRING *
+ascii_chr(PARROT_INTERP, UINTVAL codepoint)
+{
+    ASSERT_ARGS(ascii_chr)
+    char real_codepoint = (char)codepoint;
+    STRING * const return_string = string_make(interp, &real_codepoint, 1, "ascii", 0);
+    return return_string;
+}
+
+/*
+
+=item C<static UINTVAL ascii_validate(PARROT_INTERP, const STRING *src)>
+
+Verifies that the given string is valid ASCII. Returns 1 if it is ASCII,
+returns 0 otherwise.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL
+ascii_validate(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(ascii_validate)
+    String_iter iter;
+    const UINTVAL length = Parrot_str_length(interp, src);
+
+    STRING_ITER_INIT(interp, &iter);
+    while (iter.charpos < length) {
+        const UINTVAL codepoint = STRING_iter_get_and_advance(interp, src, &iter);
+        if (codepoint >= 0x80)
+            return 0;
+    }
+    return 1;
+}
+
+/*
+
+=item C<static INTVAL ascii_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset)>
+
+Returns Boolean.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+static INTVAL
+ascii_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
+{
+    ASSERT_ARGS(ascii_is_cclass)
+    UINTVAL codepoint;
+
+    if (offset >= src->strlen)
+        return 0;
+    codepoint = STRING_ord(interp, src, offset);
+
+    if (codepoint >= sizeof (Parrot_ascii_typetable) / sizeof (Parrot_ascii_typetable[0])) {
+        return 0;
+    }
+    return (Parrot_ascii_typetable[codepoint] & flags) ? 1 : 0;
+}
+
+/*
+
+=item C<static INTVAL ascii_find_cclass(PARROT_INTERP, INTVAL flags, const
+STRING *src, UINTVAL offset, UINTVAL count)>
+
+Find a character in the given character class.  Delegates to the find_cclass
+method of the encoding plugin.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+static INTVAL
+ascii_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(ascii_find_cclass)
+    const unsigned char *contents = (const unsigned char *)src->strstart;
+    UINTVAL pos = offset;
+    UINTVAL end = offset + count;
+
+    end = src->strlen < end ? src->strlen : end;
+    for (; pos < end; ++pos) {
+        if ((Parrot_ascii_typetable[contents[pos]] & flags) != 0) {
+            return pos;
+        }
+    }
+    return end;
+}
+
+/*
+
+=item C<static INTVAL ascii_find_not_cclass(PARROT_INTERP, INTVAL flags, const
+STRING *src, UINTVAL offset, UINTVAL count)>
+
+Returns C<INTVAL>.
+
+=cut
+
+*/
+
+static INTVAL
+ascii_find_not_cclass(PARROT_INTERP,
+                INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(ascii_find_not_cclass)
+    const unsigned char *contents = (const unsigned char *)src->strstart;
+    UINTVAL pos = offset;
+    UINTVAL end = offset + count;
+
+    end = src->strlen < end ? src->strlen : end;
+    for (; pos < end; ++pos) {
+        if ((Parrot_ascii_typetable[contents[pos]] & flags) == 0) {
+            return pos;
+        }
+    }
+    return end;
+}
+
+/*
+
+=item C<static STRING* ascii_upcase(PARROT_INTERP, const STRING *src)>
+
+Converts the STRING C<src> to all uppercase.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+ascii_upcase(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(ascii_upcase)
+    STRING * const result = Parrot_str_clone(interp, src);
+    const UINTVAL n = src->strlen;
+
+    if (n) {
+        char * const buffer = result->strstart;
+        UINTVAL offset;
+
+        for (offset = 0; offset < n; ++offset) {
+            buffer[offset] = (char)toupper((unsigned char)buffer[offset]);
+        }
+    }
+
+    return result;
+}
+
+/*
+
+=item C<static STRING* ascii_downcase(PARROT_INTERP, const STRING *src)>
+
+Converts the STRING C<src> to all lower-case.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+ascii_downcase(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(ascii_downcase)
+    STRING       *result = Parrot_str_clone(interp, src);
+    const UINTVAL n      = src->strlen;
+
+    if (n) {
+        char * const buffer = result->strstart;
+        UINTVAL offset;
+
+        for (offset = 0; offset < n; ++offset) {
+            buffer[offset] = (char)tolower((unsigned char)buffer[offset]);
+        }
+    }
+
+    return result;
+}
+
+/*
+
+=item C<static STRING* ascii_titlecase(PARROT_INTERP, const STRING *src)>
+
+Converts the STRING given by C<src> to title case, where
+the first character is upper case and all the rest of the characters
+are lower-case.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+ascii_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(ascii_titlecase)
+    STRING       *result = Parrot_str_clone(interp, src);
+    const UINTVAL n      = src->strlen;
+
+    if (n) {
+        char * const buffer = result->strstart;
+        UINTVAL offset;
+
+        buffer[0] = (char)toupper((unsigned char)buffer[0]);
+        for (offset = 1; offset < n; ++offset) {
+            buffer[offset] = (char)tolower((unsigned char)buffer[offset]);
+        }
+    }
+
+    return result;
+}
+
+/*
+
+=item C<static STRING* ascii_upcase_first(PARROT_INTERP, const STRING *src)>
+
+Sets the first character in the STRING C<src> to upper case,
+but doesn't modify the rest of the string.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+ascii_upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(ascii_upcase_first)
+    STRING * const result = Parrot_str_clone(interp, src);
+
+    if (result->strlen > 0) {
+        char * const buffer = result->strstart;
+        buffer[0] = (char)toupper((unsigned char)buffer[0]);
+    }
+
+    return result;
+}
+
+/*
+
+=item C<static STRING* ascii_downcase_first(PARROT_INTERP, const STRING *src)>
+
+Sets the first character of the STRING C<src> to lowercase,
+but doesn't modify the rest of the characters.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+ascii_downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(ascii_downcase_first)
+    STRING * const result = Parrot_str_clone(interp, src);
+
+    if (result->strlen > 0) {
+        char * const buffer = result->strstart;
+        buffer[0] = (char)tolower((unsigned char)buffer[0]);
+    }
+
+    return result;
+}
+
+/*
+
+=item C<static STRING* ascii_titlecase_first(PARROT_INTERP, const STRING *src)>
+
+Converts the first letter of STRING C<src> to upper case,
+but doesn't modify the rest of the string.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+ascii_titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(ascii_titlecase_first)
+    STRING * const result = Parrot_str_clone(interp, src);
+
+    if (result->strlen > 0) {
+        char * const buffer = result->strstart;
+        buffer[0] = (char)toupper((unsigned char)buffer[0]);
+    }
+
+    return result;
+}
+
+static STR_VTABLE Parrot_ascii_encoding = {
+    0,
+    "ascii",
+    NULL,
+    1, /* Max bytes per codepoint */
+
+    ascii_to_encoding,
+    ascii_chr,
+
+    fixed8_equal,
+    fixed8_compare,
+    fixed8_index,
+    fixed8_rindex,
+    fixed8_hash,
+    ascii_validate,
+
+    fixed8_scan,
+    fixed8_ord,
+    fixed8_substr,
+
+    ascii_is_cclass,
+    ascii_find_cclass,
+    ascii_find_not_cclass,
+
+    encoding_get_graphemes,
+    fixed8_compose,
+    encoding_decompose,
+
+    ascii_upcase,
+    ascii_downcase,
+    ascii_titlecase,
+    ascii_upcase_first,
+    ascii_downcase_first,
+    ascii_titlecase_first,
+
+    fixed8_iter_get,
+    fixed8_iter_skip,
+    fixed8_iter_get_and_advance,
+    fixed8_iter_set_and_advance,
+    fixed8_iter_set_position
+};
+
+STR_VTABLE *Parrot_ascii_encoding_ptr = &Parrot_ascii_encoding;
+
+
+/*
+ * Local variables:
+ *   c-file-style: "parrot"
+ * End:
+ * vim: expandtab shiftwidth=4:
+ */
+

Added: trunk/src/string/encoding/ascii.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ trunk/src/string/encoding/ascii.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -0,0 +1,30 @@
+/* ascii.h
+ *  Copyright (C) 2004-2010, Parrot Foundation.
+ *  SVN Info
+ *     $Id$
+ *  Overview:
+ *     This is the header for the 8-bit fixed-width encoding
+ *  Data Structure and Algorithms:
+ *  History:
+ *  Notes:
+ *  References:
+ */
+
+#ifndef PARROT_ENCODING_ASCII_H_GUARD
+#define PARROT_ENCODING_ASCII_H_GUARD
+
+/* HEADERIZER BEGIN: src/string/encoding/ascii.c */
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+
+
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+/* HEADERIZER END: src/string/encoding/ascii.c */
+
+#endif /* PARROT_ENCODING_ASCII_H_GUARD */
+
+/*
+ * Local variables:
+ *   c-file-style: "parrot"
+ * End:
+ * vim: expandtab shiftwidth=4:
+ */

Added: trunk/src/string/encoding/binary.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ trunk/src/string/encoding/binary.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -0,0 +1,275 @@
+/*
+Copyright (C) 2004-2010, Parrot Foundation.
+$Id$
+
+=head1 NAME
+
+src/string/encoding/binary.c
+
+=head1 DESCRIPTION
+
+This file implements encoding functions for binary strings.
+
+=over 4
+
+=cut
+
+*/
+
+#include "parrot/parrot.h"
+#include "binary.h"
+#include "shared.h"
+
+/* HEADERIZER HFILE: src/string/encoding/binary.h */
+
+/* HEADERIZER BEGIN: static */
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* binary_change_case(PARROT_INTERP, SHIM(const STRING *src))
+        __attribute__nonnull__(1);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING * binary_chr(PARROT_INTERP, UINTVAL codepoint)
+        __attribute__nonnull__(1);
+
+static INTVAL binary_find_cclass(SHIM_INTERP,
+    SHIM(INTVAL flags),
+    SHIM(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count);
+
+static INTVAL binary_find_not_cclass(SHIM_INTERP,
+    SHIM(INTVAL flags),
+    SHIM(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count);
+
+static INTVAL binary_is_cclass(SHIM_INTERP,
+    SHIM(INTVAL flags),
+    SHIM(const STRING *src),
+    SHIM(UINTVAL offset));
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* binary_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+static UINTVAL binary_validate(SHIM_INTERP, SHIM(const STRING *src));
+#define ASSERT_ARGS_binary_change_case __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_binary_chr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_binary_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
+#define ASSERT_ARGS_binary_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
+#define ASSERT_ARGS_binary_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
+#define ASSERT_ARGS_binary_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_binary_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+/* HEADERIZER END: static */
+
+#ifdef EXCEPTION
+#  undef EXCEPTION
+#endif
+
+#define EXCEPTION(err, str) \
+    Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
+
+
+/*
+
+=item C<static STRING* binary_to_encoding(PARROT_INTERP, const STRING *src)>
+
+Converts the STRING C<src> to STRING C<dest> in binary mode.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+binary_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(binary_to_encoding)
+    STRING      *dest;
+
+    dest           = Parrot_str_copy(interp, src);
+    dest->encoding = Parrot_binary_encoding_ptr;
+    dest->strlen   = dest->bufused;
+    dest->hashval  = 0;
+
+    return dest;
+}
+
+
+/*
+
+=item C<static STRING * binary_chr(PARROT_INTERP, UINTVAL codepoint)>
+
+Creates a new STRING object from a single codepoint C<codepoint>. Returns
+the new STRING.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING *
+binary_chr(PARROT_INTERP, UINTVAL codepoint)
+{
+    ASSERT_ARGS(binary_chr)
+    char real_codepoint = (char)codepoint;
+    return string_make(interp, &real_codepoint, 1, "binary", 0);
+}
+
+
+/*
+
+=item C<static UINTVAL binary_validate(PARROT_INTERP, const STRING *src)>
+
+Returns 1. All sequential data is valid binary data.
+
+=cut
+
+*/
+
+/* Binary's always valid */
+static UINTVAL
+binary_validate(SHIM_INTERP, SHIM(const STRING *src))
+{
+    ASSERT_ARGS(binary_validate)
+    return 1;
+}
+
+
+/*
+
+=item C<static INTVAL binary_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset)>
+
+Returns Boolean.
+
+=cut
+
+*/
+
+static INTVAL
+binary_is_cclass(SHIM_INTERP, SHIM(INTVAL flags), SHIM(const STRING *src), SHIM(UINTVAL offset))
+{
+    ASSERT_ARGS(binary_is_cclass)
+    return 0;
+}
+
+
+/*
+
+=item C<static INTVAL binary_find_cclass(PARROT_INTERP, INTVAL flags, const
+STRING *src, UINTVAL offset, UINTVAL count)>
+
+Find a character in the given character class.
+
+=cut
+
+*/
+
+static INTVAL
+binary_find_cclass(SHIM_INTERP, SHIM(INTVAL flags),
+            SHIM(const STRING *src), UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(binary_find_cclass)
+    return offset + count;
+}
+
+
+/*
+
+=item C<static INTVAL binary_find_not_cclass(PARROT_INTERP, INTVAL flags, const
+STRING *src, UINTVAL offset, UINTVAL count)>
+
+Returns C<INTVAL>.
+
+=cut
+
+*/
+
+static INTVAL
+binary_find_not_cclass(SHIM_INTERP, SHIM(INTVAL flags),
+               SHIM(const STRING *src), UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(binary_find_not_cclass)
+    return offset;
+}
+
+
+/*
+
+=item C<static STRING* binary_change_case(PARROT_INTERP, const STRING *src)>
+
+Throws an exception because we cannot change case of a binary string.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+binary_change_case(PARROT_INTERP, SHIM(const STRING *src))
+{
+    ASSERT_ARGS(binary_change_case)
+    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't change case of binary data");
+}
+
+
+static STR_VTABLE Parrot_binary_encoding = {
+    0,
+    "binary",
+    NULL,
+    1, /* Max bytes per codepoint */
+
+    binary_to_encoding,
+    binary_chr,
+
+    fixed8_equal,
+    fixed8_compare,
+    fixed8_index,
+    fixed8_rindex,
+    fixed8_hash,
+    binary_validate,
+
+    fixed8_scan,
+    fixed8_ord,
+    fixed8_substr,
+
+    binary_is_cclass,
+    binary_find_cclass,
+    binary_find_not_cclass,
+
+    encoding_get_graphemes,
+    fixed8_compose,
+    encoding_decompose,
+
+    binary_change_case,
+    binary_change_case,
+    binary_change_case,
+    binary_change_case,
+    binary_change_case,
+    binary_change_case,
+
+    fixed8_iter_get,
+    fixed8_iter_skip,
+    fixed8_iter_get_and_advance,
+    fixed8_iter_set_and_advance,
+    fixed8_iter_set_position
+};
+
+STR_VTABLE *Parrot_binary_encoding_ptr = &Parrot_binary_encoding;
+
+
+/*
+ * Local variables:
+ *   c-file-style: "parrot"
+ * End:
+ * vim: expandtab shiftwidth=4:
+ */

Copied and modified: trunk/src/string/encoding/binary.h (from r48832, trunk/src/string/charset/binary.h)
==============================================================================
--- trunk/src/string/charset/binary.h	Tue Sep  7 22:20:33 2010	(r48832, copy source)
+++ trunk/src/string/encoding/binary.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -10,21 +10,17 @@
  *  References:
  */
 
-#ifndef PARROT_CHARSET_BINARY_H_GUARD
-#define PARROT_CHARSET_BINARY_H_GUARD
+#ifndef PARROT_ENCODING_BINARY_H_GUARD
+#define PARROT_ENCODING_BINARY_H_GUARD
 
-/* HEADERIZER BEGIN: src/string/charset/binary.c */
+/* HEADERIZER BEGIN: src/string/encoding/binary.c */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-void Parrot_charset_binary_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
 
-#define ASSERT_ARGS_Parrot_charset_binary_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: src/string/charset/binary.c */
+/* HEADERIZER END: src/string/encoding/binary.c */
 
-#endif /* PARROT_CHARSET_BINARY_H_GUARD */
+#endif /* PARROT_ENCODING_BINARY_H_GUARD */
 
 /*
  * Local variables:

Deleted: trunk/src/string/encoding/fixed_8.c
==============================================================================
--- trunk/src/string/encoding/fixed_8.c	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,578 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/encoding/fixed_8.c
-
-=head1 DESCRIPTION
-
-This file implements the encoding functions for fixed-width 8-bit codepoints
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "fixed_8.h"
-
-/* HEADERIZER HFILE: src/string/encoding/fixed_8.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(2);
-
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(SHIM_INTERP,
-    ARGIN(const STRING *s),
-    ARGIN(const INTVAL *typetable),
-    INTVAL flags,
-    UINTVAL pos,
-    UINTVAL end)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static UINTVAL fixed8_iter_get(PARROT_INTERP,
-    ARGIN(const STRING *str),
-    ARGIN(const String_iter *iter),
-    INTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP,
-    ARGIN(const STRING *str),
-    ARGMOD(String_iter *iter))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_set_and_advance(PARROT_INTERP,
-    ARGMOD(STRING *str),
-    ARGMOD(String_iter *iter),
-    UINTVAL c)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*str)
-        FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_set_position(SHIM_INTERP,
-    ARGIN(const STRING *str),
-    ARGMOD(String_iter *iter),
-    UINTVAL pos)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_skip(SHIM_INTERP,
-    ARGIN(const STRING *str),
-    ARGMOD(String_iter *iter),
-    INTVAL skip)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*iter);
-
-static size_t fixed_8_hash(SHIM_INTERP,
-    ARGIN(const STRING *s),
-    size_t hashval)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL get_byte(SHIM_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL get_codepoint(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static void set_byte(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL byte)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_DOES_NOT_RETURN
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(s) \
-    , PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(s))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#define UNIMPL Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED, \
-    "unimpl fixed_8")
-
-/*
-
-=item C<static STRING * to_encoding(PARROT_INTERP, const STRING *src)>
-
-Converts the string C<src> to this particular encoding.  If C<dest> is
-provided, it will contain the result.  Otherwise this function operates in
-place.
-
-
-=cut
-
-*/
-
-PARROT_DOES_NOT_RETURN
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_encoding(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(to_encoding)
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static UINTVAL get_codepoint(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
-
-codepoints are bytes, so delegate
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-get_codepoint(PARROT_INTERP, ARGIN(const STRING *src),
-        UINTVAL offset)
-{
-    ASSERT_ARGS(get_codepoint)
-    return get_byte(interp, src, offset);
-}
-
-
-/*
-
-=item C<static UINTVAL find_cclass(PARROT_INTERP, const STRING *s, const INTVAL
-*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
-
-codepoints are bytes, so delegate
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-find_cclass(SHIM_INTERP, ARGIN(const STRING *s), ARGIN(const INTVAL *typetable),
-INTVAL flags, UINTVAL pos, UINTVAL end)
-{
-    ASSERT_ARGS(find_cclass)
-    const unsigned char *contents = (const unsigned char *)s->strstart;
-    for (; pos < end; ++pos) {
-        if ((typetable[contents[pos]] & flags) != 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-/*
-
-=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
-
-Returns the byte in string C<src> at position C<offset>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-get_byte(SHIM_INTERP, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(get_byte)
-    const unsigned char *contents = (const unsigned char *)src->strstart;
-
-    if (offset >= src->bufused) {
-/*        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-                "get_byte past the end of the buffer (%i of %i)",
-                offset, src->bufused); */
-        return 0;
-    }
-
-    return contents[offset];
-}
-
-/*
-
-=item C<static void set_byte(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL byte)>
-
-Sets, in string C<src> at position C<offset>, the byte C<byte>.
-
-=cut
-
-*/
-
-static void
-set_byte(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL byte)
-{
-    ASSERT_ARGS(set_byte)
-    unsigned char *contents;
-
-    if (offset >= src->bufused)
-        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-            "set_byte past the end of the buffer");
-
-    contents = (unsigned char *)src->strstart;
-    contents[offset] = (unsigned char)byte;
-}
-
-/*
-
-=item C<static STRING * get_codepoints(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.  (Delegates to C<get_bytes>.)
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_codepoints)
-    STRING * const return_string = get_bytes(interp, src, offset, count);
-    return_string->charset = src->charset;
-    return return_string;
-}
-
-/*
-
-=item C<static STRING * get_bytes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the bytes in string C<src> at position C<offset> and length C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_bytes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_bytes)
-    STRING * const return_string = Parrot_str_copy(interp, src);
-
-    return_string->encoding = src->encoding;
-    return_string->charset = src->charset;
-
-    return_string->strstart = (char *)return_string->strstart + offset ;
-    return_string->bufused = count;
-
-    return_string->strlen = count;
-    return_string->hashval = 0;
-
-    return return_string;
-}
-
-
-/*
-
-=item C<static UINTVAL codepoints(PARROT_INTERP, const STRING *src)>
-
-Returns the number of codepoints in string C<src>.
-
-=cut
-
-*/
-
-static UINTVAL
-codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(codepoints)
-    return bytes(interp, src);
-}
-
-/*
-
-=item C<static UINTVAL bytes(PARROT_INTERP, const STRING *src)>
-
-Returns the number of bytes in string C<src>.
-
-=cut
-
-*/
-
-static UINTVAL
-bytes(SHIM_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(bytes)
-    return src->bufused;
-}
-
-/*
- * iterator functions
- */
-
-/*
-
-=item C<static UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const
-String_iter *iter, INTVAL offset)>
-
-Get the character at C<iter> plus C<offset>.
-
-=cut
-
-*/
-
-static UINTVAL
-fixed8_iter_get(PARROT_INTERP,
-    ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset)
-{
-    ASSERT_ARGS(fixed8_iter_get)
-    return get_byte(interp, str, iter->charpos + offset);
-}
-
-/*
-
-=item C<static void fixed8_iter_skip(PARROT_INTERP, const STRING *str,
-String_iter *iter, INTVAL skip)>
-
-Moves the string iterator C<i> by C<skip> characters.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_skip(SHIM_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip)
-{
-    ASSERT_ARGS(fixed8_iter_skip)
-    iter->bytepos += skip;
-    iter->charpos += skip;
-    PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str));
-}
-
-/*
-
-=item C<static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING
-*str, String_iter *iter)>
-
-Moves the string iterator C<i> to the next codepoint.
-
-=cut
-
-*/
-
-static UINTVAL
-fixed8_iter_get_and_advance(PARROT_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *iter))
-{
-    ASSERT_ARGS(fixed8_iter_get_and_advance)
-    const UINTVAL c = get_byte(interp, str, iter->charpos++);
-    iter->bytepos++;
-    return c;
-}
-
-/*
-
-=item C<static void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str,
-String_iter *iter, UINTVAL c)>
-
-With the string iterator C<i>, appends the codepoint C<c> and advances to the
-next position in the string.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_set_and_advance(PARROT_INTERP,
-    ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c)
-{
-    ASSERT_ARGS(fixed8_iter_set_and_advance)
-    set_byte(interp, str, iter->charpos++, c);
-    iter->bytepos++;
-}
-
-/*
-
-=item C<static void fixed8_iter_set_position(PARROT_INTERP, const STRING *str,
-String_iter *iter, UINTVAL pos)>
-
-Moves the string iterator C<i> to the position C<n> in the string.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_set_position(SHIM_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos)
-{
-    ASSERT_ARGS(fixed8_iter_set_position)
-    iter->bytepos = iter->charpos = pos;
-    PARROT_ASSERT(pos <= Buffer_buflen(str));
-}
-
-/*
-
-=item C<static size_t fixed_8_hash(PARROT_INTERP, const STRING *s, size_t
-hashval)>
-
-Returns the hashed value of the string, given a seed in hashval.
-
-=cut
-
-*/
-
-static size_t
-fixed_8_hash(SHIM_INTERP, ARGIN(const STRING *s), size_t hashval)
-{
-    ASSERT_ARGS(fixed_8_hash)
-    const unsigned char *pos = (const unsigned char *)s->strstart;
-    UINTVAL        len = s->strlen;
-
-    while (len--) {
-        hashval += hashval << 5;
-        hashval += *(pos++);
-    }
-
-    return hashval;
-}
-
-
-/*
-
-=item C<void Parrot_encoding_fixed_8_init(PARROT_INTERP)>
-
-Initializes the fixed-8 encoding.
-
-=cut
-
-*/
-
-void
-Parrot_encoding_fixed_8_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_encoding_fixed_8_init)
-    ENCODING * const return_encoding = Parrot_new_encoding(interp);
-
-    ENCODING base_encoding = {
-        "fixed_8",
-        1, /* Max bytes per codepoint */
-        to_encoding,
-        get_codepoint,
-        get_byte,
-        set_byte,
-        get_codepoints,
-        get_bytes,
-        codepoints,
-        bytes,
-        find_cclass,
-        fixed_8_hash,
-        fixed8_iter_get,
-        fixed8_iter_skip,
-        fixed8_iter_get_and_advance,
-        fixed8_iter_set_and_advance,
-        fixed8_iter_set_position
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
-    Parrot_register_encoding(interp, "fixed_8", return_encoding);
-
-    return;
-}
-
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
-

Deleted: trunk/src/string/encoding/fixed_8.h
==============================================================================
--- trunk/src/string/encoding/fixed_8.h	Tue Sep  7 22:58:38 2010	(r48832)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,34 +0,0 @@
-/* fixed_8.h
- *  Copyright (C) 2004-2007, Parrot Foundation.
- *  SVN Info
- *     $Id$
- *  Overview:
- *     This is the header for the 8-bit fixed-width encoding
- *  Data Structure and Algorithms:
- *  History:
- *  Notes:
- *  References:
- */
-
-#ifndef PARROT_ENCODING_FIXED_8_H_GUARD
-#define PARROT_ENCODING_FIXED_8_H_GUARD
-
-/* HEADERIZER BEGIN: src/string/encoding/fixed_8.c */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-void Parrot_encoding_fixed_8_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_Parrot_encoding_fixed_8_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: src/string/encoding/fixed_8.c */
-
-#endif /* PARROT_ENCODING_FIXED_8_H_GUARD */
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Added: trunk/src/string/encoding/latin1.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ trunk/src/string/encoding/latin1.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -0,0 +1,582 @@
+/*
+Copyright (C) 2004-2010, Parrot Foundation.
+$Id$
+
+=head1 NAME
+
+src/string/encoding/latin1.c
+
+=head1 DESCRIPTION
+
+This file implements encoding functions for ISO-8859-1 strings.
+
+=over 4
+
+=cut
+
+*/
+
+#include "parrot/parrot.h"
+#include "latin1.h"
+#include "shared.h"
+#include "tables.h"
+
+/* HEADERIZER HFILE: src/string/encoding/latin1.h */
+
+/* HEADERIZER BEGIN: static */
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+
+PARROT_CANNOT_RETURN_NULL
+static STRING * latin1_chr(PARROT_INTERP, UINTVAL codepoint)
+        __attribute__nonnull__(1);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* latin1_downcase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* latin1_downcase_first(PARROT_INTERP,
+    ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+static INTVAL latin1_find_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+static INTVAL latin1_find_not_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+static INTVAL latin1_is_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* latin1_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* latin1_titlecase_first(PARROT_INTERP,
+    ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+PARROT_WARN_UNUSED_RESULT
+static STRING * latin1_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* latin1_upcase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+static STRING* latin1_upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+static UINTVAL latin1_validate(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+#define ASSERT_ARGS_latin1_chr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_latin1_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_latin1_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+/* HEADERIZER END: static */
+
+
+/*
+
+=item C<static STRING * latin1_to_encoding(PARROT_INTERP, const STRING *src)>
+
+Converts the STRING C<src> to an ISO-8859-1 STRING C<dest>.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+PARROT_WARN_UNUSED_RESULT
+static STRING *
+latin1_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(latin1_to_encoding)
+    STRING      *dest;
+
+    if (STRING_max_bytes_per_codepoint(src) == 1) {
+        dest           = Parrot_str_clone(interp, src);
+        dest->encoding = Parrot_latin1_encoding_ptr;
+    }
+    else {
+        String_iter  iter;
+        unsigned char *p;
+        const UINTVAL len = src->strlen;
+
+        dest = Parrot_str_new_init(interp, NULL, len,
+                Parrot_latin1_encoding_ptr, 0);
+        p    = (unsigned char *)dest->strstart;
+        STRING_ITER_INIT(interp, &iter);
+
+        while (iter.charpos < len) {
+            const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
+            if (c >= 0x100)
+                Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
+                    "lossy conversion to iso-8559-1");
+            *p++ = c;
+        }
+
+        dest->bufused = len;
+        dest->strlen  = len;
+    }
+
+    return dest;
+}
+
+
+/*
+
+=item C<static STRING * latin1_chr(PARROT_INTERP, UINTVAL codepoint)>
+
+Creates a new STRING from the single codepoint C<codepoint>.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING *
+latin1_chr(PARROT_INTERP, UINTVAL codepoint)
+{
+    ASSERT_ARGS(latin1_chr)
+    char real_codepoint = (char)codepoint;
+    STRING * const return_string = string_make(interp, &real_codepoint, 1,
+            "iso-8859-1", 0);
+    PARROT_ASSERT(codepoint < 0x100);
+    return return_string;
+}
+
+
+/*
+
+=item C<static UINTVAL latin1_validate(PARROT_INTERP, const STRING *src)>
+
+Returns 1 if the STRING C<src> is a valid ISO-8859-1 STRING. Returns 0 otherwise.
+
+=cut
+
+*/
+
+static UINTVAL
+latin1_validate(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(latin1_validate)
+    INTVAL offset;
+    const INTVAL length =  Parrot_str_length(interp, src);
+
+    for (offset = 0; offset < length; ++offset) {
+        const UINTVAL codepoint = STRING_ord(interp, src, offset);
+        if (codepoint >= 0x100)
+            return 0;
+    }
+    return 1;
+}
+
+
+/*
+
+=item C<static INTVAL latin1_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset)>
+
+Returns Boolean.
+
+=cut
+
+*/
+
+static INTVAL
+latin1_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
+{
+    ASSERT_ARGS(latin1_is_cclass)
+    UINTVAL codepoint;
+
+    if (offset >= src->strlen) return 0;
+    codepoint = STRING_ord(interp, src, offset);
+
+    if (codepoint >= sizeof (Parrot_ascii_typetable) /
+                     sizeof (Parrot_ascii_typetable[0])) {
+        return 0;
+    }
+    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
+}
+
+
+/*
+
+=item C<static INTVAL latin1_find_cclass(PARROT_INTERP, INTVAL flags, const
+STRING *src, UINTVAL offset, UINTVAL count)>
+
+Find a character in the given character class.  Delegates to the find_cclass
+method of the encoding plugin.
+
+=cut
+
+*/
+
+static INTVAL
+latin1_find_cclass(PARROT_INTERP, INTVAL flags,
+                ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(latin1_find_cclass)
+    const unsigned char *contents = (const unsigned char *)src->strstart;
+    UINTVAL pos = offset;
+    UINTVAL end = offset + count;
+
+    end = src->strlen < end ? src->strlen : end;
+    for (; pos < end; ++pos) {
+        if ((Parrot_iso_8859_1_typetable[contents[pos]] & flags) != 0) {
+            return pos;
+        }
+    }
+    return end;
+}
+
+
+/*
+
+=item C<static INTVAL latin1_find_not_cclass(PARROT_INTERP, INTVAL flags, const
+STRING *src, UINTVAL offset, UINTVAL count)>
+
+Returns C<INTVAL>.
+
+=cut
+
+*/
+
+static INTVAL
+latin1_find_not_cclass(PARROT_INTERP, INTVAL flags,
+                ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(latin1_find_not_cclass)
+    const unsigned char *contents = (const unsigned char *)src->strstart;
+    UINTVAL pos = offset;
+    UINTVAL end = offset + count;
+
+    end = src->strlen < end ? src->strlen : end;
+    for (; pos < end; ++pos) {
+        if ((Parrot_iso_8859_1_typetable[contents[pos]] & flags) == 0) {
+            return pos;
+        }
+    }
+    return end;
+}
+
+
+/*
+
+=item C<static STRING* latin1_upcase(PARROT_INTERP, const STRING *src)>
+
+Convert all graphemes in the STRING C<src> to upper case, for those
+graphemes that support cases.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+latin1_upcase(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(latin1_upcase)
+    unsigned char *buffer;
+    UINTVAL        offset = 0;
+    STRING        *result = Parrot_str_clone(interp, src);
+
+    if (!result->strlen)
+        return result;
+
+    buffer = (unsigned char *)result->strstart;
+    for (offset = 0; offset < result->strlen; ++offset) {
+        unsigned int c = buffer[offset]; /* XXX use encoding ? */
+        if (c >= 0xe0 && c != 0xf7)
+            c &= ~0x20;
+        else
+            c = toupper((unsigned char)c);
+        buffer[offset] = (unsigned char)c;
+    }
+
+    return result;
+}
+
+
+/*
+
+=item C<static STRING* latin1_downcase(PARROT_INTERP, const STRING *src)>
+
+Converts all graphemes in STRING C<src> to lower-case, for those graphemes
+that support cases.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+latin1_downcase(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(latin1_downcase)
+    unsigned char *buffer;
+    UINTVAL        offset = 0;
+    STRING        *result = Parrot_str_clone(interp, src);
+
+    if (!result->strlen)
+        return result;
+
+    buffer = (unsigned char *)result->strstart;
+    for (offset = 0; offset < result->strlen; ++offset) {
+        unsigned int c = buffer[offset];
+        if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
+            c |= 0x20;
+        else
+            c = tolower((unsigned char)c);
+        buffer[offset] = (unsigned char)c;
+    }
+
+    return result;
+}
+
+
+/*
+
+=item C<static STRING* latin1_titlecase(PARROT_INTERP, const STRING *src)>
+
+Converts the graphemes in STRING C<src> to title case, for those graphemes
+that support cases.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+latin1_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(latin1_titlecase)
+    unsigned char *buffer;
+    unsigned int   c;
+    UINTVAL        offset;
+    STRING        *result = Parrot_str_clone(interp, src);
+
+    if (!result->strlen)
+        return result;
+
+    buffer = (unsigned char *)result->strstart;
+    c = buffer[0];
+    if (c >= 0xe0 && c != 0xf7)
+        c &= ~0x20;
+    else
+        c = toupper((unsigned char)c);
+    buffer[0] = (unsigned char)c;
+
+    for (offset = 1; offset < result->strlen; ++offset) {
+        c = buffer[offset];
+        if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
+            c |= 0x20;
+        else
+            c = tolower((unsigned char)c);
+        buffer[offset] = (unsigned char)c;
+    }
+
+    return result;
+}
+
+
+/*
+
+=item C<static STRING* latin1_upcase_first(PARROT_INTERP, const STRING *src)>
+
+Converts the first grapheme in STRING C<src> to upper case, if it
+supports cases.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+latin1_upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(latin1_upcase_first)
+    unsigned char *buffer;
+    unsigned int   c;
+    STRING        *result = Parrot_str_clone(interp, src);
+
+    if (!result->strlen)
+        return result;
+
+    buffer = (unsigned char *)result->strstart;
+    c = buffer[0];
+    if (c >= 0xe0 && c != 0xf7)
+        c &= ~0x20;
+    else
+        c = toupper((unsigned char)c);
+    buffer[0] = (unsigned char)c;
+
+    return result;
+}
+
+
+/*
+
+=item C<static STRING* latin1_downcase_first(PARROT_INTERP, const STRING *src)>
+
+Converts the first character of the STRING C<src> to lower case, if the
+grapheme supports lower case.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+latin1_downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(latin1_downcase_first)
+    unsigned char *buffer;
+    unsigned int   c;
+    STRING        *result = Parrot_str_clone(interp, src);
+
+    if (!result->strlen)
+        return result;
+
+    buffer = (unsigned char *)result->strstart;
+    c = buffer[0];
+    if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
+        c &= ~0x20;
+    else
+        c = tolower((unsigned char)c);
+    buffer[0] = (unsigned char)c;
+
+    return result;
+}
+
+
+/*
+
+=item C<static STRING* latin1_titlecase_first(PARROT_INTERP, const STRING *src)>
+
+Converts the first grapheme in STRING C<src> to title case, if the grapheme
+supports case.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+static STRING*
+latin1_titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(latin1_titlecase_first)
+    return latin1_upcase_first(interp, src);
+}
+
+
+static STR_VTABLE Parrot_latin1_encoding = {
+    0,
+    "iso-8859-1",
+    NULL,
+    1, /* Max bytes per codepoint */
+
+    latin1_to_encoding,
+    latin1_chr,
+
+    fixed8_equal,
+    fixed8_compare,
+    fixed8_index,
+    fixed8_rindex,
+    fixed8_hash,
+    latin1_validate,
+
+    fixed8_scan,
+    fixed8_ord,
+    fixed8_substr,
+
+    latin1_is_cclass,
+    latin1_find_cclass,
+    latin1_find_not_cclass,
+
+    encoding_get_graphemes,
+    fixed8_compose,
+    encoding_decompose,
+
+    latin1_upcase,
+    latin1_downcase,
+    latin1_titlecase,
+    latin1_upcase_first,
+    latin1_downcase_first,
+    latin1_titlecase_first,
+
+    fixed8_iter_get,
+    fixed8_iter_skip,
+    fixed8_iter_get_and_advance,
+    fixed8_iter_set_and_advance,
+    fixed8_iter_set_position
+};
+
+STR_VTABLE *Parrot_latin1_encoding_ptr = &Parrot_latin1_encoding;
+
+
+/*
+ * Local variables:
+ *   c-file-style: "parrot"
+ * End:
+ * vim: expandtab shiftwidth=4:
+ */

Added: trunk/src/string/encoding/latin1.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ trunk/src/string/encoding/latin1.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -0,0 +1,30 @@
+/* iso_8859_1.h
+ *  Copyright (C) 2004-2007, Parrot Foundation.
+ *  SVN Info
+ *     $Id$
+ *  Overview:
+ *     This is the header for the iso_8859-1 charset functions
+ *  Data Structure and Algorithms:
+ *  History:
+ *  Notes:
+ *  References:
+ */
+
+#ifndef PARROT_ENCODING_LATIN1_H_GUARD
+#define PARROT_ENCODING_LATIN1_H_GUARD
+
+/* HEADERIZER BEGIN: src/string/encoding/latin1.c */
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+
+
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+/* HEADERIZER END: src/string/encoding/latin1.c */
+
+#endif /* PARROT_ENCODING_LATIN1_H_GUARD */
+
+/*
+ * Local variables:
+ *   c-file-style: "parrot"
+ * End:
+ * vim: expandtab shiftwidth=4:
+ */

Copied and modified: trunk/src/string/encoding/shared.c (from r48832, trunk/src/string/charset/unicode.c)
==============================================================================
--- trunk/src/string/charset/unicode.c	Tue Sep  7 22:20:33 2010	(r48832, copy source)
+++ trunk/src/string/encoding/shared.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -1,14 +1,18 @@
 /*
-Copyright (C) 2005-2010, Parrot Foundation.
+Copyright (C) 2004-2010, Parrot Foundation.
 $Id$
 
 =head1 NAME
 
-src/string/charset/unicode.c
+src/string/encoding/shared.c
 
 =head1 DESCRIPTION
 
-This file implements the charset functions for unicode data
+This file implements general encoding functions for strings.
+
+Functions starting with encoding_ work with any type of string.
+Functions starting with fixed8_ work with fixed8 strings.
+Functions starting with unicode_ work with unicode strings.
 
 =over 4
 
@@ -17,313 +21,171 @@
 */
 
 #include "parrot/parrot.h"
-#include "unicode.h"
-#include "ascii.h"
 #include "tables.h"
+#include "shared.h"
+
+#if PARROT_HAS_ICU
+#  include <unicode/ucnv.h>
+#  include <unicode/utypes.h>
+#  include <unicode/uchar.h>
+#  include <unicode/ustring.h>
+#  include <unicode/unorm.h>
+#endif
 
-/* HEADERIZER HFILE: src/string/charset/unicode.h */
+/* HEADERIZER HFILE: src/string/encoding/shared.h */
 
 /* HEADERIZER BEGIN: static */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-static INTVAL compare(PARROT_INTERP,
-    ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static size_t compute_hash(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    size_t seed)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static INTVAL cs_rindex(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(const STRING *search_string),
-    SHIM(UINTVAL offset))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
+static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
         __attribute__nonnull__(1);
 
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
+#define ASSERT_ARGS_u_iscclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+/* HEADERIZER END: static */
 
-static INTVAL find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
+#define UNIMPL Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED, \
+    "unimpl fixed_8")
 
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_graphemes(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static INTVAL is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
 
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
+/*
 
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
+=item C<INTVAL encoding_equal(PARROT_INTERP, const STRING *lhs, const STRING
+*rhs)>
 
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
+Compares two STRINGs, C<lhs> and C<rhs>. If STRING C<lhs> == C<rhs>,
+returns 1. If C<lhs> != C<rhs> returns 0.
 
-PARROT_CANNOT_RETURN_NULL
-static STRING* to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
+=cut
 
-static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
-        __attribute__nonnull__(1);
+*/
 
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+encoding_equal(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+{
+    ASSERT_ARGS(encoding_equal)
+    String_iter l_iter, r_iter;
+    const UINTVAL len = STRING_length(lhs);
 
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
+    if (len != STRING_length(rhs))
+        return 0;
+    if (len == 0)
+        return 1;
+    if (lhs == rhs)
+        return 1;
+    if (lhs->hashval && rhs->hashval && lhs->hashval != rhs->hashval)
+        return 0;
 
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_compute_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_cs_rindex __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_graphemes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_u_iscclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
+    STRING_ITER_INIT(interp, &l_iter);
+    STRING_ITER_INIT(interp, &r_iter);
 
-#ifdef EXCEPTION
-#  undef EXCEPTION
-#endif
+    while (l_iter.charpos < len) {
+        const UINTVAL cl = STRING_iter_get_and_advance(interp, lhs, &l_iter);
+        const UINTVAL cr = STRING_iter_get_and_advance(interp, rhs, &r_iter);
 
-#if PARROT_HAS_ICU
-#  include <unicode/ucnv.h>
-#  include <unicode/utypes.h>
-#  include <unicode/uchar.h>
-#  include <unicode/ustring.h>
-#  include <unicode/unorm.h>
-#endif
-#define EXCEPTION(err, str) \
-    Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
+        if (cl != cr)
+            return 0;
+    }
 
-#define UNIMPL EXCEPTION(EXCEPTION_UNIMPLEMENTED, "unimplemented unicode")
+    return 1;
+}
 
 
 /*
 
-=item C<static STRING * get_graphemes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
+=item C<INTVAL encoding_compare(PARROT_INTERP, const STRING *lhs, const STRING
+*rhs)>
 
-Gets the graphemes from STRING C<src> starting at C<offset>. Gets
-C<count> graphemes total.
+Compares two STRINGs, C<lhs> and C<rhs>. Returns -1 if C<lhs> < C<rhs>. Returns
+0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>.
 
 =cut
 
 */
 
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_graphemes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+encoding_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
 {
-    ASSERT_ARGS(get_graphemes)
-    return ENCODING_GET_CODEPOINTS(interp, src, offset, count);
-}
-
+    ASSERT_ARGS(encoding_compare)
+    String_iter l_iter, r_iter;
+    UINTVAL min_len, l_len, r_len;
 
-/*
+    STRING_ITER_INIT(interp, &l_iter);
+    STRING_ITER_INIT(interp, &r_iter);
 
-=item C<static STRING* to_charset(PARROT_INTERP, const STRING *src)>
+    l_len = lhs->strlen;
+    r_len = rhs->strlen;
 
-Converts input STRING C<src> to unicode STRING C<dest>.
+    min_len = l_len > r_len ? r_len : l_len;
 
-=cut
+    while (l_iter.charpos < min_len) {
+        const UINTVAL cl = STRING_iter_get_and_advance(interp, lhs, &l_iter);
+        const UINTVAL cr = STRING_iter_get_and_advance(interp, rhs, &r_iter);
 
-*/
+        if (cl != cr)
+            return cl < cr ? -1 : 1;
+    }
 
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    const charset_converter_t conversion_func =
-            Parrot_find_charset_converter(interp, src->charset,
-                    Parrot_unicode_charset_ptr);
+    if (l_len < r_len)
+        return -1;
 
-    if (conversion_func)
-         return conversion_func(interp, src);
+    if (l_len > r_len)
+        return 1;
 
-    return Parrot_utf8_encoding_ptr->to_encoding(interp, src);
+    return 0;
 }
 
 
 /*
 
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-If Parrot is built with ICU, composes the STRING C<src>. Attempts to
-denormalize the STRING into the ICU default, NFC.
+=item C<INTVAL encoding_index(PARROT_INTERP, const STRING *src, const STRING
+*search, UINTVAL offs)>
 
-If Parrot does not have ICU included, throws an exception.
+Searches for the first instance of STRING C<search> in STRING C<src>.
+returns the position where the substring is found if it is indeed found.
+Returns -1 otherwise. Operates on different types of strings, not just
+ASCII.
 
 =cut
 
 */
 
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+encoding_index(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search),
+    UINTVAL offs)
 {
-    ASSERT_ARGS(compose)
-#if PARROT_HAS_ICU
-    STRING *dest;
-    int src_len, dest_len;
-    UErrorCode err;
-    /*
-       U_STABLE int32_t U_EXPORT2
-       unorm_normalize(const UChar *source, int32_t sourceLength,
-       UNormalizationMode mode, int32_t options,
-       UChar *result, int32_t resultLength,
-       UErrorCode *status);
-       */
-    dest_len = src_len = src->strlen;
-    dest     = Parrot_str_new_init(interp, NULL, src_len * sizeof (UChar),
-            src->encoding, src->charset, 0);
-
-    err      = U_ZERO_ERROR;
-    dest_len = unorm_normalize((UChar *)src->strstart, src_len,
-            UNORM_DEFAULT,      /* default is NFC */
-            0,                  /* options 0 default - no specific icu
-                                 * version */
-            (UChar *)dest->strstart, dest_len, &err);
+    ASSERT_ARGS(encoding_index)
+    String_iter start, end;
 
-    dest->bufused = dest_len * sizeof (UChar);
+    STRING_ITER_INIT(interp, &start);
+    STRING_iter_set_position(interp, src, &start, offs);
 
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, dest, dest->bufused);
-        dest_len = unorm_normalize((UChar *)src->strstart, src_len,
-                UNORM_DEFAULT,      /* default is NFC */
-                0,                  /* options 0 default - no specific
-                                     * icu version */
-                (UChar *)dest->strstart, dest_len, &err);
-        PARROT_ASSERT(U_SUCCESS(err));
-        dest->bufused = dest_len * sizeof (UChar);
-    }
-    dest->strlen = dest_len;
-    return dest;
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
+    return Parrot_str_iter_index(interp, src, &start, &end, search);
 }
 
 
 /*
 
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
+=item C<INTVAL encoding_rindex(PARROT_INTERP, const STRING *src, const STRING
+*search_string, UINTVAL offset)>
 
-Decompose function for unicode charset. This function is not yet implemented.
+Finds the last index of substring C<search_string> in STRING C<src>,
+starting from C<offset>. Not implemented.
 
 =cut
 
 */
 
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+encoding_rindex(PARROT_INTERP, SHIM(const STRING *src),
+        SHIM(const STRING *search_string), SHIM(UINTVAL offset))
 {
-    ASSERT_ARGS(decompose)
+    ASSERT_ARGS(encoding_rindex)
     /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
     UNIMPL;
 }
@@ -331,380 +193,796 @@
 
 /*
 
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all upper-case graphemes, for those characters
-which support upper-case versions.
+=item C<size_t encoding_hash(PARROT_INTERP, const STRING *src, size_t seed)>
 
-Throws an exception if ICU is not installed.
+Computes the hash of the given STRING C<src> with starting seed value C<seed>.
 
 =cut
 
 */
 
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
+PARROT_WARN_UNUSED_RESULT
+size_t
+encoding_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t seed)
 {
-    ASSERT_ARGS(upcase)
-#if PARROT_HAS_ICU
-    UErrorCode err;
-    int dest_len, src_len, needed;
-    STRING *res;
-#endif
-
-    if (src->bufused  == src->strlen
-            && src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->upcase(interp, src);
-    }
-
-#if PARROT_HAS_ICU
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-    /*
-       U_CAPI int32_t U_EXPORT2
-       u_strToUpper(UChar *dest, int32_t destCapacity,
-       const UChar *src, int32_t srcLength,
-       const char *locale,
-       UErrorCode *pErrorCode);
-       */
-    err = U_ZERO_ERROR;
-
-    /* use all available space - see below XXX */
-    /* TODO downcase, titlecase too */
-    dest_len = Buffer_buflen(res) / sizeof (UChar);
-    src_len  = res->bufused       / sizeof (UChar);
-
-    /*
-     * XXX troubles:
-     *   t/op/string_cs_45  upcase unicode:"\u01f0"
-     *   this creates \u004a \u030c J+NON-SPACING HACEK
-     *   the string needs resizing, *if* the src buffer is
-     *   too short. *But* with icu 3.2/3.4 the src string is
-     *   overwritten with partial result, despite the icu docs sayeth:
-     *
-     *      The source string and the destination buffer
-     *      are allowed to overlap.
-     *
-     *  Workaround:  'preflighting' returns needed length
-     *  Alternative: forget about inplace operation - create new result
-     *
-     *  TODO downcase, titlecase
-     */
-    needed = u_strToUpper(NULL, 0,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-
-    if (needed > dest_len) {
-        Parrot_gc_reallocate_string_storage(interp, res, needed * sizeof (UChar));
-        dest_len = needed;
-    }
+    ASSERT_ARGS(encoding_hash)
+    String_iter iter;
+    size_t      hashval = seed;
 
-    err      = U_ZERO_ERROR;
-    dest_len = u_strToUpper((UChar *)res->strstart, dest_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-    PARROT_ASSERT(U_SUCCESS(err));
-    res->bufused = dest_len * sizeof (UChar);
+    STRING_ITER_INIT(interp, &iter);
 
-    /* downgrade if possible */
-    if (dest_len == (int)src->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
-    else {
-        /* string is likely still ucs2 if it was earlier
-         * but strlen changed due to combining char
-         */
-        res->strlen = dest_len;
+    while (iter.charpos < src->strlen) {
+        const UINTVAL c = STRING_iter_get_and_advance(interp, src, &iter);
+        hashval += hashval << 5;
+        hashval += c;
     }
 
-    return res;
-
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
+    return hashval;
 }
 
 
 /*
 
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts all graphemes to lower-case, for those graphemes which have cases.
+=item C<static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)>
 
-Throws an exception if ICU is not installed.
+Returns Boolean.
 
 =cut
 
 */
 
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
+static int
+u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
 {
-    ASSERT_ARGS(downcase)
-#if PARROT_HAS_ICU
-    UErrorCode err;
-    int dest_len, src_len;
-    STRING *res;
-#endif
-
-    if (src->bufused  == src->strlen
-            && src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->downcase(interp, src);
-    }
-
+    ASSERT_ARGS(u_iscclass)
 #if PARROT_HAS_ICU
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-    /*
-U_CAPI int32_t U_EXPORT2
-u_strToLower(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode);
+    UNUSED(interp);
+            /* XXX which one
+               return u_charDigitValue(codepoint);
+               */
+    if ((flags & enum_cclass_uppercase)    && u_isupper(codepoint))  return 1;
+    if ((flags & enum_cclass_lowercase)    && u_islower(codepoint))  return 1;
+    if ((flags & enum_cclass_alphabetic)   && u_isalpha(codepoint))  return 1;
+    if ((flags & enum_cclass_numeric)      && u_isdigit(codepoint))  return 1;
+    if ((flags & enum_cclass_hexadecimal)  && u_isxdigit(codepoint)) return 1;
+    if ((flags & enum_cclass_whitespace)   && u_isspace(codepoint))  return 1;
+    if ((flags & enum_cclass_printing)     && u_isprint(codepoint))  return 1;
+    if ((flags & enum_cclass_graphical)    && u_isgraph(codepoint))  return 1;
+    if ((flags & enum_cclass_blank)        && u_isblank(codepoint))  return 1;
+    if ((flags & enum_cclass_control)      && u_iscntrl(codepoint))  return 1;
+    if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint))  return 1;
+    if ((flags & enum_cclass_word)         &&
+        (u_isalnum(codepoint) || codepoint == '_'))                  return 1;
+
+    return 0;
+#else
+    if (codepoint < 256)
+        return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
+
+    if (flags == enum_cclass_any)
+        return 1;
+
+    /* All codepoints from u+0100 to u+02af are alphabetic, so we
+     * cheat on the WORD and ALPHABETIC properties to include these
+     * (and incorrectly exclude all others).  This is a stopgap until
+     * ICU is everywhere, or we have better non-ICU unicode support. */
+    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
+        return (codepoint < 0x2b0);
+
+    if (flags & enum_cclass_whitespace) {
+        /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
+        switch (codepoint) {
+          case 0x1680: case 0x180e: case 0x2000: case 0x2001:
+          case 0x2002: case 0x2003: case 0x2004: case 0x2005:
+          case 0x2006: case 0x2007: case 0x2008: case 0x2009:
+          case 0x200a: case 0x2028: case 0x2029: case 0x202f:
+          case 0x205f: case 0x3000:
+            return 1;
+          default:
+            break;
+        }
+    }
+
+    if (flags & enum_cclass_numeric) {
+        /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
+        if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
+        if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
+        if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
+        if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
+        if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
+        if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
+        if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
+        if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
+        if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
+        if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
+        if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
+        if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
+        if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
+        if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
+        if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
+        if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
+        if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
+        if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
+        if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
+        if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
+        if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
+        if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
+    }
+
+    if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
+        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+            "no ICU lib loaded");
+
+    return 0;
+#endif
+}
+
+
+/*
+
+=item C<UINTVAL encoding_scan(PARROT_INTERP, const STRING *src)>
+
+Returns the number of codepoints in string C<src>.
+
+=cut
+
+*/
+
+UINTVAL
+encoding_scan(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(encoding_scan)
+    String_iter iter;
+    /*
+     * this is used to initially calculate src->strlen,
+     * therefore we must scan the whole string
      */
-    err      = U_ZERO_ERROR;
-    src_len  = res->bufused / sizeof (UChar);
-    dest_len = u_strToLower((UChar *)res->strstart, src_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-    res->bufused = dest_len * sizeof (UChar);
+    STRING_ITER_INIT(interp, &iter);
+    while (iter.bytepos < src->bufused)
+        STRING_iter_get_and_advance(interp, src, &iter);
+    return iter.charpos;
+}
 
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
-        dest_len = u_strToLower((UChar *)res->strstart, dest_len,
-                (UChar *)res->strstart, src_len,
-                NULL,       /* locale = default */
-                &err);
-        PARROT_ASSERT(U_SUCCESS(err));
+
+/*
+
+=item C<STRING * encoding_substr(PARROT_INTERP, const STRING *src, UINTVAL
+offset, UINTVAL count)>
+
+Returns the codepoints in string C<src> at position C<offset> and length
+C<count>.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+STRING *
+encoding_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(encoding_substr)
+
+    STRING * const return_string = Parrot_str_copy(interp, src);
+    String_iter    iter;
+    UINTVAL        start;
+
+    STRING_ITER_INIT(interp, &iter);
+
+    if (offset)
+        STRING_iter_set_position(interp, src, &iter, offset);
+
+    start                   = iter.bytepos;
+    return_string->strstart = (char *)return_string->strstart + start;
+
+    if (count)
+        STRING_iter_set_position(interp, src, &iter, offset + count);
+
+    return_string->bufused  = iter.bytepos - start;
+    return_string->strlen   = count;
+    return_string->hashval  = 0;
+
+    return return_string;
+}
+
+
+/*
+
+=item C<INTVAL encoding_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset)>
+
+Returns Boolean.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+encoding_is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
+{
+    ASSERT_ARGS(encoding_is_cclass)
+    UINTVAL codepoint;
+
+    if (offset >= src->strlen)
+        return 0;
+
+    codepoint = STRING_ord(interp, src, offset);
+
+    if (codepoint >= 256)
+        return u_iscclass(interp, codepoint, flags) != 0;
+
+    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
+}
+
+
+/*
+
+=item C<INTVAL encoding_find_cclass(PARROT_INTERP, INTVAL flags, const STRING
+*src, UINTVAL offset, UINTVAL count)>
+
+Find a character in the given character class.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+encoding_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
+        UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(encoding_find_cclass)
+    String_iter iter;
+    UINTVAL     codepoint;
+    UINTVAL     end = offset + count;
+
+    STRING_ITER_INIT(interp, &iter);
+    STRING_iter_set_position(interp, src, &iter, offset);
+
+    end = src->strlen < end ? src->strlen : end;
+
+    while (iter.charpos < end) {
+        codepoint = STRING_iter_get_and_advance(interp, src, &iter);
+        if (codepoint >= 256) {
+            if (u_iscclass(interp, codepoint, flags))
+                    return iter.charpos - 1;
+        }
+        else {
+            if (Parrot_iso_8859_1_typetable[codepoint] & flags)
+                return iter.charpos - 1;
+        }
     }
 
-    /* downgrade if possible */
-    if (dest_len == (int)res->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
+    return end;
+}
 
-    return res;
 
-#else
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
+/*
+
+=item C<INTVAL encoding_find_not_cclass(PARROT_INTERP, INTVAL flags, const
+STRING *src, UINTVAL offset, UINTVAL count)>
+
+Returns C<INTVAL>.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+encoding_find_not_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
+        UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(encoding_find_not_cclass)
+    String_iter iter;
+    UINTVAL     codepoint;
+    UINTVAL     end = offset + count;
+    int         bit;
+
+    if (offset > src->strlen) {
+        /* XXX: Throw in this case? */
+        return offset + count;
+    }
+
+    STRING_ITER_INIT(interp, &iter);
+
+    if (offset)
+        STRING_iter_set_position(interp, src, &iter, offset);
+
+    end = src->strlen < end ? src->strlen : end;
+
+    if (flags == enum_cclass_any)
+        return end;
+
+    while (iter.charpos < end) {
+        codepoint = STRING_iter_get_and_advance(interp, src, &iter);
+        if (codepoint >= 256) {
+            for (bit = enum_cclass_uppercase;
+                    bit <= enum_cclass_word ; bit <<= 1) {
+                if ((bit & flags) && !u_iscclass(interp, codepoint, bit))
+                    return iter.charpos - 1;
+            }
+        }
+        else {
+            if (!(Parrot_iso_8859_1_typetable[codepoint] & flags))
+                return iter.charpos - 1;
+        }
+    }
+
+    return end;
+}
+
+
+/*
+
+=item C<STRING * encoding_get_graphemes(PARROT_INTERP, const STRING *src,
+UINTVAL offset, UINTVAL count)>
+
+Retrieves the graphemes for the STRING C<src>, starting at
+C<offset> and ending at C<offset + count>. Returns codepoints for now.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+PARROT_WARN_UNUSED_RESULT
+STRING *
+encoding_get_graphemes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+{
+    ASSERT_ARGS(encoding_get_graphemes)
+    return STRING_substr(interp, src, offset, count);
+}
+
+
+/*
+
+=item C<STRING* encoding_decompose(PARROT_INTERP, const STRING *src)>
+
+Decompose function. This function is not yet implemented.
+
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+STRING*
+encoding_decompose(PARROT_INTERP, SHIM(const STRING *src))
+{
+    ASSERT_ARGS(encoding_decompose)
+    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
+    UNIMPL;
+}
+
+
+/*
+
+=item C<INTVAL fixed8_equal(PARROT_INTERP, const STRING *lhs, const STRING
+*rhs)>
+
+Compares a fixed8 string with another string. If STRING C<lhs> == C<rhs>,
+returns 1. If C<lhs> != C<rhs> returns 0.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_equal(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+{
+    ASSERT_ARGS(fixed8_equal)
+    const UINTVAL len = STRING_length(lhs);
+
+    if (len != STRING_length(rhs))
+        return 0;
+    if (len == 0)
+        return 1;
+    if (lhs == rhs)
+        return 1;
+    if (lhs->hashval && rhs->hashval && lhs->hashval != rhs->hashval)
+        return 0;
+
+    if (STRING_max_bytes_per_codepoint(rhs) == 1) {
+        return memcmp(lhs->strstart, rhs->strstart, len) == 0;
+    }
+    else {
+        const unsigned char * const buf = (unsigned char *)lhs->strstart;
+        String_iter iter;
+
+        STRING_ITER_INIT(interp, &iter);
+
+        while (iter.charpos < len) {
+            const UINTVAL cl = buf[iter.charpos];
+            const UINTVAL cr = STRING_iter_get_and_advance(interp, rhs, &iter);
+            if (cl != cr)
+                return 0;
+        }
+
+        return 1;
+    }
+}
+
+
+/*
+
+=item C<INTVAL fixed8_compare(PARROT_INTERP, const STRING *lhs, const STRING
+*rhs)>
+
+Compares a fixed8 string with another string. If STRING C<lhs> > C<rhs>, returns
+1. If C<lhs> == C<rhs> returns 0. If STRING C<lhs> < C<rhs>, returns  -1.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+{
+    ASSERT_ARGS(fixed8_compare)
+    const UINTVAL l_len = lhs->strlen;
+    const UINTVAL r_len = rhs->strlen;
+    const UINTVAL min_len = l_len > r_len ? r_len : l_len;
+
+    if (STRING_max_bytes_per_codepoint(rhs) == 1) {
+        const int ret_val = memcmp(lhs->strstart, rhs->strstart, min_len);
+        if (ret_val)
+            return ret_val < 0 ? -1 : 1;
+    }
+    else {
+        const unsigned char * const buf = (unsigned char *)lhs->strstart;
+        String_iter iter;
+
+        STRING_ITER_INIT(interp, &iter);
+
+        while (iter.charpos < min_len) {
+            const UINTVAL cl = buf[iter.charpos];
+            const UINTVAL cr = STRING_iter_get_and_advance(interp, rhs, &iter);
+            if (cl != cr)
+                return cl < cr ? -1 : 1;
+        }
+    }
+
+    if (l_len < r_len)
+        return -1;
+    if (l_len > r_len)
+        return 1;
+
+    return 0;
+}
+
+
+/*
+
+=item C<INTVAL fixed8_index(PARROT_INTERP, const STRING *src, const STRING
+*search_string, UINTVAL offset)>
+
+Searches for the first instance of STRING C<search> in STRING C<src>.
+returns the position where the substring is found if it is indeed found.
+Returns -1 otherwise.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_index(PARROT_INTERP, ARGIN(const STRING *src),
+        ARGIN(const STRING *search_string), UINTVAL offset)
+{
+    ASSERT_ARGS(fixed8_index)
+    INTVAL retval;
+
+    if (STRING_max_bytes_per_codepoint(search_string) != 1) {
+        return encoding_index(interp, src, search_string, offset);
+    }
+
+    PARROT_ASSERT(STRING_max_bytes_per_codepoint(src) == 1);
+    retval = Parrot_byte_index(interp, src,
+            search_string, offset);
+    return retval;
+}
+
+
+/*
+
+=item C<INTVAL fixed8_rindex(PARROT_INTERP, const STRING *src, const STRING
+*search_string, UINTVAL offset)>
+
+Searches for the last instance of STRING C<search_string> in STRING
+C<src>. Starts searching at C<offset>.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL
+fixed8_rindex(PARROT_INTERP, ARGIN(const STRING *src),
+        ARGIN(const STRING *search_string), UINTVAL offset)
+{
+    ASSERT_ARGS(fixed8_rindex)
+    INTVAL retval;
+
+    if (STRING_max_bytes_per_codepoint(search_string) != 1)
+        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
+            "Cross-charset rindex not supported");
+
+    PARROT_ASSERT(STRING_max_bytes_per_codepoint(src) == 1);
+    retval = Parrot_byte_rindex(interp, src, search_string, offset);
+    return retval;
+}
+
+
+/*
+
+=item C<size_t fixed8_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
+
+Returns the hashed value of the string, given a seed in hashval.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+size_t
+fixed8_hash(SHIM_INTERP, ARGIN(const STRING *s), size_t hashval)
+{
+    ASSERT_ARGS(fixed8_hash)
+    const unsigned char *pos = (const unsigned char *)s->strstart;
+    UINTVAL        len = s->strlen;
+
+    while (len--) {
+        hashval += hashval << 5;
+        hashval += *(pos++);
+    }
+
+    return hashval;
+}
+
+
+/*
+
+=item C<UINTVAL fixed8_scan(PARROT_INTERP, const STRING *src)>
+
+Returns the number of codepoints in string C<src>. No scanning needed
+for fixed encodings.
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+UINTVAL
+fixed8_scan(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(fixed8_scan)
+    return src->bufused;
+}
+
+
+/*
+
+=item C<UINTVAL fixed8_ord(PARROT_INTERP, const STRING *src, UINTVAL offset)>
+
+codepoints are bytes, so delegate
+
+=cut
+
+*/
+
+PARROT_WARN_UNUSED_RESULT
+UINTVAL
+fixed8_ord(PARROT_INTERP, ARGIN(const STRING *src),
+        UINTVAL offset)
+{
+    ASSERT_ARGS(fixed8_ord)
+    const unsigned char * const buf = (unsigned char *)src->strstart;
+
+    if (offset >= src->bufused) {
+/*        Parrot_ex_throw_from_c_args(interp, NULL, 0,
+                "fixed8_ord past the end of the buffer (%i of %i)",
+                offset, src->bufused); */
+        return 0;
+    }
+
+    return buf[offset];
 }
 
 
 /*
 
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
+=item C<STRING * fixed8_substr(PARROT_INTERP, const STRING *src, UINTVAL offset,
+UINTVAL count)>
 
-Converts the string to title case, for those characters which support cases.
-
-Throws an exception if ICU is not installed.
+Returns the codepoints in string C<src> at position C<offset> and length
+C<count>.
 
 =cut
 
 */
 
+PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
+STRING *
+fixed8_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
 {
-    ASSERT_ARGS(titlecase)
-#if PARROT_HAS_ICU
+    ASSERT_ARGS(fixed8_substr)
+    STRING * const return_string = Parrot_str_copy(interp, src);
 
-    UErrorCode err;
-    int dest_len, src_len;
-    STRING *res;
+    return_string->encoding = src->encoding;
 
-    if (src->bufused  == src->strlen
-    &&  src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->titlecase(interp, src);
-    }
+    return_string->strstart = (char *)return_string->strstart + offset ;
+    return_string->bufused = count;
 
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
+    return_string->strlen = count;
+    return_string->hashval = 0;
 
-    /*
-U_CAPI int32_t U_EXPORT2
-u_strToTitle(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             UBreakIterator *titleIter,
-             const char *locale,
-             UErrorCode *pErrorCode);
-     */
+    return return_string;
+}
 
-    err      = U_ZERO_ERROR;
-    src_len  = res->bufused / sizeof (UChar);
-    dest_len = u_strToTitle((UChar *)res->strstart, src_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* default titleiter */
-            NULL,       /* locale = default */
-            &err);
-    res->bufused = dest_len * sizeof (UChar);
 
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
-        dest_len = u_strToTitle((UChar *)res->strstart, dest_len,
-                (UChar *)res->strstart, src_len,
-                NULL, NULL,
-                &err);
-        PARROT_ASSERT(U_SUCCESS(err));
-    }
+/*
 
-    /* downgrade if possible */
-    if (dest_len == (int)res->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
+=item C<STRING* fixed8_compose(PARROT_INTERP, const STRING *src)>
 
-    return res;
+Can't compose ASCII strings, so performs a string copy on it and
+returns the new string.
 
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
+=cut
+
+*/
+
+PARROT_CANNOT_RETURN_NULL
+STRING*
+fixed8_compose(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(fixed8_compose)
+
+    return Parrot_str_copy(interp, src);
 }
 
 
 /*
 
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
+=item C<UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const
+String_iter *iter, INTVAL offset)>
 
-Converts the first grapheme in the STRING C<src> to uppercase, if the
-grapheme supports it. Not implemented.
+Get the character at C<iter> plus C<offset>.
 
 =cut
 
 */
 
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, SHIM(const STRING *src))
+UINTVAL
+fixed8_iter_get(PARROT_INTERP,
+    ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset)
 {
-    ASSERT_ARGS(upcase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
+    ASSERT_ARGS(fixed8_iter_get)
+    return fixed8_ord(interp, str, iter->charpos + offset);
 }
 
 
 /*
 
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
+=item C<void fixed8_iter_skip(PARROT_INTERP, const STRING *str, String_iter
+*iter, INTVAL skip)>
 
-Converts the first grapheme in the STRING C<src> to lower-case, if
-the grapheme supports it. Not implemented
+Moves the string iterator C<i> by C<skip> characters.
 
 =cut
 
 */
 
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, SHIM(const STRING *src))
+void
+fixed8_iter_skip(SHIM_INTERP,
+    ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip)
 {
-    ASSERT_ARGS(downcase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
+    ASSERT_ARGS(fixed8_iter_skip)
+    iter->bytepos += skip;
+    iter->charpos += skip;
+    PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str));
 }
 
 
 /*
 
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
+=item C<UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING *str,
+String_iter *iter)>
 
-Converts the first grapheme in STRING C<src> to title case, if the
-string supports it. Not implemented.
+Moves the string iterator C<i> to the next codepoint.
 
 =cut
 
 */
 
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
+UINTVAL
+fixed8_iter_get_and_advance(PARROT_INTERP,
+    ARGIN(const STRING *str), ARGMOD(String_iter *iter))
 {
-    ASSERT_ARGS(titlecase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
+    ASSERT_ARGS(fixed8_iter_get_and_advance)
+    const UINTVAL c = fixed8_ord(interp, str, iter->charpos++);
+    iter->bytepos++;
+    return c;
 }
 
 
 /*
 
-=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
+=item C<void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str, String_iter
+*iter, UINTVAL c)>
 
-Compares two STRINGs, C<lhs> and C<rhs>. Returns -1 if C<lhs> < C<rhs>. Returns
-0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>.
+With the string iterator C<i>, appends the codepoint C<c> and advances to the
+next position in the string.
 
 =cut
 
 */
 
-static INTVAL
-compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
+void
+fixed8_iter_set_and_advance(PARROT_INTERP,
+    ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c)
 {
-    ASSERT_ARGS(compare)
-    String_iter l_iter, r_iter;
-    UINTVAL min_len, l_len, r_len;
-
-    /* TODO make optimized equal - strings are equal length then already */
-    STRING_ITER_INIT(interp, &l_iter);
-    STRING_ITER_INIT(interp, &r_iter);
+    ASSERT_ARGS(fixed8_iter_set_and_advance)
+    unsigned char *buf = (unsigned char *)str->strstart;
+    buf[iter->charpos++] = c;
+    iter->bytepos++;
+}
 
-    l_len = lhs->strlen;
-    r_len = rhs->strlen;
 
-    min_len = l_len > r_len ? r_len : l_len;
+/*
 
-    while (l_iter.charpos < min_len) {
-        const UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter);
-        const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter);
+=item C<void fixed8_iter_set_position(PARROT_INTERP, const STRING *str,
+String_iter *iter, UINTVAL pos)>
 
-        if (cl != cr)
-            return cl < cr ? -1 : 1;
-    }
+Moves the string iterator C<i> to the position C<n> in the string.
 
-    if (l_len < r_len)
-        return -1;
+=cut
 
-    if (l_len > r_len)
-        return 1;
+*/
 
-    return 0;
+void
+fixed8_iter_set_position(SHIM_INTERP,
+    ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos)
+{
+    ASSERT_ARGS(fixed8_iter_set_position)
+    iter->bytepos = iter->charpos = pos;
+    PARROT_ASSERT(pos <= Buffer_buflen(str));
 }
 
 
 /*
 
-=item C<static INTVAL cs_rindex(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
+=item C<STRING * unicode_chr(PARROT_INTERP, UINTVAL codepoint)>
 
-Finds the last index of substring C<search_string> in STRING C<src>,
-starting from C<offset>. Not implemented.
+Returns a one-codepoint string for the given codepoint.
 
 =cut
 
 */
 
-static INTVAL
-cs_rindex(PARROT_INTERP, SHIM(const STRING *src),
-        SHIM(const STRING *search_string), SHIM(UINTVAL offset))
+PARROT_CANNOT_RETURN_NULL
+STRING *
+unicode_chr(PARROT_INTERP, UINTVAL codepoint)
 {
-    ASSERT_ARGS(cs_rindex)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
+    ASSERT_ARGS(unicode_chr)
+    String_iter    iter;
+    STRING * const dest = string_make(interp, "", 1, "unicode", 0);
+
+    dest->strlen = 1;
+
+    STRING_ITER_INIT(interp, &iter);
+    STRING_iter_set_and_advance(interp, dest, &iter, codepoint);
+    dest->bufused = iter.bytepos;
+
+    return dest;
 }
 
 
 /*
 
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
+=item C<UINTVAL unicode_validate(PARROT_INTERP, const STRING *src)>
 
 Returns 1 if the STRING C<src> is a valid unicode string, returns 0 otherwise.
 
@@ -712,16 +990,16 @@
 
 */
 
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
+UINTVAL
+unicode_validate(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(validate)
+    ASSERT_ARGS(unicode_validate)
     String_iter iter;
-    const INTVAL length = Parrot_str_length(interp, src);
+    const UINTVAL length = Parrot_str_length(interp, src);
 
     STRING_ITER_INIT(interp, &iter);
     while (iter.charpos < length) {
-        const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
+        const UINTVAL codepoint = STRING_iter_get_and_advance(interp, src, &iter);
         /* Check for Unicode non-characters */
         if (codepoint >= 0xfdd0
         && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe)
@@ -735,335 +1013,369 @@
 
 /*
 
-=item C<static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)>
+=item C<STRING* unicode_compose(PARROT_INTERP, const STRING *src)>
 
-Returns Boolean.
+If Parrot is built with ICU, composes the STRING C<src>. Attempts to
+denormalize the STRING into the ICU default, NFC.
+
+If Parrot does not have ICU included, throws an exception.
 
 =cut
 
 */
 
-static int
-u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
+PARROT_CANNOT_RETURN_NULL
+STRING*
+unicode_compose(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(u_iscclass)
+    ASSERT_ARGS(unicode_compose)
 #if PARROT_HAS_ICU
-    UNUSED(interp);
-            /* XXX which one
-               return u_charDigitValue(codepoint);
-               */
-    if ((flags & enum_cclass_uppercase)    && u_isupper(codepoint))  return 1;
-    if ((flags & enum_cclass_lowercase)    && u_islower(codepoint))  return 1;
-    if ((flags & enum_cclass_alphabetic)   && u_isalpha(codepoint))  return 1;
-    if ((flags & enum_cclass_numeric)      && u_isdigit(codepoint))  return 1;
-    if ((flags & enum_cclass_hexadecimal)  && u_isxdigit(codepoint)) return 1;
-    if ((flags & enum_cclass_whitespace)   && u_isspace(codepoint))  return 1;
-    if ((flags & enum_cclass_printing)     && u_isprint(codepoint))  return 1;
-    if ((flags & enum_cclass_graphical)    && u_isgraph(codepoint))  return 1;
-    if ((flags & enum_cclass_blank)        && u_isblank(codepoint))  return 1;
-    if ((flags & enum_cclass_control)      && u_iscntrl(codepoint))  return 1;
-    if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint))  return 1;
-    if ((flags & enum_cclass_word)         &&
-        (u_isalnum(codepoint) || codepoint == '_'))                  return 1;
+    STRING *dest;
+    int src_len, dest_len;
+    UErrorCode err;
+    /*
+       U_STABLE int32_t U_EXPORT2
+       unorm_normalize(const UChar *source, int32_t sourceLength,
+       UNormalizationMode mode, int32_t options,
+       UChar *result, int32_t resultLength,
+       UErrorCode *status);
+       */
+    dest_len = src_len = src->strlen;
+    dest     = Parrot_str_new_init(interp, NULL, src_len * sizeof (UChar),
+            src->encoding, 0);
 
-    return 0;
+    err      = U_ZERO_ERROR;
+    dest_len = unorm_normalize((UChar *)src->strstart, src_len,
+            UNORM_DEFAULT,      /* default is NFC */
+            0,                  /* options 0 default - no specific icu
+                                 * version */
+            (UChar *)dest->strstart, dest_len, &err);
+
+    dest->bufused = dest_len * sizeof (UChar);
+
+    if (!U_SUCCESS(err)) {
+        err = U_ZERO_ERROR;
+        Parrot_gc_reallocate_string_storage(interp, dest, dest->bufused);
+        dest_len = unorm_normalize((UChar *)src->strstart, src_len,
+                UNORM_DEFAULT,      /* default is NFC */
+                0,                  /* options 0 default - no specific
+                                     * icu version */
+                (UChar *)dest->strstart, dest_len, &err);
+        PARROT_ASSERT(U_SUCCESS(err));
+        dest->bufused = dest_len * sizeof (UChar);
+    }
+    dest->strlen = dest_len;
+    return dest;
 #else
-    if (codepoint < 256)
-        return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
+    UNUSED(src);
+    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+        "no ICU lib loaded");
+#endif
+}
 
-    if (flags == enum_cclass_any)
-        return 1;
 
-    /* All codepoints from u+0100 to u+02af are alphabetic, so we
-     * cheat on the WORD and ALPHABETIC properties to include these
-     * (and incorrectly exclude all others).  This is a stopgap until
-     * ICU is everywhere, or we have better non-ICU unicode support. */
-    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
-        return (codepoint < 0x2b0);
+/*
 
-    if (flags & enum_cclass_whitespace) {
-        /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
-        switch (codepoint) {
-          case 0x1680: case 0x180e: case 0x2000: case 0x2001:
-          case 0x2002: case 0x2003: case 0x2004: case 0x2005:
-          case 0x2006: case 0x2007: case 0x2008: case 0x2009:
-          case 0x200a: case 0x2028: case 0x2029: case 0x202f:
-          case 0x205f: case 0x3000:
-            return 1;
-          default:
-            break;
-        }
-    }
+=item C<STRING* unicode_upcase(PARROT_INTERP, const STRING *src)>
 
-    if (flags & enum_cclass_numeric) {
-        /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
-        if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
-        if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
-        if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
-        if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
-        if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
-        if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
-        if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
-        if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
-        if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
-        if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
-        if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
-        if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
-        if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
-        if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
-        if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
-        if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
-        if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
-        if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
-        if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
-        if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
-        if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
-        if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
-    }
+Converts the STRING C<src> to all upper-case graphemes, for those characters
+which support upper-case versions.
 
-    if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-            "no ICU lib loaded");
+Throws an exception if ICU is not installed.
 
-    return 0;
-#endif
-}
+=cut
 
+*/
 
-/*
+PARROT_CANNOT_RETURN_NULL
+STRING*
+unicode_upcase(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(unicode_upcase)
+#if PARROT_HAS_ICU
+    UErrorCode err;
+    int dest_len, src_len, needed;
+    STRING *res;
+#endif
 
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
+    if (src->bufused  == src->strlen
+            && src->encoding == Parrot_utf8_encoding_ptr) {
+        return Parrot_ascii_encoding_ptr->upcase(interp, src);
+    }
 
-Returns Boolean.
+#if PARROT_HAS_ICU
+    /* to_encoding will allocate new string */
+    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
+    /*
+       U_CAPI int32_t U_EXPORT2
+       u_strToUpper(UChar *dest, int32_t destCapacity,
+       const UChar *src, int32_t srcLength,
+       const char *locale,
+       UErrorCode *pErrorCode);
+       */
+    err = U_ZERO_ERROR;
 
-=cut
+    /* use all available space - see below XXX */
+    /* TODO downcase, titlecase too */
+    dest_len = Buffer_buflen(res) / sizeof (UChar);
+    src_len  = res->bufused       / sizeof (UChar);
 
-*/
+    /*
+     * XXX troubles:
+     *   t/op/string_cs_45  upcase unicode:"\u01f0"
+     *   this creates \u004a \u030c J+NON-SPACING HACEK
+     *   the string needs resizing, *if* the src buffer is
+     *   too short. *But* with icu 3.2/3.4 the src string is
+     *   overwritten with partial result, despite the icu docs sayeth:
+     *
+     *      The source string and the destination buffer
+     *      are allowed to overlap.
+     *
+     *  Workaround:  'preflighting' returns needed length
+     *  Alternative: forget about inplace operation - create new result
+     *
+     *  TODO downcase, titlecase
+     */
+    needed = u_strToUpper(NULL, 0,
+            (UChar *)res->strstart, src_len,
+            NULL,       /* locale = default */
+            &err);
 
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(is_cclass)
-    UINTVAL codepoint;
+    if (needed > dest_len) {
+        Parrot_gc_reallocate_string_storage(interp, res, needed * sizeof (UChar));
+        dest_len = needed;
+    }
 
-    if (offset >= src->strlen)
-        return 0;
+    err      = U_ZERO_ERROR;
+    dest_len = u_strToUpper((UChar *)res->strstart, dest_len,
+            (UChar *)res->strstart, src_len,
+            NULL,       /* locale = default */
+            &err);
+    PARROT_ASSERT(U_SUCCESS(err));
+    res->bufused = dest_len * sizeof (UChar);
 
-    codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
+    /* downgrade if possible */
+    if (dest_len == (int)src->strlen)
+        res->encoding = Parrot_ucs2_encoding_ptr;
+    else {
+        /* string is likely still ucs2 if it was earlier
+         * but strlen changed due to combining char
+         */
+        res->strlen = dest_len;
+    }
 
-    if (codepoint >= 256)
-        return u_iscclass(interp, codepoint, flags) != 0;
+    return res;
 
-    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
+#else
+    UNUSED(src);
+    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+        "no ICU lib loaded");
+#endif
 }
 
 
 /*
 
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
+=item C<STRING* unicode_downcase(PARROT_INTERP, const STRING *src)>
 
-Find a character in the given character class.
+Converts all graphemes to lower-case, for those graphemes which have cases.
+
+Throws an exception if ICU is not installed.
 
 =cut
 
 */
 
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+PARROT_CANNOT_RETURN_NULL
+STRING*
+unicode_downcase(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(find_cclass)
-    String_iter iter;
-    UINTVAL     codepoint;
-    UINTVAL     end = offset + count;
+    ASSERT_ARGS(unicode_downcase)
+#if PARROT_HAS_ICU
+    UErrorCode err;
+    int dest_len, src_len;
+    STRING *res;
+#endif
 
-    STRING_ITER_INIT(interp, &iter);
-    STRING_ITER_SET_POSITION(interp, src, &iter, offset);
+    if (src->bufused  == src->strlen
+            && src->encoding == Parrot_utf8_encoding_ptr) {
+        return Parrot_ascii_encoding_ptr->downcase(interp, src);
+    }
 
-    end = src->strlen < end ? src->strlen : end;
+#if PARROT_HAS_ICU
+    /* to_encoding will allocate new string */
+    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
+    /*
+U_CAPI int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             const char *locale,
+             UErrorCode *pErrorCode);
+     */
+    err      = U_ZERO_ERROR;
+    src_len  = res->bufused / sizeof (UChar);
+    dest_len = u_strToLower((UChar *)res->strstart, src_len,
+            (UChar *)res->strstart, src_len,
+            NULL,       /* locale = default */
+            &err);
+    res->bufused = dest_len * sizeof (UChar);
 
-    while (iter.charpos < end) {
-        codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (codepoint >= 256) {
-            if (u_iscclass(interp, codepoint, flags))
-                    return iter.charpos - 1;
-        }
-        else {
-            if (Parrot_iso_8859_1_typetable[codepoint] & flags)
-                return iter.charpos - 1;
-        }
+    if (!U_SUCCESS(err)) {
+        err = U_ZERO_ERROR;
+        Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
+        dest_len = u_strToLower((UChar *)res->strstart, dest_len,
+                (UChar *)res->strstart, src_len,
+                NULL,       /* locale = default */
+                &err);
+        PARROT_ASSERT(U_SUCCESS(err));
     }
 
-    return end;
+    /* downgrade if possible */
+    if (dest_len == (int)res->strlen)
+        res->encoding = Parrot_ucs2_encoding_ptr;
+
+    return res;
+
+#else
+    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+        "no ICU lib loaded");
+#endif
 }
 
 
 /*
 
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
+=item C<STRING* unicode_titlecase(PARROT_INTERP, const STRING *src)>
 
-Returns C<INTVAL>.
+Converts the string to title case, for those characters which support cases.
+
+Throws an exception if ICU is not installed.
 
 =cut
 
 */
 
-static INTVAL
-find_not_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
-        UINTVAL offset, UINTVAL count)
+PARROT_CANNOT_RETURN_NULL
+STRING*
+unicode_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(find_not_cclass)
-    String_iter iter;
-    UINTVAL     codepoint;
-    UINTVAL     end = offset + count;
-    int         bit;
+    ASSERT_ARGS(unicode_titlecase)
+#if PARROT_HAS_ICU
 
-    if (offset > src->strlen) {
-        /* XXX: Throw in this case? */
-        return offset + count;
-    }
+    UErrorCode err;
+    int dest_len, src_len;
+    STRING *res;
 
-    STRING_ITER_INIT(interp, &iter);
+    if (src->bufused  == src->strlen
+    &&  src->encoding == Parrot_utf8_encoding_ptr) {
+        return Parrot_ascii_encoding_ptr->titlecase(interp, src);
+    }
 
-    if (offset)
-        STRING_ITER_SET_POSITION(interp, src, &iter, offset);
+    /* to_encoding will allocate new string */
+    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
 
-    end = src->strlen < end ? src->strlen : end;
+    /*
+U_CAPI int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             UBreakIterator *titleIter,
+             const char *locale,
+             UErrorCode *pErrorCode);
+     */
 
-    if (flags == enum_cclass_any)
-        return end;
+    err      = U_ZERO_ERROR;
+    src_len  = res->bufused / sizeof (UChar);
+    dest_len = u_strToTitle((UChar *)res->strstart, src_len,
+            (UChar *)res->strstart, src_len,
+            NULL,       /* default titleiter */
+            NULL,       /* locale = default */
+            &err);
+    res->bufused = dest_len * sizeof (UChar);
 
-    while (iter.charpos < end) {
-        codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (codepoint >= 256) {
-            for (bit = enum_cclass_uppercase;
-                    bit <= enum_cclass_word ; bit <<= 1) {
-                if ((bit & flags) && !u_iscclass(interp, codepoint, bit))
-                    return iter.charpos - 1;
-            }
-        }
-        else {
-            if (!(Parrot_iso_8859_1_typetable[codepoint] & flags))
-                return iter.charpos - 1;
-        }
+    if (!U_SUCCESS(err)) {
+        err = U_ZERO_ERROR;
+        Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
+        dest_len = u_strToTitle((UChar *)res->strstart, dest_len,
+                (UChar *)res->strstart, src_len,
+                NULL, NULL,
+                &err);
+        PARROT_ASSERT(U_SUCCESS(err));
     }
 
-    return end;
+    /* downgrade if possible */
+    if (dest_len == (int)res->strlen)
+        res->encoding = Parrot_ucs2_encoding_ptr;
+
+    return res;
+
+#else
+    UNUSED(src);
+    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+        "no ICU lib loaded");
+#endif
 }
 
 
 /*
 
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
+=item C<STRING* unicode_upcase_first(PARROT_INTERP, const STRING *src)>
 
-Returns a one-codepoint string for the given codepoint.
+Converts the first grapheme in the STRING C<src> to uppercase, if the
+grapheme supports it. Not implemented.
 
 =cut
 
 */
 
 PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
+STRING*
+unicode_upcase_first(PARROT_INTERP, SHIM(const STRING *src))
 {
-    ASSERT_ARGS(string_from_codepoint)
-    String_iter    iter;
-    STRING * const dest = string_make(interp, "", 1, "unicode", 0);
-
-    dest->strlen = 1;
-
-    STRING_ITER_INIT(interp, &iter);
-    STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint);
-    dest->bufused = iter.bytepos;
-
-    return dest;
+    ASSERT_ARGS(unicode_upcase_first)
+    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
+    UNIMPL;
 }
 
 
 /*
 
-=item C<static size_t compute_hash(PARROT_INTERP, const STRING *src, size_t
-seed)>
+=item C<STRING* unicode_downcase_first(PARROT_INTERP, const STRING *src)>
 
-Computes the hash of the given STRING C<src> with starting seed value C<seed>.
+Converts the first grapheme in the STRING C<src> to lower-case, if
+the grapheme supports it. Not implemented
 
 =cut
 
 */
 
-static size_t
-compute_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t seed)
+PARROT_CANNOT_RETURN_NULL
+STRING*
+unicode_downcase_first(PARROT_INTERP, SHIM(const STRING *src))
 {
-    ASSERT_ARGS(compute_hash)
-    String_iter iter;
-    size_t      hashval = seed;
-
-    STRING_ITER_INIT(interp, &iter);
-
-    while (iter.charpos < src->strlen) {
-        const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        hashval += hashval << 5;
-        hashval += c;
-    }
-
-    return hashval;
+    ASSERT_ARGS(unicode_downcase_first)
+    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
+    UNIMPL;
 }
 
 
 /*
 
-=item C<void Parrot_charset_unicode_init(PARROT_INTERP)>
+=item C<STRING* unicode_titlecase_first(PARROT_INTERP, const STRING *src)>
 
-Initializes the Unicode charset by installing all the necessary function
-pointers.
+Converts the first grapheme in STRING C<src> to title case, if the
+string supports it. Not implemented.
 
 =cut
 
 */
 
-void
-Parrot_charset_unicode_init(PARROT_INTERP)
+PARROT_CANNOT_RETURN_NULL
+STRING*
+unicode_titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
 {
-    ASSERT_ARGS(Parrot_charset_unicode_init)
-    CHARSET * const      return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set   = {
-        "unicode",
-        get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        compare,
-        mixed_cs_index,
-        cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-
-    /*
-     * for now use utf8
-     * TODO replace it with a fixed uint_16 or uint_32 encoding
-     *      XXX if this is changed, modify string_make so it
-     *          still takes "utf8" when fed "unicode" as charset!
-     */
-    return_set->preferred_encoding = Parrot_utf8_encoding_ptr;
-    Parrot_register_charset(interp, "unicode", return_set);
-
-    return;
+    ASSERT_ARGS(unicode_titlecase_first)
+    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
+    UNIMPL;
 }
 
 
@@ -1073,3 +1385,4 @@
  * End:
  * vim: expandtab shiftwidth=4:
  */
+

Added: trunk/src/string/encoding/shared.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ trunk/src/string/encoding/shared.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -0,0 +1,369 @@
+/* fixed_8.h
+ *  Copyright (C) 2004-2007, Parrot Foundation.
+ *  SVN Info
+ *     $Id$
+ *  Overview:
+ *     This is the header for the 8-bit fixed-width encoding
+ *  Data Structure and Algorithms:
+ *  History:
+ *  Notes:
+ *  References:
+ */
+
+#ifndef PARROT_ENCODING_SHARED_H_GUARD
+#define PARROT_ENCODING_SHARED_H_GUARD
+
+/* HEADERIZER BEGIN: src/string/encoding/shared.c */
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL encoding_compare(PARROT_INTERP,
+    ARGIN(const STRING *lhs),
+    ARGIN(const STRING *rhs))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* encoding_decompose(PARROT_INTERP, SHIM(const STRING *src))
+        __attribute__nonnull__(1);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL encoding_equal(PARROT_INTERP,
+    ARGIN(const STRING *lhs),
+    ARGIN(const STRING *rhs))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL encoding_find_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL encoding_find_not_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+PARROT_CANNOT_RETURN_NULL
+PARROT_WARN_UNUSED_RESULT
+STRING * encoding_get_graphemes(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+size_t encoding_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t seed)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL encoding_index(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    ARGIN(const STRING *search),
+    UINTVAL offs)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL encoding_is_cclass(PARROT_INTERP,
+    INTVAL flags,
+    ARGIN(const STRING *src),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL encoding_rindex(PARROT_INTERP,
+    SHIM(const STRING *src),
+    SHIM(const STRING *search_string),
+    NULLOK(UINTVAL offset))
+        __attribute__nonnull__(1);
+
+UINTVAL encoding_scan(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+STRING * encoding_substr(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_compare(PARROT_INTERP,
+    ARGIN(const STRING *lhs),
+    ARGIN(const STRING *rhs))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* fixed8_compose(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_equal(PARROT_INTERP,
+    ARGIN(const STRING *lhs),
+    ARGIN(const STRING *rhs))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+size_t fixed8_hash(SHIM_INTERP, ARGIN(const STRING *s), size_t hashval)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_index(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    ARGIN(const STRING *search_string),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
+UINTVAL fixed8_iter_get(PARROT_INTERP,
+    ARGIN(const STRING *str),
+    ARGIN(const String_iter *iter),
+    INTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
+UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP,
+    ARGIN(const STRING *str),
+    ARGMOD(String_iter *iter))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3)
+        FUNC_MODIFIES(*iter);
+
+void fixed8_iter_set_and_advance(PARROT_INTERP,
+    ARGMOD(STRING *str),
+    ARGMOD(String_iter *iter),
+    UINTVAL c)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3)
+        FUNC_MODIFIES(*str)
+        FUNC_MODIFIES(*iter);
+
+void fixed8_iter_set_position(SHIM_INTERP,
+    ARGIN(const STRING *str),
+    ARGMOD(String_iter *iter),
+    UINTVAL pos)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3)
+        FUNC_MODIFIES(*iter);
+
+void fixed8_iter_skip(SHIM_INTERP,
+    ARGIN(const STRING *str),
+    ARGMOD(String_iter *iter),
+    INTVAL skip)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3)
+        FUNC_MODIFIES(*iter);
+
+PARROT_WARN_UNUSED_RESULT
+UINTVAL fixed8_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+INTVAL fixed8_rindex(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    ARGIN(const STRING *search_string),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3);
+
+PARROT_WARN_UNUSED_RESULT
+UINTVAL fixed8_scan(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+PARROT_CANNOT_RETURN_NULL
+STRING * fixed8_substr(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+STRING * unicode_chr(PARROT_INTERP, UINTVAL codepoint)
+        __attribute__nonnull__(1);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* unicode_compose(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* unicode_downcase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* unicode_downcase_first(PARROT_INTERP, SHIM(const STRING *src))
+        __attribute__nonnull__(1);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* unicode_titlecase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* unicode_titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
+        __attribute__nonnull__(1);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* unicode_upcase(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_CANNOT_RETURN_NULL
+STRING* unicode_upcase_first(PARROT_INTERP, SHIM(const STRING *src))
+        __attribute__nonnull__(1);
+
+UINTVAL unicode_validate(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+#define ASSERT_ARGS_encoding_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(lhs) \
+    , PARROT_ASSERT_ARG(rhs))
+#define ASSERT_ARGS_encoding_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_encoding_equal __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(lhs) \
+    , PARROT_ASSERT_ARG(rhs))
+#define ASSERT_ARGS_encoding_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_encoding_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_encoding_get_graphemes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_encoding_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_encoding_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src) \
+    , PARROT_ASSERT_ARG(search))
+#define ASSERT_ARGS_encoding_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_encoding_rindex __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_encoding_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_encoding_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_fixed8_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(lhs) \
+    , PARROT_ASSERT_ARG(rhs))
+#define ASSERT_ARGS_fixed8_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_fixed8_equal __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(lhs) \
+    , PARROT_ASSERT_ARG(rhs))
+#define ASSERT_ARGS_fixed8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(s))
+#define ASSERT_ARGS_fixed8_index __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src) \
+    , PARROT_ASSERT_ARG(search_string))
+#define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(str) \
+    , PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(str) \
+    , PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(str) \
+    , PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(str) \
+    , PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(str) \
+    , PARROT_ASSERT_ARG(iter))
+#define ASSERT_ARGS_fixed8_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_fixed8_rindex __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src) \
+    , PARROT_ASSERT_ARG(search_string))
+#define ASSERT_ARGS_fixed8_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_fixed8_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_unicode_chr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_unicode_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_unicode_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_unicode_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_unicode_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_unicode_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_unicode_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_unicode_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
+#define ASSERT_ARGS_unicode_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+/* HEADERIZER END: src/string/encoding/shared.c */
+
+#endif /* PARROT_ENCODING_SHARED_H_GUARD */
+
+/*
+ * Local variables:
+ *   c-file-style: "parrot"
+ * End:
+ * vim: expandtab shiftwidth=4:
+ */

Copied and modified: trunk/src/string/encoding/tables.c (from r48832, trunk/src/string/charset/tables.c)
==============================================================================

Copied and modified: trunk/src/string/encoding/tables.h (from r48832, trunk/src/string/charset/tables.h)
==============================================================================

Modified: trunk/src/string/encoding/ucs2.c
==============================================================================
--- trunk/src/string/encoding/ucs2.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding/ucs2.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -20,6 +20,7 @@
 
 #include "parrot/parrot.h"
 #include "../unicode.h"
+#include "shared.h"
 
 #if !PARROT_HAS_ICU
 PARROT_DOES_NOT_RETURN
@@ -36,66 +37,6 @@
 /* HEADERIZER BEGIN: static */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(PARROT_INTERP,
-    ARGIN(const STRING *s),
-    ARGIN(const INTVAL *typetable),
-    INTVAL flags,
-    UINTVAL pos,
-    UINTVAL end)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static UINTVAL get_byte(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset))
-        __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset),
-    SHIM(UINTVAL count))
-        __attribute__nonnull__(1);
-
-static UINTVAL get_codepoint(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static void set_byte(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset),
-    SHIM(UINTVAL byte))
-        __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
 static size_t ucs2_hash(PARROT_INTERP,
     ARGIN(const STRING *s),
     size_t hashval)
@@ -146,30 +87,32 @@
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(s) \
-    , PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
+static UINTVAL ucs2_ord(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL ucs2_scan(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+PARROT_CANNOT_RETURN_NULL
+static STRING * ucs2_substr(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+PARROT_CANNOT_RETURN_NULL
+static STRING * ucs2_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
 #define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(s))
@@ -193,6 +136,18 @@
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs2_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ucs2_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ucs2_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ucs2_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
@@ -207,7 +162,7 @@
 
 /*
 
-=item C<static STRING * to_encoding(PARROT_INTERP, const STRING *src)>
+=item C<static STRING * ucs2_to_encoding(PARROT_INTERP, const STRING *src)>
 
 Converts the string C<src> to this particular encoding.  If C<dest> is
 provided, it will contain the result.  Otherwise this function operates in
@@ -220,9 +175,9 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 static STRING *
-to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+ucs2_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(to_encoding)
+    ASSERT_ARGS(ucs2_to_encoding)
     STRING * const result =
         Parrot_utf16_encoding_ptr->to_encoding(interp, src);
 
@@ -236,98 +191,57 @@
 
 /*
 
-=item C<static UINTVAL get_codepoint(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
+=item C<static UINTVAL ucs2_scan(PARROT_INTERP, const STRING *src)>
 
-Returns the codepoint in string C<src> at position C<offset>.
+Returns the number of codepoints in string C<src>.
 
 =cut
 
 */
 
+PARROT_WARN_UNUSED_RESULT
 static UINTVAL
-get_codepoint(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+ucs2_scan(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(get_codepoint)
+    ASSERT_ARGS(ucs2_scan)
 #if PARROT_HAS_ICU
-    const UChar * const s = (const UChar*) src->strstart;
     UNUSED(interp);
-    return s[offset];
+    return src->bufused / sizeof (UChar);
 #else
-    UNUSED(offset);
     UNUSED(src);
     no_ICU_lib(interp);
 #endif
 }
 
-
-/*
-
-=item C<static UINTVAL find_cclass(PARROT_INTERP, const STRING *s, const INTVAL
-*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
-
-Stub, the charset level handles this for unicode strings.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-find_cclass(PARROT_INTERP, ARGIN(const STRING *s), ARGIN(const INTVAL *typetable),
-INTVAL flags, UINTVAL pos, UINTVAL end)
-{
-    UNUSED(s);
-    UNUSED(typetable);
-    UNUSED(flags);
-    UNUSED(pos);
-    UNUSED(end);
-
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_UNIMPLEMENTED,
-        "No find_cclass support in unicode encoding plugins");
-}
-
 /*
 
-=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
+=item C<static UINTVAL ucs2_ord(PARROT_INTERP, const STRING *src, UINTVAL
 offset)>
 
-Returns the byte in string C<src> at position C<offset>.
+Returns the codepoint in string C<src> at position C<offset>.
 
 =cut
 
 */
 
 static UINTVAL
-get_byte(PARROT_INTERP, SHIM(const STRING *src), SHIM(UINTVAL offset))
-{
-    ASSERT_ARGS(get_byte)
-    UNIMPL;
-}
-
-/*
-
-=item C<static void set_byte(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL byte)>
-
-Sets, in string C<src> at position C<offset>, the byte C<byte>.
-
-=cut
-
-*/
-
-static void
-set_byte(PARROT_INTERP, SHIM(const STRING *src), SHIM(UINTVAL offset),
-        SHIM(UINTVAL byte))
+ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
 {
-    ASSERT_ARGS(set_byte)
-    UNIMPL;
+    ASSERT_ARGS(ucs2_ord)
+#if PARROT_HAS_ICU
+    const UChar * const s = (const UChar*) src->strstart;
+    UNUSED(interp);
+    return s[offset];
+#else
+    UNUSED(offset);
+    UNUSED(src);
+    no_ICU_lib(interp);
+#endif
 }
 
 /*
 
-=item C<static STRING * get_codepoints(PARROT_INTERP, const STRING *src, UINTVAL
+=item C<static STRING * ucs2_substr(PARROT_INTERP, const STRING *src, UINTVAL
 offset, UINTVAL count)>
 
 Returns the codepoints in string C<src> at position C<offset> and length
@@ -340,98 +254,27 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 static STRING *
-get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+ucs2_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
 {
-    ASSERT_ARGS(get_codepoints)
+    ASSERT_ARGS(ucs2_substr)
     STRING * const return_string = Parrot_str_copy(interp, src);
 
 #if PARROT_HAS_ICU
     return_string->strstart = (char*)src->strstart + offset * sizeof (UChar);
-    return_string->bufused = count * sizeof (UChar);
-#else
-    {
-        String_iter iter;
-        UINTVAL start;
-
-        STRING_ITER_INIT(interp, &iter);
-        ucs2_iter_set_position(interp, src, &iter, offset);
-        start = iter.bytepos;
-        return_string->strstart = (char *)return_string->strstart + start;
-        ucs2_iter_set_position(interp, src, &iter, offset + count);
-        return_string->bufused = iter.bytepos - start;
-    }
-#endif
-    return_string->strlen = count;
-    return_string->hashval = 0;
+    return_string->bufused  = count * sizeof (UChar);
+    return_string->strlen   = count;
+    return_string->hashval  = 0;
     return return_string;
-}
-
-/*
-
-=item C<static STRING * get_bytes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the bytes in string C<src> at position C<offset> and length C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_bytes(PARROT_INTERP, SHIM(const STRING *src), SHIM(UINTVAL offset),
-        SHIM(UINTVAL count))
-{
-    ASSERT_ARGS(get_bytes)
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static UINTVAL codepoints(PARROT_INTERP, const STRING *src)>
-
-Returns the number of codepoints in string C<src>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(codepoints)
-#if PARROT_HAS_ICU
-    UNUSED(interp);
-    return src->bufused / sizeof (UChar);
 #else
     UNUSED(src);
+    UNUSED(offset);
+    UNUSED(count);
     no_ICU_lib(interp);
 #endif
 }
 
 /*
 
-=item C<static UINTVAL bytes(PARROT_INTERP, const STRING *src)>
-
-Returns the number of bytes in string C<src>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-bytes(SHIM_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(bytes)
-    return src->bufused;
-}
-
-/*
-
 =item C<static UINTVAL ucs2_iter_get(PARROT_INTERP, const STRING *str, const
 String_iter *i, INTVAL offset)>
 
@@ -446,7 +289,7 @@
     ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
 {
     ASSERT_ARGS(ucs2_iter_get)
-    return get_codepoint(interp, str, i->charpos + offset);
+    return ucs2_ord(interp, str, i->charpos + offset);
 }
 
 /*
@@ -607,46 +450,50 @@
 #endif
 }
 
-/*
-
-=item C<void Parrot_encoding_ucs2_init(PARROT_INTERP)>
+static STR_VTABLE Parrot_ucs2_encoding = {
+    0,
+    "ucs2",
+    NULL,
+    2, /* Max bytes per codepoint */
+
+    ucs2_to_encoding,
+    unicode_chr,
+
+    encoding_equal,
+    encoding_compare,
+    encoding_index,
+    encoding_rindex,
+    encoding_hash,
+    unicode_validate,
+
+    ucs2_scan,
+    ucs2_ord,
+    ucs2_substr,
+
+    encoding_is_cclass,
+    encoding_find_cclass,
+    encoding_find_not_cclass,
+
+    encoding_get_graphemes,
+    unicode_compose,
+    encoding_decompose,
+
+    unicode_upcase,
+    unicode_downcase,
+    unicode_titlecase,
+    unicode_upcase_first,
+    unicode_downcase_first,
+    unicode_titlecase_first,
+
+    ucs2_iter_get,
+    ucs2_iter_skip,
+    ucs2_iter_get_and_advance,
+    ucs2_iter_set_and_advance,
+    ucs2_iter_set_position
+};
 
-Initializes the UCS-2 encoding.
+STR_VTABLE *Parrot_ucs2_encoding_ptr = &Parrot_ucs2_encoding;
 
-=cut
-
-*/
-
-void
-Parrot_encoding_ucs2_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_encoding_ucs2_init)
-    ENCODING * const return_encoding = Parrot_new_encoding(interp);
-
-    static const ENCODING base_encoding = {
-        "ucs2",
-        2, /* Max bytes per codepoint 0 .. 0x10ffff */
-        to_encoding,
-        get_codepoint,
-        get_byte,
-        set_byte,
-        get_codepoints,
-        get_bytes,
-        codepoints,
-        bytes,
-        find_cclass,
-        ucs2_hash,
-        ucs2_iter_get,
-        ucs2_iter_skip,
-        ucs2_iter_get_and_advance,
-        ucs2_iter_set_and_advance,
-        ucs2_iter_set_position
-    };
-    STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
-    Parrot_register_encoding(interp, "ucs2", return_encoding);
-
-    return;
-}
 
 /*
 

Modified: trunk/src/string/encoding/ucs2.h
==============================================================================
--- trunk/src/string/encoding/ucs2.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding/ucs2.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -16,11 +16,7 @@
 /* HEADERIZER BEGIN: src/string/encoding/ucs2.c */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-void Parrot_encoding_ucs2_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
 
-#define ASSERT_ARGS_Parrot_encoding_ucs2_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: src/string/encoding/ucs2.c */
 

Modified: trunk/src/string/encoding/ucs4.c
==============================================================================
--- trunk/src/string/encoding/ucs4.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding/ucs4.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -20,6 +20,7 @@
 
 #include "parrot/parrot.h"
 #include "../unicode.h"
+#include "shared.h"
 
 #if !PARROT_HAS_ICU
 PARROT_DOES_NOT_RETURN
@@ -36,66 +37,6 @@
 /* HEADERIZER BEGIN: static */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(PARROT_INTERP,
-    ARGIN(const STRING *s),
-    ARGIN(const INTVAL *typetable),
-    INTVAL flags,
-    UINTVAL pos,
-    UINTVAL end)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static UINTVAL get_byte(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset))
-        __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset),
-    SHIM(UINTVAL count))
-        __attribute__nonnull__(1);
-
-static UINTVAL get_codepoint(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static void set_byte(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset),
-    SHIM(UINTVAL byte))
-        __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
 static size_t ucs4_hash(PARROT_INTERP,
     ARGIN(const STRING *s),
     size_t hashval)
@@ -146,30 +87,32 @@
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(s) \
-    , PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
+static UINTVAL ucs4_ord(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL ucs4_scan(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+PARROT_CANNOT_RETURN_NULL
+static STRING * ucs4_substr(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+PARROT_CANNOT_RETURN_NULL
+static STRING * ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
 #define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(s))
@@ -193,6 +136,18 @@
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_ucs4_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ucs4_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ucs4_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_ucs4_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
@@ -202,9 +157,10 @@
 #  include <unicode/ustring.h>
 #endif
 
+
 /*
 
-=item C<static STRING * to_encoding(PARROT_INTERP, const STRING *src)>
+=item C<static STRING * ucs4_to_encoding(PARROT_INTERP, const STRING *src)>
 
 Converts the string C<src> to this particular encoding.
 
@@ -215,9 +171,9 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 static STRING *
-to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+ucs4_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(to_encoding)
+    ASSERT_ARGS(ucs4_to_encoding)
 #if PARROT_HAS_ICU
     if (src->encoding == Parrot_ucs4_encoding_ptr) {
         return Parrot_str_clone(interp, src);
@@ -225,11 +181,12 @@
     else {
         UINTVAL len = Parrot_str_length(interp, src);
         STRING *res = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
-                           Parrot_ucs4_encoding_ptr, Parrot_unicode_charset_ptr, 0);
+                           Parrot_ucs4_encoding_ptr, 0);
         UChar32 *buf = (UChar32 *) res->strstart;
         UINTVAL offs;
+        /* TODO: use an iterator */
         for (offs = 0; offs < len; offs++){
-            buf[offs] = src->encoding->get_codepoint(interp, src, offs);
+            buf[offs] = STRING_ord(interp, src, offs);
         };
         res->strlen  = len;
         res->bufused = len * sizeof (UChar32);
@@ -243,27 +200,26 @@
 
 }
 
+
 /*
 
-=item C<static UINTVAL get_codepoint(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
+=item C<static UINTVAL ucs4_scan(PARROT_INTERP, const STRING *src)>
 
-Returns the codepoint in string C<src> at position C<offset>.
+Returns the number of codepoints in string C<src>.
 
 =cut
 
 */
 
+PARROT_WARN_UNUSED_RESULT
 static UINTVAL
-get_codepoint(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+ucs4_scan(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(get_codepoint)
+    ASSERT_ARGS(ucs4_scan)
 #if PARROT_HAS_ICU
-    const UChar32 * const s = (const UChar32*) src->strstart;
     UNUSED(interp);
-    return s[offset];
+    return src->bufused / sizeof (UChar32);
 #else
-    UNUSED(offset);
     UNUSED(src);
     no_ICU_lib(interp);
 #endif
@@ -272,77 +228,34 @@
 
 /*
 
-=item C<static UINTVAL find_cclass(PARROT_INTERP, const STRING *s, const INTVAL
-*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
-
-Stub, the charset level handles this for unicode strings.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-find_cclass(PARROT_INTERP, ARGIN(const STRING *s), ARGIN(const INTVAL *typetable),
-INTVAL flags, UINTVAL pos, UINTVAL end)
-{
-    ASSERT_ARGS(find_cclass)
-
-    UNUSED(s);
-    UNUSED(typetable);
-    UNUSED(flags);
-    UNUSED(pos);
-    UNUSED(end);
-
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_UNIMPLEMENTED,
-        "No find_cclass support in unicode encoding plugins");
-}
-
-/*
-
-=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
+=item C<static UINTVAL ucs4_ord(PARROT_INTERP, const STRING *src, UINTVAL
 offset)>
 
-Returns the byte in string C<src> at position C<offset>.
+Returns the codepoint in string C<src> at position C<offset>.
 
 =cut
 
 */
 
 static UINTVAL
-get_byte(PARROT_INTERP, SHIM(const STRING *src), SHIM(UINTVAL offset))
+ucs4_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
 {
-    ASSERT_ARGS(get_byte)
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_UNIMPLEMENTED,
-        "No get_byte for UCS-4");
+    ASSERT_ARGS(ucs4_ord)
+#if PARROT_HAS_ICU
+    const UChar32 * const s = (const UChar32*) src->strstart;
+    UNUSED(interp);
+    return s[offset];
+#else
+    UNUSED(offset);
+    UNUSED(src);
+    no_ICU_lib(interp);
+#endif
 }
 
-/*
-
-=item C<static void set_byte(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL byte)>
-
-Sets, in string C<src> at position C<offset>, the byte C<byte>.
-
-=cut
-
-*/
-
-static void
-set_byte(PARROT_INTERP, SHIM(const STRING *src), SHIM(UINTVAL offset),
-        SHIM(UINTVAL byte))
-{
-    ASSERT_ARGS(set_byte)
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_UNIMPLEMENTED,
-        "No set_byte for UCS-4");
-}
 
 /*
 
-=item C<static STRING * get_codepoints(PARROT_INTERP, const STRING *src, UINTVAL
+=item C<static STRING * ucs4_substr(PARROT_INTERP, const STRING *src, UINTVAL
 offset, UINTVAL count)>
 
 Returns the C<count> codepoints stored at position C<offset> in string
@@ -355,12 +268,12 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 static STRING *
-get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+ucs4_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
 {
-    ASSERT_ARGS(get_codepoints)
+    ASSERT_ARGS(ucs4_substr)
 #if PARROT_HAS_ICU
     return Parrot_str_new_init(interp, (char*)src->strstart + offset * sizeof (UChar32),
-                               count * sizeof (UChar32), src->encoding, src->charset, 0);
+                               count * sizeof (UChar32), src->encoding, 0);
 #else
     UNUSED(src);
     UNUSED(offset);
@@ -369,71 +282,6 @@
 #endif
 }
 
-/*
-
-=item C<static STRING * get_bytes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the bytes in string C<src> at position C<offset> and length C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_bytes(PARROT_INTERP, SHIM(const STRING *src), SHIM(UINTVAL offset),
-        SHIM(UINTVAL count))
-{
-    ASSERT_ARGS(get_bytes)
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_UNIMPLEMENTED,
-        "No get_bytes for UCS-4");
-}
-
-
-/*
-
-=item C<static UINTVAL codepoints(PARROT_INTERP, const STRING *src)>
-
-Returns the number of codepoints in string C<src>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(codepoints)
-#if PARROT_HAS_ICU
-    UNUSED(interp);
-    return src->bufused / sizeof (UChar32);
-#else
-    UNUSED(src);
-    no_ICU_lib(interp);
-#endif
-}
-
-/*
-
-=item C<static UINTVAL bytes(PARROT_INTERP, const STRING *src)>
-
-Returns the number of bytes in string C<src>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-bytes(SHIM_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(bytes)
-    return src->bufused;
-}
 
 /*
 
@@ -451,9 +299,10 @@
     ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
 {
     ASSERT_ARGS(ucs4_iter_get)
-    return get_codepoint(interp, str, i->charpos + offset);
+    return ucs4_ord(interp, str, i->charpos + offset);
 }
 
+
 /*
 
 =item C<static void ucs4_iter_skip(PARROT_INTERP, const STRING *str, String_iter
@@ -482,6 +331,7 @@
 #endif
 }
 
+
 /*
 
 =item C<static UINTVAL ucs4_iter_get_and_advance(PARROT_INTERP, const STRING
@@ -512,6 +362,7 @@
 #endif
 }
 
+
 /*
 
 =item C<static void ucs4_iter_set_and_advance(PARROT_INTERP, STRING *str,
@@ -542,6 +393,7 @@
 #endif
 }
 
+
 /*
 
 =item C<static void ucs4_iter_set_position(PARROT_INTERP, const STRING *str,
@@ -570,7 +422,7 @@
 #endif
 }
 
-#if PARROT_HAS_ICU
+
 /*
 
 =item C<static size_t ucs4_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
@@ -596,52 +448,52 @@
 
     return hashval;
 }
-#endif
 
-/*
-
-=item C<void Parrot_encoding_ucs4_init(PARROT_INTERP)>
-
-Initializes the UCS-4 encoding.
-
-=cut
-
-*/
 
-void
-Parrot_encoding_ucs4_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_encoding_ucs4_init)
-    ENCODING * const return_encoding = Parrot_new_encoding(interp);
+static STR_VTABLE Parrot_ucs4_encoding = {
+    0,
+    "ucs4",
+    NULL,
+    4, /* Max bytes per codepoint */
+
+    ucs4_to_encoding,
+    unicode_chr,
+
+    encoding_equal,
+    encoding_compare,
+    encoding_index,
+    encoding_rindex,
+    ucs4_hash,
+    unicode_validate,
+
+    ucs4_scan,
+    ucs4_ord,
+    ucs4_substr,
+
+    encoding_is_cclass,
+    encoding_find_cclass,
+    encoding_find_not_cclass,
+
+    encoding_get_graphemes,
+    unicode_compose,
+    encoding_decompose,
+
+    unicode_upcase,
+    unicode_downcase,
+    unicode_titlecase,
+    unicode_upcase_first,
+    unicode_downcase_first,
+    unicode_titlecase_first,
+
+    ucs4_iter_get,
+    ucs4_iter_skip,
+    ucs4_iter_get_and_advance,
+    ucs4_iter_set_and_advance,
+    ucs4_iter_set_position
+};
 
-    static const ENCODING base_encoding = {
-        "ucs4",
-        4, /* Max bytes per codepoint */
-        to_encoding,
-        get_codepoint,
-        get_byte,
-        set_byte,
-        get_codepoints,
-        get_bytes,
-        codepoints,
-        bytes,
-        find_cclass,
-#if PARROT_HAS_ICU
-        ucs4_hash,
-#else
-        NULL,
-#endif
-        ucs4_iter_get,
-        ucs4_iter_skip,
-        ucs4_iter_get_and_advance,
-        ucs4_iter_set_and_advance,
-        ucs4_iter_set_position
-    };
-    STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
-    Parrot_register_encoding(interp, "ucs4", return_encoding);
+STR_VTABLE *Parrot_ucs4_encoding_ptr = &Parrot_ucs4_encoding;
 
-    return;
-}
 
 /*
 

Modified: trunk/src/string/encoding/ucs4.h
==============================================================================
--- trunk/src/string/encoding/ucs4.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding/ucs4.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -9,11 +9,7 @@
 /* HEADERIZER BEGIN: src/string/encoding/ucs4.c */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-void Parrot_encoding_ucs4_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
 
-#define ASSERT_ARGS_Parrot_encoding_ucs4_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: src/string/encoding/ucs4.c */
 

Modified: trunk/src/string/encoding/utf16.c
==============================================================================
--- trunk/src/string/encoding/utf16.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding/utf16.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -20,74 +20,13 @@
 
 #include "parrot/parrot.h"
 #include "../unicode.h"
+#include "shared.h"
 
 /* HEADERIZER HFILE: src/string/encoding/utf16.h */
 
 /* HEADERIZER BEGIN: static */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(PARROT_INTERP,
-    ARGIN(const STRING *s),
-    ARGIN(const INTVAL *typetable),
-    INTVAL flags,
-    UINTVAL pos,
-    UINTVAL end)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static UINTVAL get_byte(SHIM_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static UINTVAL get_codepoint(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static void set_byte(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL byte)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
 static UINTVAL utf16_iter_get(PARROT_INTERP,
     ARGIN(const STRING *str),
     ARGIN(const String_iter *i),
@@ -133,32 +72,32 @@
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(s) \
-    , PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
+static UINTVAL utf16_ord(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+static UINTVAL utf16_scan(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+PARROT_CANNOT_RETURN_NULL
+static STRING * utf16_substr(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset,
+    UINTVAL count)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+PARROT_WARN_UNUSED_RESULT
+PARROT_CANNOT_RETURN_NULL
+static STRING * utf16_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
 #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(str) \
@@ -179,6 +118,18 @@
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf16_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_utf16_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_utf16_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_utf16_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
@@ -195,7 +146,7 @@
 
 /*
 
-=item C<static STRING * to_encoding(PARROT_INTERP, const STRING *src)>
+=item C<static STRING * utf16_to_encoding(PARROT_INTERP, const STRING *src)>
 
 Converts the string C<src> to this particular encoding.  If C<dest> is
 provided, it will contain the result.  Otherwise this function operates in
@@ -209,9 +160,9 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 static STRING *
-to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+utf16_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(to_encoding)
+    ASSERT_ARGS(utf16_to_encoding)
 #if PARROT_HAS_ICU
     UErrorCode err;
     int dest_len;
@@ -231,7 +182,6 @@
      */
     src_len = src->strlen;
     if (!src_len) {
-        result->charset  = Parrot_unicode_charset_ptr;
         result->encoding = Parrot_ucs2_encoding_ptr;
         result->strlen = result->bufused = 0;
         return result;
@@ -240,8 +190,8 @@
     Parrot_gc_allocate_string_storage(interp, result, sizeof (UChar) * src_len);
     p = (UChar *)result->strstart;
 
-    if (src->charset == Parrot_iso_8859_1_charset_ptr ||
-            src->charset == Parrot_ascii_charset_ptr) {
+    if (src->encoding == Parrot_latin1_encoding_ptr ||
+            src->encoding == Parrot_ascii_encoding_ptr) {
         for (dest_len = 0; dest_len < (int)src->strlen; ++dest_len) {
             p[dest_len] = (UChar)((unsigned char*)src->strstart)[dest_len];
         }
@@ -264,7 +214,6 @@
         }
     }
     result->bufused = dest_len * sizeof (UChar);
-    result->charset  = Parrot_unicode_charset_ptr;
     result->encoding = Parrot_utf16_encoding_ptr;
     result->strlen = src_len;
 
@@ -280,118 +229,76 @@
 
 /*
 
-=item C<static UINTVAL get_codepoint(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
+=item C<static UINTVAL utf16_scan(PARROT_INTERP, const STRING *src)>
 
-Returns the codepoint in string C<src> at position C<offset>.
+Returns the number of codepoints in string C<src> by scanning the whole
+string.
 
 =cut
 
 */
 
+PARROT_WARN_UNUSED_RESULT
 static UINTVAL
-get_codepoint(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+utf16_scan(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(get_codepoint)
+    ASSERT_ARGS(utf16_scan)
 #if PARROT_HAS_ICU
     const UChar * const s = (UChar*) src->strstart;
-    UINTVAL c, pos;
-    UNUSED(interp);
-
-    pos = 0;
-    U16_FWD_N_UNSAFE(s, pos, offset);
-    U16_GET_UNSAFE(s, pos, c);
-    return c;
+    UINTVAL pos = 0, charpos = 0;
+    /*
+     * this is used to initially calculate src->strlen,
+     * therefore we must scan the whole string
+     */
+    while (pos * sizeof (UChar) < src->bufused) {
+        U16_FWD_1_UNSAFE(s, pos);
+        ++charpos;
+    }
+    return charpos;
 #else
     UNUSED(src);
-    UNUSED(offset);
 
     Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
         "no ICU lib loaded");
 #endif
 }
 
-
 /*
 
-=item C<static UINTVAL find_cclass(PARROT_INTERP, const STRING *s, const INTVAL
-*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
-
-Stub, the charset level handles this for unicode strings.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-find_cclass(PARROT_INTERP, ARGIN(const STRING *s), ARGIN(const INTVAL *typetable),
-INTVAL flags, UINTVAL pos, UINTVAL end)
-{
-    UNUSED(s);
-    UNUSED(typetable);
-    UNUSED(flags);
-    UNUSED(pos);
-    UNUSED(end);
-
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_UNIMPLEMENTED,
-        "No find_cclass support in unicode encoding plugins");
-}
-
-/*
-
-=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
+=item C<static UINTVAL utf16_ord(PARROT_INTERP, const STRING *src, UINTVAL
 offset)>
 
-Returns the byte in string C<src> at position C<offset>.
+Returns the codepoint in string C<src> at position C<offset>.
 
 =cut
 
 */
 
 static UINTVAL
-get_byte(SHIM_INTERP, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(get_byte)
-    const unsigned char * const contents = (unsigned char *)src->strstart;
-    if (offset >= src->bufused) {
-/*        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-                "get_byte past the end of the buffer (%i of %i)",
-                offset, src->bufused); */
-        return 0;
-    }
-    return contents[offset];
-}
-
-/*
-
-=item C<static void set_byte(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL byte)>
-
-Sets, in string C<src> at position C<offset>, the byte C<byte>.
-
-=cut
-
-*/
-
-static void
-set_byte(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL byte)
+utf16_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
 {
-    ASSERT_ARGS(set_byte)
-    unsigned char *contents;
+    ASSERT_ARGS(utf16_ord)
+#if PARROT_HAS_ICU
+    const UChar * const s = (UChar*) src->strstart;
+    UINTVAL c, pos;
+    UNUSED(interp);
 
-    if (offset >= src->bufused)
-        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-            "set_byte past the end of the buffer");
+    pos = 0;
+    U16_FWD_N_UNSAFE(s, pos, offset);
+    U16_GET_UNSAFE(s, pos, c);
+    return c;
+#else
+    UNUSED(src);
+    UNUSED(offset);
 
-    contents = (unsigned char *)src->strstart;
-    contents[offset] = (unsigned char)byte;
+    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+        "no ICU lib loaded");
+#endif
 }
 
 /*
 
-=item C<static STRING * get_codepoints(PARROT_INTERP, const STRING *src, UINTVAL
+=item C<static STRING * utf16_substr(PARROT_INTERP, const STRING *src, UINTVAL
 offset, UINTVAL count)>
 
 Returns the codepoints in string C<src> at position C<offset> and length
@@ -404,9 +311,9 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 static STRING *
-get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
+utf16_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
 {
-    ASSERT_ARGS(get_codepoints)
+    ASSERT_ARGS(utf16_substr)
 #if PARROT_HAS_ICU
     UINTVAL pos = 0, start;
     const UChar * const s = (UChar*) src->strstart;
@@ -430,84 +337,6 @@
 #endif
 }
 
-
-/*
-
-=item C<static STRING * get_bytes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the bytes in string C<src> at position C<offset> and length C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_bytes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_bytes)
-    UNUSED(interp);
-    UNUSED(src);
-    UNUSED(offset)
-    UNUSED(count);
-    UNIMPL;
-}
-
-/*
-
-=item C<static UINTVAL codepoints(PARROT_INTERP, const STRING *src)>
-
-Returns the number of codepoints in string C<src>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(codepoints)
-#if PARROT_HAS_ICU
-    const UChar * const s = (UChar*) src->strstart;
-    UINTVAL pos = 0, charpos = 0;
-    /*
-     * this is used to initially calculate src->strlen,
-     * therefore we must scan the whole string
-     */
-    while (pos * sizeof (UChar) < src->bufused) {
-        U16_FWD_1_UNSAFE(s, pos);
-        ++charpos;
-    }
-    return charpos;
-#else
-    UNUSED(src);
-
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-/*
-
-=item C<static UINTVAL bytes(PARROT_INTERP, const STRING *src)>
-
-Returns the number of bytes in string C<src>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-bytes(SHIM_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(bytes)
-    return src->bufused;
-}
-
 /*
 
 =item C<static UINTVAL utf16_iter_get(PARROT_INTERP, const STRING *str, const
@@ -691,46 +520,51 @@
 #endif
 }
 
-/*
-
-=item C<void Parrot_encoding_utf16_init(PARROT_INTERP)>
 
-Initializes the UTF-16 encoding.
+static STR_VTABLE Parrot_utf16_encoding = {
+    0,
+    "utf16",
+    NULL,
+    4, /* Max bytes per codepoint */
+
+    utf16_to_encoding,
+    unicode_chr,
+
+    encoding_equal,
+    encoding_compare,
+    encoding_index,
+    encoding_rindex,
+    encoding_hash,
+    unicode_validate,
+
+    utf16_scan,
+    utf16_ord,
+    utf16_substr,
+
+    encoding_is_cclass,
+    encoding_find_cclass,
+    encoding_find_not_cclass,
+
+    encoding_get_graphemes,
+    unicode_compose,
+    encoding_decompose,
+
+    unicode_upcase,
+    unicode_downcase,
+    unicode_titlecase,
+    unicode_upcase_first,
+    unicode_downcase_first,
+    unicode_titlecase_first,
+
+    utf16_iter_get,
+    utf16_iter_skip,
+    utf16_iter_get_and_advance,
+    utf16_iter_set_and_advance,
+    utf16_iter_set_position
+};
 
-=cut
-
-*/
+STR_VTABLE *Parrot_utf16_encoding_ptr = &Parrot_utf16_encoding;
 
-void
-Parrot_encoding_utf16_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_encoding_utf16_init)
-    ENCODING * const return_encoding = Parrot_new_encoding(interp);
-
-    static const ENCODING base_encoding = {
-        "utf16",
-        4, /* Max bytes per codepoint 0 .. 0x10ffff */
-        to_encoding,
-        get_codepoint,
-        get_byte,
-        set_byte,
-        get_codepoints,
-        get_bytes,
-        codepoints,
-        bytes,
-        find_cclass,
-        NULL,
-        utf16_iter_get,
-        utf16_iter_skip,
-        utf16_iter_get_and_advance,
-        utf16_iter_set_and_advance,
-        utf16_iter_set_position
-    };
-    STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
-    Parrot_register_encoding(interp, "utf16", return_encoding);
-
-    return;
-}
 
 /*
 

Modified: trunk/src/string/encoding/utf16.h
==============================================================================
--- trunk/src/string/encoding/utf16.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding/utf16.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -16,11 +16,7 @@
 /* HEADERIZER BEGIN: src/string/encoding/utf16.c */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-void Parrot_encoding_utf16_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
 
-#define ASSERT_ARGS_Parrot_encoding_utf16_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: src/string/encoding/utf16.c */
 

Modified: trunk/src/string/encoding/utf8.c
==============================================================================
--- trunk/src/string/encoding/utf8.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding/utf8.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -21,74 +21,13 @@
 #include "parrot/parrot.h"
 #include "../unicode.h"
 #include "utf8.h"
+#include "shared.h"
 
 /* HEADERIZER HFILE: src/string/encoding/utf8.h */
 
 /* HEADERIZER BEGIN: static */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-PARROT_PURE_FUNCTION
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(2);
-
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(PARROT_INTERP,
-    SHIM(const STRING *s),
-    SHIM(const INTVAL *typetable),
-    SHIM(INTVAL flags),
-    SHIM(UINTVAL pos),
-    SHIM(UINTVAL end))
-        __attribute__nonnull__(1);
-
-static UINTVAL get_byte(SHIM_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static UINTVAL get_codepoint(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static void set_byte(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL byte)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CAN_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static UINTVAL utf8_characters(PARROT_INTERP,
-    ARGIN(const utf8_t *ptr),
-    UINTVAL byte_len)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
 static UINTVAL utf8_decode(PARROT_INTERP, ARGIN(const utf8_t *ptr))
         __attribute__nonnull__(1)
         __attribute__nonnull__(2);
@@ -140,6 +79,20 @@
         __attribute__nonnull__(3)
         FUNC_MODIFIES(*i);
 
+static UINTVAL utf8_ord(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    UINTVAL offset)
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+static UINTVAL utf8_scan(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
+static UINTVAL utf8_scan2(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
 PARROT_WARN_UNUSED_RESULT
 PARROT_CANNOT_RETURN_NULL
 static const void * utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n)
@@ -149,33 +102,11 @@
 static const void * utf8_skip_forward(ARGIN(const void *ptr), UINTVAL n)
         __attribute__nonnull__(1);
 
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_utf8_characters __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(ptr))
+PARROT_CAN_RETURN_NULL
+static STRING * utf8_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2);
+
 #define ASSERT_ARGS_utf8_decode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(ptr))
@@ -200,10 +131,22 @@
 #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(str) \
     , PARROT_ASSERT_ARG(i))
+#define ASSERT_ARGS_utf8_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_utf8_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
+#define ASSERT_ARGS_utf8_scan2 __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
 #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(ptr))
 #define ASSERT_ARGS_utf8_skip_forward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(ptr))
+#define ASSERT_ARGS_utf8_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
@@ -229,25 +172,96 @@
     4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6      /* cjk etc. */
 };
 
+
 /*
 
-=item C<static UINTVAL utf8_characters(PARROT_INTERP, const utf8_t *ptr, UINTVAL
-byte_len)>
+=item C<static STRING * utf8_to_encoding(PARROT_INTERP, const STRING *src)>
 
-Returns the number of characters in the C<byte_len> bytes from C<*ptr>.
+Converts the string C<src> to this particular encoding.  If C<dest> is
+provided, it will contain the result.  Otherwise this function operates in
+place.
 
-XXX This function is unused.
+=cut
+
+*/
+
+PARROT_CAN_RETURN_NULL
+static STRING *
+utf8_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(utf8_to_encoding)
+    STRING *result;
+    const STR_VTABLE *src_encoding = src->encoding;
+    UINTVAL dest_len, dest_pos, src_len;
+    unsigned char *p;
+
+    if (src_encoding == Parrot_utf8_encoding_ptr)
+        return Parrot_str_clone(interp, src);
+
+    src_len          = src->strlen;
+    result           = Parrot_gc_new_string_header(interp, 0);
+    result->encoding = Parrot_utf8_encoding_ptr;
+    result->strlen   = src_len;
+
+    if (!src_len)
+        return result;
+
+    Parrot_gc_allocate_string_storage(interp, result, src_len);
+    p = (unsigned char *)result->strstart;
+
+    if (src_encoding == Parrot_ascii_encoding_ptr) {
+        for (dest_len = 0; dest_len < src_len; ++dest_len) {
+            p[dest_len] = ((unsigned char*)src->strstart)[dest_len];
+        }
+        result->bufused = dest_len;
+    }
+    else {
+        String_iter src_iter;
+        STRING_ITER_INIT(interp, &src_iter);
+        dest_len = src_len;
+        dest_pos = 0;
+        while (src_iter.charpos < src_len) {
+            const UINTVAL c = src_encoding->iter_get_and_advance(interp, src, &src_iter);
+            unsigned char *new_pos;
+            unsigned char *pos;
+
+            if (dest_len - dest_pos < 6) {
+                UINTVAL need = (UINTVAL)((src->strlen - src_iter.charpos + 1) * 1.5);
+                if (need < 16)
+                    need = 16;
+                dest_len += need;
+                result->bufused = dest_pos;
+                Parrot_gc_reallocate_string_storage(interp, result, dest_len);
+                p = (unsigned char *)result->strstart;
+            }
+
+            pos = p + dest_pos;
+            new_pos = (unsigned char *)utf8_encode(interp, pos, c);
+            dest_pos += (new_pos - pos);
+        }
+        result->bufused = dest_pos;
+    }
+
+    return result;
+}
+
+
+/*
+
+=item C<static UINTVAL utf8_scan(PARROT_INTERP, const STRING *src)>
+
+Returns the number of characters in string C<str> by scanning the string.
 
 =cut
 
 */
 
 static UINTVAL
-utf8_characters(PARROT_INTERP, ARGIN(const utf8_t *ptr), UINTVAL byte_len)
+utf8_scan(PARROT_INTERP, ARGIN(const STRING *src))
 {
-    ASSERT_ARGS(utf8_characters)
-    const utf8_t *u8ptr = ptr;
-    const utf8_t *u8end = u8ptr + byte_len;
+    ASSERT_ARGS(utf8_scan)
+    const utf8_t *u8ptr = (const utf8_t *)src->strstart;
+    const utf8_t *u8end = (const utf8_t *)(src->strstart + src->bufused);
     UINTVAL characters = 0;
 
     while (u8ptr < u8end) {
@@ -262,6 +276,53 @@
     return characters;
 }
 
+
+/*
+
+=item C<static UINTVAL utf8_scan2(PARROT_INTERP, const STRING *src)>
+
+Returns the number of codepoints in string C<src>.
+
+=cut
+
+*/
+
+static UINTVAL
+utf8_scan2(PARROT_INTERP, ARGIN(const STRING *src))
+{
+    ASSERT_ARGS(utf8_scan2)
+    String_iter iter;
+    /*
+     * this is used to initially calculate src->strlen,
+     * therefore we must scan the whole string
+     */
+    STRING_ITER_INIT(interp, &iter);
+    while (iter.bytepos < src->bufused)
+        utf8_iter_get_and_advance(interp, src, &iter);
+    return iter.charpos;
+}
+
+
+/*
+
+=item C<static UINTVAL utf8_ord(PARROT_INTERP, const STRING *src, UINTVAL
+offset)>
+
+Returns the codepoint in string C<src> at position C<offset>.
+
+=cut
+
+*/
+
+static UINTVAL
+utf8_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
+{
+    ASSERT_ARGS(utf8_ord)
+    const utf8_t * const start = (const utf8_t *)utf8_skip_forward(src->strstart, offset);
+    return utf8_decode(interp, start);
+}
+
+
 /*
 
 =item C<static UINTVAL utf8_decode(PARROT_INTERP, const utf8_t *ptr)>
@@ -306,6 +367,7 @@
     return c;
 }
 
+
 /*
 
 =item C<static void * utf8_encode(PARROT_INTERP, void *ptr, UINTVAL c)>
@@ -343,6 +405,7 @@
     return (utf8_t *)ptr + len;
 }
 
+
 /*
 
 =item C<static const void * utf8_skip_forward(const void *ptr, UINTVAL n)>
@@ -367,6 +430,7 @@
     return u8ptr;
 }
 
+
 /*
 
 =item C<static const void * utf8_skip_backward(const void *ptr, UINTVAL n)>
@@ -396,17 +460,6 @@
     return u8ptr;
 }
 
-/*
-
-=back
-
-=head2 Iterator Functions
-
-=over 4
-
-=cut
-
-*/
 
 /*
 
@@ -436,6 +489,7 @@
     return utf8_decode(interp, u8ptr);
 }
 
+
 /*
 
 =item C<static void utf8_iter_skip(PARROT_INTERP, const STRING *str, String_iter
@@ -465,6 +519,7 @@
     i->bytepos = (const char *)u8ptr - (const char *)str->strstart;
 }
 
+
 /*
 
 =item C<static UINTVAL utf8_iter_get_and_advance(PARROT_INTERP, const STRING
@@ -515,6 +570,7 @@
     return c;
 }
 
+
 /*
 
 =item C<static void utf8_iter_set_and_advance(PARROT_INTERP, STRING *str,
@@ -541,6 +597,7 @@
     i->charpos++;
 }
 
+
 /*
 
 =item C<static void utf8_iter_set_position(PARROT_INTERP, const STRING *str,
@@ -597,326 +654,50 @@
 }
 
 
-/*
-
-=item C<static STRING * to_encoding(PARROT_INTERP, const STRING *src)>
-
-Converts the string C<src> to this particular encoding.  If C<dest> is
-provided, it will contain the result.  Otherwise this function operates in
-place.
-
-=cut
-
-*/
-
-PARROT_CAN_RETURN_NULL
-static STRING *
-to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_encoding)
-    STRING *result;
-    const ENCODING *src_encoding;
-    UINTVAL dest_len, dest_pos, src_len;
-    unsigned char *p;
-
-    if (src->encoding == Parrot_utf8_encoding_ptr)
-        return Parrot_str_clone(interp, src);
-
-    result = Parrot_gc_new_string_header(interp, 0);
-    src_len = src->strlen;
-
-    /* save source encoding before possibly changing it */
-    src_encoding = src->encoding;
-    result->charset  = Parrot_unicode_charset_ptr;
-    result->encoding = Parrot_utf8_encoding_ptr;
-    result->strlen   = src_len;
-
-    if (!src->strlen)
-        return result;
-
-    Parrot_gc_allocate_string_storage(interp, result, src_len);
-    p = (unsigned char *)result->strstart;
-
-    if (src->charset == Parrot_ascii_charset_ptr) {
-        for (dest_len = 0; dest_len < src_len; ++dest_len) {
-            p[dest_len] = ((unsigned char*)src->strstart)[dest_len];
-        }
-        result->bufused = dest_len;
-    }
-    else {
-        String_iter src_iter;
-        STRING_ITER_INIT(interp, &src_iter);
-        dest_len = src_len;
-        dest_pos = 0;
-        while (src_iter.charpos < src_len) {
-            const UINTVAL c = src_encoding->iter_get_and_advance(interp, src, &src_iter);
-            unsigned char *new_pos;
-            unsigned char *pos;
-
-            if (dest_len - dest_pos < 6) {
-                UINTVAL need = (UINTVAL)((src->strlen - src_iter.charpos + 1) * 1.5);
-                if (need < 16)
-                    need = 16;
-                dest_len += need;
-                result->bufused = dest_pos;
-                Parrot_gc_reallocate_string_storage(interp, result, dest_len);
-                p = (unsigned char *)result->strstart;
-            }
-
-            pos = p + dest_pos;
-            new_pos = (unsigned char *)utf8_encode(interp, pos, c);
-            dest_pos += (new_pos - pos);
-        }
-        result->bufused = dest_pos;
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static UINTVAL get_codepoint(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
-
-Returns the codepoint in string C<src> at position C<offset>.
-
-=cut
-
-*/
-
-static UINTVAL
-get_codepoint(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(get_codepoint)
-    const utf8_t * const start = (const utf8_t *)utf8_skip_forward(src->strstart, offset);
-    return utf8_decode(interp, start);
-}
-
-
-/*
-
-=item C<static UINTVAL find_cclass(PARROT_INTERP, const STRING *s, const INTVAL
-*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
-
-Stub, the charset level handles this for unicode strings.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-find_cclass(PARROT_INTERP, SHIM(const STRING *s), SHIM(const INTVAL *typetable),
-SHIM(INTVAL flags), SHIM(UINTVAL pos), SHIM(UINTVAL end))
-{
-    Parrot_ex_throw_from_c_args(interp, NULL,
-        EXCEPTION_UNIMPLEMENTED,
-        "No find_cclass support in unicode encoding plugins");
-}
-
-/*
-
-=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
-
-Returns the byte in string C<src> at position C<offset>.
-
-=cut
-
-*/
-
-static UINTVAL
-get_byte(SHIM_INTERP, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(get_byte)
-    unsigned char *contents = (unsigned char *)src->strstart;
-    if (offset >= src->bufused) {
-/*        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-                "get_byte past the end of the buffer (%i of %i)",
-                offset, src->bufused); */
-        return 0;
-    }
-    return contents[offset];
-}
-
-/*
-
-=item C<static void set_byte(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL byte)>
-
-Sets, in string C<src> at position C<offset>, the byte C<byte>.
-
-=cut
-
-*/
-
-static void
-set_byte(PARROT_INTERP, ARGIN(const STRING *src),
-        UINTVAL offset, UINTVAL byte)
-{
-    ASSERT_ARGS(set_byte)
-    unsigned char *contents;
-
-    if (offset >= src->bufused)
-        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-            "set_byte past the end of the buffer");
-
-    contents = (unsigned char *)src->strstart;
-    contents[offset] = (unsigned char)byte;
-}
-
-/*
-
-=item C<static STRING * get_codepoints(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_codepoints)
-
-    STRING * const return_string = Parrot_str_copy(interp, src);
-    String_iter    iter;
-    UINTVAL        start;
-
-    STRING_ITER_INIT(interp, &iter);
-
-    if (offset)
-        utf8_iter_set_position(interp, src, &iter, offset);
-
-    start                   = iter.bytepos;
-    return_string->strstart = (char *)return_string->strstart + start;
-
-    if (count)
-        utf8_iter_set_position(interp, src, &iter, offset + count);
-
-    return_string->bufused  = iter.bytepos - start;
-    return_string->strlen   = count;
-    return_string->hashval  = 0;
-
-    return return_string;
-}
-
-/*
-
-=item C<static STRING * get_bytes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the bytes in string C<src> at position C<offset> and length C<count>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_bytes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_bytes)
-    STRING * const return_string = Parrot_str_copy(interp, src);
-
-    return_string->strstart = (char *)return_string->strstart + offset ;
-    return_string->bufused = count;
-
-    return_string->strlen = count;
-    return_string->hashval = 0;
-
-    return return_string;
-}
-
-
-
-/*
-
-=item C<static UINTVAL codepoints(PARROT_INTERP, const STRING *src)>
-
-Returns the number of codepoints in string C<src>.
-
-=cut
-
-*/
-
-static UINTVAL
-codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(codepoints)
-    String_iter iter;
-    /*
-     * this is used to initially calculate src->strlen,
-     * therefore we must scan the whole string
-     */
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.bytepos < src->bufused)
-        utf8_iter_get_and_advance(interp, src, &iter);
-    return iter.charpos;
-}
-
-/*
-
-=item C<static UINTVAL bytes(PARROT_INTERP, const STRING *src)>
-
-Returns the number of bytes in string C<src>.
-
-=cut
-
-*/
-
-PARROT_PURE_FUNCTION
-static UINTVAL
-bytes(SHIM_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(bytes)
-    return src->bufused;
-}
-
-/*
-
-=item C<void Parrot_encoding_utf8_init(PARROT_INTERP)>
-
-Initializes the UTF-8 encoding.
-
-=cut
-
-*/
-
-void
-Parrot_encoding_utf8_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_encoding_utf8_init)
-    ENCODING * const return_encoding = Parrot_new_encoding(interp);
+static STR_VTABLE Parrot_utf8_encoding = {
+    0,
+    "utf8",
+    NULL,
+    4, /* Max bytes per codepoint */
+
+    utf8_to_encoding,
+    unicode_chr,
+
+    encoding_equal,
+    encoding_compare,
+    encoding_index,
+    encoding_rindex,
+    encoding_hash,
+    unicode_validate,
+
+    utf8_scan2,
+    utf8_ord,
+    encoding_substr,
+
+    encoding_is_cclass,
+    encoding_find_cclass,
+    encoding_find_not_cclass,
+
+    encoding_get_graphemes,
+    unicode_compose,
+    encoding_decompose,
+
+    unicode_upcase,
+    unicode_downcase,
+    unicode_titlecase,
+    unicode_upcase_first,
+    unicode_downcase_first,
+    unicode_titlecase_first,
+
+    utf8_iter_get,
+    utf8_iter_skip,
+    utf8_iter_get_and_advance,
+    utf8_iter_set_and_advance,
+    utf8_iter_set_position
+};
 
-    static const ENCODING base_encoding = {
-        "utf8",
-        4, /* Max bytes per codepoint 0 .. 0x10ffff */
-        to_encoding,
-        get_codepoint,
-        get_byte,
-        set_byte,
-        get_codepoints,
-        get_bytes,
-        codepoints,
-        bytes,
-        find_cclass,
-        NULL,
-        utf8_iter_get,
-        utf8_iter_skip,
-        utf8_iter_get_and_advance,
-        utf8_iter_set_and_advance,
-        utf8_iter_set_position
-    };
-    STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
-    Parrot_register_encoding(interp, "utf8", return_encoding);
+STR_VTABLE *Parrot_utf8_encoding_ptr = &Parrot_utf8_encoding;
 
-    return;
-}
 
 /*
 

Modified: trunk/src/string/encoding/utf8.h
==============================================================================
--- trunk/src/string/encoding/utf8.h	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/encoding/utf8.h	Tue Sep  7 22:58:38 2010	(r48833)
@@ -16,11 +16,7 @@
 /* HEADERIZER BEGIN: src/string/encoding/utf8.c */
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 
-void Parrot_encoding_utf8_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
 
-#define ASSERT_ARGS_Parrot_encoding_utf8_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: src/string/encoding/utf8.c */
 

Modified: trunk/src/string/primitives.c
==============================================================================
--- trunk/src/string/primitives.c	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/src/string/primitives.c	Tue Sep  7 22:58:38 2010	(r48833)
@@ -89,14 +89,15 @@
     UINTVAL workchar  = 0;
     UINTVAL charcount = 0;
     const UINTVAL len = Parrot_str_byte_length(interp, string);
+    const unsigned char * const buf = (unsigned char *)string->strstart;
 
     /* Well, not right now */
-    UINTVAL codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+    UINTVAL codepoint = buf[*offset];
     ++*offset;
 
     switch (codepoint) {
       case 'x':
-        codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+        codepoint = buf[*offset];
         if (codepoint >= '0' && codepoint <= '9') {
             workchar = codepoint - '0';
         }
@@ -111,7 +112,7 @@
             ++*offset;
             workchar = 0;
             for (i = 0; i < 8 && *offset < len; ++i, ++*offset) {
-                codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+                codepoint = buf[*offset];
                 if (codepoint == '}') {
                     ++*offset;
                     return workchar;
@@ -145,7 +146,7 @@
         ++*offset;
         if (*offset < len) {
             workchar *= 16;
-            codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+            codepoint = buf[*offset];
             if (codepoint >= '0' && codepoint <= '9') {
                 workchar += codepoint - '0';
             }
@@ -165,7 +166,7 @@
         ++*offset;
         return workchar;
       case 'c':
-        codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+        codepoint = buf[*offset];
         if (codepoint >= 'A' && codepoint <= 'Z') {
             workchar = codepoint - 'A' + 1;
         }
@@ -181,7 +182,7 @@
         for (charcount = 0; charcount < 4; charcount++) {
             if (*offset < len) {
                 workchar *= 16;
-                codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+                codepoint = buf[*offset];
                 if (codepoint >= '0' && codepoint <= '9') {
                     workchar += codepoint - '0';
                 }
@@ -211,7 +212,7 @@
         for (charcount = 0; charcount < 8; charcount++) {
             if (*offset < len) {
                 workchar *= 16;
-                codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+                codepoint = buf[*offset];
                 if (codepoint >= '0' && codepoint <= '9') {
                     workchar += codepoint - '0';
                 }
@@ -247,7 +248,7 @@
         workchar = codepoint - '0';
         if (*offset < len) {
             workchar *= 8;
-            codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+            codepoint = buf[*offset];
             if (codepoint >= '0' && codepoint <= '7') {
                 workchar += codepoint - '0';
             }
@@ -261,7 +262,7 @@
         ++*offset;
         if (*offset < len) {
             workchar *= 8;
-            codepoint = CHARSET_GET_BYTE(interp, string, *offset);
+            codepoint = buf[*offset];
             if (codepoint >= '0' && codepoint <= '7') {
                 workchar += codepoint - '0';
             }

Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/t/op/string_cs.t	Tue Sep  7 22:58:38 2010	(r48833)
@@ -337,7 +337,7 @@
     end
 CODE
 abc_\xc3\xa4_
-unicode
+utf8
 6
 OUTPUT
 

Modified: trunk/t/pmc/bytebuffer.t
==============================================================================
--- trunk/t/pmc/bytebuffer.t	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/t/pmc/bytebuffer.t	Tue Sep  7 22:58:38 2010	(r48833)
@@ -135,7 +135,7 @@
 
     bb = new ['ByteBuffer']
     bb = binary:"abcd"
-    s = bb.'get_string'('ascii', 'fixed_8')
+    s = bb.'get_string'('ascii')
     n = length s
     is(n, 4, "getting ascii from buffer gives correct length")
     is(s, "abcd", "getting ascii from buffer gives correct content")
@@ -161,7 +161,7 @@
     bb[0] = 0x00
     bb[1] = 0xD1
 doit:
-    s = bb.'get_string'('unicode', 'utf16')
+    s = bb.'get_string'('utf16')
     n = length s
     is(n, 1, "getting utf16 from buffer gives correct length")
     n = ord s
@@ -297,7 +297,7 @@
     if i < 8192 goto loopset
 
     .local string s
-    s = bb.'get_string'('unicode', 'utf16')
+    s = bb.'get_string'('utf16')
 
     # Check string size
     i = length s
@@ -350,7 +350,7 @@
     bb = new ['ByteBuffer']
     bb = 'something'
     push_eh catch_charset
-    s = bb.'get_string'('***INVALID cHARsET%%%%', 'fixed_8')
+    s = bb.'get_string'('***INVALID cHARsET%%%%')
     pop_eh
     ok(0, "get_string with invalid charset should throw")
     goto check_encoding

Modified: trunk/t/pmc/filehandle.t
==============================================================================
--- trunk/t/pmc/filehandle.t	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/t/pmc/filehandle.t	Tue Sep  7 22:58:38 2010	(r48833)
@@ -541,7 +541,7 @@
 
     \$I1 = charset line
     \$S2 = charsetname \$I1
-    if \$S2 == 'unicode' goto ok_3
+    if \$S2 == 'utf8' goto ok_3
     print \$S2
     print 'not '
   ok_3:

Modified: trunk/t/pmc/io.t
==============================================================================
--- trunk/t/pmc/io.t	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/t/pmc/io.t	Tue Sep  7 22:58:38 2010	(r48833)
@@ -658,7 +658,7 @@
     print $S1
 .end
 CODE
-unicode
+utf8
 utf8
 T\xf6tsch
 OUTPUT

Modified: trunk/tools/dev/gen_charset_tables.pl
==============================================================================
--- trunk/tools/dev/gen_charset_tables.pl	Tue Sep  7 22:20:33 2010	(r48832)
+++ trunk/tools/dev/gen_charset_tables.pl	Tue Sep  7 22:58:38 2010	(r48833)
@@ -23,7 +23,7 @@
     '$Id$' =~
     /^\$[iI][dD]:\s(.*) \$$/;
 my $fileid      = '$' . 'Id $';
-my $charset_dir = File::Spec->catdir(qw/ src charset /);
+my $charset_dir = File::Spec->catdir(qw/ src string encoding /);
 
 my $coda = <<'EOF';
 /*
@@ -110,7 +110,7 @@
 }
 
 #
-# create 'src/charset/tables.c'
+# create 'src/encoding/tables.c'
 #
 ###########################################################################
 my $c_file = File::Spec->catfile( $charset_dir, 'tables.c' );
@@ -129,7 +129,7 @@
 close STDOUT;
 
 #
-# create 'src/charset/tables.h'
+# create 'src/encoding/tables.h'
 #
 ###########################################################################
 my $h_file = File::Spec->catfile( $charset_dir, 'tables.h' );


More information about the parrot-commits mailing list