[svn:parrot] r49296 - in branches/html_cleanup: . compilers/data_json compilers/imcc compilers/pct compilers/pge compilers/tge config/auto/sizes config/auto/zlib docs/book/draft docs/book/pct docs/dev docs/pdds examples/embed examples/languages/abc examples/languages/squaak examples/pge examples/tools ext/nqp-rx include/parrot lib/Parrot lib/Parrot/Configure/Step ports/cygwin ports/debian runtime/parrot/languages runtime/parrot/library runtime/parrot/library/Math src/call src/gc src/interp src/pmc src/runcore src/string/charset src/string/encoding t/codingstd t/compilers/tge t/examples t/oo t/pmc t/src t/steps/init/hints tools/build tools/dev tools/release

mikehh at svn.parrot.org mikehh at svn.parrot.org
Fri Sep 24 13:04:06 UTC 2010


Author: mikehh
Date: Fri Sep 24 13:04:05 2010
New Revision: 49296
URL: https://trac.parrot.org/parrot/changeset/49296

Log:
html_cleanup - some files did not get deleted properly in merge (and property changes)

Deleted:
   branches/html_cleanup/src/pmc/imageio.pmc
   branches/html_cleanup/src/string/charset/ascii.c
   branches/html_cleanup/src/string/charset/binary.c
   branches/html_cleanup/src/string/charset/iso-8859-1.c
   branches/html_cleanup/src/string/charset/unicode.c
   branches/html_cleanup/src/string/encoding/fixed_8.c
Modified:
   branches/html_cleanup/   (props changed)
   branches/html_cleanup/compilers/data_json/Rules.mak   (props changed)
   branches/html_cleanup/compilers/imcc/Rules.in   (props changed)
   branches/html_cleanup/compilers/pct/Rules.mak   (props changed)
   branches/html_cleanup/compilers/pge/Rules.mak   (props changed)
   branches/html_cleanup/compilers/tge/Rules.mak   (props changed)
   branches/html_cleanup/config/auto/sizes/intval_maxmin_c.in   (props changed)
   branches/html_cleanup/config/auto/zlib/   (props changed)
   branches/html_cleanup/docs/book/draft/README   (props changed)
   branches/html_cleanup/docs/book/draft/appa_glossary.pod   (props changed)
   branches/html_cleanup/docs/book/draft/appb_patch_submission.pod   (props changed)
   branches/html_cleanup/docs/book/draft/appc_command_line_options.pod   (props changed)
   branches/html_cleanup/docs/book/draft/appd_build_options.pod   (props changed)
   branches/html_cleanup/docs/book/draft/appe_source_code.pod   (props changed)
   branches/html_cleanup/docs/book/draft/ch01_introduction.pod   (props changed)
   branches/html_cleanup/docs/book/draft/ch02_getting_started.pod   (props changed)
   branches/html_cleanup/docs/book/draft/ch07_dynpmcs.pod   (props changed)
   branches/html_cleanup/docs/book/draft/ch08_dynops.pod   (props changed)
   branches/html_cleanup/docs/book/draft/ch10_opcode_reference.pod   (props changed)
   branches/html_cleanup/docs/book/draft/ch11_directive_reference.pod   (props changed)
   branches/html_cleanup/docs/book/draft/ch12_operator_reference.pod   (props changed)
   branches/html_cleanup/docs/book/draft/chXX_hlls.pod   (props changed)
   branches/html_cleanup/docs/book/draft/chXX_library.pod   (props changed)
   branches/html_cleanup/docs/book/draft/chXX_testing_and_debugging.pod   (props changed)
   branches/html_cleanup/docs/book/pct/ch01_introduction.pod   (props changed)
   branches/html_cleanup/docs/book/pct/ch02_getting_started.pod   (props changed)
   branches/html_cleanup/docs/book/pct/ch03_compiler_tools.pod   (props changed)
   branches/html_cleanup/docs/book/pct/ch04_pge.pod   (props changed)
   branches/html_cleanup/docs/book/pct/ch05_nqp.pod   (props changed)
   branches/html_cleanup/docs/dev/c_functions.pod   (props changed)
   branches/html_cleanup/docs/pdds/pdd30_install.pod   (props changed)
   branches/html_cleanup/examples/embed/cotorra.c   (props changed)
   branches/html_cleanup/examples/languages/abc/   (props changed)
   branches/html_cleanup/examples/languages/squaak/   (props changed)
   branches/html_cleanup/examples/pge/demo.pir   (props changed)
   branches/html_cleanup/examples/tools/pgegrep   (props changed)
   branches/html_cleanup/ext/nqp-rx/Rules.mak   (props changed)
   branches/html_cleanup/include/parrot/call.h   (props changed)
   branches/html_cleanup/include/parrot/gc_api.h   (props changed)
   branches/html_cleanup/include/parrot/runcore_api.h   (props changed)
   branches/html_cleanup/include/parrot/runcore_profiling.h   (props changed)
   branches/html_cleanup/include/parrot/runcore_trace.h   (props changed)
   branches/html_cleanup/lib/Parrot/Configure/Step/Test.pm   (props changed)
   branches/html_cleanup/lib/Parrot/H2inc.pm   (props changed)
   branches/html_cleanup/ports/cygwin/parrot-1.0.0-1.cygport   (props changed)
   branches/html_cleanup/ports/debian/libparrot-dev.install.in   (props changed)
   branches/html_cleanup/ports/debian/libparrot.install.in   (props changed)
   branches/html_cleanup/ports/debian/parrot-doc.install.in   (props changed)
   branches/html_cleanup/ports/debian/parrot.install.in   (props changed)
   branches/html_cleanup/runtime/parrot/languages/   (props changed)
   branches/html_cleanup/runtime/parrot/library/Math/Rand.pir   (props changed)
   branches/html_cleanup/runtime/parrot/library/Rules.mak   (props changed)
   branches/html_cleanup/src/call/ops.c   (props changed)
   branches/html_cleanup/src/call/pcc.c   (props changed)
   branches/html_cleanup/src/gc/alloc_memory.c   (props changed)
   branches/html_cleanup/src/gc/alloc_resources.c   (props changed)
   branches/html_cleanup/src/gc/api.c   (props changed)
   branches/html_cleanup/src/gc/malloc.c   (props changed)
   branches/html_cleanup/src/gc/malloc_trace.c   (props changed)
   branches/html_cleanup/src/gc/mark_sweep.c   (props changed)
   branches/html_cleanup/src/gc/string_gc.c   (props changed)
   branches/html_cleanup/src/gc/system.c   (props changed)
   branches/html_cleanup/src/interp/inter_cb.c   (props changed)
   branches/html_cleanup/src/interp/inter_create.c   (props changed)
   branches/html_cleanup/src/interp/inter_misc.c   (props changed)
   branches/html_cleanup/src/runcore/cores.c   (props changed)
   branches/html_cleanup/src/runcore/main.c   (props changed)
   branches/html_cleanup/src/runcore/profiling.c   (props changed)
   branches/html_cleanup/src/runcore/trace.c   (props changed)
   branches/html_cleanup/t/codingstd/pmc_docs.t   (props changed)
   branches/html_cleanup/t/compilers/tge/NoneGrammar.tg   (props changed)
   branches/html_cleanup/t/examples/pgegrep.t   (props changed)
   branches/html_cleanup/t/oo/objects.t   (props changed)
   branches/html_cleanup/t/oo/root_new.t   (props changed)
   branches/html_cleanup/t/pmc/namespace-old.t   (props changed)
   branches/html_cleanup/t/src/embed.t   (props changed)
   branches/html_cleanup/t/steps/init/hints/linux-01.t   (props changed)
   branches/html_cleanup/tools/build/README   (props changed)
   branches/html_cleanup/tools/build/h2inc.pl   (props changed)
   branches/html_cleanup/tools/dev/README   (props changed)
   branches/html_cleanup/tools/dev/addopstags.pl   (props changed)
   branches/html_cleanup/tools/dev/dump_pbc.pl   (props changed)
   branches/html_cleanup/tools/dev/fetch_languages.pl   (props changed)
   branches/html_cleanup/tools/dev/headerizer.pl   (props changed)
   branches/html_cleanup/tools/dev/mk_gitignore.pl   (props changed)
   branches/html_cleanup/tools/dev/ncidef2pasm.pl   (props changed)
   branches/html_cleanup/tools/dev/parrot-config.pir   (props changed)
   branches/html_cleanup/tools/dev/perlcritic-cage.conf   (props changed)
   branches/html_cleanup/tools/dev/perlcritic.conf   (props changed)
   branches/html_cleanup/tools/dev/perltidy.conf   (props changed)
   branches/html_cleanup/tools/dev/update_copyright.pl   (props changed)
   branches/html_cleanup/tools/release/crow.pir   (props changed)
   branches/html_cleanup/tools/release/gen_release_info.pl   (props changed)
   branches/html_cleanup/tools/release/inc_ver.pir   (props changed)
   branches/html_cleanup/tools/release/release.json   (props changed)
   branches/html_cleanup/tools/release/templates.json   (props changed)

Deleted: branches/html_cleanup/src/pmc/imageio.pmc
==============================================================================
--- branches/html_cleanup/src/pmc/imageio.pmc	Fri Sep 24 13:04:05 2010	(r49295)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,793 +0,0 @@
-/*
-Copyright (C) 2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/pmc/imageio.pmc - ImageIO PMC
-
-=head1 DESCRIPTION
-
-Freezes and thaws other PMCs.
-
-=head1 FUNCTIONS
-
-=over 4
-
-=cut
-
-*/
-
-#define GROW_TO_16_BYTE_BOUNDARY(size) ((size) + ((size) % 16 ? 16 - (size) % 16 : 0))
-
-/* preallocate freeze image for aggregates with this estimation */
-#define FREEZE_BYTES_PER_ITEM 9
-
-/* macros/constants to handle packing/unpacking of PMC IDs and flags
- * the 2 LSBs are used for flags, all other bits are used for PMC ID
- */
-#define PackID_new(id, flags)       (((UINTVAL)(id) * 4) | ((UINTVAL)(flags) & 3))
-#define PackID_get_PMCID(id)        ((UINTVAL)(id) / 4)
-#define PackID_set_PMCID(lv, id)    (lv) = PackID_new((id), PackID_get_FLAGS(lv))
-#define PackID_get_FLAGS(id)        ((UINTVAL)(id) & 3)
-#define PackID_set_FLAGS(lv, flags) (lv) = PackID_new(PackID_get_PMCID(lv), (flags))
-
-enum {
-    enum_PackID_normal     = 0,
-    enum_PackID_seen       = 1,
-};
-
-/* HEADERIZER HFILE: none */
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-static void create_buffer(PARROT_INTERP,
-    ARGIN_NULLOK(PMC *pmc),
-    ARGMOD(PMC *info))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*info);
-
-PARROT_INLINE
-static void ensure_buffer_size(PARROT_INTERP, ARGIN(PMC *io), size_t len)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_INLINE
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static opcode_t * GET_VISIT_CURSOR(ARGIN(const PMC *pmc))
-        __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CAN_RETURN_NULL
-PARROT_INLINE
-static PMC* id_list_get(PARROT_INTERP, ARGIN(const PMC *io), UINTVAL id)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_INLINE
-static void INC_VISIT_CURSOR(ARGMOD(PMC *pmc), UINTVAL inc)
-        __attribute__nonnull__(1)
-        FUNC_MODIFIES(*pmc);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_INLINE
-static INTVAL INFO_HAS_DATA(ARGIN(const PMC *io))
-        __attribute__nonnull__(1);
-
-PARROT_INLINE
-static void SET_VISIT_CURSOR(ARGMOD(PMC *pmc), ARGIN(const char *cursor))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        FUNC_MODIFIES(*pmc);
-
-#define ASSERT_ARGS_create_buffer __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(info))
-#define ASSERT_ARGS_ensure_buffer_size __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(io))
-#define ASSERT_ARGS_GET_VISIT_CURSOR __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(pmc))
-#define ASSERT_ARGS_id_list_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(io))
-#define ASSERT_ARGS_INC_VISIT_CURSOR __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(pmc))
-#define ASSERT_ARGS_INFO_HAS_DATA __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(io))
-#define ASSERT_ARGS_SET_VISIT_CURSOR __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(pmc) \
-    , PARROT_ASSERT_ARG(cursor))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-/*
-
-=item C<static opcode_t * GET_VISIT_CURSOR(const PMC *pmc)>
-
-=cut
-
-*/
-
-PARROT_INLINE
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static opcode_t *
-GET_VISIT_CURSOR(ARGIN(const PMC *pmc))
-{
-    ASSERT_ARGS(GET_VISIT_CURSOR)
-
-    char * const buf = (char *)Buffer_bufstart(PARROT_IMAGEIO(pmc)->buffer);
-    const size_t pos = PARROT_IMAGEIO(pmc)->pos;
-    return (opcode_t *)(buf + pos);
-}
-
-/*
-
-=item C<static void SET_VISIT_CURSOR(PMC *pmc, const char *cursor)>
-
-=cut
-
-*/
-
-
-PARROT_INLINE
-static void
-SET_VISIT_CURSOR(ARGMOD(PMC *pmc), ARGIN(const char *cursor))
-{
-    ASSERT_ARGS(SET_VISIT_CURSOR)
-
-    const char * const bufstart  = (const char *)Buffer_bufstart(PARROT_IMAGEIO(pmc)->buffer);
-    PARROT_IMAGEIO(pmc)->pos = (cursor - bufstart);
-}
-
-/*
-
-=item C<static void INC_VISIT_CURSOR(PMC *pmc, UINTVAL inc)>
-
-=cut
-
-*/
-
-
-PARROT_INLINE
-static void
-INC_VISIT_CURSOR(ARGMOD(PMC *pmc), UINTVAL inc)
-{
-    ASSERT_ARGS(INC_VISIT_CURSOR)
-
-    PARROT_IMAGEIO(pmc)->pos += inc;
-}
-
-
-#define BYTECODE_SHIFT_OK(pmc) PARROT_ASSERT( \
-    PARROT_IMAGEIO(pmc)->pos <= PARROT_IMAGEIO(pmc)->input_length)
-
-/*
-
-=item C<static void create_buffer(PARROT_INTERP, PMC *pmc, PMC *info)>
-
-=cut
-
-*/
-
-static void
-create_buffer(PARROT_INTERP, ARGIN_NULLOK(PMC *pmc), ARGMOD(PMC *info))
-{
-    ASSERT_ARGS(create_buffer)
-
-    INTVAL  len;
-
-    if (!PMC_IS_NULL(pmc)) {
-        STRING * const array = CONST_STRING(interp, "array");
-        STRING * const hash  = CONST_STRING(interp, "hash");
-        INTVAL         items = 1;
-
-        if (VTABLE_does(interp, pmc, array) || VTABLE_does(interp, pmc, hash))
-            items += VTABLE_elements(interp, pmc);
-
-        len = items * FREEZE_BYTES_PER_ITEM;
-    }
-    else
-        len = FREEZE_BYTES_PER_ITEM;
-
-    PARROT_IMAGEIO(info)->buffer =
-        Parrot_gc_new_bufferlike_header(interp, sizeof (Buffer));
-    Parrot_gc_allocate_buffer_storage_aligned(interp,
-        PARROT_IMAGEIO(info)->buffer, len);
-    SET_VISIT_CURSOR(info,
-        (const char *)Buffer_bufstart(PARROT_IMAGEIO(info)->buffer));
-}
-
-/*
-
-=item C<static void ensure_buffer_size(PARROT_INTERP, PMC *io, size_t len)>
-
-Checks the size of the buffer to see if it can accommodate C<len> more
-bytes. If not, expands the buffer.
-
-=cut
-
-*/
-
-PARROT_INLINE
-static void
-ensure_buffer_size(PARROT_INTERP, ARGIN(PMC *io), size_t len)
-{
-    ASSERT_ARGS(ensure_buffer_size)
-
-    Buffer * const buf  = PARROT_IMAGEIO(io)->buffer;
-    const size_t used   = PARROT_IMAGEIO(io)->pos;
-    const int need_free = Buffer_buflen(buf) - used - len;
-
-    /* grow by factor 1.5 or such */
-    if (need_free <= 16) {
-        size_t new_size = (size_t) (Buffer_buflen(buf) * 1.5);
-
-        if (new_size < Buffer_buflen(buf) - need_free + 512)
-            new_size = Buffer_buflen(buf) - need_free + 512;
-
-        Parrot_gc_reallocate_buffer_storage(interp, buf, new_size);
-        PARROT_ASSERT(Buffer_buflen(buf) - used - len >= 15);
-    }
-
-#ifndef DISABLE_GC_DEBUG
-    Parrot_gc_compact_memory_pool(interp);
-#endif
-}
-
-/*
-
-=item C<static INTVAL INFO_HAS_DATA(const PMC *io)>
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_INLINE
-static INTVAL
-INFO_HAS_DATA(ARGIN(const PMC *io))
-{
-    ASSERT_ARGS(INFO_HAS_DATA)
-
-    return PARROT_IMAGEIO(io)->pos < PARROT_IMAGEIO(io)->input_length;
-}
-
-/*
-
-=item C<static PMC* id_list_get(PARROT_INTERP, const PMC *io, UINTVAL id)>
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CAN_RETURN_NULL
-PARROT_INLINE
-static PMC*
-id_list_get(PARROT_INTERP, ARGIN(const PMC *io), UINTVAL id)
-{
-    ASSERT_ARGS(id_list_get)
-
-    return VTABLE_get_pmc_keyed_int(interp, PARROT_IMAGEIO(io)->todo, id - 1);
-}
-
-pmclass ImageIO auto_attrs {
-    ATTR Buffer              *buffer;      /* buffer to store the image */
-    ATTR size_t               pos;         /* current read/write buf position */
-    ATTR size_t               input_length;
-    ATTR INTVAL               what;
-    ATTR PMC                 *seen;        /* seen hash */
-    ATTR PMC                 *todo;        /* todo list */
-    ATTR UINTVAL              id;          /* freze ID of PMC */
-    ATTR struct PackFile     *pf;
-    ATTR PackFile_ConstTable *pf_ct;
-
-/*
-
-=back
-
-=head1 VTABLES
-
-=over 4
-
-=cut
-
-*/
-
-/*
-
-=item C<void init()>
-
-Initializes the PMC.
-
-=cut
-
-*/
-    VTABLE void init() {
-        PARROT_IMAGEIO(SELF)->seen = PMCNULL;
-        PARROT_IMAGEIO(SELF)->todo =
-            Parrot_pmc_new(INTERP, enum_class_ResizablePMCArray);
-
-        PObj_flag_CLEAR(private1, SELF);
-
-        PObj_custom_mark_SET(SELF);
-    }
-
-
-/*
-
-=item C<void destroy()>
-
-Destroys the PMC.
-
-=cut
-
-*/
-    VTABLE void destroy() {
-        PackFile_destroy(INTERP, PARROT_IMAGEIO(SELF)->pf);
-        PARROT_IMAGEIO(SELF)->pf = NULL;
-    }
-
-
-/*
-
-=item C<void mark()>
-
-Marks the PMC as alive.
-
-=cut
-
-*/
-    VTABLE void mark() {
-        PObj * const buffer = (PObj *)(PARROT_IMAGEIO(SELF)->buffer);
-        if (buffer)
-            Parrot_gc_mark_PObj_alive(INTERP, buffer);
-        Parrot_gc_mark_PMC_alive(INTERP, PARROT_IMAGEIO(SELF)->todo);
-        Parrot_gc_mark_PMC_alive(INTERP, PARROT_IMAGEIO(SELF)->seen);
-    }
-
-
-/*
-
-=item C<STRING *get_string()>
-
-Returns the content of the image as a string.
-
-=cut
-
-*/
-
-    VTABLE STRING *get_string() {
-        return Parrot_str_new_from_buffer(INTERP,
-                                          PARROT_IMAGEIO(SELF)->buffer,
-                                          PARROT_IMAGEIO(SELF)->pos);
-    }
-
-
-/*
-
-=item C<VTABLE PMC *get_pmc()>
-
-Gets the result PMC after a thaw.
-
-=cut
-
-*/
-
-    VTABLE PMC *get_pmc() {
-        return VTABLE_get_pmc_keyed_int(INTERP,
-            (PARROT_IMAGEIO(SELF))->todo, 0);
-    }
-
-
-/*
-
-=item C<VTABLE PMC *get_iter()>
-
-Get the C<todo> list for this freeze/thaw for iterating over.
-
-=cut
-
-*/
-
-    VTABLE PMC *get_iter() {
-        return PARROT_IMAGEIO(SELF)->todo;
-    }
-
-/*
-
-=item C<VTABLE INTVAL get_integer()>
-
-Returns the flags describing the visit action.
-
-=cut
-
-*/
-
-    VTABLE INTVAL get_integer() {
-        return PARROT_IMAGEIO(SELF)->what;
-    }
-
-
-/*
-
-=item C<VTABLE void push_integer(INTVAL v)>
-
-Pushes the integer C<v> onto the end of the image.
-
-=cut
-
-*/
-
-    VTABLE void push_integer(INTVAL v) {
-        const size_t len = PF_size_integer() * sizeof (opcode_t);
-        ensure_buffer_size(INTERP, SELF, len);
-        SET_VISIT_CURSOR(SELF,
-            (const char *)PF_store_integer(GET_VISIT_CURSOR(SELF), v));
-    }
-
-
-/*
-
-=item C<VTABLE void push_float(FLOATVAL v)>
-
-Pushes the float C<v> onto the end of the image.
-
-=cut
-
-*/
-
-    VTABLE void push_float(FLOATVAL v) {
-        const size_t len = PF_size_number() * sizeof (opcode_t);
-        ensure_buffer_size(INTERP, SELF, len);
-        SET_VISIT_CURSOR(SELF,
-            (const char *)PF_store_number(GET_VISIT_CURSOR(SELF), &v));
-    }
-
-
-/*
-
-=item C<VTABLE void push_string(STRING *v)>
-
-Pushes the string C<*v> onto the end of the image.
-
-=cut
-
-*/
-
-    VTABLE void push_string(STRING *v) {
-        if (PObj_flag_TEST(private1, SELF)) {
-            /* store a reference to constant table entry of string */
-            PMC                 * const v_pmc = key_new_string(interp, v);
-            PackFile_ConstTable * const table = PARROT_IMAGEIO(SELF)->pf_ct;
-            const int idx =
-                PackFile_ConstTable_rlookup(INTERP, table, v_pmc, PFC_STRING);
-
-            if (idx >= 0) {
-                STATICSELF.push_integer(idx);
-                return;
-            }
-
-            /* XXX handle cases where the PMC has changed after
-             * Parrot_freeze_strings was called eg: :immediate subs */
-            STATICSELF.push_integer(-1);
-
-            /* TODO
-             * should really be:
-             * PANIC(INTERP, "string not previously in constant table "
-             *               "when freezing to packfile"); */
-        }
-
-        {
-            const size_t len = PF_size_string(v) * sizeof (opcode_t);
-            ensure_buffer_size(INTERP, SELF, len);
-            SET_VISIT_CURSOR(SELF,
-                (const char *)PF_store_string(GET_VISIT_CURSOR(SELF), v));
-        }
-    }
-
-
-/*
-
-=item C<VTABLE void push_pmc(PMC *v)>
-
-Pushes a reference to pmc C<*v> onto the end of the image. If C<*v>
-hasn't been seen yet, it is also pushed onto the todo list.
-
-=cut
-
-*/
-
-    VTABLE void push_pmc(PMC *v) {
-        UINTVAL id;
-        int packid_type;
-
-        PARROT_ASSERT(PARROT_IMAGEIO(SELF)->what == VISIT_FREEZE_NORMAL);
-
-        if (PMC_IS_NULL(v)) {
-            id   = 0;
-            packid_type = enum_PackID_seen;
-        }
-        else {
-            Hash * const hash = (Hash *)VTABLE_get_pointer(INTERP,
-                    PARROT_IMAGEIO(SELF)->seen);
-            HashBucket * const b = parrot_hash_get_bucket(INTERP, hash, v);
-
-            if (b) {
-                id = (UINTVAL)b->value;
-                packid_type = enum_PackID_seen;
-            }
-            else {
-                ++PARROT_IMAGEIO(SELF)->id; /* next id to freeze */
-                id = PARROT_IMAGEIO(SELF)->id;
-                packid_type = enum_PackID_normal;
-            }
-        }
-
-        SELF.push_integer(PackID_new(id, packid_type));
-
-        if (packid_type == enum_PackID_normal) {
-            Hash * const hash = (Hash *)VTABLE_get_pointer(INTERP,
-                    PARROT_IMAGEIO(SELF)->seen);
-
-            PARROT_ASSERT(v);
-
-            /* workaround to keep ParrotInterpreter PBC hack working */
-            if (v->vtable->base_type == enum_class_ParrotInterpreter)
-                PObj_flag_CLEAR(private1, SELF);
-
-            SELF.push_integer(
-                    PObj_is_object_TEST(v)
-                    ? (INTVAL) enum_class_Object
-                    : v->vtable->base_type);
-
-            parrot_hash_put(INTERP, hash, v, (void *)id);
-            VTABLE_push_pmc(INTERP, PARROT_IMAGEIO(SELF)->todo, v);
-        }
-    }
-
-
-/*
-
-=item C<void set_pointer(void *value)>
-
-Sets the constant table of this ImageIO PMC.
-
-=cut
-
-*/
-
-    VTABLE void set_pointer(void *value) {
-        PObj_flag_SET(private1, SELF);
-        PARROT_IMAGEIO(SELF)->pf_ct = (PackFile_ConstTable *)value;
-    }
-
-
-/*
-
-=item C<VTABLE INTVAL shift_integer()>
-
-Removes and returns an integer from the start of the image.
-
-=cut
-
-*/
-
-    VTABLE INTVAL shift_integer() {
-        /* inlining PF_fetch_integer speeds up PBC thawing measurably */
-        const PackFile      *pf     = PARROT_IMAGEIO(SELF)->pf;
-        const opcode_t      *pos    = GET_VISIT_CURSOR(SELF);
-        const unsigned char *stream = (const unsigned char *)pos;
-        const INTVAL         i      = pf->fetch_iv(stream);
-
-        SET_VISIT_CURSOR(SELF, (const char *)pos + pf->header->wordsize);
-        BYTECODE_SHIFT_OK(SELF);
-        return i;
-    }
-
-
-/*
-
-=item C<VTABLE FLOATVAL shift_float()>
-
-Removes and returns an number from the start of the image.
-
-=cut
-
-*/
-
-    VTABLE FLOATVAL shift_float() {
-        const opcode_t *pos = GET_VISIT_CURSOR(SELF);
-        FLOATVAL        f   = PF_fetch_number(PARROT_IMAGEIO(SELF)->pf, &pos);
-        SET_VISIT_CURSOR(SELF, (const char *)pos);
-        BYTECODE_SHIFT_OK(SELF);
-        return f;
-    }
-
-
-/*
-
-=item C<VTABLE STRING* shift_string()>
-
-Removes and returns a string from the start of the image.
-
-=cut
-
-*/
-
-    VTABLE STRING *shift_string() {
-        if (PObj_flag_TEST(private1, SELF)) {
-            const INTVAL i = STATICSELF.shift_integer();
-
-            if (i >= 0) {
-                PackFile_ConstTable *table = PARROT_IMAGEIO(SELF)->pf_ct;
-
-                if (!table->constants[i].type)
-                    Parrot_ex_throw_from_c_args(interp, NULL,
-                            EXCEPTION_MALFORMED_PACKFILE,
-                            "Reference to constant not yet unpacked %d", i);
-                return table->constants[i].u.string;
-            }
-
-            /* XXX
-             * only got here because constant table doesn't contain the string
-             * fallback on inline strings
-             */
-        }
-
-        {
-            const opcode_t * pos     = GET_VISIT_CURSOR(SELF);
-            STRING         * const s = PF_fetch_string(INTERP,
-                                    PARROT_IMAGEIO(SELF)->pf, &pos);
-            SET_VISIT_CURSOR(SELF, (const char *)pos);
-            BYTECODE_SHIFT_OK(SELF);
-            return s;
-        }
-    }
-
-
-/*
-
-=item C<static PMC *shift_pmc()>
-
-Removes and returns a reference to a pmc from the start of the image.
-
-=cut
-
-*/
-
-    VTABLE PMC *shift_pmc() {
-        const UINTVAL  n            = SELF.shift_integer();
-        const INTVAL   id           = PackID_get_PMCID(n);
-        const int      packid_flags = PackID_get_FLAGS(n);
-        PMC           *pmc          = PMCNULL;
-
-        PARROT_ASSERT(PARROT_IMAGEIO(SELF)->what == VISIT_THAW_NORMAL);
-
-        switch (packid_flags) {
-            case enum_PackID_seen:
-                if (id) /* got a non-NULL PMC */
-                    pmc = id_list_get(INTERP, SELF, id);
-                break;
-            case enum_PackID_normal:
-                {
-                    PMC * const  todo = PARROT_IMAGEIO(SELF)->todo;
-                    const INTVAL type = VTABLE_shift_integer(INTERP, SELF);
-
-                    PARROT_ASSERT(id - 1
-                            == VTABLE_elements(INTERP, PARROT_IMAGEIO(SELF)->todo));
-
-                    if (type <= 0 || type > INTERP->n_vtable_max)
-                        Parrot_ex_throw_from_c_args(INTERP, NULL, 1,
-                                "Unknown PMC type to thaw %d", type);
-
-                    /* workaround to keep ParrotInterpreter PBC hack working */
-                    if (type == enum_class_ParrotInterpreter)
-                        PObj_flag_CLEAR(private1, SELF);
-
-                    pmc = Parrot_pmc_new_noinit(INTERP, type);
-
-                    VTABLE_set_pmc_keyed_int(INTERP, todo, id - 1, pmc);
-                }
-                break;
-            default:
-                Parrot_ex_throw_from_c_args(INTERP, NULL, 1,
-                        "Unknown PMC id args thaw %d", packid_flags);
-                break;
-        }
-
-        return pmc;
-    }
-
-    VTABLE void set_pmc(PMC *p)
-    {
-        PARROT_IMAGEIO(SELF)->what  = VISIT_FREEZE_NORMAL;
-
-        create_buffer(INTERP, p, SELF);
-        if (PObj_flag_TEST(private1, SELF)) {
-            PARROT_IMAGEIO(SELF)->pf = PARROT_IMAGEIO(SELF)->pf_ct->base.pf;
-        }
-        else {
-            const UINTVAL header_length =
-                GROW_TO_16_BYTE_BOUNDARY(PACKFILE_HEADER_BYTES);
-
-            PARROT_IMAGEIO(SELF)->pf = PackFile_new(INTERP, 0);
-            PObj_custom_destroy_SET(SELF);
-
-            ensure_buffer_size(INTERP, SELF, header_length);
-            mem_sys_memcopy(GET_VISIT_CURSOR(SELF),
-                PARROT_IMAGEIO(SELF)->pf->header, PACKFILE_HEADER_BYTES);
-            INC_VISIT_CURSOR(SELF, header_length);
-        }
-
-        PARROT_IMAGEIO(SELF)->seen = Parrot_pmc_new(INTERP, enum_class_Hash);
-        VTABLE_set_pointer(INTERP, PARROT_IMAGEIO(SELF)->seen,
-            parrot_new_intval_hash(INTERP));
-
-        STATICSELF.push_pmc(p);
-        Parrot_visit_loop_visit(INTERP, SELF);
-    }
-
-    VTABLE void set_string_native(STRING *image) {
-        PMC          *unused;
-        PARROT_IMAGEIO(SELF)->what   = VISIT_THAW_NORMAL;
-        PARROT_IMAGEIO(SELF)->buffer = (Buffer *)image;
-
-        PARROT_ASSERT(image->_bufstart == image->strstart);
-
-        SET_VISIT_CURSOR(SELF,
-            (const char *)Buffer_bufstart(PARROT_IMAGEIO(SELF)->buffer));
-        PARROT_IMAGEIO(SELF)->input_length = image->strlen;
-
-        if (PObj_flag_TEST(private1, SELF)) {
-            PARROT_IMAGEIO(SELF)->pf = PARROT_IMAGEIO(SELF)->pf_ct->base.pf;
-        }
-        else {
-            const UINTVAL header_length =
-                 GROW_TO_16_BYTE_BOUNDARY(PACKFILE_HEADER_BYTES);
-            int unpacked_length;
-
-            PARROT_IMAGEIO(SELF)->pf   = PackFile_new(INTERP, 0);
-            PObj_custom_destroy_SET(SELF);
-
-            PARROT_IMAGEIO(SELF)->pf->options |= PFOPT_PMC_FREEZE_ONLY;
-            unpacked_length = PackFile_unpack(INTERP, PARROT_IMAGEIO(SELF)->pf,
-                GET_VISIT_CURSOR(SELF), PARROT_IMAGEIO(SELF)->input_length);
-
-            if (unpacked_length)
-                INC_VISIT_CURSOR(SELF, header_length);
-            else
-                Parrot_ex_throw_from_c_args(INTERP, NULL,
-                        EXCEPTION_INVALID_STRING_REPRESENTATION,
-                        "PackFile header failed during unpack");
-        }
-
-        unused = STATICSELF.shift_pmc();
-        Parrot_visit_loop_visit(INTERP, SELF);
-
-        /* we're done reading the image */
-        PARROT_ASSERT(!INFO_HAS_DATA(SELF));
-        Parrot_visit_loop_thawfinish(INTERP, SELF);
-    }
-
-
-/*
-
-=back
-
-=cut
-
-*/
-
-}
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: branches/html_cleanup/src/string/charset/ascii.c
==============================================================================
--- branches/html_cleanup/src/string/charset/ascii.c	Fri Sep 24 13:04:05 2010	(r49295)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,876 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/ascii.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for ascii data and common
-charset functionality for similar charsets like iso-8859-1.
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "ascii.h"
-
-/*
- * TODO check interpreter error and warnings setting
- */
-
-#include "tables.h"
-
-/* HEADERIZER HFILE: src/string/charset/ascii.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_ascii __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-/*
-
-=item C<STRING * ascii_get_graphemes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Retrieves the graphemes for the STRING C<src>, starting at
-C<offset> and ending at C<offset + count>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING *
-ascii_get_graphemes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(ascii_get_graphemes)
-    return ENCODING_GET_BYTES(interp, src, offset, count);
-}
-
-/*
-
-=item C<static STRING * to_ascii(PARROT_INTERP, const STRING *src)>
-
-Attempts to convert STRING C<src> to ASCII in STRING C<dest>. Throws
-an exception if unconvertable UNICODE characters are involved.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_ascii)
-    String_iter iter;
-    unsigned char *p;
-    const UINTVAL len = src->strlen;
-
-    /* the string can't grow. Just clone it */
-    STRING * const dest = Parrot_str_clone(interp, src);
-
-    p = (unsigned char *)dest->strstart;
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.charpos < len) {
-        const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (c >= 128)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                    "can't convert unicode string to ascii");
-        *p++ = (unsigned char)c;
-    }
-    dest->bufused = len;
-    dest->strlen = len;
-    dest->charset = Parrot_ascii_charset_ptr;
-    dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest);
-    return dest;
-}
-
-/*
-
-=item C<static STRING * to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to ASCII charset STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    const charset_converter_t conversion_func =
-        Parrot_find_charset_converter(interp, src->charset, Parrot_ascii_charset_ptr);
-
-    if (conversion_func) {
-         return conversion_func(interp, src);
-    }
-    else {
-        return to_ascii(interp, src);
-    }
-}
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-Can't compose ASCII strings, so performs a string copy on it and
-returns the new string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(compose)
-
-    STRING * const dest = Parrot_str_copy(interp, src);
-
-    return dest;
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Can't decompose ASCII, so we perform a string copy instead and return
-a pointer to the new string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(decompose)
-
-    STRING * const dest = Parrot_str_copy(interp, src);
-
-    return dest;
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all uppercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase)
-    STRING * const result = Parrot_str_clone(interp, src);
-    const UINTVAL n = src->strlen;
-
-    if (n) {
-        char * const buffer = result->strstart;
-        UINTVAL offset;
-
-        for (offset = 0; offset < n; ++offset) {
-            buffer[offset] = (char)toupper((unsigned char)buffer[offset]);
-        }
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase)
-    STRING       *result = Parrot_str_clone(interp, src);
-    const UINTVAL n      = src->strlen;
-
-    if (n) {
-        char * const buffer = result->strstart;
-        UINTVAL offset;
-
-        for (offset = 0; offset < n; ++offset) {
-            buffer[offset] = (char)tolower((unsigned char)buffer[offset]);
-        }
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING given by C<src> to title case, where
-the first character is upper case and all the rest of the characters
-are lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase)
-    STRING       *result = Parrot_str_clone(interp, src);
-    const UINTVAL n      = src->strlen;
-
-    if (n) {
-        char * const buffer = result->strstart;
-        UINTVAL offset;
-
-        buffer[0] = (char)toupper((unsigned char)buffer[0]);
-        for (offset = 1; offset < n; ++offset) {
-            buffer[offset] = (char)tolower((unsigned char)buffer[offset]);
-        }
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Sets the first character in the STRING C<src> to upper case,
-but doesn't modify the rest of the string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase_first)
-    STRING * const result = Parrot_str_clone(interp, src);
-
-    if (result->strlen > 0) {
-        char * const buffer = result->strstart;
-        buffer[0] = (char)toupper((unsigned char)buffer[0]);
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Sets the first character of the STRING C<src> to lowercase,
-but doesn't modify the rest of the characters.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase_first)
-    STRING * const result = Parrot_str_clone(interp, src);
-
-    if (result->strlen > 0) {
-        char * const buffer = result->strstart;
-        buffer[0] = (char)tolower((unsigned char)buffer[0]);
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first letter of STRING C<src> to upper case,
-but doesn't modify the rest of the string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase_first)
-    STRING * const result = Parrot_str_clone(interp, src);
-
-    if (result->strlen > 0) {
-        char * const buffer = result->strstart;
-        buffer[0] = (char)toupper((unsigned char)buffer[0]);
-    }
-
-    return result;
-}
-
-/*
-
-=item C<INTVAL ascii_compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compares two strings as ASCII strings. If STRING C<lhs> > C<rhs>, returns
-1. If C<lhs> == C<rhs> returns 0. If STRING C<lhs> < C<rhs>, returns  -1.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
-    ASSERT_ARGS(ascii_compare)
-    const UINTVAL l_len = lhs->strlen;
-    const UINTVAL r_len = rhs->strlen;
-    const UINTVAL min_len = l_len > r_len ? r_len : l_len;
-    String_iter iter;
-
-    if (lhs->encoding == rhs->encoding) {
-        const int ret_val = memcmp(lhs->strstart, rhs->strstart, min_len);
-        if (ret_val)
-            return ret_val < 0 ? -1 : 1;
-    }
-    else {
-        STRING_ITER_INIT(interp, &iter);
-        while (iter.charpos < min_len) {
-            const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, iter.charpos);
-            const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &iter);
-            if (cl != cr)
-                return cl < cr ? -1 : 1;
-        }
-    }
-    if (l_len < r_len) {
-        return -1;
-    }
-    if (l_len > r_len) {
-        return 1;
-    }
-    return 0;
-}
-
-/*
-
-=item C<INTVAL mixed_cs_index(PARROT_INTERP, const STRING *src, const STRING
-*search, UINTVAL offs)>
-
-Searches for the first instance of STRING C<search> in STRING C<src>.
-returns the position where the substring is found if it is indeed found.
-Returns -1 otherwise. Operates on different types of strings, not just
-ASCII.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-mixed_cs_index(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search),
-    UINTVAL offs)
-{
-    ASSERT_ARGS(mixed_cs_index)
-    String_iter start, end;
-
-    STRING_ITER_INIT(interp, &start);
-    STRING_ITER_SET_POSITION(interp, src, &start, offs);
-
-    return Parrot_str_iter_index(interp, src, &start, &end, search);
-}
-
-/*
-
-=item C<INTVAL ascii_cs_index(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Searches for the first instance of STRING C<search> in STRING C<src>.
-returns the position where the substring is found if it is indeed found.
-Returns -1 otherwise.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_cs_index(PARROT_INTERP, ARGIN(const STRING *src),
-        ARGIN(const STRING *search_string), UINTVAL offset)
-{
-    ASSERT_ARGS(ascii_cs_index)
-    INTVAL retval;
-    if (src->charset != search_string->charset) {
-        return mixed_cs_index(interp, src, search_string, offset);
-    }
-
-    PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
-    retval = Parrot_byte_index(interp, src,
-            search_string, offset);
-    return retval;
-}
-
-/*
-
-=item C<INTVAL ascii_cs_rindex(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Searches for the last instance of STRING C<search_string> in STRING
-C<src>. Starts searching at C<offset>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_cs_rindex(PARROT_INTERP, ARGIN(const STRING *src),
-        ARGIN(const STRING *search_string), UINTVAL offset)
-{
-    ASSERT_ARGS(ascii_cs_rindex)
-    INTVAL retval;
-
-    if (src->charset != search_string->charset)
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-            "Cross-charset index not supported");
-
-    PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
-    retval = Parrot_byte_rindex(interp, src,
-            search_string, offset);
-    return retval;
-}
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Verifies that the given string is valid ASCII. Returns 1 if it is ASCII,
-returns 0 otherwise.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(validate)
-    String_iter iter;
-    const INTVAL length = Parrot_str_length(interp, src);
-
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.charpos < length) {
-        const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (codepoint >= 0x80)
-            return 0;
-    }
-    return 1;
-}
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING object from a single codepoint C<codepoint>. Returns
-the new STRING.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
-    ASSERT_ARGS(string_from_codepoint)
-    char real_codepoint = (char)codepoint;
-    STRING * const return_string = string_make(interp, &real_codepoint, 1, "ascii", 0);
-    return return_string;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(is_cclass)
-    UINTVAL codepoint;
-
-    if (offset >= src->strlen)
-        return 0;
-    codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
-    if (codepoint >= sizeof (Parrot_ascii_typetable) / sizeof (Parrot_ascii_typetable[0])) {
-        return 0;
-    }
-    return (Parrot_ascii_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.  Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_cclass)
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    return ENCODING_FIND_CCLASS(interp, src, Parrot_ascii_typetable,
-            flags, pos, end);
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP,
-                INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_not_cclass)
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    for (; pos < end; ++pos) {
-        const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, pos);
-        if ((Parrot_ascii_typetable[codepoint] & flags) == 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-/*
-
-=item C<size_t ascii_compute_hash(PARROT_INTERP, const STRING *src, size_t
-seed)>
-
-Computes the hash of STRING C<src> starting with seed value
-C<seed>.
-
-=cut
-
-*/
-
-PARROT_PURE_FUNCTION
-size_t
-ascii_compute_hash(SHIM_INTERP, ARGIN(const STRING *src), size_t seed)
-{
-    ASSERT_ARGS(ascii_compute_hash)
-    size_t hashval = seed;
-    const char *buffptr = (const char *)src->strstart;
-    UINTVAL len = src->strlen;
-
-    PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
-    while (len--) {
-        hashval += hashval << 5;
-        hashval += *buffptr++;
-    }
-    return hashval;
-}
-
-/*
-
-=item C<void Parrot_charset_ascii_init(PARROT_INTERP)>
-
-Initialize the ASCII charset but registering all the necessary
-function pointers and settings.
-
-=cut
-
-*/
-
-void
-Parrot_charset_ascii_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charset_ascii_init)
-    CHARSET * const return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set = {
-        "ascii",
-        ascii_get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        ascii_compare,
-        ascii_cs_index,
-        ascii_cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        ascii_compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-    return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
-    Parrot_register_charset(interp, "ascii", return_set);
-
-    return;
-}
-
-/*
-
-=item C<STRING * charset_cvt_ascii_to_binary(PARROT_INTERP, const STRING *src)>
-
-Converts an ASCII STRING C<src> to a binary STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-STRING *
-charset_cvt_ascii_to_binary(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(charset_cvt_ascii_to_binary)
-    STRING * const dest = Parrot_str_clone(interp, src);
-    UINTVAL offs;
-
-    for (offs = 0; offs < src->strlen; ++offs) {
-        const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
-        ENCODING_SET_BYTE(interp, dest, offs, c);
-    }
-
-    dest->charset = Parrot_binary_charset_ptr;
-    return dest;
-}
-
-/*
-
-=item C<STRING * charset_cvt_ascii_to_iso_8859_1(PARROT_INTERP, const STRING
-*src)>
-
-Converts ASCII STRING C<src> to ISO8859-1 STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-STRING *
-charset_cvt_ascii_to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(charset_cvt_ascii_to_iso_8859_1)
-    STRING * const dest = Parrot_str_clone(interp, src);
-    UINTVAL offs;
-
-    for (offs = 0; offs < src->strlen; ++offs) {
-        const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
-        ENCODING_SET_BYTE(interp, dest, offs, c);
-    }
-
-    dest->charset = Parrot_iso_8859_1_charset_ptr;
-    return dest;
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: branches/html_cleanup/src/string/charset/binary.c
==============================================================================
--- branches/html_cleanup/src/string/charset/binary.c	Fri Sep 24 13:04:05 2010	(r49295)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,494 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/binary.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for binary data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-
-/* In local src/string/charset/ directory */
-#include "ascii.h"
-#include "binary.h"
-
-/* HEADERIZER HFILE: src/string/charset/binary.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-static INTVAL compare(SHIM_INTERP,
-    ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-static INTVAL find_cclass(SHIM_INTERP,
-    SHIM(INTVAL flags),
-    SHIM(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count);
-
-static INTVAL find_not_cclass(SHIM_INTERP,
-    SHIM(INTVAL flags),
-    SHIM(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count);
-
-static INTVAL is_cclass(SHIM_INTERP,
-    SHIM(INTVAL flags),
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset));
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-static UINTVAL validate(SHIM_INTERP, SHIM(const STRING *src));
-#define ASSERT_ARGS_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#ifdef EXCEPTION
-#  undef EXCEPTION
-#endif
-
-#define EXCEPTION(err, str) \
-    Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
-
-
-/*
-
-=item C<static STRING* to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to STRING C<dest> in binary mode. Throws
-an exception if a suitable conversion function is not found.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    charset_converter_t conversion_func =
-        Parrot_find_charset_converter(interp, src->charset, Parrot_binary_charset_ptr);
-
-    if (conversion_func)
-         return conversion_func(interp, src);
-
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-        "to_charset for binary not implemented");
-}
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot compose a binary string.
-
-=cut
-
-*/
-
-/* A err. can't compose binary */
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(compose)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't compose binary data");
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot decompose a binary string.
-
-=cut
-
-*/
-
-/* A err. can't decompose binary */
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(decompose)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't decompose binary data");
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-upper case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(upcase)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't upcase binary data");
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(downcase)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't downcase binary data");
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-title case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(titlecase)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't titlecase binary data");
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot set the first "character" of the
-binary string to uppercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(upcase_first)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't upcase binary data");
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot set the first "character"
-of the binary string to lowercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(downcase_first)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't downcase binary data");
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we can't convert the first "character"
-of binary data to title case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(titlecase_first)
-    EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't titlecase binary data");
-}
-
-/*
-
-=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compare the two buffers, first by size, then with memcmp.
-
-=cut
-
-*/
-
-static INTVAL
-compare(SHIM_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
-    ASSERT_ARGS(compare)
-    const UINTVAL l_len = lhs->strlen;
-    const UINTVAL r_len = rhs->strlen;
-    if (l_len != r_len)
-        return l_len - r_len;
-
-    return memcmp(lhs->strstart, rhs->strstart, l_len);
-}
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1. All sequential data is valid binary data.
-
-=cut
-
-*/
-
-/* Binary's always valid */
-static UINTVAL
-validate(SHIM_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(validate)
-    return 1;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(SHIM_INTERP, SHIM(INTVAL flags), SHIM(const STRING *src), SHIM(UINTVAL offset))
-{
-    ASSERT_ARGS(is_cclass)
-    return 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(SHIM_INTERP, SHIM(INTVAL flags),
-            SHIM(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_cclass)
-    return offset + count;
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(SHIM_INTERP, SHIM(INTVAL flags),
-               SHIM(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_not_cclass)
-    return offset + count;
-}
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING object from a single codepoint C<codepoint>. Returns
-the new STRING.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
-    ASSERT_ARGS(string_from_codepoint)
-    STRING *return_string;
-    char real_codepoint = (char)codepoint;
-    return_string = string_make(interp, &real_codepoint, 1, "binary", 0);
-    return return_string;
-}
-
-
-/*
-
-=item C<void Parrot_charset_binary_init(PARROT_INTERP)>
-
-Initialize the binary charset, including function pointers and
-settings.
-
-=cut
-
-*/
-
-void
-Parrot_charset_binary_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charset_binary_init)
-    CHARSET * const return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set = {
-        "binary",
-        ascii_get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        compare,
-        ascii_cs_index,
-        ascii_cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        ascii_compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-    return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
-    Parrot_register_charset(interp, "binary", return_set);
-
-    return;
-
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: branches/html_cleanup/src/string/charset/iso-8859-1.c
==============================================================================
--- branches/html_cleanup/src/string/charset/iso-8859-1.c	Fri Sep 24 13:04:05 2010	(r49295)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,733 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/iso-8859-1.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for iso-8859-1 data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "iso-8859-1.h"
-#include "ascii.h"
-
-/* HEADERIZER HFILE: src/string/charset/iso-8859-1.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static INTVAL find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING * to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_unicode(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_iso_8859_1 __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_unicode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#include "tables.h"
-
-/*
-
-=item C<static STRING * to_iso_8859_1(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to iso-8859-1 in STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_iso_8859_1)
-    UINTVAL src_len;
-    String_iter iter;
-    /* iso-8859-1 is never bigger then source */
-    STRING * dest = Parrot_str_clone(interp, src);
-
-    STRING_ITER_INIT(interp, &iter);
-    src_len = src->strlen;
-    dest->bufused = src_len;
-    while (iter.charpos < src_len) {
-        const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (c >= 0x100)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                "lossy conversion to iso-8559-1");
-
-        Parrot_fixed_8_encoding_ptr->set_byte(interp, dest, iter.charpos - 1, c);
-    }
-    dest->charset = Parrot_iso_8859_1_charset_ptr;
-    dest->encoding = Parrot_fixed_8_encoding_ptr;
-    return dest;
-}
-
-/*
-
-=item C<static STRING * to_unicode(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to unicode STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_unicode(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_unicode)
-    STRING * dest = Parrot_str_clone(interp, src);
-    UINTVAL offs;
-    String_iter iter;
-
-    dest->charset = Parrot_unicode_charset_ptr;
-    dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest);
-    Parrot_gc_reallocate_string_storage(interp, dest, src->strlen);
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.charpos < src->strlen) {
-        const UINTVAL c = ENCODING_GET_BYTE(interp, src, iter.charpos);
-
-        if (iter.bytepos >= Buffer_buflen(dest) - 4) {
-            UINTVAL need = (UINTVAL)((src->strlen - iter.charpos) * 1.5);
-            if (need < 16)
-                need = 16;
-            Parrot_gc_reallocate_string_storage(interp, dest,
-                    Buffer_buflen(dest) + need);
-        }
-        STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, c);
-    }
-    dest->bufused = iter.bytepos;
-    dest->strlen  = iter.charpos;
-    return dest;
-}
-
-/*
-
-=item C<static STRING * to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to an ISO-8859-1 STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING *
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    const charset_converter_t conversion_func =
-        Parrot_find_charset_converter(interp, src->charset, Parrot_iso_8859_1_charset_ptr);
-
-    if (conversion_func)
-        return conversion_func(interp, src);
-    else
-        return to_iso_8859_1(interp, src);
-}
-
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-ISO-8859-1 does not support composing, so we just copy the STRING C<src> and return the
-copy.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(compose)
-
-    STRING * const dest = Parrot_str_copy(interp, src);
-
-    return dest;
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-SO-8859-1 does not support decomposing, so we throw an exception.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(decompose)
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
-            "decompose for iso-8859-1 not implemented");
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Convert all graphemes in the STRING C<src> to upper case, for those
-graphemes that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase)
-    unsigned char *buffer;
-    UINTVAL        offset = 0;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    for (offset = 0; offset < result->strlen; ++offset) {
-        unsigned int c = buffer[offset]; /* XXX use encoding ? */
-        if (c >= 0xe0 && c != 0xf7)
-            c &= ~0x20;
-        else
-            c = toupper((unsigned char)c);
-        buffer[offset] = (unsigned char)c;
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts all graphemes in STRING C<src> to lower-case, for those graphemes
-that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase)
-    unsigned char *buffer;
-    UINTVAL        offset = 0;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    for (offset = 0; offset < result->strlen; ++offset) {
-        unsigned int c = buffer[offset];
-        if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
-            c |= 0x20;
-        else
-            c = tolower((unsigned char)c);
-        buffer[offset] = (unsigned char)c;
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the graphemes in STRING C<src> to title case, for those graphemes
-that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase)
-    unsigned char *buffer;
-    unsigned int   c;
-    UINTVAL        offset;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    c = buffer[0];
-    if (c >= 0xe0 && c != 0xf7)
-        c &= ~0x20;
-    else
-        c = toupper((unsigned char)c);
-    buffer[0] = (unsigned char)c;
-
-    for (offset = 1; offset < result->strlen; ++offset) {
-        c = buffer[offset];
-        if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
-            c |= 0x20;
-        else
-            c = tolower((unsigned char)c);
-        buffer[offset] = (unsigned char)c;
-    }
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to upper case, if it
-supports cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase_first)
-    unsigned char *buffer;
-    unsigned int   c;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    c = buffer[0];
-    if (c >= 0xe0 && c != 0xf7)
-        c &= ~0x20;
-    else
-        c = toupper((unsigned char)c);
-    buffer[0] = (unsigned char)c;
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first character of the STRING C<src> to lower case, if the
-grapheme supports lower case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase_first)
-    unsigned char *buffer;
-    unsigned int   c;
-    STRING        *result = Parrot_str_clone(interp, src);
-
-    if (!result->strlen)
-        return result;
-
-    buffer = (unsigned char *)result->strstart;
-    c = buffer[0];
-    if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
-        c &= ~0x20;
-    else
-        c = tolower((unsigned char)c);
-    buffer[0] = (unsigned char)c;
-
-    return result;
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to title case, if the grapheme
-supports case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase_first)
-    return upcase_first(interp, src);
-}
-
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1 if the STRING C<src> is a valid ISO-8859-1 STRING. Returns 0 otherwise.
-
-=cut
-
-*/
-
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(validate)
-    INTVAL offset;
-    const INTVAL length =  Parrot_str_length(interp, src);
-
-    for (offset = 0; offset < length; ++offset) {
-        const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-        if (codepoint >= 0x100)
-            return 0;
-    }
-    return 1;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(is_cclass)
-    UINTVAL codepoint;
-
-    if (offset >= src->strlen) return 0;
-    codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
-    if (codepoint >= sizeof (Parrot_ascii_typetable) /
-                     sizeof (Parrot_ascii_typetable[0])) {
-        return 0;
-    }
-    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.  Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags,
-                ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_cclass)
-    const UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    return ENCODING_FIND_CCLASS(interp, src,
-            Parrot_iso_8859_1_typetable, flags, pos, end);
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP, INTVAL flags,
-                ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_not_cclass)
-    UINTVAL pos = offset;
-    UINTVAL end = offset + count;
-
-    end = src->strlen < end ? src->strlen : end;
-    for (; pos < end; ++pos) {
-        const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, pos);
-        if ((Parrot_iso_8859_1_typetable[codepoint] & flags) == 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING from the single codepoint C<codepoint>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
-    ASSERT_ARGS(string_from_codepoint)
-    char real_codepoint = (char)codepoint;
-    STRING * const return_string = string_make(interp, &real_codepoint, 1,
-            "iso-8859-1", 0);
-    return return_string;
-}
-
-/*
-
-=item C<void Parrot_charset_iso_8859_1_init(PARROT_INTERP)>
-
-Initializes the ISO-8859-1 charset by installing all the necessary function pointers.
-
-=cut
-
-*/
-
-void
-Parrot_charset_iso_8859_1_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charset_iso_8859_1_init)
-    CHARSET * const return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set = {
-        "iso-8859-1",
-        ascii_get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        ascii_compare,
-        ascii_cs_index,
-        ascii_cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        ascii_compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-    return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
-    Parrot_register_charset(interp, "iso-8859-1", return_set);
-
-    return;
-}
-
-/*
-
-=item C<STRING * charset_cvt_iso_8859_1_to_ascii(PARROT_INTERP, const STRING
-*src)>
-
-Converts STRING C<src> in ISO-8859-1 to ASCII STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING *
-charset_cvt_iso_8859_1_to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(charset_cvt_iso_8859_1_to_ascii)
-    UINTVAL offs;
-    STRING *dest = Parrot_str_clone(interp, src);
-
-    for (offs = 0; offs < src->strlen; ++offs) {
-        UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
-        if (c >= 0x80)
-            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
-                "lossy conversion to ascii");
-
-        ENCODING_SET_BYTE(interp, dest, offs, c);
-    }
-    return dest;
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: branches/html_cleanup/src/string/charset/unicode.c
==============================================================================
--- branches/html_cleanup/src/string/charset/unicode.c	Fri Sep 24 13:04:05 2010	(r49295)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,1075 +0,0 @@
-/*
-Copyright (C) 2005-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/unicode.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for unicode data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "unicode.h"
-#include "ascii.h"
-#include "tables.h"
-
-/* HEADERIZER HFILE: src/string/charset/unicode.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-static INTVAL compare(PARROT_INTERP,
-    ARGIN(const STRING *lhs),
-    ARGIN(const STRING *rhs))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static size_t compute_hash(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    size_t seed)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static INTVAL cs_rindex(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(const STRING *search_string),
-    SHIM(UINTVAL offset))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-static INTVAL find_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_graphemes(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static INTVAL is_cclass(PARROT_INTERP,
-    INTVAL flags,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
-        __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(lhs) \
-    , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_compute_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_cs_rindex __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_graphemes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_u_iscclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#ifdef EXCEPTION
-#  undef EXCEPTION
-#endif
-
-#if PARROT_HAS_ICU
-#  include <unicode/ucnv.h>
-#  include <unicode/utypes.h>
-#  include <unicode/uchar.h>
-#  include <unicode/ustring.h>
-#  include <unicode/unorm.h>
-#endif
-#define EXCEPTION(err, str) \
-    Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
-
-#define UNIMPL EXCEPTION(EXCEPTION_UNIMPLEMENTED, "unimplemented unicode")
-
-
-/*
-
-=item C<static STRING * get_graphemes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Gets the graphemes from STRING C<src> starting at C<offset>. Gets
-C<count> graphemes total.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_graphemes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_graphemes)
-    return ENCODING_GET_CODEPOINTS(interp, src, offset, count);
-}
-
-
-/*
-
-=item C<static STRING* to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts input STRING C<src> to unicode STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(to_charset)
-    const charset_converter_t conversion_func =
-            Parrot_find_charset_converter(interp, src->charset,
-                    Parrot_unicode_charset_ptr);
-
-    if (conversion_func)
-         return conversion_func(interp, src);
-
-    return Parrot_utf8_encoding_ptr->to_encoding(interp, src);
-}
-
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-If Parrot is built with ICU, composes the STRING C<src>. Attempts to
-denormalize the STRING into the ICU default, NFC.
-
-If Parrot does not have ICU included, throws an exception.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(compose)
-#if PARROT_HAS_ICU
-    STRING *dest;
-    int src_len, dest_len;
-    UErrorCode err;
-    /*
-       U_STABLE int32_t U_EXPORT2
-       unorm_normalize(const UChar *source, int32_t sourceLength,
-       UNormalizationMode mode, int32_t options,
-       UChar *result, int32_t resultLength,
-       UErrorCode *status);
-       */
-    dest_len = src_len = src->strlen;
-    dest     = Parrot_str_new_init(interp, NULL, src_len * sizeof (UChar),
-            src->encoding, src->charset, 0);
-
-    err      = U_ZERO_ERROR;
-    dest_len = unorm_normalize((UChar *)src->strstart, src_len,
-            UNORM_DEFAULT,      /* default is NFC */
-            0,                  /* options 0 default - no specific icu
-                                 * version */
-            (UChar *)dest->strstart, dest_len, &err);
-
-    dest->bufused = dest_len * sizeof (UChar);
-
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, dest, dest->bufused);
-        dest_len = unorm_normalize((UChar *)src->strstart, src_len,
-                UNORM_DEFAULT,      /* default is NFC */
-                0,                  /* options 0 default - no specific
-                                     * icu version */
-                (UChar *)dest->strstart, dest_len, &err);
-        PARROT_ASSERT(U_SUCCESS(err));
-        dest->bufused = dest_len * sizeof (UChar);
-    }
-    dest->strlen = dest_len;
-    return dest;
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Decompose function for unicode charset. This function is not yet implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(decompose)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all upper-case graphemes, for those characters
-which support upper-case versions.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(upcase)
-#if PARROT_HAS_ICU
-    UErrorCode err;
-    int dest_len, src_len, needed;
-    STRING *res;
-#endif
-
-    if (src->bufused  == src->strlen
-            && src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->upcase(interp, src);
-    }
-
-#if PARROT_HAS_ICU
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-    /*
-       U_CAPI int32_t U_EXPORT2
-       u_strToUpper(UChar *dest, int32_t destCapacity,
-       const UChar *src, int32_t srcLength,
-       const char *locale,
-       UErrorCode *pErrorCode);
-       */
-    err = U_ZERO_ERROR;
-
-    /* use all available space - see below XXX */
-    /* TODO downcase, titlecase too */
-    dest_len = Buffer_buflen(res) / sizeof (UChar);
-    src_len  = res->bufused       / sizeof (UChar);
-
-    /*
-     * XXX troubles:
-     *   t/op/string_cs_45  upcase unicode:"\u01f0"
-     *   this creates \u004a \u030c J+NON-SPACING HACEK
-     *   the string needs resizing, *if* the src buffer is
-     *   too short. *But* with icu 3.2/3.4 the src string is
-     *   overwritten with partial result, despite the icu docs sayeth:
-     *
-     *      The source string and the destination buffer
-     *      are allowed to overlap.
-     *
-     *  Workaround:  'preflighting' returns needed length
-     *  Alternative: forget about inplace operation - create new result
-     *
-     *  TODO downcase, titlecase
-     */
-    needed = u_strToUpper(NULL, 0,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-
-    if (needed > dest_len) {
-        Parrot_gc_reallocate_string_storage(interp, res, needed * sizeof (UChar));
-        dest_len = needed;
-    }
-
-    err      = U_ZERO_ERROR;
-    dest_len = u_strToUpper((UChar *)res->strstart, dest_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-    PARROT_ASSERT(U_SUCCESS(err));
-    res->bufused = dest_len * sizeof (UChar);
-
-    /* downgrade if possible */
-    if (dest_len == (int)src->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
-    else {
-        /* string is likely still ucs2 if it was earlier
-         * but strlen changed due to combining char
-         */
-        res->strlen = dest_len;
-    }
-
-    return res;
-
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts all graphemes to lower-case, for those graphemes which have cases.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(downcase)
-#if PARROT_HAS_ICU
-    UErrorCode err;
-    int dest_len, src_len;
-    STRING *res;
-#endif
-
-    if (src->bufused  == src->strlen
-            && src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->downcase(interp, src);
-    }
-
-#if PARROT_HAS_ICU
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-    /*
-U_CAPI int32_t U_EXPORT2
-u_strToLower(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode);
-     */
-    err      = U_ZERO_ERROR;
-    src_len  = res->bufused / sizeof (UChar);
-    dest_len = u_strToLower((UChar *)res->strstart, src_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* locale = default */
-            &err);
-    res->bufused = dest_len * sizeof (UChar);
-
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
-        dest_len = u_strToLower((UChar *)res->strstart, dest_len,
-                (UChar *)res->strstart, src_len,
-                NULL,       /* locale = default */
-                &err);
-        PARROT_ASSERT(U_SUCCESS(err));
-    }
-
-    /* downgrade if possible */
-    if (dest_len == (int)res->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
-
-    return res;
-
-#else
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the string to title case, for those characters which support cases.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(titlecase)
-#if PARROT_HAS_ICU
-
-    UErrorCode err;
-    int dest_len, src_len;
-    STRING *res;
-
-    if (src->bufused  == src->strlen
-    &&  src->encoding == Parrot_utf8_encoding_ptr) {
-        return Parrot_ascii_charset_ptr->titlecase(interp, src);
-    }
-
-    /* to_encoding will allocate new string */
-    res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-
-    /*
-U_CAPI int32_t U_EXPORT2
-u_strToTitle(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             UBreakIterator *titleIter,
-             const char *locale,
-             UErrorCode *pErrorCode);
-     */
-
-    err      = U_ZERO_ERROR;
-    src_len  = res->bufused / sizeof (UChar);
-    dest_len = u_strToTitle((UChar *)res->strstart, src_len,
-            (UChar *)res->strstart, src_len,
-            NULL,       /* default titleiter */
-            NULL,       /* locale = default */
-            &err);
-    res->bufused = dest_len * sizeof (UChar);
-
-    if (!U_SUCCESS(err)) {
-        err = U_ZERO_ERROR;
-        Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
-        dest_len = u_strToTitle((UChar *)res->strstart, dest_len,
-                (UChar *)res->strstart, src_len,
-                NULL, NULL,
-                &err);
-        PARROT_ASSERT(U_SUCCESS(err));
-    }
-
-    /* downgrade if possible */
-    if (dest_len == (int)res->strlen)
-        res->encoding = Parrot_ucs2_encoding_ptr;
-
-    return res;
-
-#else
-    UNUSED(src);
-    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-        "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in the STRING C<src> to uppercase, if the
-grapheme supports it. Not implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(upcase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in the STRING C<src> to lower-case, if
-the grapheme supports it. Not implemented
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(downcase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to title case, if the
-string supports it. Not implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(titlecase_first)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compares two STRINGs, C<lhs> and C<rhs>. Returns -1 if C<lhs> < C<rhs>. Returns
-0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>.
-
-=cut
-
-*/
-
-static INTVAL
-compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
-    ASSERT_ARGS(compare)
-    String_iter l_iter, r_iter;
-    UINTVAL min_len, l_len, r_len;
-
-    /* TODO make optimized equal - strings are equal length then already */
-    STRING_ITER_INIT(interp, &l_iter);
-    STRING_ITER_INIT(interp, &r_iter);
-
-    l_len = lhs->strlen;
-    r_len = rhs->strlen;
-
-    min_len = l_len > r_len ? r_len : l_len;
-
-    while (l_iter.charpos < min_len) {
-        const UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter);
-        const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter);
-
-        if (cl != cr)
-            return cl < cr ? -1 : 1;
-    }
-
-    if (l_len < r_len)
-        return -1;
-
-    if (l_len > r_len)
-        return 1;
-
-    return 0;
-}
-
-
-/*
-
-=item C<static INTVAL cs_rindex(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Finds the last index of substring C<search_string> in STRING C<src>,
-starting from C<offset>. Not implemented.
-
-=cut
-
-*/
-
-static INTVAL
-cs_rindex(PARROT_INTERP, SHIM(const STRING *src),
-        SHIM(const STRING *search_string), SHIM(UINTVAL offset))
-{
-    ASSERT_ARGS(cs_rindex)
-    /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1 if the STRING C<src> is a valid unicode string, returns 0 otherwise.
-
-=cut
-
-*/
-
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(validate)
-    String_iter iter;
-    const INTVAL length = Parrot_str_length(interp, src);
-
-    STRING_ITER_INIT(interp, &iter);
-    while (iter.charpos < length) {
-        const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        /* Check for Unicode non-characters */
-        if (codepoint >= 0xfdd0
-        && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe)
-        &&  codepoint <= 0x10ffff)
-            return 0;
-    }
-
-    return 1;
-}
-
-
-/*
-
-=item C<static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static int
-u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
-{
-    ASSERT_ARGS(u_iscclass)
-#if PARROT_HAS_ICU
-    UNUSED(interp);
-            /* XXX which one
-               return u_charDigitValue(codepoint);
-               */
-    if ((flags & enum_cclass_uppercase)    && u_isupper(codepoint))  return 1;
-    if ((flags & enum_cclass_lowercase)    && u_islower(codepoint))  return 1;
-    if ((flags & enum_cclass_alphabetic)   && u_isalpha(codepoint))  return 1;
-    if ((flags & enum_cclass_numeric)      && u_isdigit(codepoint))  return 1;
-    if ((flags & enum_cclass_hexadecimal)  && u_isxdigit(codepoint)) return 1;
-    if ((flags & enum_cclass_whitespace)   && u_isspace(codepoint))  return 1;
-    if ((flags & enum_cclass_printing)     && u_isprint(codepoint))  return 1;
-    if ((flags & enum_cclass_graphical)    && u_isgraph(codepoint))  return 1;
-    if ((flags & enum_cclass_blank)        && u_isblank(codepoint))  return 1;
-    if ((flags & enum_cclass_control)      && u_iscntrl(codepoint))  return 1;
-    if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint))  return 1;
-    if ((flags & enum_cclass_word)         &&
-        (u_isalnum(codepoint) || codepoint == '_'))                  return 1;
-
-    return 0;
-#else
-    if (codepoint < 256)
-        return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-
-    if (flags == enum_cclass_any)
-        return 1;
-
-    /* All codepoints from u+0100 to u+02af are alphabetic, so we
-     * cheat on the WORD and ALPHABETIC properties to include these
-     * (and incorrectly exclude all others).  This is a stopgap until
-     * ICU is everywhere, or we have better non-ICU unicode support. */
-    if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
-        return (codepoint < 0x2b0);
-
-    if (flags & enum_cclass_whitespace) {
-        /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
-        switch (codepoint) {
-          case 0x1680: case 0x180e: case 0x2000: case 0x2001:
-          case 0x2002: case 0x2003: case 0x2004: case 0x2005:
-          case 0x2006: case 0x2007: case 0x2008: case 0x2009:
-          case 0x200a: case 0x2028: case 0x2029: case 0x202f:
-          case 0x205f: case 0x3000:
-            return 1;
-          default:
-            break;
-        }
-    }
-
-    if (flags & enum_cclass_numeric) {
-        /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
-        if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
-        if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
-        if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
-        if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
-        if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
-        if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
-        if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
-        if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
-        if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
-        if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
-        if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
-        if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
-        if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
-        if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
-        if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
-        if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
-        if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
-        if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
-        if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
-        if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
-        if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
-        if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
-    }
-
-    if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
-        Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
-            "no ICU lib loaded");
-
-    return 0;
-#endif
-}
-
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(is_cclass)
-    UINTVAL codepoint;
-
-    if (offset >= src->strlen)
-        return 0;
-
-    codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
-    if (codepoint >= 256)
-        return u_iscclass(interp, codepoint, flags) != 0;
-
-    return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_cclass)
-    String_iter iter;
-    UINTVAL     codepoint;
-    UINTVAL     end = offset + count;
-
-    STRING_ITER_INIT(interp, &iter);
-    STRING_ITER_SET_POSITION(interp, src, &iter, offset);
-
-    end = src->strlen < end ? src->strlen : end;
-
-    while (iter.charpos < end) {
-        codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (codepoint >= 256) {
-            if (u_iscclass(interp, codepoint, flags))
-                    return iter.charpos - 1;
-        }
-        else {
-            if (Parrot_iso_8859_1_typetable[codepoint] & flags)
-                return iter.charpos - 1;
-        }
-    }
-
-    return end;
-}
-
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
-        UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(find_not_cclass)
-    String_iter iter;
-    UINTVAL     codepoint;
-    UINTVAL     end = offset + count;
-    int         bit;
-
-    if (offset > src->strlen) {
-        /* XXX: Throw in this case? */
-        return offset + count;
-    }
-
-    STRING_ITER_INIT(interp, &iter);
-
-    if (offset)
-        STRING_ITER_SET_POSITION(interp, src, &iter, offset);
-
-    end = src->strlen < end ? src->strlen : end;
-
-    if (flags == enum_cclass_any)
-        return end;
-
-    while (iter.charpos < end) {
-        codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        if (codepoint >= 256) {
-            for (bit = enum_cclass_uppercase;
-                    bit <= enum_cclass_word ; bit <<= 1) {
-                if ((bit & flags) && !u_iscclass(interp, codepoint, bit))
-                    return iter.charpos - 1;
-            }
-        }
-        else {
-            if (!(Parrot_iso_8859_1_typetable[codepoint] & flags))
-                return iter.charpos - 1;
-        }
-    }
-
-    return end;
-}
-
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Returns a one-codepoint string for the given codepoint.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
-    ASSERT_ARGS(string_from_codepoint)
-    String_iter    iter;
-    STRING * const dest = string_make(interp, "", 1, "unicode", 0);
-
-    dest->strlen = 1;
-
-    STRING_ITER_INIT(interp, &iter);
-    STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint);
-    dest->bufused = iter.bytepos;
-
-    return dest;
-}
-
-
-/*
-
-=item C<static size_t compute_hash(PARROT_INTERP, const STRING *src, size_t
-seed)>
-
-Computes the hash of the given STRING C<src> with starting seed value C<seed>.
-
-=cut
-
-*/
-
-static size_t
-compute_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t seed)
-{
-    ASSERT_ARGS(compute_hash)
-    String_iter iter;
-    size_t      hashval = seed;
-
-    STRING_ITER_INIT(interp, &iter);
-
-    while (iter.charpos < src->strlen) {
-        const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
-        hashval += hashval << 5;
-        hashval += c;
-    }
-
-    return hashval;
-}
-
-
-/*
-
-=item C<void Parrot_charset_unicode_init(PARROT_INTERP)>
-
-Initializes the Unicode charset by installing all the necessary function
-pointers.
-
-=cut
-
-*/
-
-void
-Parrot_charset_unicode_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_charset_unicode_init)
-    CHARSET * const      return_set = Parrot_new_charset(interp);
-    static const CHARSET base_set   = {
-        "unicode",
-        get_graphemes,
-        to_charset,
-        compose,
-        decompose,
-        upcase,
-        downcase,
-        titlecase,
-        upcase_first,
-        downcase_first,
-        titlecase_first,
-        compare,
-        mixed_cs_index,
-        cs_rindex,
-        validate,
-        is_cclass,
-        find_cclass,
-        find_not_cclass,
-        string_from_codepoint,
-        compute_hash,
-        NULL
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-
-    /*
-     * for now use utf8
-     * TODO replace it with a fixed uint_16 or uint_32 encoding
-     *      XXX if this is changed, modify string_make so it
-     *          still takes "utf8" when fed "unicode" as charset!
-     */
-    return_set->preferred_encoding = Parrot_utf8_encoding_ptr;
-    Parrot_register_charset(interp, "unicode", return_set);
-
-    return;
-}
-
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */

Deleted: branches/html_cleanup/src/string/encoding/fixed_8.c
==============================================================================
--- branches/html_cleanup/src/string/encoding/fixed_8.c	Fri Sep 24 13:04:05 2010	(r49295)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,578 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/encoding/fixed_8.c
-
-=head1 DESCRIPTION
-
-This file implements the encoding functions for fixed-width 8-bit codepoints
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "fixed_8.h"
-
-/* HEADERIZER HFILE: src/string/encoding/fixed_8.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(2);
-
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(SHIM_INTERP,
-    ARGIN(const STRING *s),
-    ARGIN(const INTVAL *typetable),
-    INTVAL flags,
-    UINTVAL pos,
-    UINTVAL end)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static UINTVAL fixed8_iter_get(PARROT_INTERP,
-    ARGIN(const STRING *str),
-    ARGIN(const String_iter *iter),
-    INTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP,
-    ARGIN(const STRING *str),
-    ARGMOD(String_iter *iter))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_set_and_advance(PARROT_INTERP,
-    ARGMOD(STRING *str),
-    ARGMOD(String_iter *iter),
-    UINTVAL c)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*str)
-        FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_set_position(SHIM_INTERP,
-    ARGIN(const STRING *str),
-    ARGMOD(String_iter *iter),
-    UINTVAL pos)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_skip(SHIM_INTERP,
-    ARGIN(const STRING *str),
-    ARGMOD(String_iter *iter),
-    INTVAL skip)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*iter);
-
-static size_t fixed_8_hash(SHIM_INTERP,
-    ARGIN(const STRING *s),
-    size_t hashval)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL get_byte(SHIM_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL get_codepoint(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static void set_byte(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL byte)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_DOES_NOT_RETURN
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, SHIM(const STRING *src))
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(s) \
-    , PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(str) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(s))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#define UNIMPL Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED, \
-    "unimpl fixed_8")
-
-/*
-
-=item C<static STRING * to_encoding(PARROT_INTERP, const STRING *src)>
-
-Converts the string C<src> to this particular encoding.  If C<dest> is
-provided, it will contain the result.  Otherwise this function operates in
-place.
-
-
-=cut
-
-*/
-
-PARROT_DOES_NOT_RETURN
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_encoding(PARROT_INTERP, SHIM(const STRING *src))
-{
-    ASSERT_ARGS(to_encoding)
-    UNIMPL;
-}
-
-
-/*
-
-=item C<static UINTVAL get_codepoint(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
-
-codepoints are bytes, so delegate
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-get_codepoint(PARROT_INTERP, ARGIN(const STRING *src),
-        UINTVAL offset)
-{
-    ASSERT_ARGS(get_codepoint)
-    return get_byte(interp, src, offset);
-}
-
-
-/*
-
-=item C<static UINTVAL find_cclass(PARROT_INTERP, const STRING *s, const INTVAL
-*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
-
-codepoints are bytes, so delegate
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-find_cclass(SHIM_INTERP, ARGIN(const STRING *s), ARGIN(const INTVAL *typetable),
-INTVAL flags, UINTVAL pos, UINTVAL end)
-{
-    ASSERT_ARGS(find_cclass)
-    const unsigned char *contents = (const unsigned char *)s->strstart;
-    for (; pos < end; ++pos) {
-        if ((typetable[contents[pos]] & flags) != 0) {
-            return pos;
-        }
-    }
-    return end;
-}
-
-/*
-
-=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
-
-Returns the byte in string C<src> at position C<offset>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-get_byte(SHIM_INTERP, ARGIN(const STRING *src), UINTVAL offset)
-{
-    ASSERT_ARGS(get_byte)
-    const unsigned char *contents = (const unsigned char *)src->strstart;
-
-    if (offset >= src->bufused) {
-/*        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-                "get_byte past the end of the buffer (%i of %i)",
-                offset, src->bufused); */
-        return 0;
-    }
-
-    return contents[offset];
-}
-
-/*
-
-=item C<static void set_byte(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL byte)>
-
-Sets, in string C<src> at position C<offset>, the byte C<byte>.
-
-=cut
-
-*/
-
-static void
-set_byte(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL byte)
-{
-    ASSERT_ARGS(set_byte)
-    unsigned char *contents;
-
-    if (offset >= src->bufused)
-        Parrot_ex_throw_from_c_args(interp, NULL, 0,
-            "set_byte past the end of the buffer");
-
-    contents = (unsigned char *)src->strstart;
-    contents[offset] = (unsigned char)byte;
-}
-
-/*
-
-=item C<static STRING * get_codepoints(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the codepoints in string C<src> at position C<offset> and length
-C<count>.  (Delegates to C<get_bytes>.)
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_codepoints)
-    STRING * const return_string = get_bytes(interp, src, offset, count);
-    return_string->charset = src->charset;
-    return return_string;
-}
-
-/*
-
-=item C<static STRING * get_bytes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the bytes in string C<src> at position C<offset> and length C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_bytes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
-    ASSERT_ARGS(get_bytes)
-    STRING * const return_string = Parrot_str_copy(interp, src);
-
-    return_string->encoding = src->encoding;
-    return_string->charset = src->charset;
-
-    return_string->strstart = (char *)return_string->strstart + offset ;
-    return_string->bufused = count;
-
-    return_string->strlen = count;
-    return_string->hashval = 0;
-
-    return return_string;
-}
-
-
-/*
-
-=item C<static UINTVAL codepoints(PARROT_INTERP, const STRING *src)>
-
-Returns the number of codepoints in string C<src>.
-
-=cut
-
-*/
-
-static UINTVAL
-codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(codepoints)
-    return bytes(interp, src);
-}
-
-/*
-
-=item C<static UINTVAL bytes(PARROT_INTERP, const STRING *src)>
-
-Returns the number of bytes in string C<src>.
-
-=cut
-
-*/
-
-static UINTVAL
-bytes(SHIM_INTERP, ARGIN(const STRING *src))
-{
-    ASSERT_ARGS(bytes)
-    return src->bufused;
-}
-
-/*
- * iterator functions
- */
-
-/*
-
-=item C<static UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const
-String_iter *iter, INTVAL offset)>
-
-Get the character at C<iter> plus C<offset>.
-
-=cut
-
-*/
-
-static UINTVAL
-fixed8_iter_get(PARROT_INTERP,
-    ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset)
-{
-    ASSERT_ARGS(fixed8_iter_get)
-    return get_byte(interp, str, iter->charpos + offset);
-}
-
-/*
-
-=item C<static void fixed8_iter_skip(PARROT_INTERP, const STRING *str,
-String_iter *iter, INTVAL skip)>
-
-Moves the string iterator C<i> by C<skip> characters.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_skip(SHIM_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip)
-{
-    ASSERT_ARGS(fixed8_iter_skip)
-    iter->bytepos += skip;
-    iter->charpos += skip;
-    PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str));
-}
-
-/*
-
-=item C<static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING
-*str, String_iter *iter)>
-
-Moves the string iterator C<i> to the next codepoint.
-
-=cut
-
-*/
-
-static UINTVAL
-fixed8_iter_get_and_advance(PARROT_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *iter))
-{
-    ASSERT_ARGS(fixed8_iter_get_and_advance)
-    const UINTVAL c = get_byte(interp, str, iter->charpos++);
-    iter->bytepos++;
-    return c;
-}
-
-/*
-
-=item C<static void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str,
-String_iter *iter, UINTVAL c)>
-
-With the string iterator C<i>, appends the codepoint C<c> and advances to the
-next position in the string.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_set_and_advance(PARROT_INTERP,
-    ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c)
-{
-    ASSERT_ARGS(fixed8_iter_set_and_advance)
-    set_byte(interp, str, iter->charpos++, c);
-    iter->bytepos++;
-}
-
-/*
-
-=item C<static void fixed8_iter_set_position(PARROT_INTERP, const STRING *str,
-String_iter *iter, UINTVAL pos)>
-
-Moves the string iterator C<i> to the position C<n> in the string.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_set_position(SHIM_INTERP,
-    ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos)
-{
-    ASSERT_ARGS(fixed8_iter_set_position)
-    iter->bytepos = iter->charpos = pos;
-    PARROT_ASSERT(pos <= Buffer_buflen(str));
-}
-
-/*
-
-=item C<static size_t fixed_8_hash(PARROT_INTERP, const STRING *s, size_t
-hashval)>
-
-Returns the hashed value of the string, given a seed in hashval.
-
-=cut
-
-*/
-
-static size_t
-fixed_8_hash(SHIM_INTERP, ARGIN(const STRING *s), size_t hashval)
-{
-    ASSERT_ARGS(fixed_8_hash)
-    const unsigned char *pos = (const unsigned char *)s->strstart;
-    UINTVAL        len = s->strlen;
-
-    while (len--) {
-        hashval += hashval << 5;
-        hashval += *(pos++);
-    }
-
-    return hashval;
-}
-
-
-/*
-
-=item C<void Parrot_encoding_fixed_8_init(PARROT_INTERP)>
-
-Initializes the fixed-8 encoding.
-
-=cut
-
-*/
-
-void
-Parrot_encoding_fixed_8_init(PARROT_INTERP)
-{
-    ASSERT_ARGS(Parrot_encoding_fixed_8_init)
-    ENCODING * const return_encoding = Parrot_new_encoding(interp);
-
-    ENCODING base_encoding = {
-        "fixed_8",
-        1, /* Max bytes per codepoint */
-        to_encoding,
-        get_codepoint,
-        get_byte,
-        set_byte,
-        get_codepoints,
-        get_bytes,
-        codepoints,
-        bytes,
-        find_cclass,
-        fixed_8_hash,
-        fixed8_iter_get,
-        fixed8_iter_skip,
-        fixed8_iter_get_and_advance,
-        fixed8_iter_set_and_advance,
-        fixed8_iter_set_position
-    };
-
-    STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
-    Parrot_register_encoding(interp, "fixed_8", return_encoding);
-
-    return;
-}
-
-
-/*
- * Local variables:
- *   c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
-


More information about the parrot-commits mailing list