[svn:parrot] r49296 - in branches/html_cleanup: . compilers/data_json compilers/imcc compilers/pct compilers/pge compilers/tge config/auto/sizes config/auto/zlib docs/book/draft docs/book/pct docs/dev docs/pdds examples/embed examples/languages/abc examples/languages/squaak examples/pge examples/tools ext/nqp-rx include/parrot lib/Parrot lib/Parrot/Configure/Step ports/cygwin ports/debian runtime/parrot/languages runtime/parrot/library runtime/parrot/library/Math src/call src/gc src/interp src/pmc src/runcore src/string/charset src/string/encoding t/codingstd t/compilers/tge t/examples t/oo t/pmc t/src t/steps/init/hints tools/build tools/dev tools/release
mikehh at svn.parrot.org
mikehh at svn.parrot.org
Fri Sep 24 13:04:06 UTC 2010
Author: mikehh
Date: Fri Sep 24 13:04:05 2010
New Revision: 49296
URL: https://trac.parrot.org/parrot/changeset/49296
Log:
html_cleanup - some files did not get deleted properly in merge (and property changes)
Deleted:
branches/html_cleanup/src/pmc/imageio.pmc
branches/html_cleanup/src/string/charset/ascii.c
branches/html_cleanup/src/string/charset/binary.c
branches/html_cleanup/src/string/charset/iso-8859-1.c
branches/html_cleanup/src/string/charset/unicode.c
branches/html_cleanup/src/string/encoding/fixed_8.c
Modified:
branches/html_cleanup/ (props changed)
branches/html_cleanup/compilers/data_json/Rules.mak (props changed)
branches/html_cleanup/compilers/imcc/Rules.in (props changed)
branches/html_cleanup/compilers/pct/Rules.mak (props changed)
branches/html_cleanup/compilers/pge/Rules.mak (props changed)
branches/html_cleanup/compilers/tge/Rules.mak (props changed)
branches/html_cleanup/config/auto/sizes/intval_maxmin_c.in (props changed)
branches/html_cleanup/config/auto/zlib/ (props changed)
branches/html_cleanup/docs/book/draft/README (props changed)
branches/html_cleanup/docs/book/draft/appa_glossary.pod (props changed)
branches/html_cleanup/docs/book/draft/appb_patch_submission.pod (props changed)
branches/html_cleanup/docs/book/draft/appc_command_line_options.pod (props changed)
branches/html_cleanup/docs/book/draft/appd_build_options.pod (props changed)
branches/html_cleanup/docs/book/draft/appe_source_code.pod (props changed)
branches/html_cleanup/docs/book/draft/ch01_introduction.pod (props changed)
branches/html_cleanup/docs/book/draft/ch02_getting_started.pod (props changed)
branches/html_cleanup/docs/book/draft/ch07_dynpmcs.pod (props changed)
branches/html_cleanup/docs/book/draft/ch08_dynops.pod (props changed)
branches/html_cleanup/docs/book/draft/ch10_opcode_reference.pod (props changed)
branches/html_cleanup/docs/book/draft/ch11_directive_reference.pod (props changed)
branches/html_cleanup/docs/book/draft/ch12_operator_reference.pod (props changed)
branches/html_cleanup/docs/book/draft/chXX_hlls.pod (props changed)
branches/html_cleanup/docs/book/draft/chXX_library.pod (props changed)
branches/html_cleanup/docs/book/draft/chXX_testing_and_debugging.pod (props changed)
branches/html_cleanup/docs/book/pct/ch01_introduction.pod (props changed)
branches/html_cleanup/docs/book/pct/ch02_getting_started.pod (props changed)
branches/html_cleanup/docs/book/pct/ch03_compiler_tools.pod (props changed)
branches/html_cleanup/docs/book/pct/ch04_pge.pod (props changed)
branches/html_cleanup/docs/book/pct/ch05_nqp.pod (props changed)
branches/html_cleanup/docs/dev/c_functions.pod (props changed)
branches/html_cleanup/docs/pdds/pdd30_install.pod (props changed)
branches/html_cleanup/examples/embed/cotorra.c (props changed)
branches/html_cleanup/examples/languages/abc/ (props changed)
branches/html_cleanup/examples/languages/squaak/ (props changed)
branches/html_cleanup/examples/pge/demo.pir (props changed)
branches/html_cleanup/examples/tools/pgegrep (props changed)
branches/html_cleanup/ext/nqp-rx/Rules.mak (props changed)
branches/html_cleanup/include/parrot/call.h (props changed)
branches/html_cleanup/include/parrot/gc_api.h (props changed)
branches/html_cleanup/include/parrot/runcore_api.h (props changed)
branches/html_cleanup/include/parrot/runcore_profiling.h (props changed)
branches/html_cleanup/include/parrot/runcore_trace.h (props changed)
branches/html_cleanup/lib/Parrot/Configure/Step/Test.pm (props changed)
branches/html_cleanup/lib/Parrot/H2inc.pm (props changed)
branches/html_cleanup/ports/cygwin/parrot-1.0.0-1.cygport (props changed)
branches/html_cleanup/ports/debian/libparrot-dev.install.in (props changed)
branches/html_cleanup/ports/debian/libparrot.install.in (props changed)
branches/html_cleanup/ports/debian/parrot-doc.install.in (props changed)
branches/html_cleanup/ports/debian/parrot.install.in (props changed)
branches/html_cleanup/runtime/parrot/languages/ (props changed)
branches/html_cleanup/runtime/parrot/library/Math/Rand.pir (props changed)
branches/html_cleanup/runtime/parrot/library/Rules.mak (props changed)
branches/html_cleanup/src/call/ops.c (props changed)
branches/html_cleanup/src/call/pcc.c (props changed)
branches/html_cleanup/src/gc/alloc_memory.c (props changed)
branches/html_cleanup/src/gc/alloc_resources.c (props changed)
branches/html_cleanup/src/gc/api.c (props changed)
branches/html_cleanup/src/gc/malloc.c (props changed)
branches/html_cleanup/src/gc/malloc_trace.c (props changed)
branches/html_cleanup/src/gc/mark_sweep.c (props changed)
branches/html_cleanup/src/gc/string_gc.c (props changed)
branches/html_cleanup/src/gc/system.c (props changed)
branches/html_cleanup/src/interp/inter_cb.c (props changed)
branches/html_cleanup/src/interp/inter_create.c (props changed)
branches/html_cleanup/src/interp/inter_misc.c (props changed)
branches/html_cleanup/src/runcore/cores.c (props changed)
branches/html_cleanup/src/runcore/main.c (props changed)
branches/html_cleanup/src/runcore/profiling.c (props changed)
branches/html_cleanup/src/runcore/trace.c (props changed)
branches/html_cleanup/t/codingstd/pmc_docs.t (props changed)
branches/html_cleanup/t/compilers/tge/NoneGrammar.tg (props changed)
branches/html_cleanup/t/examples/pgegrep.t (props changed)
branches/html_cleanup/t/oo/objects.t (props changed)
branches/html_cleanup/t/oo/root_new.t (props changed)
branches/html_cleanup/t/pmc/namespace-old.t (props changed)
branches/html_cleanup/t/src/embed.t (props changed)
branches/html_cleanup/t/steps/init/hints/linux-01.t (props changed)
branches/html_cleanup/tools/build/README (props changed)
branches/html_cleanup/tools/build/h2inc.pl (props changed)
branches/html_cleanup/tools/dev/README (props changed)
branches/html_cleanup/tools/dev/addopstags.pl (props changed)
branches/html_cleanup/tools/dev/dump_pbc.pl (props changed)
branches/html_cleanup/tools/dev/fetch_languages.pl (props changed)
branches/html_cleanup/tools/dev/headerizer.pl (props changed)
branches/html_cleanup/tools/dev/mk_gitignore.pl (props changed)
branches/html_cleanup/tools/dev/ncidef2pasm.pl (props changed)
branches/html_cleanup/tools/dev/parrot-config.pir (props changed)
branches/html_cleanup/tools/dev/perlcritic-cage.conf (props changed)
branches/html_cleanup/tools/dev/perlcritic.conf (props changed)
branches/html_cleanup/tools/dev/perltidy.conf (props changed)
branches/html_cleanup/tools/dev/update_copyright.pl (props changed)
branches/html_cleanup/tools/release/crow.pir (props changed)
branches/html_cleanup/tools/release/gen_release_info.pl (props changed)
branches/html_cleanup/tools/release/inc_ver.pir (props changed)
branches/html_cleanup/tools/release/release.json (props changed)
branches/html_cleanup/tools/release/templates.json (props changed)
Deleted: branches/html_cleanup/src/pmc/imageio.pmc
==============================================================================
--- branches/html_cleanup/src/pmc/imageio.pmc Fri Sep 24 13:04:05 2010 (r49295)
+++ /dev/null 00:00:00 1970 (deleted)
@@ -1,793 +0,0 @@
-/*
-Copyright (C) 2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/pmc/imageio.pmc - ImageIO PMC
-
-=head1 DESCRIPTION
-
-Freezes and thaws other PMCs.
-
-=head1 FUNCTIONS
-
-=over 4
-
-=cut
-
-*/
-
-#define GROW_TO_16_BYTE_BOUNDARY(size) ((size) + ((size) % 16 ? 16 - (size) % 16 : 0))
-
-/* preallocate freeze image for aggregates with this estimation */
-#define FREEZE_BYTES_PER_ITEM 9
-
-/* macros/constants to handle packing/unpacking of PMC IDs and flags
- * the 2 LSBs are used for flags, all other bits are used for PMC ID
- */
-#define PackID_new(id, flags) (((UINTVAL)(id) * 4) | ((UINTVAL)(flags) & 3))
-#define PackID_get_PMCID(id) ((UINTVAL)(id) / 4)
-#define PackID_set_PMCID(lv, id) (lv) = PackID_new((id), PackID_get_FLAGS(lv))
-#define PackID_get_FLAGS(id) ((UINTVAL)(id) & 3)
-#define PackID_set_FLAGS(lv, flags) (lv) = PackID_new(PackID_get_PMCID(lv), (flags))
-
-enum {
- enum_PackID_normal = 0,
- enum_PackID_seen = 1,
-};
-
-/* HEADERIZER HFILE: none */
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-
-static void create_buffer(PARROT_INTERP,
- ARGIN_NULLOK(PMC *pmc),
- ARGMOD(PMC *info))
- __attribute__nonnull__(1)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*info);
-
-PARROT_INLINE
-static void ensure_buffer_size(PARROT_INTERP, ARGIN(PMC *io), size_t len)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_INLINE
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static opcode_t * GET_VISIT_CURSOR(ARGIN(const PMC *pmc))
- __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CAN_RETURN_NULL
-PARROT_INLINE
-static PMC* id_list_get(PARROT_INTERP, ARGIN(const PMC *io), UINTVAL id)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_INLINE
-static void INC_VISIT_CURSOR(ARGMOD(PMC *pmc), UINTVAL inc)
- __attribute__nonnull__(1)
- FUNC_MODIFIES(*pmc);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_INLINE
-static INTVAL INFO_HAS_DATA(ARGIN(const PMC *io))
- __attribute__nonnull__(1);
-
-PARROT_INLINE
-static void SET_VISIT_CURSOR(ARGMOD(PMC *pmc), ARGIN(const char *cursor))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- FUNC_MODIFIES(*pmc);
-
-#define ASSERT_ARGS_create_buffer __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(info))
-#define ASSERT_ARGS_ensure_buffer_size __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(io))
-#define ASSERT_ARGS_GET_VISIT_CURSOR __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(pmc))
-#define ASSERT_ARGS_id_list_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(io))
-#define ASSERT_ARGS_INC_VISIT_CURSOR __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(pmc))
-#define ASSERT_ARGS_INFO_HAS_DATA __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(io))
-#define ASSERT_ARGS_SET_VISIT_CURSOR __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(pmc) \
- , PARROT_ASSERT_ARG(cursor))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-/* HEADERIZER END: static */
-
-/*
-
-=item C<static opcode_t * GET_VISIT_CURSOR(const PMC *pmc)>
-
-=cut
-
-*/
-
-PARROT_INLINE
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static opcode_t *
-GET_VISIT_CURSOR(ARGIN(const PMC *pmc))
-{
- ASSERT_ARGS(GET_VISIT_CURSOR)
-
- char * const buf = (char *)Buffer_bufstart(PARROT_IMAGEIO(pmc)->buffer);
- const size_t pos = PARROT_IMAGEIO(pmc)->pos;
- return (opcode_t *)(buf + pos);
-}
-
-/*
-
-=item C<static void SET_VISIT_CURSOR(PMC *pmc, const char *cursor)>
-
-=cut
-
-*/
-
-
-PARROT_INLINE
-static void
-SET_VISIT_CURSOR(ARGMOD(PMC *pmc), ARGIN(const char *cursor))
-{
- ASSERT_ARGS(SET_VISIT_CURSOR)
-
- const char * const bufstart = (const char *)Buffer_bufstart(PARROT_IMAGEIO(pmc)->buffer);
- PARROT_IMAGEIO(pmc)->pos = (cursor - bufstart);
-}
-
-/*
-
-=item C<static void INC_VISIT_CURSOR(PMC *pmc, UINTVAL inc)>
-
-=cut
-
-*/
-
-
-PARROT_INLINE
-static void
-INC_VISIT_CURSOR(ARGMOD(PMC *pmc), UINTVAL inc)
-{
- ASSERT_ARGS(INC_VISIT_CURSOR)
-
- PARROT_IMAGEIO(pmc)->pos += inc;
-}
-
-
-#define BYTECODE_SHIFT_OK(pmc) PARROT_ASSERT( \
- PARROT_IMAGEIO(pmc)->pos <= PARROT_IMAGEIO(pmc)->input_length)
-
-/*
-
-=item C<static void create_buffer(PARROT_INTERP, PMC *pmc, PMC *info)>
-
-=cut
-
-*/
-
-static void
-create_buffer(PARROT_INTERP, ARGIN_NULLOK(PMC *pmc), ARGMOD(PMC *info))
-{
- ASSERT_ARGS(create_buffer)
-
- INTVAL len;
-
- if (!PMC_IS_NULL(pmc)) {
- STRING * const array = CONST_STRING(interp, "array");
- STRING * const hash = CONST_STRING(interp, "hash");
- INTVAL items = 1;
-
- if (VTABLE_does(interp, pmc, array) || VTABLE_does(interp, pmc, hash))
- items += VTABLE_elements(interp, pmc);
-
- len = items * FREEZE_BYTES_PER_ITEM;
- }
- else
- len = FREEZE_BYTES_PER_ITEM;
-
- PARROT_IMAGEIO(info)->buffer =
- Parrot_gc_new_bufferlike_header(interp, sizeof (Buffer));
- Parrot_gc_allocate_buffer_storage_aligned(interp,
- PARROT_IMAGEIO(info)->buffer, len);
- SET_VISIT_CURSOR(info,
- (const char *)Buffer_bufstart(PARROT_IMAGEIO(info)->buffer));
-}
-
-/*
-
-=item C<static void ensure_buffer_size(PARROT_INTERP, PMC *io, size_t len)>
-
-Checks the size of the buffer to see if it can accommodate C<len> more
-bytes. If not, expands the buffer.
-
-=cut
-
-*/
-
-PARROT_INLINE
-static void
-ensure_buffer_size(PARROT_INTERP, ARGIN(PMC *io), size_t len)
-{
- ASSERT_ARGS(ensure_buffer_size)
-
- Buffer * const buf = PARROT_IMAGEIO(io)->buffer;
- const size_t used = PARROT_IMAGEIO(io)->pos;
- const int need_free = Buffer_buflen(buf) - used - len;
-
- /* grow by factor 1.5 or such */
- if (need_free <= 16) {
- size_t new_size = (size_t) (Buffer_buflen(buf) * 1.5);
-
- if (new_size < Buffer_buflen(buf) - need_free + 512)
- new_size = Buffer_buflen(buf) - need_free + 512;
-
- Parrot_gc_reallocate_buffer_storage(interp, buf, new_size);
- PARROT_ASSERT(Buffer_buflen(buf) - used - len >= 15);
- }
-
-#ifndef DISABLE_GC_DEBUG
- Parrot_gc_compact_memory_pool(interp);
-#endif
-}
-
-/*
-
-=item C<static INTVAL INFO_HAS_DATA(const PMC *io)>
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_INLINE
-static INTVAL
-INFO_HAS_DATA(ARGIN(const PMC *io))
-{
- ASSERT_ARGS(INFO_HAS_DATA)
-
- return PARROT_IMAGEIO(io)->pos < PARROT_IMAGEIO(io)->input_length;
-}
-
-/*
-
-=item C<static PMC* id_list_get(PARROT_INTERP, const PMC *io, UINTVAL id)>
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CAN_RETURN_NULL
-PARROT_INLINE
-static PMC*
-id_list_get(PARROT_INTERP, ARGIN(const PMC *io), UINTVAL id)
-{
- ASSERT_ARGS(id_list_get)
-
- return VTABLE_get_pmc_keyed_int(interp, PARROT_IMAGEIO(io)->todo, id - 1);
-}
-
-pmclass ImageIO auto_attrs {
- ATTR Buffer *buffer; /* buffer to store the image */
- ATTR size_t pos; /* current read/write buf position */
- ATTR size_t input_length;
- ATTR INTVAL what;
- ATTR PMC *seen; /* seen hash */
- ATTR PMC *todo; /* todo list */
- ATTR UINTVAL id; /* freze ID of PMC */
- ATTR struct PackFile *pf;
- ATTR PackFile_ConstTable *pf_ct;
-
-/*
-
-=back
-
-=head1 VTABLES
-
-=over 4
-
-=cut
-
-*/
-
-/*
-
-=item C<void init()>
-
-Initializes the PMC.
-
-=cut
-
-*/
- VTABLE void init() {
- PARROT_IMAGEIO(SELF)->seen = PMCNULL;
- PARROT_IMAGEIO(SELF)->todo =
- Parrot_pmc_new(INTERP, enum_class_ResizablePMCArray);
-
- PObj_flag_CLEAR(private1, SELF);
-
- PObj_custom_mark_SET(SELF);
- }
-
-
-/*
-
-=item C<void destroy()>
-
-Destroys the PMC.
-
-=cut
-
-*/
- VTABLE void destroy() {
- PackFile_destroy(INTERP, PARROT_IMAGEIO(SELF)->pf);
- PARROT_IMAGEIO(SELF)->pf = NULL;
- }
-
-
-/*
-
-=item C<void mark()>
-
-Marks the PMC as alive.
-
-=cut
-
-*/
- VTABLE void mark() {
- PObj * const buffer = (PObj *)(PARROT_IMAGEIO(SELF)->buffer);
- if (buffer)
- Parrot_gc_mark_PObj_alive(INTERP, buffer);
- Parrot_gc_mark_PMC_alive(INTERP, PARROT_IMAGEIO(SELF)->todo);
- Parrot_gc_mark_PMC_alive(INTERP, PARROT_IMAGEIO(SELF)->seen);
- }
-
-
-/*
-
-=item C<STRING *get_string()>
-
-Returns the content of the image as a string.
-
-=cut
-
-*/
-
- VTABLE STRING *get_string() {
- return Parrot_str_new_from_buffer(INTERP,
- PARROT_IMAGEIO(SELF)->buffer,
- PARROT_IMAGEIO(SELF)->pos);
- }
-
-
-/*
-
-=item C<VTABLE PMC *get_pmc()>
-
-Gets the result PMC after a thaw.
-
-=cut
-
-*/
-
- VTABLE PMC *get_pmc() {
- return VTABLE_get_pmc_keyed_int(INTERP,
- (PARROT_IMAGEIO(SELF))->todo, 0);
- }
-
-
-/*
-
-=item C<VTABLE PMC *get_iter()>
-
-Get the C<todo> list for this freeze/thaw for iterating over.
-
-=cut
-
-*/
-
- VTABLE PMC *get_iter() {
- return PARROT_IMAGEIO(SELF)->todo;
- }
-
-/*
-
-=item C<VTABLE INTVAL get_integer()>
-
-Returns the flags describing the visit action.
-
-=cut
-
-*/
-
- VTABLE INTVAL get_integer() {
- return PARROT_IMAGEIO(SELF)->what;
- }
-
-
-/*
-
-=item C<VTABLE void push_integer(INTVAL v)>
-
-Pushes the integer C<v> onto the end of the image.
-
-=cut
-
-*/
-
- VTABLE void push_integer(INTVAL v) {
- const size_t len = PF_size_integer() * sizeof (opcode_t);
- ensure_buffer_size(INTERP, SELF, len);
- SET_VISIT_CURSOR(SELF,
- (const char *)PF_store_integer(GET_VISIT_CURSOR(SELF), v));
- }
-
-
-/*
-
-=item C<VTABLE void push_float(FLOATVAL v)>
-
-Pushes the float C<v> onto the end of the image.
-
-=cut
-
-*/
-
- VTABLE void push_float(FLOATVAL v) {
- const size_t len = PF_size_number() * sizeof (opcode_t);
- ensure_buffer_size(INTERP, SELF, len);
- SET_VISIT_CURSOR(SELF,
- (const char *)PF_store_number(GET_VISIT_CURSOR(SELF), &v));
- }
-
-
-/*
-
-=item C<VTABLE void push_string(STRING *v)>
-
-Pushes the string C<*v> onto the end of the image.
-
-=cut
-
-*/
-
- VTABLE void push_string(STRING *v) {
- if (PObj_flag_TEST(private1, SELF)) {
- /* store a reference to constant table entry of string */
- PMC * const v_pmc = key_new_string(interp, v);
- PackFile_ConstTable * const table = PARROT_IMAGEIO(SELF)->pf_ct;
- const int idx =
- PackFile_ConstTable_rlookup(INTERP, table, v_pmc, PFC_STRING);
-
- if (idx >= 0) {
- STATICSELF.push_integer(idx);
- return;
- }
-
- /* XXX handle cases where the PMC has changed after
- * Parrot_freeze_strings was called eg: :immediate subs */
- STATICSELF.push_integer(-1);
-
- /* TODO
- * should really be:
- * PANIC(INTERP, "string not previously in constant table "
- * "when freezing to packfile"); */
- }
-
- {
- const size_t len = PF_size_string(v) * sizeof (opcode_t);
- ensure_buffer_size(INTERP, SELF, len);
- SET_VISIT_CURSOR(SELF,
- (const char *)PF_store_string(GET_VISIT_CURSOR(SELF), v));
- }
- }
-
-
-/*
-
-=item C<VTABLE void push_pmc(PMC *v)>
-
-Pushes a reference to pmc C<*v> onto the end of the image. If C<*v>
-hasn't been seen yet, it is also pushed onto the todo list.
-
-=cut
-
-*/
-
- VTABLE void push_pmc(PMC *v) {
- UINTVAL id;
- int packid_type;
-
- PARROT_ASSERT(PARROT_IMAGEIO(SELF)->what == VISIT_FREEZE_NORMAL);
-
- if (PMC_IS_NULL(v)) {
- id = 0;
- packid_type = enum_PackID_seen;
- }
- else {
- Hash * const hash = (Hash *)VTABLE_get_pointer(INTERP,
- PARROT_IMAGEIO(SELF)->seen);
- HashBucket * const b = parrot_hash_get_bucket(INTERP, hash, v);
-
- if (b) {
- id = (UINTVAL)b->value;
- packid_type = enum_PackID_seen;
- }
- else {
- ++PARROT_IMAGEIO(SELF)->id; /* next id to freeze */
- id = PARROT_IMAGEIO(SELF)->id;
- packid_type = enum_PackID_normal;
- }
- }
-
- SELF.push_integer(PackID_new(id, packid_type));
-
- if (packid_type == enum_PackID_normal) {
- Hash * const hash = (Hash *)VTABLE_get_pointer(INTERP,
- PARROT_IMAGEIO(SELF)->seen);
-
- PARROT_ASSERT(v);
-
- /* workaround to keep ParrotInterpreter PBC hack working */
- if (v->vtable->base_type == enum_class_ParrotInterpreter)
- PObj_flag_CLEAR(private1, SELF);
-
- SELF.push_integer(
- PObj_is_object_TEST(v)
- ? (INTVAL) enum_class_Object
- : v->vtable->base_type);
-
- parrot_hash_put(INTERP, hash, v, (void *)id);
- VTABLE_push_pmc(INTERP, PARROT_IMAGEIO(SELF)->todo, v);
- }
- }
-
-
-/*
-
-=item C<void set_pointer(void *value)>
-
-Sets the constant table of this ImageIO PMC.
-
-=cut
-
-*/
-
- VTABLE void set_pointer(void *value) {
- PObj_flag_SET(private1, SELF);
- PARROT_IMAGEIO(SELF)->pf_ct = (PackFile_ConstTable *)value;
- }
-
-
-/*
-
-=item C<VTABLE INTVAL shift_integer()>
-
-Removes and returns an integer from the start of the image.
-
-=cut
-
-*/
-
- VTABLE INTVAL shift_integer() {
- /* inlining PF_fetch_integer speeds up PBC thawing measurably */
- const PackFile *pf = PARROT_IMAGEIO(SELF)->pf;
- const opcode_t *pos = GET_VISIT_CURSOR(SELF);
- const unsigned char *stream = (const unsigned char *)pos;
- const INTVAL i = pf->fetch_iv(stream);
-
- SET_VISIT_CURSOR(SELF, (const char *)pos + pf->header->wordsize);
- BYTECODE_SHIFT_OK(SELF);
- return i;
- }
-
-
-/*
-
-=item C<VTABLE FLOATVAL shift_float()>
-
-Removes and returns an number from the start of the image.
-
-=cut
-
-*/
-
- VTABLE FLOATVAL shift_float() {
- const opcode_t *pos = GET_VISIT_CURSOR(SELF);
- FLOATVAL f = PF_fetch_number(PARROT_IMAGEIO(SELF)->pf, &pos);
- SET_VISIT_CURSOR(SELF, (const char *)pos);
- BYTECODE_SHIFT_OK(SELF);
- return f;
- }
-
-
-/*
-
-=item C<VTABLE STRING* shift_string()>
-
-Removes and returns a string from the start of the image.
-
-=cut
-
-*/
-
- VTABLE STRING *shift_string() {
- if (PObj_flag_TEST(private1, SELF)) {
- const INTVAL i = STATICSELF.shift_integer();
-
- if (i >= 0) {
- PackFile_ConstTable *table = PARROT_IMAGEIO(SELF)->pf_ct;
-
- if (!table->constants[i].type)
- Parrot_ex_throw_from_c_args(interp, NULL,
- EXCEPTION_MALFORMED_PACKFILE,
- "Reference to constant not yet unpacked %d", i);
- return table->constants[i].u.string;
- }
-
- /* XXX
- * only got here because constant table doesn't contain the string
- * fallback on inline strings
- */
- }
-
- {
- const opcode_t * pos = GET_VISIT_CURSOR(SELF);
- STRING * const s = PF_fetch_string(INTERP,
- PARROT_IMAGEIO(SELF)->pf, &pos);
- SET_VISIT_CURSOR(SELF, (const char *)pos);
- BYTECODE_SHIFT_OK(SELF);
- return s;
- }
- }
-
-
-/*
-
-=item C<static PMC *shift_pmc()>
-
-Removes and returns a reference to a pmc from the start of the image.
-
-=cut
-
-*/
-
- VTABLE PMC *shift_pmc() {
- const UINTVAL n = SELF.shift_integer();
- const INTVAL id = PackID_get_PMCID(n);
- const int packid_flags = PackID_get_FLAGS(n);
- PMC *pmc = PMCNULL;
-
- PARROT_ASSERT(PARROT_IMAGEIO(SELF)->what == VISIT_THAW_NORMAL);
-
- switch (packid_flags) {
- case enum_PackID_seen:
- if (id) /* got a non-NULL PMC */
- pmc = id_list_get(INTERP, SELF, id);
- break;
- case enum_PackID_normal:
- {
- PMC * const todo = PARROT_IMAGEIO(SELF)->todo;
- const INTVAL type = VTABLE_shift_integer(INTERP, SELF);
-
- PARROT_ASSERT(id - 1
- == VTABLE_elements(INTERP, PARROT_IMAGEIO(SELF)->todo));
-
- if (type <= 0 || type > INTERP->n_vtable_max)
- Parrot_ex_throw_from_c_args(INTERP, NULL, 1,
- "Unknown PMC type to thaw %d", type);
-
- /* workaround to keep ParrotInterpreter PBC hack working */
- if (type == enum_class_ParrotInterpreter)
- PObj_flag_CLEAR(private1, SELF);
-
- pmc = Parrot_pmc_new_noinit(INTERP, type);
-
- VTABLE_set_pmc_keyed_int(INTERP, todo, id - 1, pmc);
- }
- break;
- default:
- Parrot_ex_throw_from_c_args(INTERP, NULL, 1,
- "Unknown PMC id args thaw %d", packid_flags);
- break;
- }
-
- return pmc;
- }
-
- VTABLE void set_pmc(PMC *p)
- {
- PARROT_IMAGEIO(SELF)->what = VISIT_FREEZE_NORMAL;
-
- create_buffer(INTERP, p, SELF);
- if (PObj_flag_TEST(private1, SELF)) {
- PARROT_IMAGEIO(SELF)->pf = PARROT_IMAGEIO(SELF)->pf_ct->base.pf;
- }
- else {
- const UINTVAL header_length =
- GROW_TO_16_BYTE_BOUNDARY(PACKFILE_HEADER_BYTES);
-
- PARROT_IMAGEIO(SELF)->pf = PackFile_new(INTERP, 0);
- PObj_custom_destroy_SET(SELF);
-
- ensure_buffer_size(INTERP, SELF, header_length);
- mem_sys_memcopy(GET_VISIT_CURSOR(SELF),
- PARROT_IMAGEIO(SELF)->pf->header, PACKFILE_HEADER_BYTES);
- INC_VISIT_CURSOR(SELF, header_length);
- }
-
- PARROT_IMAGEIO(SELF)->seen = Parrot_pmc_new(INTERP, enum_class_Hash);
- VTABLE_set_pointer(INTERP, PARROT_IMAGEIO(SELF)->seen,
- parrot_new_intval_hash(INTERP));
-
- STATICSELF.push_pmc(p);
- Parrot_visit_loop_visit(INTERP, SELF);
- }
-
- VTABLE void set_string_native(STRING *image) {
- PMC *unused;
- PARROT_IMAGEIO(SELF)->what = VISIT_THAW_NORMAL;
- PARROT_IMAGEIO(SELF)->buffer = (Buffer *)image;
-
- PARROT_ASSERT(image->_bufstart == image->strstart);
-
- SET_VISIT_CURSOR(SELF,
- (const char *)Buffer_bufstart(PARROT_IMAGEIO(SELF)->buffer));
- PARROT_IMAGEIO(SELF)->input_length = image->strlen;
-
- if (PObj_flag_TEST(private1, SELF)) {
- PARROT_IMAGEIO(SELF)->pf = PARROT_IMAGEIO(SELF)->pf_ct->base.pf;
- }
- else {
- const UINTVAL header_length =
- GROW_TO_16_BYTE_BOUNDARY(PACKFILE_HEADER_BYTES);
- int unpacked_length;
-
- PARROT_IMAGEIO(SELF)->pf = PackFile_new(INTERP, 0);
- PObj_custom_destroy_SET(SELF);
-
- PARROT_IMAGEIO(SELF)->pf->options |= PFOPT_PMC_FREEZE_ONLY;
- unpacked_length = PackFile_unpack(INTERP, PARROT_IMAGEIO(SELF)->pf,
- GET_VISIT_CURSOR(SELF), PARROT_IMAGEIO(SELF)->input_length);
-
- if (unpacked_length)
- INC_VISIT_CURSOR(SELF, header_length);
- else
- Parrot_ex_throw_from_c_args(INTERP, NULL,
- EXCEPTION_INVALID_STRING_REPRESENTATION,
- "PackFile header failed during unpack");
- }
-
- unused = STATICSELF.shift_pmc();
- Parrot_visit_loop_visit(INTERP, SELF);
-
- /* we're done reading the image */
- PARROT_ASSERT(!INFO_HAS_DATA(SELF));
- Parrot_visit_loop_thawfinish(INTERP, SELF);
- }
-
-
-/*
-
-=back
-
-=cut
-
-*/
-
-}
-
-/*
- * Local variables:
- * c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
Deleted: branches/html_cleanup/src/string/charset/ascii.c
==============================================================================
--- branches/html_cleanup/src/string/charset/ascii.c Fri Sep 24 13:04:05 2010 (r49295)
+++ /dev/null 00:00:00 1970 (deleted)
@@ -1,876 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/ascii.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for ascii data and common
-charset functionality for similar charsets like iso-8859-1.
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "ascii.h"
-
-/*
- * TODO check interpreter error and warnings setting
- */
-
-#include "tables.h"
-
-/* HEADERIZER HFILE: src/string/charset/ascii.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL find_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL is_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_charset(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_ascii __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-/* HEADERIZER END: static */
-
-/*
-
-=item C<STRING * ascii_get_graphemes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Retrieves the graphemes for the STRING C<src>, starting at
-C<offset> and ending at C<offset + count>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING *
-ascii_get_graphemes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(ascii_get_graphemes)
- return ENCODING_GET_BYTES(interp, src, offset, count);
-}
-
-/*
-
-=item C<static STRING * to_ascii(PARROT_INTERP, const STRING *src)>
-
-Attempts to convert STRING C<src> to ASCII in STRING C<dest>. Throws
-an exception if unconvertable UNICODE characters are involved.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(to_ascii)
- String_iter iter;
- unsigned char *p;
- const UINTVAL len = src->strlen;
-
- /* the string can't grow. Just clone it */
- STRING * const dest = Parrot_str_clone(interp, src);
-
- p = (unsigned char *)dest->strstart;
- STRING_ITER_INIT(interp, &iter);
- while (iter.charpos < len) {
- const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
- if (c >= 128)
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
- "can't convert unicode string to ascii");
- *p++ = (unsigned char)c;
- }
- dest->bufused = len;
- dest->strlen = len;
- dest->charset = Parrot_ascii_charset_ptr;
- dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest);
- return dest;
-}
-
-/*
-
-=item C<static STRING * to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to ASCII charset STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(to_charset)
- const charset_converter_t conversion_func =
- Parrot_find_charset_converter(interp, src->charset, Parrot_ascii_charset_ptr);
-
- if (conversion_func) {
- return conversion_func(interp, src);
- }
- else {
- return to_ascii(interp, src);
- }
-}
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-Can't compose ASCII strings, so performs a string copy on it and
-returns the new string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(compose)
-
- STRING * const dest = Parrot_str_copy(interp, src);
-
- return dest;
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Can't decompose ASCII, so we perform a string copy instead and return
-a pointer to the new string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(decompose)
-
- STRING * const dest = Parrot_str_copy(interp, src);
-
- return dest;
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all uppercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(upcase)
- STRING * const result = Parrot_str_clone(interp, src);
- const UINTVAL n = src->strlen;
-
- if (n) {
- char * const buffer = result->strstart;
- UINTVAL offset;
-
- for (offset = 0; offset < n; ++offset) {
- buffer[offset] = (char)toupper((unsigned char)buffer[offset]);
- }
- }
-
- return result;
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(downcase)
- STRING *result = Parrot_str_clone(interp, src);
- const UINTVAL n = src->strlen;
-
- if (n) {
- char * const buffer = result->strstart;
- UINTVAL offset;
-
- for (offset = 0; offset < n; ++offset) {
- buffer[offset] = (char)tolower((unsigned char)buffer[offset]);
- }
- }
-
- return result;
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING given by C<src> to title case, where
-the first character is upper case and all the rest of the characters
-are lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(titlecase)
- STRING *result = Parrot_str_clone(interp, src);
- const UINTVAL n = src->strlen;
-
- if (n) {
- char * const buffer = result->strstart;
- UINTVAL offset;
-
- buffer[0] = (char)toupper((unsigned char)buffer[0]);
- for (offset = 1; offset < n; ++offset) {
- buffer[offset] = (char)tolower((unsigned char)buffer[offset]);
- }
- }
-
- return result;
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Sets the first character in the STRING C<src> to upper case,
-but doesn't modify the rest of the string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(upcase_first)
- STRING * const result = Parrot_str_clone(interp, src);
-
- if (result->strlen > 0) {
- char * const buffer = result->strstart;
- buffer[0] = (char)toupper((unsigned char)buffer[0]);
- }
-
- return result;
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Sets the first character of the STRING C<src> to lowercase,
-but doesn't modify the rest of the characters.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(downcase_first)
- STRING * const result = Parrot_str_clone(interp, src);
-
- if (result->strlen > 0) {
- char * const buffer = result->strstart;
- buffer[0] = (char)tolower((unsigned char)buffer[0]);
- }
-
- return result;
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first letter of STRING C<src> to upper case,
-but doesn't modify the rest of the string.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(titlecase_first)
- STRING * const result = Parrot_str_clone(interp, src);
-
- if (result->strlen > 0) {
- char * const buffer = result->strstart;
- buffer[0] = (char)toupper((unsigned char)buffer[0]);
- }
-
- return result;
-}
-
-/*
-
-=item C<INTVAL ascii_compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compares two strings as ASCII strings. If STRING C<lhs> > C<rhs>, returns
-1. If C<lhs> == C<rhs> returns 0. If STRING C<lhs> < C<rhs>, returns -1.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
- ASSERT_ARGS(ascii_compare)
- const UINTVAL l_len = lhs->strlen;
- const UINTVAL r_len = rhs->strlen;
- const UINTVAL min_len = l_len > r_len ? r_len : l_len;
- String_iter iter;
-
- if (lhs->encoding == rhs->encoding) {
- const int ret_val = memcmp(lhs->strstart, rhs->strstart, min_len);
- if (ret_val)
- return ret_val < 0 ? -1 : 1;
- }
- else {
- STRING_ITER_INIT(interp, &iter);
- while (iter.charpos < min_len) {
- const UINTVAL cl = ENCODING_GET_BYTE(interp, lhs, iter.charpos);
- const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &iter);
- if (cl != cr)
- return cl < cr ? -1 : 1;
- }
- }
- if (l_len < r_len) {
- return -1;
- }
- if (l_len > r_len) {
- return 1;
- }
- return 0;
-}
-
-/*
-
-=item C<INTVAL mixed_cs_index(PARROT_INTERP, const STRING *src, const STRING
-*search, UINTVAL offs)>
-
-Searches for the first instance of STRING C<search> in STRING C<src>.
-returns the position where the substring is found if it is indeed found.
-Returns -1 otherwise. Operates on different types of strings, not just
-ASCII.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-mixed_cs_index(PARROT_INTERP, ARGIN(const STRING *src), ARGIN(const STRING *search),
- UINTVAL offs)
-{
- ASSERT_ARGS(mixed_cs_index)
- String_iter start, end;
-
- STRING_ITER_INIT(interp, &start);
- STRING_ITER_SET_POSITION(interp, src, &start, offs);
-
- return Parrot_str_iter_index(interp, src, &start, &end, search);
-}
-
-/*
-
-=item C<INTVAL ascii_cs_index(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Searches for the first instance of STRING C<search> in STRING C<src>.
-returns the position where the substring is found if it is indeed found.
-Returns -1 otherwise.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_cs_index(PARROT_INTERP, ARGIN(const STRING *src),
- ARGIN(const STRING *search_string), UINTVAL offset)
-{
- ASSERT_ARGS(ascii_cs_index)
- INTVAL retval;
- if (src->charset != search_string->charset) {
- return mixed_cs_index(interp, src, search_string, offset);
- }
-
- PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
- retval = Parrot_byte_index(interp, src,
- search_string, offset);
- return retval;
-}
-
-/*
-
-=item C<INTVAL ascii_cs_rindex(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Searches for the last instance of STRING C<search_string> in STRING
-C<src>. Starts searching at C<offset>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-INTVAL
-ascii_cs_rindex(PARROT_INTERP, ARGIN(const STRING *src),
- ARGIN(const STRING *search_string), UINTVAL offset)
-{
- ASSERT_ARGS(ascii_cs_rindex)
- INTVAL retval;
-
- if (src->charset != search_string->charset)
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
- "Cross-charset index not supported");
-
- PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
- retval = Parrot_byte_rindex(interp, src,
- search_string, offset);
- return retval;
-}
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Verifies that the given string is valid ASCII. Returns 1 if it is ASCII,
-returns 0 otherwise.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(validate)
- String_iter iter;
- const INTVAL length = Parrot_str_length(interp, src);
-
- STRING_ITER_INIT(interp, &iter);
- while (iter.charpos < length) {
- const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
- if (codepoint >= 0x80)
- return 0;
- }
- return 1;
-}
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING object from a single codepoint C<codepoint>. Returns
-the new STRING.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
- ASSERT_ARGS(string_from_codepoint)
- char real_codepoint = (char)codepoint;
- STRING * const return_string = string_make(interp, &real_codepoint, 1, "ascii", 0);
- return return_string;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
- ASSERT_ARGS(is_cclass)
- UINTVAL codepoint;
-
- if (offset >= src->strlen)
- return 0;
- codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
- if (codepoint >= sizeof (Parrot_ascii_typetable) / sizeof (Parrot_ascii_typetable[0])) {
- return 0;
- }
- return (Parrot_ascii_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class. Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(find_cclass)
- UINTVAL pos = offset;
- UINTVAL end = offset + count;
-
- end = src->strlen < end ? src->strlen : end;
- return ENCODING_FIND_CCLASS(interp, src, Parrot_ascii_typetable,
- flags, pos, end);
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP,
- INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(find_not_cclass)
- UINTVAL pos = offset;
- UINTVAL end = offset + count;
-
- end = src->strlen < end ? src->strlen : end;
- for (; pos < end; ++pos) {
- const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, pos);
- if ((Parrot_ascii_typetable[codepoint] & flags) == 0) {
- return pos;
- }
- }
- return end;
-}
-
-/*
-
-=item C<size_t ascii_compute_hash(PARROT_INTERP, const STRING *src, size_t
-seed)>
-
-Computes the hash of STRING C<src> starting with seed value
-C<seed>.
-
-=cut
-
-*/
-
-PARROT_PURE_FUNCTION
-size_t
-ascii_compute_hash(SHIM_INTERP, ARGIN(const STRING *src), size_t seed)
-{
- ASSERT_ARGS(ascii_compute_hash)
- size_t hashval = seed;
- const char *buffptr = (const char *)src->strstart;
- UINTVAL len = src->strlen;
-
- PARROT_ASSERT(src->encoding == Parrot_fixed_8_encoding_ptr);
- while (len--) {
- hashval += hashval << 5;
- hashval += *buffptr++;
- }
- return hashval;
-}
-
-/*
-
-=item C<void Parrot_charset_ascii_init(PARROT_INTERP)>
-
-Initialize the ASCII charset but registering all the necessary
-function pointers and settings.
-
-=cut
-
-*/
-
-void
-Parrot_charset_ascii_init(PARROT_INTERP)
-{
- ASSERT_ARGS(Parrot_charset_ascii_init)
- CHARSET * const return_set = Parrot_new_charset(interp);
- static const CHARSET base_set = {
- "ascii",
- ascii_get_graphemes,
- to_charset,
- compose,
- decompose,
- upcase,
- downcase,
- titlecase,
- upcase_first,
- downcase_first,
- titlecase_first,
- ascii_compare,
- ascii_cs_index,
- ascii_cs_rindex,
- validate,
- is_cclass,
- find_cclass,
- find_not_cclass,
- string_from_codepoint,
- ascii_compute_hash,
- NULL
- };
-
- STRUCT_COPY_FROM_STRUCT(return_set, base_set);
- return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
- Parrot_register_charset(interp, "ascii", return_set);
-
- return;
-}
-
-/*
-
-=item C<STRING * charset_cvt_ascii_to_binary(PARROT_INTERP, const STRING *src)>
-
-Converts an ASCII STRING C<src> to a binary STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-STRING *
-charset_cvt_ascii_to_binary(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(charset_cvt_ascii_to_binary)
- STRING * const dest = Parrot_str_clone(interp, src);
- UINTVAL offs;
-
- for (offs = 0; offs < src->strlen; ++offs) {
- const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
- ENCODING_SET_BYTE(interp, dest, offs, c);
- }
-
- dest->charset = Parrot_binary_charset_ptr;
- return dest;
-}
-
-/*
-
-=item C<STRING * charset_cvt_ascii_to_iso_8859_1(PARROT_INTERP, const STRING
-*src)>
-
-Converts ASCII STRING C<src> to ISO8859-1 STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-STRING *
-charset_cvt_ascii_to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(charset_cvt_ascii_to_iso_8859_1)
- STRING * const dest = Parrot_str_clone(interp, src);
- UINTVAL offs;
-
- for (offs = 0; offs < src->strlen; ++offs) {
- const UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
- ENCODING_SET_BYTE(interp, dest, offs, c);
- }
-
- dest->charset = Parrot_iso_8859_1_charset_ptr;
- return dest;
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-/*
- * Local variables:
- * c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
Deleted: branches/html_cleanup/src/string/charset/binary.c
==============================================================================
--- branches/html_cleanup/src/string/charset/binary.c Fri Sep 24 13:04:05 2010 (r49295)
+++ /dev/null 00:00:00 1970 (deleted)
@@ -1,494 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/binary.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for binary data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-
-/* In local src/string/charset/ directory */
-#include "ascii.h"
-#include "binary.h"
-
-/* HEADERIZER HFILE: src/string/charset/binary.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-
-static INTVAL compare(SHIM_INTERP,
- ARGIN(const STRING *lhs),
- ARGIN(const STRING *rhs))
- __attribute__nonnull__(2)
- __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-static INTVAL find_cclass(SHIM_INTERP,
- SHIM(INTVAL flags),
- SHIM(const STRING *src),
- UINTVAL offset,
- UINTVAL count);
-
-static INTVAL find_not_cclass(SHIM_INTERP,
- SHIM(INTVAL flags),
- SHIM(const STRING *src),
- UINTVAL offset,
- UINTVAL count);
-
-static INTVAL is_cclass(SHIM_INTERP,
- SHIM(INTVAL flags),
- SHIM(const STRING *src),
- SHIM(UINTVAL offset));
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* to_charset(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-static UINTVAL validate(SHIM_INTERP, SHIM(const STRING *src));
-#define ASSERT_ARGS_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(lhs) \
- , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#ifdef EXCEPTION
-# undef EXCEPTION
-#endif
-
-#define EXCEPTION(err, str) \
- Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
-
-
-/*
-
-=item C<static STRING* to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to STRING C<dest> in binary mode. Throws
-an exception if a suitable conversion function is not found.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(to_charset)
- charset_converter_t conversion_func =
- Parrot_find_charset_converter(interp, src->charset, Parrot_binary_charset_ptr);
-
- if (conversion_func)
- return conversion_func(interp, src);
-
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
- "to_charset for binary not implemented");
-}
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot compose a binary string.
-
-=cut
-
-*/
-
-/* A err. can't compose binary */
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(compose)
- EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't compose binary data");
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot decompose a binary string.
-
-=cut
-
-*/
-
-/* A err. can't decompose binary */
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(decompose)
- EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't decompose binary data");
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-upper case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(upcase)
- EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't upcase binary data");
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-lower-case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(downcase)
- EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't downcase binary data");
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot convert a binary string to
-title case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(titlecase)
- EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't titlecase binary data");
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot set the first "character" of the
-binary string to uppercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(upcase_first)
- EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't upcase binary data");
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we cannot set the first "character"
-of the binary string to lowercase.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(downcase_first)
- EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't downcase binary data");
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Throws an exception because we can't convert the first "character"
-of binary data to title case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(titlecase_first)
- EXCEPTION(EXCEPTION_INVALID_CHARTYPE, "Can't titlecase binary data");
-}
-
-/*
-
-=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compare the two buffers, first by size, then with memcmp.
-
-=cut
-
-*/
-
-static INTVAL
-compare(SHIM_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
- ASSERT_ARGS(compare)
- const UINTVAL l_len = lhs->strlen;
- const UINTVAL r_len = rhs->strlen;
- if (l_len != r_len)
- return l_len - r_len;
-
- return memcmp(lhs->strstart, rhs->strstart, l_len);
-}
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1. All sequential data is valid binary data.
-
-=cut
-
-*/
-
-/* Binary's always valid */
-static UINTVAL
-validate(SHIM_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(validate)
- return 1;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(SHIM_INTERP, SHIM(INTVAL flags), SHIM(const STRING *src), SHIM(UINTVAL offset))
-{
- ASSERT_ARGS(is_cclass)
- return 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(SHIM_INTERP, SHIM(INTVAL flags),
- SHIM(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(find_cclass)
- return offset + count;
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(SHIM_INTERP, SHIM(INTVAL flags),
- SHIM(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(find_not_cclass)
- return offset + count;
-}
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING object from a single codepoint C<codepoint>. Returns
-the new STRING.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
- ASSERT_ARGS(string_from_codepoint)
- STRING *return_string;
- char real_codepoint = (char)codepoint;
- return_string = string_make(interp, &real_codepoint, 1, "binary", 0);
- return return_string;
-}
-
-
-/*
-
-=item C<void Parrot_charset_binary_init(PARROT_INTERP)>
-
-Initialize the binary charset, including function pointers and
-settings.
-
-=cut
-
-*/
-
-void
-Parrot_charset_binary_init(PARROT_INTERP)
-{
- ASSERT_ARGS(Parrot_charset_binary_init)
- CHARSET * const return_set = Parrot_new_charset(interp);
- static const CHARSET base_set = {
- "binary",
- ascii_get_graphemes,
- to_charset,
- compose,
- decompose,
- upcase,
- downcase,
- titlecase,
- upcase_first,
- downcase_first,
- titlecase_first,
- compare,
- ascii_cs_index,
- ascii_cs_rindex,
- validate,
- is_cclass,
- find_cclass,
- find_not_cclass,
- string_from_codepoint,
- ascii_compute_hash,
- NULL
- };
-
- STRUCT_COPY_FROM_STRUCT(return_set, base_set);
- return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
- Parrot_register_charset(interp, "binary", return_set);
-
- return;
-
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-
-/*
- * Local variables:
- * c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
Deleted: branches/html_cleanup/src/string/charset/iso-8859-1.c
==============================================================================
--- branches/html_cleanup/src/string/charset/iso-8859-1.c Fri Sep 24 13:04:05 2010 (r49295)
+++ /dev/null 00:00:00 1970 (deleted)
@@ -1,733 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/iso-8859-1.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for iso-8859-1 data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "iso-8859-1.h"
-#include "ascii.h"
-
-/* HEADERIZER HFILE: src/string/charset/iso-8859-1.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static INTVAL find_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-static INTVAL is_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING * to_charset(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_unicode(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_iso_8859_1 __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_unicode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#include "tables.h"
-
-/*
-
-=item C<static STRING * to_iso_8859_1(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to iso-8859-1 in STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_iso_8859_1(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(to_iso_8859_1)
- UINTVAL src_len;
- String_iter iter;
- /* iso-8859-1 is never bigger then source */
- STRING * dest = Parrot_str_clone(interp, src);
-
- STRING_ITER_INIT(interp, &iter);
- src_len = src->strlen;
- dest->bufused = src_len;
- while (iter.charpos < src_len) {
- const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
- if (c >= 0x100)
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
- "lossy conversion to iso-8559-1");
-
- Parrot_fixed_8_encoding_ptr->set_byte(interp, dest, iter.charpos - 1, c);
- }
- dest->charset = Parrot_iso_8859_1_charset_ptr;
- dest->encoding = Parrot_fixed_8_encoding_ptr;
- return dest;
-}
-
-/*
-
-=item C<static STRING * to_unicode(PARROT_INTERP, const STRING *src)>
-
-Converts STRING C<src> to unicode STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_unicode(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(to_unicode)
- STRING * dest = Parrot_str_clone(interp, src);
- UINTVAL offs;
- String_iter iter;
-
- dest->charset = Parrot_unicode_charset_ptr;
- dest->encoding = CHARSET_GET_PREFERRED_ENCODING(interp, dest);
- Parrot_gc_reallocate_string_storage(interp, dest, src->strlen);
- STRING_ITER_INIT(interp, &iter);
- while (iter.charpos < src->strlen) {
- const UINTVAL c = ENCODING_GET_BYTE(interp, src, iter.charpos);
-
- if (iter.bytepos >= Buffer_buflen(dest) - 4) {
- UINTVAL need = (UINTVAL)((src->strlen - iter.charpos) * 1.5);
- if (need < 16)
- need = 16;
- Parrot_gc_reallocate_string_storage(interp, dest,
- Buffer_buflen(dest) + need);
- }
- STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, c);
- }
- dest->bufused = iter.bytepos;
- dest->strlen = iter.charpos;
- return dest;
-}
-
-/*
-
-=item C<static STRING * to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to an ISO-8859-1 STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING *
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(to_charset)
- const charset_converter_t conversion_func =
- Parrot_find_charset_converter(interp, src->charset, Parrot_iso_8859_1_charset_ptr);
-
- if (conversion_func)
- return conversion_func(interp, src);
- else
- return to_iso_8859_1(interp, src);
-}
-
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-ISO-8859-1 does not support composing, so we just copy the STRING C<src> and return the
-copy.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(compose)
-
- STRING * const dest = Parrot_str_copy(interp, src);
-
- return dest;
-}
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-SO-8859-1 does not support decomposing, so we throw an exception.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(decompose)
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
- "decompose for iso-8859-1 not implemented");
-}
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Convert all graphemes in the STRING C<src> to upper case, for those
-graphemes that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(upcase)
- unsigned char *buffer;
- UINTVAL offset = 0;
- STRING *result = Parrot_str_clone(interp, src);
-
- if (!result->strlen)
- return result;
-
- buffer = (unsigned char *)result->strstart;
- for (offset = 0; offset < result->strlen; ++offset) {
- unsigned int c = buffer[offset]; /* XXX use encoding ? */
- if (c >= 0xe0 && c != 0xf7)
- c &= ~0x20;
- else
- c = toupper((unsigned char)c);
- buffer[offset] = (unsigned char)c;
- }
-
- return result;
-}
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts all graphemes in STRING C<src> to lower-case, for those graphemes
-that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(downcase)
- unsigned char *buffer;
- UINTVAL offset = 0;
- STRING *result = Parrot_str_clone(interp, src);
-
- if (!result->strlen)
- return result;
-
- buffer = (unsigned char *)result->strstart;
- for (offset = 0; offset < result->strlen; ++offset) {
- unsigned int c = buffer[offset];
- if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
- c |= 0x20;
- else
- c = tolower((unsigned char)c);
- buffer[offset] = (unsigned char)c;
- }
-
- return result;
-}
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the graphemes in STRING C<src> to title case, for those graphemes
-that support cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(titlecase)
- unsigned char *buffer;
- unsigned int c;
- UINTVAL offset;
- STRING *result = Parrot_str_clone(interp, src);
-
- if (!result->strlen)
- return result;
-
- buffer = (unsigned char *)result->strstart;
- c = buffer[0];
- if (c >= 0xe0 && c != 0xf7)
- c &= ~0x20;
- else
- c = toupper((unsigned char)c);
- buffer[0] = (unsigned char)c;
-
- for (offset = 1; offset < result->strlen; ++offset) {
- c = buffer[offset];
- if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
- c |= 0x20;
- else
- c = tolower((unsigned char)c);
- buffer[offset] = (unsigned char)c;
- }
-
- return result;
-}
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to upper case, if it
-supports cases.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(upcase_first)
- unsigned char *buffer;
- unsigned int c;
- STRING *result = Parrot_str_clone(interp, src);
-
- if (!result->strlen)
- return result;
-
- buffer = (unsigned char *)result->strstart;
- c = buffer[0];
- if (c >= 0xe0 && c != 0xf7)
- c &= ~0x20;
- else
- c = toupper((unsigned char)c);
- buffer[0] = (unsigned char)c;
-
- return result;
-}
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first character of the STRING C<src> to lower case, if the
-grapheme supports lower case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(downcase_first)
- unsigned char *buffer;
- unsigned int c;
- STRING *result = Parrot_str_clone(interp, src);
-
- if (!result->strlen)
- return result;
-
- buffer = (unsigned char *)result->strstart;
- c = buffer[0];
- if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
- c &= ~0x20;
- else
- c = tolower((unsigned char)c);
- buffer[0] = (unsigned char)c;
-
- return result;
-}
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to title case, if the grapheme
-supports case.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(titlecase_first)
- return upcase_first(interp, src);
-}
-
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1 if the STRING C<src> is a valid ISO-8859-1 STRING. Returns 0 otherwise.
-
-=cut
-
-*/
-
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(validate)
- INTVAL offset;
- const INTVAL length = Parrot_str_length(interp, src);
-
- for (offset = 0; offset < length; ++offset) {
- const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
- if (codepoint >= 0x100)
- return 0;
- }
- return 1;
-}
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
- ASSERT_ARGS(is_cclass)
- UINTVAL codepoint;
-
- if (offset >= src->strlen) return 0;
- codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
- if (codepoint >= sizeof (Parrot_ascii_typetable) /
- sizeof (Parrot_ascii_typetable[0])) {
- return 0;
- }
- return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class. Delegates to the find_cclass
-method of the encoding plugin.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags,
- ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(find_cclass)
- const UINTVAL pos = offset;
- UINTVAL end = offset + count;
-
- end = src->strlen < end ? src->strlen : end;
- return ENCODING_FIND_CCLASS(interp, src,
- Parrot_iso_8859_1_typetable, flags, pos, end);
-}
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP, INTVAL flags,
- ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(find_not_cclass)
- UINTVAL pos = offset;
- UINTVAL end = offset + count;
-
- end = src->strlen < end ? src->strlen : end;
- for (; pos < end; ++pos) {
- const UINTVAL codepoint = ENCODING_GET_CODEPOINT(interp, src, pos);
- if ((Parrot_iso_8859_1_typetable[codepoint] & flags) == 0) {
- return pos;
- }
- }
- return end;
-}
-
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Creates a new STRING from the single codepoint C<codepoint>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
- ASSERT_ARGS(string_from_codepoint)
- char real_codepoint = (char)codepoint;
- STRING * const return_string = string_make(interp, &real_codepoint, 1,
- "iso-8859-1", 0);
- return return_string;
-}
-
-/*
-
-=item C<void Parrot_charset_iso_8859_1_init(PARROT_INTERP)>
-
-Initializes the ISO-8859-1 charset by installing all the necessary function pointers.
-
-=cut
-
-*/
-
-void
-Parrot_charset_iso_8859_1_init(PARROT_INTERP)
-{
- ASSERT_ARGS(Parrot_charset_iso_8859_1_init)
- CHARSET * const return_set = Parrot_new_charset(interp);
- static const CHARSET base_set = {
- "iso-8859-1",
- ascii_get_graphemes,
- to_charset,
- compose,
- decompose,
- upcase,
- downcase,
- titlecase,
- upcase_first,
- downcase_first,
- titlecase_first,
- ascii_compare,
- ascii_cs_index,
- ascii_cs_rindex,
- validate,
- is_cclass,
- find_cclass,
- find_not_cclass,
- string_from_codepoint,
- ascii_compute_hash,
- NULL
- };
-
- STRUCT_COPY_FROM_STRUCT(return_set, base_set);
- return_set->preferred_encoding = Parrot_fixed_8_encoding_ptr;
- Parrot_register_charset(interp, "iso-8859-1", return_set);
-
- return;
-}
-
-/*
-
-=item C<STRING * charset_cvt_iso_8859_1_to_ascii(PARROT_INTERP, const STRING
-*src)>
-
-Converts STRING C<src> in ISO-8859-1 to ASCII STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-PARROT_WARN_UNUSED_RESULT
-STRING *
-charset_cvt_iso_8859_1_to_ascii(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(charset_cvt_iso_8859_1_to_ascii)
- UINTVAL offs;
- STRING *dest = Parrot_str_clone(interp, src);
-
- for (offs = 0; offs < src->strlen; ++offs) {
- UINTVAL c = ENCODING_GET_BYTE(interp, src, offs);
- if (c >= 0x80)
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LOSSY_CONVERSION,
- "lossy conversion to ascii");
-
- ENCODING_SET_BYTE(interp, dest, offs, c);
- }
- return dest;
-}
-
-/*
-
-=back
-
-=cut
-
-*/
-
-
-/*
- * Local variables:
- * c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
Deleted: branches/html_cleanup/src/string/charset/unicode.c
==============================================================================
--- branches/html_cleanup/src/string/charset/unicode.c Fri Sep 24 13:04:05 2010 (r49295)
+++ /dev/null 00:00:00 1970 (deleted)
@@ -1,1075 +0,0 @@
-/*
-Copyright (C) 2005-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/charset/unicode.c
-
-=head1 DESCRIPTION
-
-This file implements the charset functions for unicode data
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "unicode.h"
-#include "ascii.h"
-#include "tables.h"
-
-/* HEADERIZER HFILE: src/string/charset/unicode.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-
-static INTVAL compare(PARROT_INTERP,
- ARGIN(const STRING *lhs),
- ARGIN(const STRING *rhs))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* compose(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static size_t compute_hash(PARROT_INTERP,
- ARGIN(const STRING *src),
- size_t seed)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static INTVAL cs_rindex(PARROT_INTERP,
- SHIM(const STRING *src),
- SHIM(const STRING *search_string),
- SHIM(UINTVAL offset))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* decompose(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* downcase_first(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-static INTVAL find_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-static INTVAL find_not_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_graphemes(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static INTVAL is_cclass(PARROT_INTERP,
- INTVAL flags,
- ARGIN(const STRING *src),
- UINTVAL offset)
- __attribute__nonnull__(1)
- __attribute__nonnull__(3);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* to_charset(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
- __attribute__nonnull__(1);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_CANNOT_RETURN_NULL
-static STRING* upcase_first(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-static UINTVAL validate(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-#define ASSERT_ARGS_compare __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(lhs) \
- , PARROT_ASSERT_ARG(rhs))
-#define ASSERT_ARGS_compose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_compute_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_cs_rindex __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_decompose __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_downcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_downcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_not_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_graphemes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_is_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_string_from_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_titlecase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_titlecase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_charset __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_u_iscclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_upcase __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_upcase_first __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_validate __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#ifdef EXCEPTION
-# undef EXCEPTION
-#endif
-
-#if PARROT_HAS_ICU
-# include <unicode/ucnv.h>
-# include <unicode/utypes.h>
-# include <unicode/uchar.h>
-# include <unicode/ustring.h>
-# include <unicode/unorm.h>
-#endif
-#define EXCEPTION(err, str) \
- Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
-
-#define UNIMPL EXCEPTION(EXCEPTION_UNIMPLEMENTED, "unimplemented unicode")
-
-
-/*
-
-=item C<static STRING * get_graphemes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Gets the graphemes from STRING C<src> starting at C<offset>. Gets
-C<count> graphemes total.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_graphemes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(get_graphemes)
- return ENCODING_GET_CODEPOINTS(interp, src, offset, count);
-}
-
-
-/*
-
-=item C<static STRING* to_charset(PARROT_INTERP, const STRING *src)>
-
-Converts input STRING C<src> to unicode STRING C<dest>.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-to_charset(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(to_charset)
- const charset_converter_t conversion_func =
- Parrot_find_charset_converter(interp, src->charset,
- Parrot_unicode_charset_ptr);
-
- if (conversion_func)
- return conversion_func(interp, src);
-
- return Parrot_utf8_encoding_ptr->to_encoding(interp, src);
-}
-
-
-/*
-
-=item C<static STRING* compose(PARROT_INTERP, const STRING *src)>
-
-If Parrot is built with ICU, composes the STRING C<src>. Attempts to
-denormalize the STRING into the ICU default, NFC.
-
-If Parrot does not have ICU included, throws an exception.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-compose(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(compose)
-#if PARROT_HAS_ICU
- STRING *dest;
- int src_len, dest_len;
- UErrorCode err;
- /*
- U_STABLE int32_t U_EXPORT2
- unorm_normalize(const UChar *source, int32_t sourceLength,
- UNormalizationMode mode, int32_t options,
- UChar *result, int32_t resultLength,
- UErrorCode *status);
- */
- dest_len = src_len = src->strlen;
- dest = Parrot_str_new_init(interp, NULL, src_len * sizeof (UChar),
- src->encoding, src->charset, 0);
-
- err = U_ZERO_ERROR;
- dest_len = unorm_normalize((UChar *)src->strstart, src_len,
- UNORM_DEFAULT, /* default is NFC */
- 0, /* options 0 default - no specific icu
- * version */
- (UChar *)dest->strstart, dest_len, &err);
-
- dest->bufused = dest_len * sizeof (UChar);
-
- if (!U_SUCCESS(err)) {
- err = U_ZERO_ERROR;
- Parrot_gc_reallocate_string_storage(interp, dest, dest->bufused);
- dest_len = unorm_normalize((UChar *)src->strstart, src_len,
- UNORM_DEFAULT, /* default is NFC */
- 0, /* options 0 default - no specific
- * icu version */
- (UChar *)dest->strstart, dest_len, &err);
- PARROT_ASSERT(U_SUCCESS(err));
- dest->bufused = dest_len * sizeof (UChar);
- }
- dest->strlen = dest_len;
- return dest;
-#else
- UNUSED(src);
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
- "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* decompose(PARROT_INTERP, const STRING *src)>
-
-Decompose function for unicode charset. This function is not yet implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-decompose(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(decompose)
- /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
- UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* upcase(PARROT_INTERP, const STRING *src)>
-
-Converts the STRING C<src> to all upper-case graphemes, for those characters
-which support upper-case versions.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(upcase)
-#if PARROT_HAS_ICU
- UErrorCode err;
- int dest_len, src_len, needed;
- STRING *res;
-#endif
-
- if (src->bufused == src->strlen
- && src->encoding == Parrot_utf8_encoding_ptr) {
- return Parrot_ascii_charset_ptr->upcase(interp, src);
- }
-
-#if PARROT_HAS_ICU
- /* to_encoding will allocate new string */
- res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
- /*
- U_CAPI int32_t U_EXPORT2
- u_strToUpper(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode);
- */
- err = U_ZERO_ERROR;
-
- /* use all available space - see below XXX */
- /* TODO downcase, titlecase too */
- dest_len = Buffer_buflen(res) / sizeof (UChar);
- src_len = res->bufused / sizeof (UChar);
-
- /*
- * XXX troubles:
- * t/op/string_cs_45 upcase unicode:"\u01f0"
- * this creates \u004a \u030c J+NON-SPACING HACEK
- * the string needs resizing, *if* the src buffer is
- * too short. *But* with icu 3.2/3.4 the src string is
- * overwritten with partial result, despite the icu docs sayeth:
- *
- * The source string and the destination buffer
- * are allowed to overlap.
- *
- * Workaround: 'preflighting' returns needed length
- * Alternative: forget about inplace operation - create new result
- *
- * TODO downcase, titlecase
- */
- needed = u_strToUpper(NULL, 0,
- (UChar *)res->strstart, src_len,
- NULL, /* locale = default */
- &err);
-
- if (needed > dest_len) {
- Parrot_gc_reallocate_string_storage(interp, res, needed * sizeof (UChar));
- dest_len = needed;
- }
-
- err = U_ZERO_ERROR;
- dest_len = u_strToUpper((UChar *)res->strstart, dest_len,
- (UChar *)res->strstart, src_len,
- NULL, /* locale = default */
- &err);
- PARROT_ASSERT(U_SUCCESS(err));
- res->bufused = dest_len * sizeof (UChar);
-
- /* downgrade if possible */
- if (dest_len == (int)src->strlen)
- res->encoding = Parrot_ucs2_encoding_ptr;
- else {
- /* string is likely still ucs2 if it was earlier
- * but strlen changed due to combining char
- */
- res->strlen = dest_len;
- }
-
- return res;
-
-#else
- UNUSED(src);
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
- "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* downcase(PARROT_INTERP, const STRING *src)>
-
-Converts all graphemes to lower-case, for those graphemes which have cases.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(downcase)
-#if PARROT_HAS_ICU
- UErrorCode err;
- int dest_len, src_len;
- STRING *res;
-#endif
-
- if (src->bufused == src->strlen
- && src->encoding == Parrot_utf8_encoding_ptr) {
- return Parrot_ascii_charset_ptr->downcase(interp, src);
- }
-
-#if PARROT_HAS_ICU
- /* to_encoding will allocate new string */
- res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
- /*
-U_CAPI int32_t U_EXPORT2
-u_strToLower(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode);
- */
- err = U_ZERO_ERROR;
- src_len = res->bufused / sizeof (UChar);
- dest_len = u_strToLower((UChar *)res->strstart, src_len,
- (UChar *)res->strstart, src_len,
- NULL, /* locale = default */
- &err);
- res->bufused = dest_len * sizeof (UChar);
-
- if (!U_SUCCESS(err)) {
- err = U_ZERO_ERROR;
- Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
- dest_len = u_strToLower((UChar *)res->strstart, dest_len,
- (UChar *)res->strstart, src_len,
- NULL, /* locale = default */
- &err);
- PARROT_ASSERT(U_SUCCESS(err));
- }
-
- /* downgrade if possible */
- if (dest_len == (int)res->strlen)
- res->encoding = Parrot_ucs2_encoding_ptr;
-
- return res;
-
-#else
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
- "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* titlecase(PARROT_INTERP, const STRING *src)>
-
-Converts the string to title case, for those characters which support cases.
-
-Throws an exception if ICU is not installed.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(titlecase)
-#if PARROT_HAS_ICU
-
- UErrorCode err;
- int dest_len, src_len;
- STRING *res;
-
- if (src->bufused == src->strlen
- && src->encoding == Parrot_utf8_encoding_ptr) {
- return Parrot_ascii_charset_ptr->titlecase(interp, src);
- }
-
- /* to_encoding will allocate new string */
- res = Parrot_utf16_encoding_ptr->to_encoding(interp, src);
-
- /*
-U_CAPI int32_t U_EXPORT2
-u_strToTitle(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UBreakIterator *titleIter,
- const char *locale,
- UErrorCode *pErrorCode);
- */
-
- err = U_ZERO_ERROR;
- src_len = res->bufused / sizeof (UChar);
- dest_len = u_strToTitle((UChar *)res->strstart, src_len,
- (UChar *)res->strstart, src_len,
- NULL, /* default titleiter */
- NULL, /* locale = default */
- &err);
- res->bufused = dest_len * sizeof (UChar);
-
- if (!U_SUCCESS(err)) {
- err = U_ZERO_ERROR;
- Parrot_gc_reallocate_string_storage(interp, res, res->bufused);
- dest_len = u_strToTitle((UChar *)res->strstart, dest_len,
- (UChar *)res->strstart, src_len,
- NULL, NULL,
- &err);
- PARROT_ASSERT(U_SUCCESS(err));
- }
-
- /* downgrade if possible */
- if (dest_len == (int)res->strlen)
- res->encoding = Parrot_ucs2_encoding_ptr;
-
- return res;
-
-#else
- UNUSED(src);
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
- "no ICU lib loaded");
-#endif
-}
-
-
-/*
-
-=item C<static STRING* upcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in the STRING C<src> to uppercase, if the
-grapheme supports it. Not implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-upcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(upcase_first)
- /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
- UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* downcase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in the STRING C<src> to lower-case, if
-the grapheme supports it. Not implemented
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-downcase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(downcase_first)
- /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
- UNIMPL;
-}
-
-
-/*
-
-=item C<static STRING* titlecase_first(PARROT_INTERP, const STRING *src)>
-
-Converts the first grapheme in STRING C<src> to title case, if the
-string supports it. Not implemented.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING*
-titlecase_first(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(titlecase_first)
- /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
- UNIMPL;
-}
-
-
-/*
-
-=item C<static INTVAL compare(PARROT_INTERP, const STRING *lhs, const STRING
-*rhs)>
-
-Compares two STRINGs, C<lhs> and C<rhs>. Returns -1 if C<lhs> < C<rhs>. Returns
-0 if C<lhs> = C<rhs>. Returns 1 if C<lhs> > C<rhs>.
-
-=cut
-
-*/
-
-static INTVAL
-compare(PARROT_INTERP, ARGIN(const STRING *lhs), ARGIN(const STRING *rhs))
-{
- ASSERT_ARGS(compare)
- String_iter l_iter, r_iter;
- UINTVAL min_len, l_len, r_len;
-
- /* TODO make optimized equal - strings are equal length then already */
- STRING_ITER_INIT(interp, &l_iter);
- STRING_ITER_INIT(interp, &r_iter);
-
- l_len = lhs->strlen;
- r_len = rhs->strlen;
-
- min_len = l_len > r_len ? r_len : l_len;
-
- while (l_iter.charpos < min_len) {
- const UINTVAL cl = STRING_ITER_GET_AND_ADVANCE(interp, lhs, &l_iter);
- const UINTVAL cr = STRING_ITER_GET_AND_ADVANCE(interp, rhs, &r_iter);
-
- if (cl != cr)
- return cl < cr ? -1 : 1;
- }
-
- if (l_len < r_len)
- return -1;
-
- if (l_len > r_len)
- return 1;
-
- return 0;
-}
-
-
-/*
-
-=item C<static INTVAL cs_rindex(PARROT_INTERP, const STRING *src, const STRING
-*search_string, UINTVAL offset)>
-
-Finds the last index of substring C<search_string> in STRING C<src>,
-starting from C<offset>. Not implemented.
-
-=cut
-
-*/
-
-static INTVAL
-cs_rindex(PARROT_INTERP, SHIM(const STRING *src),
- SHIM(const STRING *search_string), SHIM(UINTVAL offset))
-{
- ASSERT_ARGS(cs_rindex)
- /* TODO: https://trac.parrot.org/parrot/wiki/StringsTasklist Implement this. */
- UNIMPL;
-}
-
-
-/*
-
-=item C<static UINTVAL validate(PARROT_INTERP, const STRING *src)>
-
-Returns 1 if the STRING C<src> is a valid unicode string, returns 0 otherwise.
-
-=cut
-
-*/
-
-static UINTVAL
-validate(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(validate)
- String_iter iter;
- const INTVAL length = Parrot_str_length(interp, src);
-
- STRING_ITER_INIT(interp, &iter);
- while (iter.charpos < length) {
- const UINTVAL codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
- /* Check for Unicode non-characters */
- if (codepoint >= 0xfdd0
- && (codepoint <= 0xfdef || (codepoint & 0xfffe) == 0xfffe)
- && codepoint <= 0x10ffff)
- return 0;
- }
-
- return 1;
-}
-
-
-/*
-
-=item C<static int u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static int
-u_iscclass(PARROT_INTERP, UINTVAL codepoint, INTVAL flags)
-{
- ASSERT_ARGS(u_iscclass)
-#if PARROT_HAS_ICU
- UNUSED(interp);
- /* XXX which one
- return u_charDigitValue(codepoint);
- */
- if ((flags & enum_cclass_uppercase) && u_isupper(codepoint)) return 1;
- if ((flags & enum_cclass_lowercase) && u_islower(codepoint)) return 1;
- if ((flags & enum_cclass_alphabetic) && u_isalpha(codepoint)) return 1;
- if ((flags & enum_cclass_numeric) && u_isdigit(codepoint)) return 1;
- if ((flags & enum_cclass_hexadecimal) && u_isxdigit(codepoint)) return 1;
- if ((flags & enum_cclass_whitespace) && u_isspace(codepoint)) return 1;
- if ((flags & enum_cclass_printing) && u_isprint(codepoint)) return 1;
- if ((flags & enum_cclass_graphical) && u_isgraph(codepoint)) return 1;
- if ((flags & enum_cclass_blank) && u_isblank(codepoint)) return 1;
- if ((flags & enum_cclass_control) && u_iscntrl(codepoint)) return 1;
- if ((flags & enum_cclass_alphanumeric) && u_isalnum(codepoint)) return 1;
- if ((flags & enum_cclass_word) &&
- (u_isalnum(codepoint) || codepoint == '_')) return 1;
-
- return 0;
-#else
- if (codepoint < 256)
- return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-
- if (flags == enum_cclass_any)
- return 1;
-
- /* All codepoints from u+0100 to u+02af are alphabetic, so we
- * cheat on the WORD and ALPHABETIC properties to include these
- * (and incorrectly exclude all others). This is a stopgap until
- * ICU is everywhere, or we have better non-ICU unicode support. */
- if (flags == enum_cclass_word || flags == enum_cclass_alphabetic)
- return (codepoint < 0x2b0);
-
- if (flags & enum_cclass_whitespace) {
- /* from http://www.unicode.org/Public/UNIDATA/PropList.txt */
- switch (codepoint) {
- case 0x1680: case 0x180e: case 0x2000: case 0x2001:
- case 0x2002: case 0x2003: case 0x2004: case 0x2005:
- case 0x2006: case 0x2007: case 0x2008: case 0x2009:
- case 0x200a: case 0x2028: case 0x2029: case 0x202f:
- case 0x205f: case 0x3000:
- return 1;
- default:
- break;
- }
- }
-
- if (flags & enum_cclass_numeric) {
- /* from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
- if (codepoint >= 0x0660 && codepoint <= 0x0669) return 1;
- if (codepoint >= 0x06f0 && codepoint <= 0x06f9) return 1;
- if (codepoint >= 0x07c0 && codepoint <= 0x07c9) return 1;
- if (codepoint >= 0x0966 && codepoint <= 0x096f) return 1;
- if (codepoint >= 0x09e6 && codepoint <= 0x09ef) return 1;
- if (codepoint >= 0x0a66 && codepoint <= 0x0a6f) return 1;
- if (codepoint >= 0x0ae6 && codepoint <= 0x0aef) return 1;
- if (codepoint >= 0x0b66 && codepoint <= 0x0b6f) return 1;
- if (codepoint >= 0x0be6 && codepoint <= 0x0bef) return 1;
- if (codepoint >= 0x0c66 && codepoint <= 0x0c6f) return 1;
- if (codepoint >= 0x0ce6 && codepoint <= 0x0cef) return 1;
- if (codepoint >= 0x0d66 && codepoint <= 0x0d6f) return 1;
- if (codepoint >= 0x0e50 && codepoint <= 0x0e59) return 1;
- if (codepoint >= 0x0ed0 && codepoint <= 0x0ed9) return 1;
- if (codepoint >= 0x0f20 && codepoint <= 0x0f29) return 1;
- if (codepoint >= 0x1040 && codepoint <= 0x1049) return 1;
- if (codepoint >= 0x17e0 && codepoint <= 0x17e9) return 1;
- if (codepoint >= 0x1810 && codepoint <= 0x1819) return 1;
- if (codepoint >= 0x1946 && codepoint <= 0x194f) return 1;
- if (codepoint >= 0x19d0 && codepoint <= 0x19d9) return 1;
- if (codepoint >= 0x1b50 && codepoint <= 0x1b59) return 1;
- if (codepoint >= 0xff10 && codepoint <= 0xff19) return 1;
- }
-
- if (flags & ~(enum_cclass_whitespace | enum_cclass_numeric | enum_cclass_newline))
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
- "no ICU lib loaded");
-
- return 0;
-#endif
-}
-
-
-/*
-
-=item C<static INTVAL is_cclass(PARROT_INTERP, INTVAL flags, const STRING *src,
-UINTVAL offset)>
-
-Returns Boolean.
-
-=cut
-
-*/
-
-static INTVAL
-is_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset)
-{
- ASSERT_ARGS(is_cclass)
- UINTVAL codepoint;
-
- if (offset >= src->strlen)
- return 0;
-
- codepoint = ENCODING_GET_CODEPOINT(interp, src, offset);
-
- if (codepoint >= 256)
- return u_iscclass(interp, codepoint, flags) != 0;
-
- return (Parrot_iso_8859_1_typetable[codepoint] & flags) ? 1 : 0;
-}
-
-
-/*
-
-=item C<static INTVAL find_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Find a character in the given character class.
-
-=cut
-
-*/
-
-static INTVAL
-find_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(find_cclass)
- String_iter iter;
- UINTVAL codepoint;
- UINTVAL end = offset + count;
-
- STRING_ITER_INIT(interp, &iter);
- STRING_ITER_SET_POSITION(interp, src, &iter, offset);
-
- end = src->strlen < end ? src->strlen : end;
-
- while (iter.charpos < end) {
- codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
- if (codepoint >= 256) {
- if (u_iscclass(interp, codepoint, flags))
- return iter.charpos - 1;
- }
- else {
- if (Parrot_iso_8859_1_typetable[codepoint] & flags)
- return iter.charpos - 1;
- }
- }
-
- return end;
-}
-
-
-/*
-
-=item C<static INTVAL find_not_cclass(PARROT_INTERP, INTVAL flags, const STRING
-*src, UINTVAL offset, UINTVAL count)>
-
-Returns C<INTVAL>.
-
-=cut
-
-*/
-
-static INTVAL
-find_not_cclass(PARROT_INTERP, INTVAL flags, ARGIN(const STRING *src),
- UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(find_not_cclass)
- String_iter iter;
- UINTVAL codepoint;
- UINTVAL end = offset + count;
- int bit;
-
- if (offset > src->strlen) {
- /* XXX: Throw in this case? */
- return offset + count;
- }
-
- STRING_ITER_INIT(interp, &iter);
-
- if (offset)
- STRING_ITER_SET_POSITION(interp, src, &iter, offset);
-
- end = src->strlen < end ? src->strlen : end;
-
- if (flags == enum_cclass_any)
- return end;
-
- while (iter.charpos < end) {
- codepoint = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
- if (codepoint >= 256) {
- for (bit = enum_cclass_uppercase;
- bit <= enum_cclass_word ; bit <<= 1) {
- if ((bit & flags) && !u_iscclass(interp, codepoint, bit))
- return iter.charpos - 1;
- }
- }
- else {
- if (!(Parrot_iso_8859_1_typetable[codepoint] & flags))
- return iter.charpos - 1;
- }
- }
-
- return end;
-}
-
-
-/*
-
-=item C<static STRING * string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)>
-
-Returns a one-codepoint string for the given codepoint.
-
-=cut
-
-*/
-
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-string_from_codepoint(PARROT_INTERP, UINTVAL codepoint)
-{
- ASSERT_ARGS(string_from_codepoint)
- String_iter iter;
- STRING * const dest = string_make(interp, "", 1, "unicode", 0);
-
- dest->strlen = 1;
-
- STRING_ITER_INIT(interp, &iter);
- STRING_ITER_SET_AND_ADVANCE(interp, dest, &iter, codepoint);
- dest->bufused = iter.bytepos;
-
- return dest;
-}
-
-
-/*
-
-=item C<static size_t compute_hash(PARROT_INTERP, const STRING *src, size_t
-seed)>
-
-Computes the hash of the given STRING C<src> with starting seed value C<seed>.
-
-=cut
-
-*/
-
-static size_t
-compute_hash(PARROT_INTERP, ARGIN(const STRING *src), size_t seed)
-{
- ASSERT_ARGS(compute_hash)
- String_iter iter;
- size_t hashval = seed;
-
- STRING_ITER_INIT(interp, &iter);
-
- while (iter.charpos < src->strlen) {
- const UINTVAL c = STRING_ITER_GET_AND_ADVANCE(interp, src, &iter);
- hashval += hashval << 5;
- hashval += c;
- }
-
- return hashval;
-}
-
-
-/*
-
-=item C<void Parrot_charset_unicode_init(PARROT_INTERP)>
-
-Initializes the Unicode charset by installing all the necessary function
-pointers.
-
-=cut
-
-*/
-
-void
-Parrot_charset_unicode_init(PARROT_INTERP)
-{
- ASSERT_ARGS(Parrot_charset_unicode_init)
- CHARSET * const return_set = Parrot_new_charset(interp);
- static const CHARSET base_set = {
- "unicode",
- get_graphemes,
- to_charset,
- compose,
- decompose,
- upcase,
- downcase,
- titlecase,
- upcase_first,
- downcase_first,
- titlecase_first,
- compare,
- mixed_cs_index,
- cs_rindex,
- validate,
- is_cclass,
- find_cclass,
- find_not_cclass,
- string_from_codepoint,
- compute_hash,
- NULL
- };
-
- STRUCT_COPY_FROM_STRUCT(return_set, base_set);
-
- /*
- * for now use utf8
- * TODO replace it with a fixed uint_16 or uint_32 encoding
- * XXX if this is changed, modify string_make so it
- * still takes "utf8" when fed "unicode" as charset!
- */
- return_set->preferred_encoding = Parrot_utf8_encoding_ptr;
- Parrot_register_charset(interp, "unicode", return_set);
-
- return;
-}
-
-
-/*
- * Local variables:
- * c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
Deleted: branches/html_cleanup/src/string/encoding/fixed_8.c
==============================================================================
--- branches/html_cleanup/src/string/encoding/fixed_8.c Fri Sep 24 13:04:05 2010 (r49295)
+++ /dev/null 00:00:00 1970 (deleted)
@@ -1,578 +0,0 @@
-/*
-Copyright (C) 2004-2010, Parrot Foundation.
-$Id$
-
-=head1 NAME
-
-src/string/encoding/fixed_8.c
-
-=head1 DESCRIPTION
-
-This file implements the encoding functions for fixed-width 8-bit codepoints
-
-=over 4
-
-=cut
-
-*/
-
-#include "parrot/parrot.h"
-#include "fixed_8.h"
-
-/* HEADERIZER HFILE: src/string/encoding/fixed_8.h */
-
-/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(2);
-
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(SHIM_INTERP,
- ARGIN(const STRING *s),
- ARGIN(const INTVAL *typetable),
- INTVAL flags,
- UINTVAL pos,
- UINTVAL end)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3);
-
-static UINTVAL fixed8_iter_get(PARROT_INTERP,
- ARGIN(const STRING *str),
- ARGIN(const String_iter *iter),
- INTVAL offset)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3);
-
-static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP,
- ARGIN(const STRING *str),
- ARGMOD(String_iter *iter))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_set_and_advance(PARROT_INTERP,
- ARGMOD(STRING *str),
- ARGMOD(String_iter *iter),
- UINTVAL c)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*str)
- FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_set_position(SHIM_INTERP,
- ARGIN(const STRING *str),
- ARGMOD(String_iter *iter),
- UINTVAL pos)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
-static void fixed8_iter_skip(SHIM_INTERP,
- ARGIN(const STRING *str),
- ARGMOD(String_iter *iter),
- INTVAL skip)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
-static size_t fixed_8_hash(SHIM_INTERP,
- ARGIN(const STRING *s),
- size_t hashval)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL get_byte(SHIM_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL get_codepoint(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static void set_byte(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL byte)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_DOES_NOT_RETURN
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, SHIM(const STRING *src))
- __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(s) \
- , PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(str) \
- , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(s))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-/* HEADERIZER END: static */
-
-#define UNIMPL Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED, \
- "unimpl fixed_8")
-
-/*
-
-=item C<static STRING * to_encoding(PARROT_INTERP, const STRING *src)>
-
-Converts the string C<src> to this particular encoding. If C<dest> is
-provided, it will contain the result. Otherwise this function operates in
-place.
-
-
-=cut
-
-*/
-
-PARROT_DOES_NOT_RETURN
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-to_encoding(PARROT_INTERP, SHIM(const STRING *src))
-{
- ASSERT_ARGS(to_encoding)
- UNIMPL;
-}
-
-
-/*
-
-=item C<static UINTVAL get_codepoint(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
-
-codepoints are bytes, so delegate
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-get_codepoint(PARROT_INTERP, ARGIN(const STRING *src),
- UINTVAL offset)
-{
- ASSERT_ARGS(get_codepoint)
- return get_byte(interp, src, offset);
-}
-
-
-/*
-
-=item C<static UINTVAL find_cclass(PARROT_INTERP, const STRING *s, const INTVAL
-*typetable, INTVAL flags, UINTVAL pos, UINTVAL end)>
-
-codepoints are bytes, so delegate
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-find_cclass(SHIM_INTERP, ARGIN(const STRING *s), ARGIN(const INTVAL *typetable),
-INTVAL flags, UINTVAL pos, UINTVAL end)
-{
- ASSERT_ARGS(find_cclass)
- const unsigned char *contents = (const unsigned char *)s->strstart;
- for (; pos < end; ++pos) {
- if ((typetable[contents[pos]] & flags) != 0) {
- return pos;
- }
- }
- return end;
-}
-
-/*
-
-=item C<static UINTVAL get_byte(PARROT_INTERP, const STRING *src, UINTVAL
-offset)>
-
-Returns the byte in string C<src> at position C<offset>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL
-get_byte(SHIM_INTERP, ARGIN(const STRING *src), UINTVAL offset)
-{
- ASSERT_ARGS(get_byte)
- const unsigned char *contents = (const unsigned char *)src->strstart;
-
- if (offset >= src->bufused) {
-/* Parrot_ex_throw_from_c_args(interp, NULL, 0,
- "get_byte past the end of the buffer (%i of %i)",
- offset, src->bufused); */
- return 0;
- }
-
- return contents[offset];
-}
-
-/*
-
-=item C<static void set_byte(PARROT_INTERP, const STRING *src, UINTVAL offset,
-UINTVAL byte)>
-
-Sets, in string C<src> at position C<offset>, the byte C<byte>.
-
-=cut
-
-*/
-
-static void
-set_byte(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL byte)
-{
- ASSERT_ARGS(set_byte)
- unsigned char *contents;
-
- if (offset >= src->bufused)
- Parrot_ex_throw_from_c_args(interp, NULL, 0,
- "set_byte past the end of the buffer");
-
- contents = (unsigned char *)src->strstart;
- contents[offset] = (unsigned char)byte;
-}
-
-/*
-
-=item C<static STRING * get_codepoints(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the codepoints in string C<src> at position C<offset> and length
-C<count>. (Delegates to C<get_bytes>.)
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_codepoints(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(get_codepoints)
- STRING * const return_string = get_bytes(interp, src, offset, count);
- return_string->charset = src->charset;
- return return_string;
-}
-
-/*
-
-=item C<static STRING * get_bytes(PARROT_INTERP, const STRING *src, UINTVAL
-offset, UINTVAL count)>
-
-Returns the bytes in string C<src> at position C<offset> and length C<count>.
-
-=cut
-
-*/
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING *
-get_bytes(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
-{
- ASSERT_ARGS(get_bytes)
- STRING * const return_string = Parrot_str_copy(interp, src);
-
- return_string->encoding = src->encoding;
- return_string->charset = src->charset;
-
- return_string->strstart = (char *)return_string->strstart + offset ;
- return_string->bufused = count;
-
- return_string->strlen = count;
- return_string->hashval = 0;
-
- return return_string;
-}
-
-
-/*
-
-=item C<static UINTVAL codepoints(PARROT_INTERP, const STRING *src)>
-
-Returns the number of codepoints in string C<src>.
-
-=cut
-
-*/
-
-static UINTVAL
-codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(codepoints)
- return bytes(interp, src);
-}
-
-/*
-
-=item C<static UINTVAL bytes(PARROT_INTERP, const STRING *src)>
-
-Returns the number of bytes in string C<src>.
-
-=cut
-
-*/
-
-static UINTVAL
-bytes(SHIM_INTERP, ARGIN(const STRING *src))
-{
- ASSERT_ARGS(bytes)
- return src->bufused;
-}
-
-/*
- * iterator functions
- */
-
-/*
-
-=item C<static UINTVAL fixed8_iter_get(PARROT_INTERP, const STRING *str, const
-String_iter *iter, INTVAL offset)>
-
-Get the character at C<iter> plus C<offset>.
-
-=cut
-
-*/
-
-static UINTVAL
-fixed8_iter_get(PARROT_INTERP,
- ARGIN(const STRING *str), ARGIN(const String_iter *iter), INTVAL offset)
-{
- ASSERT_ARGS(fixed8_iter_get)
- return get_byte(interp, str, iter->charpos + offset);
-}
-
-/*
-
-=item C<static void fixed8_iter_skip(PARROT_INTERP, const STRING *str,
-String_iter *iter, INTVAL skip)>
-
-Moves the string iterator C<i> by C<skip> characters.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_skip(SHIM_INTERP,
- ARGIN(const STRING *str), ARGMOD(String_iter *iter), INTVAL skip)
-{
- ASSERT_ARGS(fixed8_iter_skip)
- iter->bytepos += skip;
- iter->charpos += skip;
- PARROT_ASSERT(iter->bytepos <= Buffer_buflen(str));
-}
-
-/*
-
-=item C<static UINTVAL fixed8_iter_get_and_advance(PARROT_INTERP, const STRING
-*str, String_iter *iter)>
-
-Moves the string iterator C<i> to the next codepoint.
-
-=cut
-
-*/
-
-static UINTVAL
-fixed8_iter_get_and_advance(PARROT_INTERP,
- ARGIN(const STRING *str), ARGMOD(String_iter *iter))
-{
- ASSERT_ARGS(fixed8_iter_get_and_advance)
- const UINTVAL c = get_byte(interp, str, iter->charpos++);
- iter->bytepos++;
- return c;
-}
-
-/*
-
-=item C<static void fixed8_iter_set_and_advance(PARROT_INTERP, STRING *str,
-String_iter *iter, UINTVAL c)>
-
-With the string iterator C<i>, appends the codepoint C<c> and advances to the
-next position in the string.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_set_and_advance(PARROT_INTERP,
- ARGMOD(STRING *str), ARGMOD(String_iter *iter), UINTVAL c)
-{
- ASSERT_ARGS(fixed8_iter_set_and_advance)
- set_byte(interp, str, iter->charpos++, c);
- iter->bytepos++;
-}
-
-/*
-
-=item C<static void fixed8_iter_set_position(PARROT_INTERP, const STRING *str,
-String_iter *iter, UINTVAL pos)>
-
-Moves the string iterator C<i> to the position C<n> in the string.
-
-=cut
-
-*/
-
-static void
-fixed8_iter_set_position(SHIM_INTERP,
- ARGIN(const STRING *str), ARGMOD(String_iter *iter), UINTVAL pos)
-{
- ASSERT_ARGS(fixed8_iter_set_position)
- iter->bytepos = iter->charpos = pos;
- PARROT_ASSERT(pos <= Buffer_buflen(str));
-}
-
-/*
-
-=item C<static size_t fixed_8_hash(PARROT_INTERP, const STRING *s, size_t
-hashval)>
-
-Returns the hashed value of the string, given a seed in hashval.
-
-=cut
-
-*/
-
-static size_t
-fixed_8_hash(SHIM_INTERP, ARGIN(const STRING *s), size_t hashval)
-{
- ASSERT_ARGS(fixed_8_hash)
- const unsigned char *pos = (const unsigned char *)s->strstart;
- UINTVAL len = s->strlen;
-
- while (len--) {
- hashval += hashval << 5;
- hashval += *(pos++);
- }
-
- return hashval;
-}
-
-
-/*
-
-=item C<void Parrot_encoding_fixed_8_init(PARROT_INTERP)>
-
-Initializes the fixed-8 encoding.
-
-=cut
-
-*/
-
-void
-Parrot_encoding_fixed_8_init(PARROT_INTERP)
-{
- ASSERT_ARGS(Parrot_encoding_fixed_8_init)
- ENCODING * const return_encoding = Parrot_new_encoding(interp);
-
- ENCODING base_encoding = {
- "fixed_8",
- 1, /* Max bytes per codepoint */
- to_encoding,
- get_codepoint,
- get_byte,
- set_byte,
- get_codepoints,
- get_bytes,
- codepoints,
- bytes,
- find_cclass,
- fixed_8_hash,
- fixed8_iter_get,
- fixed8_iter_skip,
- fixed8_iter_get_and_advance,
- fixed8_iter_set_and_advance,
- fixed8_iter_set_position
- };
-
- STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
- Parrot_register_encoding(interp, "fixed_8", return_encoding);
-
- return;
-}
-
-
-/*
- * Local variables:
- * c-file-style: "parrot"
- * End:
- * vim: expandtab shiftwidth=4:
- */
-
More information about the parrot-commits
mailing list