[svn:parrot] r47094 - in branches/gsoc_nfg: . src/string/encoding
darbelo at svn.parrot.org
darbelo at svn.parrot.org
Fri May 28 15:37:45 UTC 2010
Author: darbelo
Date: Fri May 28 15:37:45 2010
New Revision: 47094
URL: https://trac.parrot.org/parrot/changeset/47094
Log:
Add base files for the NFG encoding. It's UCS-4 with the serial numbers removed at the time.
Added:
branches/gsoc_nfg/src/string/encoding/nfg.c
- copied, changed from r47092, branches/gsoc_nfg/src/string/encoding/ucs4.c
branches/gsoc_nfg/src/string/encoding/nfg.h
- copied, changed from r47092, branches/gsoc_nfg/src/string/encoding/ucs4.h
Modified:
branches/gsoc_nfg/MANIFEST
Modified: branches/gsoc_nfg/MANIFEST
==============================================================================
--- branches/gsoc_nfg/MANIFEST Fri May 28 15:29:38 2010 (r47093)
+++ branches/gsoc_nfg/MANIFEST Fri May 28 15:37:45 2010 (r47094)
@@ -1,7 +1,7 @@
# ex: set ro:
# $Id$
#
-# generated by tools/dev/mk_manifest_and_skip.pl Wed May 26 01:29:10 2010 UT
+# generated by tools/dev/mk_manifest_and_skip.pl Fri May 28 15:36:43 2010 UT
#
# See below for documentation on the format of this file.
#
@@ -1452,6 +1452,8 @@
src/string/encoding.c []
src/string/encoding/fixed_8.c []
src/string/encoding/fixed_8.h []
+src/string/encoding/nfg.c []
+src/string/encoding/nfg.h []
src/string/encoding/ucs2.c []
src/string/encoding/ucs2.h []
src/string/encoding/ucs4.c []
Copied and modified: branches/gsoc_nfg/src/string/encoding/nfg.c (from r47092, branches/gsoc_nfg/src/string/encoding/ucs4.c)
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/ucs4.c Fri May 28 15:25:07 2010 (r47092, copy source)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c Fri May 28 15:37:45 2010 (r47094)
@@ -4,11 +4,11 @@
=head1 NAME
-src/string/encoding/ucs4.c - UCS-4 encoding
+src/string/encoding/nfg.c - NFG encoding
=head1 DESCRIPTION
-UCS-4 encoding with the help of the ICU library.
+NFG encoding with the help of the ICU library.
=head2 Functions
@@ -31,149 +31,12 @@
}
#endif
-/* HEADERIZER HFILE: src/string/encoding/ucs4.h */
+/* HEADERIZER HFILE: src/string/encoding/nfg.h */
/* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(PARROT_INTERP,
- ARGIN(const STRING *s),
- ARGIN(const INTVAL *typetable),
- INTVAL flags,
- UINTVAL pos,
- UINTVAL end)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3);
-
-static UINTVAL get_byte(PARROT_INTERP,
- SHIM(const STRING *src),
- SHIM(UINTVAL offset))
- __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
- SHIM(const STRING *src),
- SHIM(UINTVAL offset),
- SHIM(UINTVAL count))
- __attribute__nonnull__(1);
-
-static UINTVAL get_codepoint(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
- ARGIN(const STRING *src),
- UINTVAL offset,
- UINTVAL count)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static void iter_init(PARROT_INTERP,
- ARGIN(const STRING *src),
- ARGOUT(String_iter *iter))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- __attribute__nonnull__(3)
- FUNC_MODIFIES(*iter);
-
-static void set_byte(PARROT_INTERP,
- SHIM(const STRING *src),
- SHIM(UINTVAL offset),
- SHIM(UINTVAL byte))
- __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static UINTVAL ucs4_decode_and_advance(PARROT_INTERP,
- ARGMOD(String_iter *i))
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- FUNC_MODIFIES(*i);
-
-static void ucs4_encode_and_advance(PARROT_INTERP,
- ARGMOD(String_iter *i),
- UINTVAL c)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- FUNC_MODIFIES(*i);
-
-static size_t ucs4_hash(PARROT_INTERP,
- ARGIN(const STRING *s),
- size_t hashval)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2);
-
-static void ucs4_set_position(PARROT_INTERP,
- ARGMOD(String_iter *i),
- UINTVAL n)
- __attribute__nonnull__(1)
- __attribute__nonnull__(2)
- FUNC_MODIFIES(*i);
-
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(s) \
- , PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src) \
- , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs4_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_ucs4_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(s))
-#define ASSERT_ARGS_ucs4_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp) \
- , PARROT_ASSERT_ARG(i))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */
-#include "ucs4.h"
+#include "nfg.h"
#if PARROT_HAS_ICU
# include <unicode/ustring.h>
@@ -196,13 +59,13 @@
{
ASSERT_ARGS(to_encoding)
#if PARROT_HAS_ICU
- if (src->encoding == Parrot_ucs4_encoding_ptr) {
+ if (src->encoding == Parrot_nfg_encoding_ptr) {
return Parrot_str_clone(interp, src);
}
else {
UINTVAL len = Parrot_str_length(interp, src);
STRING *res = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
- Parrot_ucs4_encoding_ptr, Parrot_unicode_charset_ptr, 0);
+ Parrot_nfg_encoding_ptr, Parrot_unicode_charset_ptr, 0);
UChar32 *buf = (UChar32 *) res->strstart;
UINTVAL offs;
for (offs = 0; offs < len; offs++){
@@ -293,7 +156,7 @@
ASSERT_ARGS(get_byte)
Parrot_ex_throw_from_c_args(interp, NULL,
EXCEPTION_UNIMPLEMENTED,
- "No get_byte for UCS-4");
+ "No get_byte for NFG");
}
/*
@@ -314,7 +177,7 @@
ASSERT_ARGS(set_byte)
Parrot_ex_throw_from_c_args(interp, NULL,
EXCEPTION_UNIMPLEMENTED,
- "No set_byte for UCS-4");
+ "No set_byte for NFG");
}
/*
@@ -366,7 +229,7 @@
ASSERT_ARGS(get_bytes)
Parrot_ex_throw_from_c_args(interp, NULL,
EXCEPTION_UNIMPLEMENTED,
- "No get_bytes for UCS-4");
+ "No get_bytes for NFG");
}
@@ -414,18 +277,18 @@
/*
-=item C<static UINTVAL ucs4_decode_and_advance(PARROT_INTERP, String_iter *i)>
+=item C<static UINTVAL nfg_decode_and_advance(PARROT_INTERP, String_iter *i)>
-Moves the string iterator C<i> to the next UCS-4 codepoint.
+Moves the string iterator C<i> to the next nfg codepoint.
=cut
*/
static UINTVAL
-ucs4_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))
+nfg_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))
{
- ASSERT_ARGS(ucs4_decode_and_advance)
+ ASSERT_ARGS(nfg_decode_and_advance)
#if PARROT_HAS_ICU
const UChar32 * const s = (const UChar32 *) i->str->strstart;
size_t pos = i->bytepos / sizeof (UChar32);
@@ -441,7 +304,7 @@
/*
-=item C<static void ucs4_encode_and_advance(PARROT_INTERP, String_iter *i,
+=item C<static void nfg_encode_and_advance(PARROT_INTERP, String_iter *i,
UINTVAL c)>
With the string iterator C<i>, appends the codepoint C<c> and advances to the
@@ -452,9 +315,9 @@
*/
static void
-ucs4_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)
+nfg_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)
{
- ASSERT_ARGS(ucs4_encode_and_advance)
+ ASSERT_ARGS(nfg_encode_and_advance)
#if PARROT_HAS_ICU
UChar32 *s = (UChar32 *) i->str->strstart;
size_t pos = i->bytepos / sizeof (UChar32);
@@ -470,7 +333,7 @@
#if PARROT_HAS_ICU
/*
-=item C<static size_t ucs4_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
+=item C<static size_t nfg_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
Returns the hashed value of the string, given a seed in hashval.
@@ -479,9 +342,9 @@
*/
static size_t
-ucs4_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
+nfg_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
{
- ASSERT_ARGS(ucs4_hash)
+ ASSERT_ARGS(nfg_hash)
const UChar32 *pos = (const UChar32*) s->strstart;
UINTVAL len = s->strlen;
UNUSED(interp);
@@ -497,7 +360,7 @@
/*
-=item C<static void ucs4_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>
+=item C<static void nfg_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>
Moves the string iterator C<i> to the position C<n> in the string.
@@ -506,9 +369,9 @@
*/
static void
-ucs4_set_position(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL n)
+nfg_set_position(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL n)
{
- ASSERT_ARGS(ucs4_set_position)
+ ASSERT_ARGS(nfg_set_position)
#if PARROT_HAS_ICU
i->charpos = n;
i->bytepos = n * sizeof (UChar32);
@@ -540,9 +403,9 @@
iter->str = src;
iter->bytepos = 0;
iter->charpos = 0;
- iter->get_and_advance = ucs4_decode_and_advance;
- iter->set_and_advance = ucs4_encode_and_advance;
- iter->set_position = ucs4_set_position;
+ iter->get_and_advance = nfg_decode_and_advance;
+ iter->set_and_advance = nfg_encode_and_advance;
+ iter->set_position = nfg_set_position;
#else
UNUSED(src);
UNUSED(iter);
@@ -552,22 +415,22 @@
/*
-=item C<void Parrot_encoding_ucs4_init(PARROT_INTERP)>
+=item C<void Parrot_encoding_nfg_init(PARROT_INTERP)>
-Initializes the UCS-4 encoding.
+Initializes the NFG encoding.
=cut
*/
void
-Parrot_encoding_ucs4_init(PARROT_INTERP)
+Parrot_encoding_nfg_init(PARROT_INTERP)
{
- ASSERT_ARGS(Parrot_encoding_ucs4_init)
+ ASSERT_ARGS(Parrot_encoding_nfg_init)
ENCODING * const return_encoding = Parrot_new_encoding(interp);
static const ENCODING base_encoding = {
- "ucs4",
+ "nfg",
4, /* Max bytes per codepoint */
to_encoding,
get_codepoint,
@@ -580,13 +443,13 @@
iter_init,
find_cclass,
#if PARROT_HAS_ICU
- ucs4_hash
+ nfg_hash
#else
NULL
#endif
};
STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
- Parrot_register_encoding(interp, "ucs4", return_encoding);
+ Parrot_register_encoding(interp, "nfg", return_encoding);
return;
}
@@ -601,6 +464,7 @@
F<src/string/encoding/utf8.c>,
F<src/string/encoding/utf16.c>,
F<src/string/encoding/ucs2.c>,
+F<src/string/encoding/ucs4.c>,
F<src/string.c>,
F<include/parrot/string.h>,
F<docs/string.pod>.
Copied and modified: branches/gsoc_nfg/src/string/encoding/nfg.h (from r47092, branches/gsoc_nfg/src/string/encoding/ucs4.h)
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/ucs4.h Fri May 28 15:25:07 2010 (r47092, copy source)
+++ branches/gsoc_nfg/src/string/encoding/nfg.h Fri May 28 15:37:45 2010 (r47094)
@@ -3,21 +3,13 @@
$Id$
*/
-#ifndef PARROT_ENCODING_UCS4_H_GUARD
-#define PARROT_ENCODING_UCS4_H_GUARD
+#ifndef PARROT_ENCODING_NFG_H_GUARD
+#define PARROT_ENCODING_NFG_H_GUARD
-/* HEADERIZER BEGIN: src/string/encoding/ucs4.c */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
+/* HEADERIZER BEGIN: src/string/encoding/nfg.c */
+/* HEADERIZER END: src/string/encoding/nfg.c */
-void Parrot_encoding_ucs4_init(PARROT_INTERP)
- __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_Parrot_encoding_ucs4_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
- PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
-/* HEADERIZER END: src/string/encoding/ucs4.c */
-
-#endif /* PARROT_ENCODING_UCS4_H_GUARD */
+#endif /* PARROT_ENCODING_NFG_H_GUARD */
/*
* Local variables:
More information about the parrot-commits
mailing list