[svn:parrot] r47094 - in branches/gsoc_nfg: . src/string/encoding

darbelo at svn.parrot.org darbelo at svn.parrot.org
Fri May 28 15:37:45 UTC 2010


Author: darbelo
Date: Fri May 28 15:37:45 2010
New Revision: 47094
URL: https://trac.parrot.org/parrot/changeset/47094

Log:
Add base files for the NFG encoding. It's UCS-4 with the serial numbers removed at the time.

Added:
   branches/gsoc_nfg/src/string/encoding/nfg.c
      - copied, changed from r47092, branches/gsoc_nfg/src/string/encoding/ucs4.c
   branches/gsoc_nfg/src/string/encoding/nfg.h
      - copied, changed from r47092, branches/gsoc_nfg/src/string/encoding/ucs4.h
Modified:
   branches/gsoc_nfg/MANIFEST

Modified: branches/gsoc_nfg/MANIFEST
==============================================================================
--- branches/gsoc_nfg/MANIFEST	Fri May 28 15:29:38 2010	(r47093)
+++ branches/gsoc_nfg/MANIFEST	Fri May 28 15:37:45 2010	(r47094)
@@ -1,7 +1,7 @@
 # ex: set ro:
 # $Id$
 #
-# generated by tools/dev/mk_manifest_and_skip.pl Wed May 26 01:29:10 2010 UT
+# generated by tools/dev/mk_manifest_and_skip.pl Fri May 28 15:36:43 2010 UT
 #
 # See below for documentation on the format of this file.
 #
@@ -1452,6 +1452,8 @@
 src/string/encoding.c                                       []
 src/string/encoding/fixed_8.c                               []
 src/string/encoding/fixed_8.h                               []
+src/string/encoding/nfg.c                                   []
+src/string/encoding/nfg.h                                   []
 src/string/encoding/ucs2.c                                  []
 src/string/encoding/ucs2.h                                  []
 src/string/encoding/ucs4.c                                  []

Copied and modified: branches/gsoc_nfg/src/string/encoding/nfg.c (from r47092, branches/gsoc_nfg/src/string/encoding/ucs4.c)
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/ucs4.c	Fri May 28 15:25:07 2010	(r47092, copy source)
+++ branches/gsoc_nfg/src/string/encoding/nfg.c	Fri May 28 15:37:45 2010	(r47094)
@@ -4,11 +4,11 @@
 
 =head1 NAME
 
-src/string/encoding/ucs4.c - UCS-4 encoding
+src/string/encoding/nfg.c - NFG encoding
 
 =head1 DESCRIPTION
 
-UCS-4 encoding with the help of the ICU library.
+NFG encoding with the help of the ICU library.
 
 =head2 Functions
 
@@ -31,149 +31,12 @@
 }
 #endif
 
-/* HEADERIZER HFILE: src/string/encoding/ucs4.h */
+/* HEADERIZER HFILE: src/string/encoding/nfg.h */
 
 /* HEADERIZER BEGIN: static */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL bytes(SHIM_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL codepoints(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-static UINTVAL find_cclass(PARROT_INTERP,
-    ARGIN(const STRING *s),
-    ARGIN(const INTVAL *typetable),
-    INTVAL flags,
-    UINTVAL pos,
-    UINTVAL end)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3);
-
-static UINTVAL get_byte(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset))
-        __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_bytes(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset),
-    SHIM(UINTVAL count))
-        __attribute__nonnull__(1);
-
-static UINTVAL get_codepoint(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * get_codepoints(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    UINTVAL offset,
-    UINTVAL count)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static void iter_init(PARROT_INTERP,
-    ARGIN(const STRING *src),
-    ARGOUT(String_iter *iter))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        __attribute__nonnull__(3)
-        FUNC_MODIFIES(*iter);
-
-static void set_byte(PARROT_INTERP,
-    SHIM(const STRING *src),
-    SHIM(UINTVAL offset),
-    SHIM(UINTVAL byte))
-        __attribute__nonnull__(1);
-
-PARROT_WARN_UNUSED_RESULT
-PARROT_CANNOT_RETURN_NULL
-static STRING * to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static UINTVAL ucs4_decode_and_advance(PARROT_INTERP,
-    ARGMOD(String_iter *i))
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        FUNC_MODIFIES(*i);
-
-static void ucs4_encode_and_advance(PARROT_INTERP,
-    ARGMOD(String_iter *i),
-    UINTVAL c)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        FUNC_MODIFIES(*i);
-
-static size_t ucs4_hash(PARROT_INTERP,
-    ARGIN(const STRING *s),
-    size_t hashval)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2);
-
-static void ucs4_set_position(PARROT_INTERP,
-    ARGMOD(String_iter *i),
-    UINTVAL n)
-        __attribute__nonnull__(1)
-        __attribute__nonnull__(2)
-        FUNC_MODIFIES(*i);
-
-#define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(s) \
-    , PARROT_ASSERT_ARG(typetable))
-#define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_get_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src) \
-    , PARROT_ASSERT_ARG(iter))
-#define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-#define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(src))
-#define ASSERT_ARGS_ucs4_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_ucs4_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(i))
-#define ASSERT_ARGS_ucs4_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(s))
-#define ASSERT_ARGS_ucs4_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp) \
-    , PARROT_ASSERT_ARG(i))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
-#include "ucs4.h"
+#include "nfg.h"
 
 #if PARROT_HAS_ICU
 #  include <unicode/ustring.h>
@@ -196,13 +59,13 @@
 {
     ASSERT_ARGS(to_encoding)
 #if PARROT_HAS_ICU
-    if (src->encoding == Parrot_ucs4_encoding_ptr) {
+    if (src->encoding == Parrot_nfg_encoding_ptr) {
         return Parrot_str_clone(interp, src);
     }
     else {
         UINTVAL len = Parrot_str_length(interp, src);
         STRING *res = Parrot_str_new_init(interp, NULL, len * sizeof (UChar32),
-                           Parrot_ucs4_encoding_ptr, Parrot_unicode_charset_ptr, 0);
+                           Parrot_nfg_encoding_ptr, Parrot_unicode_charset_ptr, 0);
         UChar32 *buf = (UChar32 *) res->strstart;
         UINTVAL offs;
         for (offs = 0; offs < len; offs++){
@@ -293,7 +156,7 @@
     ASSERT_ARGS(get_byte)
     Parrot_ex_throw_from_c_args(interp, NULL,
         EXCEPTION_UNIMPLEMENTED,
-        "No get_byte for UCS-4");
+        "No get_byte for NFG");
 }
 
 /*
@@ -314,7 +177,7 @@
     ASSERT_ARGS(set_byte)
     Parrot_ex_throw_from_c_args(interp, NULL,
         EXCEPTION_UNIMPLEMENTED,
-        "No set_byte for UCS-4");
+        "No set_byte for NFG");
 }
 
 /*
@@ -366,7 +229,7 @@
     ASSERT_ARGS(get_bytes)
     Parrot_ex_throw_from_c_args(interp, NULL,
         EXCEPTION_UNIMPLEMENTED,
-        "No get_bytes for UCS-4");
+        "No get_bytes for NFG");
 }
 
 
@@ -414,18 +277,18 @@
 
 /*
 
-=item C<static UINTVAL ucs4_decode_and_advance(PARROT_INTERP, String_iter *i)>
+=item C<static UINTVAL nfg_decode_and_advance(PARROT_INTERP, String_iter *i)>
 
-Moves the string iterator C<i> to the next UCS-4 codepoint.
+Moves the string iterator C<i> to the next nfg codepoint.
 
 =cut
 
 */
 
 static UINTVAL
-ucs4_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))
+nfg_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))
 {
-    ASSERT_ARGS(ucs4_decode_and_advance)
+    ASSERT_ARGS(nfg_decode_and_advance)
 #if PARROT_HAS_ICU
     const UChar32 * const s = (const UChar32 *) i->str->strstart;
     size_t pos              = i->bytepos / sizeof (UChar32);
@@ -441,7 +304,7 @@
 
 /*
 
-=item C<static void ucs4_encode_and_advance(PARROT_INTERP, String_iter *i,
+=item C<static void nfg_encode_and_advance(PARROT_INTERP, String_iter *i,
 UINTVAL c)>
 
 With the string iterator C<i>, appends the codepoint C<c> and advances to the
@@ -452,9 +315,9 @@
 */
 
 static void
-ucs4_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)
+nfg_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)
 {
-    ASSERT_ARGS(ucs4_encode_and_advance)
+    ASSERT_ARGS(nfg_encode_and_advance)
 #if PARROT_HAS_ICU
     UChar32 *s   = (UChar32 *) i->str->strstart;
     size_t   pos = i->bytepos / sizeof (UChar32);
@@ -470,7 +333,7 @@
 #if PARROT_HAS_ICU
 /*
 
-=item C<static size_t ucs4_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
+=item C<static size_t nfg_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
 
 Returns the hashed value of the string, given a seed in hashval.
 
@@ -479,9 +342,9 @@
 */
 
 static size_t
-ucs4_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
+nfg_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
 {
-    ASSERT_ARGS(ucs4_hash)
+    ASSERT_ARGS(nfg_hash)
     const UChar32 *pos = (const UChar32*) s->strstart;
     UINTVAL len = s->strlen;
     UNUSED(interp);
@@ -497,7 +360,7 @@
 
 /*
 
-=item C<static void ucs4_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>
+=item C<static void nfg_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>
 
 Moves the string iterator C<i> to the position C<n> in the string.
 
@@ -506,9 +369,9 @@
 */
 
 static void
-ucs4_set_position(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL n)
+nfg_set_position(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL n)
 {
-    ASSERT_ARGS(ucs4_set_position)
+    ASSERT_ARGS(nfg_set_position)
 #if PARROT_HAS_ICU
     i->charpos = n;
     i->bytepos = n * sizeof (UChar32);
@@ -540,9 +403,9 @@
     iter->str             = src;
     iter->bytepos         = 0;
     iter->charpos         = 0;
-    iter->get_and_advance = ucs4_decode_and_advance;
-    iter->set_and_advance = ucs4_encode_and_advance;
-    iter->set_position    = ucs4_set_position;
+    iter->get_and_advance = nfg_decode_and_advance;
+    iter->set_and_advance = nfg_encode_and_advance;
+    iter->set_position    = nfg_set_position;
 #else
     UNUSED(src);
     UNUSED(iter);
@@ -552,22 +415,22 @@
 
 /*
 
-=item C<void Parrot_encoding_ucs4_init(PARROT_INTERP)>
+=item C<void Parrot_encoding_nfg_init(PARROT_INTERP)>
 
-Initializes the UCS-4 encoding.
+Initializes the NFG encoding.
 
 =cut
 
 */
 
 void
-Parrot_encoding_ucs4_init(PARROT_INTERP)
+Parrot_encoding_nfg_init(PARROT_INTERP)
 {
-    ASSERT_ARGS(Parrot_encoding_ucs4_init)
+    ASSERT_ARGS(Parrot_encoding_nfg_init)
     ENCODING * const return_encoding = Parrot_new_encoding(interp);
 
     static const ENCODING base_encoding = {
-        "ucs4",
+        "nfg",
         4, /* Max bytes per codepoint */
         to_encoding,
         get_codepoint,
@@ -580,13 +443,13 @@
         iter_init,
         find_cclass,
 #if PARROT_HAS_ICU
-        ucs4_hash
+        nfg_hash
 #else
         NULL
 #endif
     };
     STRUCT_COPY_FROM_STRUCT(return_encoding, base_encoding);
-    Parrot_register_encoding(interp, "ucs4", return_encoding);
+    Parrot_register_encoding(interp, "nfg", return_encoding);
 
     return;
 }
@@ -601,6 +464,7 @@
 F<src/string/encoding/utf8.c>,
 F<src/string/encoding/utf16.c>,
 F<src/string/encoding/ucs2.c>,
+F<src/string/encoding/ucs4.c>,
 F<src/string.c>,
 F<include/parrot/string.h>,
 F<docs/string.pod>.

Copied and modified: branches/gsoc_nfg/src/string/encoding/nfg.h (from r47092, branches/gsoc_nfg/src/string/encoding/ucs4.h)
==============================================================================
--- branches/gsoc_nfg/src/string/encoding/ucs4.h	Fri May 28 15:25:07 2010	(r47092, copy source)
+++ branches/gsoc_nfg/src/string/encoding/nfg.h	Fri May 28 15:37:45 2010	(r47094)
@@ -3,21 +3,13 @@
 $Id$
 */
 
-#ifndef PARROT_ENCODING_UCS4_H_GUARD
-#define PARROT_ENCODING_UCS4_H_GUARD
+#ifndef PARROT_ENCODING_NFG_H_GUARD
+#define PARROT_ENCODING_NFG_H_GUARD
 
-/* HEADERIZER BEGIN: src/string/encoding/ucs4.c */
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
+/* HEADERIZER BEGIN: src/string/encoding/nfg.c */
+/* HEADERIZER END: src/string/encoding/nfg.c */
 
-void Parrot_encoding_ucs4_init(PARROT_INTERP)
-        __attribute__nonnull__(1);
-
-#define ASSERT_ARGS_Parrot_encoding_ucs4_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
-       PARROT_ASSERT_ARG(interp))
-/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
-/* HEADERIZER END: src/string/encoding/ucs4.c */
-
-#endif /* PARROT_ENCODING_UCS4_H_GUARD */
+#endif /* PARROT_ENCODING_NFG_H_GUARD */
 
 /*
  * Local variables:


More information about the parrot-commits mailing list