[svn:parrot] r46480 - in trunk: . include/parrot src/pmc src/string

NotFound at svn.parrot.org NotFound at svn.parrot.org
Mon May 10 22:32:47 UTC 2010


Author: NotFound
Date: Mon May 10 22:32:45 2010
New Revision: 46480
URL: https://trac.parrot.org/parrot/changeset/46480

Log:
experimental function Parrot_str_unescpae_string and String.unescape method, TT #1628

Modified:
   trunk/DEPRECATED.pod
   trunk/include/parrot/string_funcs.h
   trunk/src/pmc/string.pmc
   trunk/src/string/api.c

Modified: trunk/DEPRECATED.pod
==============================================================================
--- trunk/DEPRECATED.pod	Mon May 10 22:28:56 2010	(r46479)
+++ trunk/DEPRECATED.pod	Mon May 10 22:32:45 2010	(r46480)
@@ -124,6 +124,12 @@
 
 L<http://trac.parrot.org/parrot/ticket/1606>
 
+=item Method unescape on String [experimental]
+
+This is a helper method for testing of Parrot_str_unescape_string.
+
+L<https://trac.parrot.org/parrot/ticket/1628>
+
 =back
 
 =head1 Opcodes
@@ -330,6 +336,13 @@
 
 L<https://trac.parrot.org/parrot/ticket/1588>
 
+=item Parrot_str_unescape_string [experimental]
+
+This function is an experimental addition to enhance and maybe replace
+Parrot_str_unescape
+
+L<https://trac.parrot.org/parrot/ticket/1628>
+
 =back
 
 =head1 Compiler tools

Modified: trunk/include/parrot/string_funcs.h
==============================================================================
--- trunk/include/parrot/string_funcs.h	Mon May 10 22:28:56 2010	(r46479)
+++ trunk/include/parrot/string_funcs.h	Mon May 10 22:32:45 2010	(r46480)
@@ -373,6 +373,17 @@
         __attribute__nonnull__(2);
 
 PARROT_EXPORT
+PARROT_CANNOT_RETURN_NULL
+STRING * Parrot_str_unescape_string(PARROT_INTERP,
+    ARGIN(const STRING *src),
+    ARGIN(const CHARSET *charset),
+    ARGIN(const ENCODING *encoding))
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(2)
+        __attribute__nonnull__(3)
+        __attribute__nonnull__(4);
+
+PARROT_EXPORT
 void Parrot_str_unpin(PARROT_INTERP, ARGMOD(STRING *s))
         __attribute__nonnull__(1)
         __attribute__nonnull__(2)
@@ -593,6 +604,11 @@
 #define ASSERT_ARGS_Parrot_str_unescape __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(cstring))
+#define ASSERT_ARGS_Parrot_str_unescape_string __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp) \
+    , PARROT_ASSERT_ARG(src) \
+    , PARROT_ASSERT_ARG(charset) \
+    , PARROT_ASSERT_ARG(encoding))
 #define ASSERT_ARGS_Parrot_str_unpin __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(interp) \
     , PARROT_ASSERT_ARG(s))

Modified: trunk/src/pmc/string.pmc
==============================================================================
--- trunk/src/pmc/string.pmc	Mon May 10 22:28:56 2010	(r46479)
+++ trunk/src/pmc/string.pmc	Mon May 10 22:32:45 2010	(r46480)
@@ -801,6 +801,29 @@
         res = CHARSET_RINDEX(INTERP, src, substring, (UINTVAL)start);
         RETURN(INTVAL res);
     }
+
+/*
+
+=item C<STRING *unescape()>
+
+EXPERIMENTAL, for testing only. See TT #1628
+
+=cut
+
+*/
+
+    METHOD unescape(STRING *charsetname, STRING *encodingname)
+    {
+        const CHARSET *charset = Parrot_get_charset(INTERP,
+            Parrot_charset_number(INTERP, charsetname));
+        const ENCODING *encoding = Parrot_get_encoding(INTERP,
+            Parrot_encoding_number(INTERP, encodingname));
+        STRING * const src = VTABLE_get_string(INTERP, SELF);
+        STRING * const dest = Parrot_str_unescape_string(INTERP, src,
+                charset, encoding);
+        RETURN(STRING *dest);
+    }
+
 }
 
 /*

Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c	Mon May 10 22:28:56 2010	(r46479)
+++ trunk/src/string/api.c	Mon May 10 22:32:45 2010	(r46480)
@@ -55,10 +55,17 @@
         __attribute__nonnull__(4)
         FUNC_MODIFIES(*e);
 
+PARROT_DOES_NOT_RETURN
+PARROT_COLD
+static void throw_illegal_escape(PARROT_INTERP)
+        __attribute__nonnull__(1);
+
 #define ASSERT_ARGS_string_rep_compatible __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
        PARROT_ASSERT_ARG(a) \
     , PARROT_ASSERT_ARG(b) \
     , PARROT_ASSERT_ARG(e))
+#define ASSERT_ARGS_throw_illegal_escape __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+       PARROT_ASSERT_ARG(interp))
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
 /* HEADERIZER END: static */
 
@@ -2494,6 +2501,174 @@
     return result;
 }
 
+/*
+
+=item C<static void throw_illegal_escape(PARROT_INTERP)>
+
+Helper function to avoid repeated throw calls.
+
+=cut
+
+*/
+
+PARROT_DOES_NOT_RETURN
+PARROT_COLD
+static void
+throw_illegal_escape(PARROT_INTERP)
+{
+    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
+            "Illegal escape sequence");
+}
+
+/*
+
+=item C<STRING * Parrot_str_unescape_string(PARROT_INTERP, const STRING *src,
+const CHARSET *charset, const ENCODING *encoding)>
+
+EXPERIMENTAL, see TT #1628
+
+Unescapes the src string returnning a new string with the charset
+and encoding specified.
+
+
+=cut
+
+*/
+
+PARROT_EXPORT
+PARROT_CANNOT_RETURN_NULL
+STRING *
+Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
+        ARGIN(const CHARSET *charset),
+        ARGIN(const ENCODING *encoding))
+{
+    ASSERT_ARGS(Parrot_str_unescape_string)
+
+    UINTVAL srclen = Parrot_str_byte_length(interp, src);
+    STRING *result = Parrot_gc_new_string_header(interp, 0);
+    String_iter itersrc;
+    String_iter iterdest;
+    UINTVAL reserved;
+    int digcount;
+    char digbuf[9];
+    int pending;
+
+    result->charset = charset;
+    result->encoding = encoding;
+    reserved = string_max_bytes(interp, result, srclen);
+    Parrot_gc_allocate_string_storage(interp, result, reserved);
+    result->bufused = reserved;
+
+    src->encoding->iter_init(interp, src, &itersrc);
+    encoding->iter_init(interp, result, &iterdest);
+    while (itersrc.bytepos < srclen) {
+        INTVAL c = itersrc.get_and_advance(interp, &itersrc);
+        INTVAL next;
+
+        do {
+            pending = 0;
+            next = c;
+            if (c == '\\') {
+                c = itersrc.get_and_advance(interp, &itersrc);
+                switch (c) {
+                /* Common one char sequences */
+                case 'a': next = '\a'; break;
+                case 'b': next = '\b'; break;
+                case 't': next = '\t'; break;
+                case 'n': next = '\n'; break;
+                case 'v': next = '\v'; break;
+                case 'f': next = '\f'; break;
+                case 'r': next = '\r'; break;
+                case 'e': next = '\e'; break;
+                /* Escape character */
+                case 'c':
+                    c = itersrc.get_and_advance(interp, &itersrc);
+                    /* This assumes ascii-alike encoding */
+                    if (c < 'A' || c > 'Z')
+                        throw_illegal_escape(interp);
+                    next = c - 'A' + 1;
+                    break;
+                case 'x':
+                    digcount = 0;
+                    c = itersrc.get_and_advance(interp, &itersrc);
+                    if (c == '{') {
+                        /* \x{h..h} 1..8 hex digits */
+                        while (itersrc.bytepos < srclen) {
+                            c = itersrc.get_and_advance(interp, &itersrc);
+                            if (c == '}')
+                                break;
+                            if (!isxdigit(c))
+                                throw_illegal_escape(interp);
+                            if (digcount == 8)
+                                break;
+                            digbuf[digcount++] = c;
+                        }
+                        if (c != '}')
+                            throw_illegal_escape(interp);
+                    }
+                    else {
+                        /* \xhh 1..2 hex digits */
+                        for (digcount = 0; digcount < 2; ++digcount) {
+                            if (!isxdigit(c))
+                                break;
+                            digbuf[digcount] = c;
+                            c = itersrc.get_and_advance(interp, &itersrc);
+                        }
+                        pending = 1;
+                    }
+                    if (digcount == 0)
+                        throw_illegal_escape(interp);
+                    digbuf[digcount] = '\0';
+                    next = strtol(digbuf, NULL, 16);
+                    break;
+                case 'u':
+                    /* \uhhhh 4 hex digits */
+                    for (digcount = 0; digcount < 4; ++digcount) {
+                        c = itersrc.get_and_advance(interp, &itersrc);
+                        if (!isxdigit(c))
+                            throw_illegal_escape(interp);
+                        digbuf[digcount] = c;
+                    }
+                    digbuf[digcount] = '\0';
+                    next = strtol(digbuf, NULL, 16);
+                    break;
+                case 'U':
+                    /* \Uhhhhhhhh 8 hex digits */
+                    for (digcount = 0; digcount < 8; ++digcount) {
+                        c = itersrc.get_and_advance(interp, &itersrc);
+                        if (!isxdigit(c))
+                            throw_illegal_escape(interp);
+                        digbuf[digcount] = c;
+                    }
+                    digbuf[digcount] = '\0';
+                    next = strtol(digbuf, NULL, 16);
+                    break;
+                case '0': case '1': case '2': case '3':
+                case '4': case '5': case '6': case '7':
+                    /* \ooo 1..3 oct digits */
+                    digbuf[0] = c;
+                    for (digcount = 1; digcount < 3; ++digcount) {
+                        c = itersrc.get_and_advance(interp, &itersrc);
+                        if (c < '0' || c > '7')
+                            break;
+                        digbuf[digcount] = c;
+                    }
+                    digbuf[digcount] = '\0';
+                    next = strtol(digbuf, NULL, 8);
+                    if (digcount < 3)
+                        pending = 1;
+                    break;
+                default:
+                    next = c;
+                }
+            }
+            iterdest.set_and_advance(interp, &iterdest, next);
+        } while (pending);
+    }
+    result->bufused = iterdest.bytepos;
+    result->strlen = iterdest.charpos;
+    return result;
+}
 
 /*
 


More information about the parrot-commits mailing list