[svn:parrot] r46480 - in trunk: . include/parrot src/pmc src/string
NotFound at svn.parrot.org
NotFound at svn.parrot.org
Mon May 10 22:32:47 UTC 2010
Author: NotFound
Date: Mon May 10 22:32:45 2010
New Revision: 46480
URL: https://trac.parrot.org/parrot/changeset/46480
Log:
experimental function Parrot_str_unescpae_string and String.unescape method, TT #1628
Modified:
trunk/DEPRECATED.pod
trunk/include/parrot/string_funcs.h
trunk/src/pmc/string.pmc
trunk/src/string/api.c
Modified: trunk/DEPRECATED.pod
==============================================================================
--- trunk/DEPRECATED.pod Mon May 10 22:28:56 2010 (r46479)
+++ trunk/DEPRECATED.pod Mon May 10 22:32:45 2010 (r46480)
@@ -124,6 +124,12 @@
L<http://trac.parrot.org/parrot/ticket/1606>
+=item Method unescape on String [experimental]
+
+This is a helper method for testing of Parrot_str_unescape_string.
+
+L<https://trac.parrot.org/parrot/ticket/1628>
+
=back
=head1 Opcodes
@@ -330,6 +336,13 @@
L<https://trac.parrot.org/parrot/ticket/1588>
+=item Parrot_str_unescape_string [experimental]
+
+This function is an experimental addition to enhance and maybe replace
+Parrot_str_unescape
+
+L<https://trac.parrot.org/parrot/ticket/1628>
+
=back
=head1 Compiler tools
Modified: trunk/include/parrot/string_funcs.h
==============================================================================
--- trunk/include/parrot/string_funcs.h Mon May 10 22:28:56 2010 (r46479)
+++ trunk/include/parrot/string_funcs.h Mon May 10 22:32:45 2010 (r46480)
@@ -373,6 +373,17 @@
__attribute__nonnull__(2);
PARROT_EXPORT
+PARROT_CANNOT_RETURN_NULL
+STRING * Parrot_str_unescape_string(PARROT_INTERP,
+ ARGIN(const STRING *src),
+ ARGIN(const CHARSET *charset),
+ ARGIN(const ENCODING *encoding))
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3)
+ __attribute__nonnull__(4);
+
+PARROT_EXPORT
void Parrot_str_unpin(PARROT_INTERP, ARGMOD(STRING *s))
__attribute__nonnull__(1)
__attribute__nonnull__(2)
@@ -593,6 +604,11 @@
#define ASSERT_ARGS_Parrot_str_unescape __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(cstring))
+#define ASSERT_ARGS_Parrot_str_unescape_string __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp) \
+ , PARROT_ASSERT_ARG(src) \
+ , PARROT_ASSERT_ARG(charset) \
+ , PARROT_ASSERT_ARG(encoding))
#define ASSERT_ARGS_Parrot_str_unpin __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(interp) \
, PARROT_ASSERT_ARG(s))
Modified: trunk/src/pmc/string.pmc
==============================================================================
--- trunk/src/pmc/string.pmc Mon May 10 22:28:56 2010 (r46479)
+++ trunk/src/pmc/string.pmc Mon May 10 22:32:45 2010 (r46480)
@@ -801,6 +801,29 @@
res = CHARSET_RINDEX(INTERP, src, substring, (UINTVAL)start);
RETURN(INTVAL res);
}
+
+/*
+
+=item C<STRING *unescape()>
+
+EXPERIMENTAL, for testing only. See TT #1628
+
+=cut
+
+*/
+
+ METHOD unescape(STRING *charsetname, STRING *encodingname)
+ {
+ const CHARSET *charset = Parrot_get_charset(INTERP,
+ Parrot_charset_number(INTERP, charsetname));
+ const ENCODING *encoding = Parrot_get_encoding(INTERP,
+ Parrot_encoding_number(INTERP, encodingname));
+ STRING * const src = VTABLE_get_string(INTERP, SELF);
+ STRING * const dest = Parrot_str_unescape_string(INTERP, src,
+ charset, encoding);
+ RETURN(STRING *dest);
+ }
+
}
/*
Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c Mon May 10 22:28:56 2010 (r46479)
+++ trunk/src/string/api.c Mon May 10 22:32:45 2010 (r46480)
@@ -55,10 +55,17 @@
__attribute__nonnull__(4)
FUNC_MODIFIES(*e);
+PARROT_DOES_NOT_RETURN
+PARROT_COLD
+static void throw_illegal_escape(PARROT_INTERP)
+ __attribute__nonnull__(1);
+
#define ASSERT_ARGS_string_rep_compatible __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
PARROT_ASSERT_ARG(a) \
, PARROT_ASSERT_ARG(b) \
, PARROT_ASSERT_ARG(e))
+#define ASSERT_ARGS_throw_illegal_escape __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
+ PARROT_ASSERT_ARG(interp))
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
/* HEADERIZER END: static */
@@ -2494,6 +2501,174 @@
return result;
}
+/*
+
+=item C<static void throw_illegal_escape(PARROT_INTERP)>
+
+Helper function to avoid repeated throw calls.
+
+=cut
+
+*/
+
+PARROT_DOES_NOT_RETURN
+PARROT_COLD
+static void
+throw_illegal_escape(PARROT_INTERP)
+{
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
+ "Illegal escape sequence");
+}
+
+/*
+
+=item C<STRING * Parrot_str_unescape_string(PARROT_INTERP, const STRING *src,
+const CHARSET *charset, const ENCODING *encoding)>
+
+EXPERIMENTAL, see TT #1628
+
+Unescapes the src string returnning a new string with the charset
+and encoding specified.
+
+
+=cut
+
+*/
+
+PARROT_EXPORT
+PARROT_CANNOT_RETURN_NULL
+STRING *
+Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
+ ARGIN(const CHARSET *charset),
+ ARGIN(const ENCODING *encoding))
+{
+ ASSERT_ARGS(Parrot_str_unescape_string)
+
+ UINTVAL srclen = Parrot_str_byte_length(interp, src);
+ STRING *result = Parrot_gc_new_string_header(interp, 0);
+ String_iter itersrc;
+ String_iter iterdest;
+ UINTVAL reserved;
+ int digcount;
+ char digbuf[9];
+ int pending;
+
+ result->charset = charset;
+ result->encoding = encoding;
+ reserved = string_max_bytes(interp, result, srclen);
+ Parrot_gc_allocate_string_storage(interp, result, reserved);
+ result->bufused = reserved;
+
+ src->encoding->iter_init(interp, src, &itersrc);
+ encoding->iter_init(interp, result, &iterdest);
+ while (itersrc.bytepos < srclen) {
+ INTVAL c = itersrc.get_and_advance(interp, &itersrc);
+ INTVAL next;
+
+ do {
+ pending = 0;
+ next = c;
+ if (c == '\\') {
+ c = itersrc.get_and_advance(interp, &itersrc);
+ switch (c) {
+ /* Common one char sequences */
+ case 'a': next = '\a'; break;
+ case 'b': next = '\b'; break;
+ case 't': next = '\t'; break;
+ case 'n': next = '\n'; break;
+ case 'v': next = '\v'; break;
+ case 'f': next = '\f'; break;
+ case 'r': next = '\r'; break;
+ case 'e': next = '\e'; break;
+ /* Escape character */
+ case 'c':
+ c = itersrc.get_and_advance(interp, &itersrc);
+ /* This assumes ascii-alike encoding */
+ if (c < 'A' || c > 'Z')
+ throw_illegal_escape(interp);
+ next = c - 'A' + 1;
+ break;
+ case 'x':
+ digcount = 0;
+ c = itersrc.get_and_advance(interp, &itersrc);
+ if (c == '{') {
+ /* \x{h..h} 1..8 hex digits */
+ while (itersrc.bytepos < srclen) {
+ c = itersrc.get_and_advance(interp, &itersrc);
+ if (c == '}')
+ break;
+ if (!isxdigit(c))
+ throw_illegal_escape(interp);
+ if (digcount == 8)
+ break;
+ digbuf[digcount++] = c;
+ }
+ if (c != '}')
+ throw_illegal_escape(interp);
+ }
+ else {
+ /* \xhh 1..2 hex digits */
+ for (digcount = 0; digcount < 2; ++digcount) {
+ if (!isxdigit(c))
+ break;
+ digbuf[digcount] = c;
+ c = itersrc.get_and_advance(interp, &itersrc);
+ }
+ pending = 1;
+ }
+ if (digcount == 0)
+ throw_illegal_escape(interp);
+ digbuf[digcount] = '\0';
+ next = strtol(digbuf, NULL, 16);
+ break;
+ case 'u':
+ /* \uhhhh 4 hex digits */
+ for (digcount = 0; digcount < 4; ++digcount) {
+ c = itersrc.get_and_advance(interp, &itersrc);
+ if (!isxdigit(c))
+ throw_illegal_escape(interp);
+ digbuf[digcount] = c;
+ }
+ digbuf[digcount] = '\0';
+ next = strtol(digbuf, NULL, 16);
+ break;
+ case 'U':
+ /* \Uhhhhhhhh 8 hex digits */
+ for (digcount = 0; digcount < 8; ++digcount) {
+ c = itersrc.get_and_advance(interp, &itersrc);
+ if (!isxdigit(c))
+ throw_illegal_escape(interp);
+ digbuf[digcount] = c;
+ }
+ digbuf[digcount] = '\0';
+ next = strtol(digbuf, NULL, 16);
+ break;
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ /* \ooo 1..3 oct digits */
+ digbuf[0] = c;
+ for (digcount = 1; digcount < 3; ++digcount) {
+ c = itersrc.get_and_advance(interp, &itersrc);
+ if (c < '0' || c > '7')
+ break;
+ digbuf[digcount] = c;
+ }
+ digbuf[digcount] = '\0';
+ next = strtol(digbuf, NULL, 8);
+ if (digcount < 3)
+ pending = 1;
+ break;
+ default:
+ next = c;
+ }
+ }
+ iterdest.set_and_advance(interp, &iterdest, next);
+ } while (pending);
+ }
+ result->bufused = iterdest.bytepos;
+ result->strlen = iterdest.charpos;
+ return result;
+}
/*
More information about the parrot-commits
mailing list