[svn:parrot] r39296 - branches/tt24_unicode_numifications/src/string

bacek at svn.parrot.org bacek at svn.parrot.org
Sun May 31 22:04:56 UTC 2009


Author: bacek
Date: Sun May 31 22:04:56 2009
New Revision: 39296
URL: https://trac.parrot.org/parrot/changeset/39296

Log:
[core] Refactor Parrot_str_to_int to be closer to Parrot_str_to_num.

Now it handles unicode strings properly.

Modified:
   branches/tt24_unicode_numifications/src/string/api.c

Modified: branches/tt24_unicode_numifications/src/string/api.c
==============================================================================
--- branches/tt24_unicode_numifications/src/string/api.c	Sun May 31 22:04:33 2009	(r39295)
+++ branches/tt24_unicode_numifications/src/string/api.c	Sun May 31 22:04:56 2009	(r39296)
@@ -2043,6 +2043,21 @@
     return output;
 }
 
+/*
+State of FSM during number value parsing.
+
+Integer uses only parse_start, parse_before_dot and parse_end.
+
+*/
+typedef enum number_parse_state {
+    parse_start,
+    parse_before_dot,
+    parse_after_dot,
+    parse_after_e,
+    parse_after_e_sign,
+    parse_end
+} number_parse_state;
+
 
 /*
 
@@ -2076,45 +2091,60 @@
     if (s == NULL)
         return 0;
     {
-        const char         *start     = s->strstart;
-        const char * const  end       = start + s->bufused;
         const INTVAL        max_safe  = PARROT_INTVAL_MAX / 10;
         const INTVAL        last_dig  = PARROT_INTVAL_MAX % 10;
         int                 sign      = 1;
-        INTVAL              in_number = 0;
         INTVAL              i         = 0;
+        String_iter         iter;
+        UINTVAL             offs;
+        number_parse_state  state = parse_start;
+
+        ENCODING_ITER_INIT(interp, s, &iter);
+
+        for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {
+            const UINTVAL c = iter.get_and_advance(interp, &iter);
+
+            switch (state) {
+                case parse_start:
+                    if (isdigit(c)) {
+                        const INTVAL nextval = c - '0';
+                        if (i < max_safe || (i == max_safe && nextval <= last_dig))
+                            i = i * 10 + nextval;
+                        else
+                            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ERR_OVERFLOW,
+                                "Integer value of String '%S' too big", s);
+                        state = parse_before_dot;
+                    }
+                    else if (c == '-') {
+                        sign      = -1;
+                        state = parse_before_dot;
+                    }
+                    else if (c == '+')
+                        state = parse_before_dot;
+                    else if (isspace((unsigned char)c))
+                        ; /* Do nothing */
+                    else
+                        state = parse_end;
 
-        PARROT_ASSERT(s);
+                    break;
 
-        while (start < end) {
-            const unsigned char c = *start;
+                case parse_before_dot:
+                    if (isdigit(c)) {
+                        const INTVAL nextval = c - '0';
+                        if (i < max_safe || (i == max_safe && nextval <= last_dig))
+                            i = i * 10 + nextval;
+                        else
+                            Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ERR_OVERFLOW,
+                                "Integer value of String '%S' too big", s);
+                    }
+                    else
+                        state = parse_end;
+                    break;
 
-            if (isdigit((unsigned char)c)) {
-                const INTVAL nextval = c - '0';
-                in_number = 1;
-                if (i < max_safe || (i == max_safe && nextval <= last_dig))
-                    i = i * 10 + nextval;
-                else
-                    Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ERR_OVERFLOW,
-                        "Integer value of String '%S' too big", s);
-            }
-            else if (!in_number) {
-                /* we've not yet seen any digits */
-                if (c == '-') {
-                    sign      = -1;
-                    in_number = 1;
-                }
-                else if (c == '+')
-                    in_number = 1;
-                else if (isspace((unsigned char)c))
-                    ;
-                else
+                default:
+                    /* Pacify compiler */
                     break;
             }
-            else {
-                break;
-            }
-            ++start;
         }
 
         i *= sign;
@@ -2124,18 +2154,6 @@
 }
 
 /*
- State of FSM during float value parsing
- */
-typedef enum float_parse_state {
-    parse_start,
-    parse_before_dot,
-    parse_after_dot,
-    parse_after_e,
-    parse_after_e_sign,
-    parse_end
-} float_parse_state;
-
-/*
 
 =item C<FLOATVAL Parrot_str_to_num(PARROT_INTERP, const STRING *s)>
 
@@ -2158,7 +2176,7 @@
     INTVAL      e_sign = 1; /* -1 for '-' */
     String_iter iter;
     UINTVAL     offs;
-    float_parse_state state = parse_start;
+    number_parse_state state = parse_start;
 
     if (Parrot_str_equal(interp, s, CONST_STRING(interp, "Inf")))
         return PARROT_FLOATVAL_INF_POSITIVE;


More information about the parrot-commits mailing list