[svn:parrot] r39529 - in trunk: config/gen/makefiles src/string t/op
bacek at svn.parrot.org
bacek at svn.parrot.org
Fri Jun 12 23:23:29 UTC 2009
Author: bacek
Date: Fri Jun 12 23:23:26 2009
New Revision: 39529
URL: https://trac.parrot.org/parrot/changeset/39529
Log:
Merge tt24_unicode_numifications branch into trunk.
Modified:
trunk/config/gen/makefiles/root.in
trunk/src/string/api.c
trunk/t/op/number.t
trunk/t/op/sprintf.t
trunk/t/op/string.t
trunk/t/op/stringu.t
Modified: trunk/config/gen/makefiles/root.in
==============================================================================
--- trunk/config/gen/makefiles/root.in Fri Jun 12 18:06:27 2009 (r39528)
+++ trunk/config/gen/makefiles/root.in Fri Jun 12 23:23:26 2009 (r39529)
@@ -618,6 +618,7 @@
$(SRC_DIR)/scheduler.str \
$(SRC_DIR)/spf_render.str \
$(SRC_DIR)/spf_vtable.str \
+ $(SRC_DIR)/string/api.str \
$(SRC_DIR)/sub.str \
$(SRC_DIR)/stacks.str \
\
Modified: trunk/src/string/api.c
==============================================================================
--- trunk/src/string/api.c Fri Jun 12 18:06:27 2009 (r39528)
+++ trunk/src/string/api.c Fri Jun 12 23:23:26 2009 (r39529)
@@ -27,6 +27,7 @@
#include "parrot/compiler.h"
#include "parrot/string_funcs.h"
#include "private_cstring.h"
+#include "api.str"
#define nonnull_encoding_name(s) (s) ? (s)->encoding->name : "null string"
#define saneify_string(s) \
@@ -2041,6 +2042,21 @@
return output;
}
+/*
+State of FSM during number value parsing.
+
+Integer uses only parse_start, parse_before_dot and parse_end.
+
+*/
+typedef enum number_parse_state {
+ parse_start,
+ parse_before_dot,
+ parse_after_dot,
+ parse_after_e,
+ parse_after_e_sign,
+ parse_end
+} number_parse_state;
+
/*
@@ -2074,45 +2090,60 @@
if (s == NULL)
return 0;
{
- const char *start = s->strstart;
- const char * const end = start + s->bufused;
const INTVAL max_safe = PARROT_INTVAL_MAX / 10;
const INTVAL last_dig = PARROT_INTVAL_MAX % 10;
int sign = 1;
- INTVAL in_number = 0;
INTVAL i = 0;
+ String_iter iter;
+ UINTVAL offs;
+ number_parse_state state = parse_start;
- PARROT_ASSERT(s);
+ ENCODING_ITER_INIT(interp, s, &iter);
- while (start < end) {
- const unsigned char c = *start;
+ for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {
+ const UINTVAL c = iter.get_and_advance(interp, &iter);
+
+ switch (state) {
+ case parse_start:
+ if (isdigit(c)) {
+ const INTVAL nextval = c - '0';
+ if (i < max_safe || (i == max_safe && nextval <= last_dig))
+ i = i * 10 + nextval;
+ else
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ERR_OVERFLOW,
+ "Integer value of String '%S' too big", s);
+ state = parse_before_dot;
+ }
+ else if (c == '-') {
+ sign = -1;
+ state = parse_before_dot;
+ }
+ else if (c == '+')
+ state = parse_before_dot;
+ else if (isspace((unsigned char)c))
+ ; /* Do nothing */
+ else
+ state = parse_end;
- if (isdigit((unsigned char)c)) {
- const INTVAL nextval = c - '0';
- in_number = 1;
- if (i < max_safe || (i == max_safe && nextval <= last_dig))
- i = i * 10 + nextval;
- else
- Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ERR_OVERFLOW,
- "Integer value of String '%S' too big", s);
- }
- else if (!in_number) {
- /* we've not yet seen any digits */
- if (c == '-') {
- sign = -1;
- in_number = 1;
- }
- else if (c == '+')
- in_number = 1;
- else if (isspace((unsigned char)c))
- ;
- else
+ break;
+
+ case parse_before_dot:
+ if (isdigit(c)) {
+ const INTVAL nextval = c - '0';
+ if (i < max_safe || (i == max_safe && nextval <= last_dig))
+ i = i * 10 + nextval;
+ else
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ERR_OVERFLOW,
+ "Integer value of String '%S' too big", s);
+ }
+ else
+ state = parse_end;
+ break;
+
+ default:
+ /* Pacify compiler */
break;
}
- else {
- break;
- }
- ++start;
}
i *= sign;
@@ -2121,7 +2152,6 @@
}
}
-
/*
=item C<FLOATVAL Parrot_str_to_num(PARROT_INTERP, const STRING *s)>
@@ -2138,39 +2168,158 @@
Parrot_str_to_num(PARROT_INTERP, ARGIN(const STRING *s))
{
ASSERT_ARGS(Parrot_str_to_num)
- FLOATVAL f;
- char *cstr;
- const char *p;
+ FLOATVAL f = 0.0;
+ FLOATVAL mantissa = 0.0;
+ FLOATVAL sign = 1.0; /* -1 for '-' */
+ FLOATVAL divider = 0.1;
+ INTVAL e = 0;
+ INTVAL e_sign = 1; /* -1 for '-' */
+ /* How many digits it's safe to parse */
+ const INTVAL max_safe = PARROT_INTVAL_MAX / 10;
+ INTVAL m = 0; /* Integer mantissa */
+ int m_is_safe = 1; /* We can use integer mantissa */
+ INTVAL d = 0; /* Integer descriminator */
+ int d_is_safe = 1; /* We can use integer mantissa */
+ int d_length = 0;
+ int check_nan = 0; /* Check for NaN and Inf after main loop */
+ String_iter iter;
+ UINTVAL offs;
+ number_parse_state state = parse_start;
- /*
- * XXX C99 atof interprets 0x prefix
- * XXX would strtod() be better for detecting malformed input?
- */
- cstr = Parrot_str_to_cstring(interp, s);
- p = cstr;
+ if (!s)
+ return 0.0;
- while (isspace((unsigned char)*p))
- p++;
+ ENCODING_ITER_INIT(interp, s, &iter);
- if (STREQ(p, PARROT_CSTRING_INF_POSITIVE))
- f = PARROT_FLOATVAL_INF_POSITIVE;
- else if (STREQ(p, PARROT_CSTRING_INF_NEGATIVE))
- f = PARROT_FLOATVAL_INF_NEGATIVE;
- else if (STREQ(p, PARROT_CSTRING_NAN_QUIET))
- f = PARROT_FLOATVAL_NAN_QUIET;
- else
- f = atof(p);
+ /* Handcrafter FSM to read float value */
+ for (offs = 0; (state != parse_end) && (offs < s->strlen); ++offs) {
+ const UINTVAL c = iter.get_and_advance(interp, &iter);
+ switch (state) {
+ case parse_start:
+ if (isdigit(c)) {
+ f = c - '0';
+ m = c - '0';
+ state = parse_before_dot;
+ }
+ else if (c == '-') {
+ sign = -1.0;
+ state = parse_before_dot;
+ }
+ else if (c == '+')
+ state = parse_before_dot;
+ else if (c == '.')
+ state = parse_after_dot;
+ else if (isspace(c))
+ ; /* Do nothing */
+ else {
+ check_nan = 1;
+ state = parse_end;
+ }
+ break;
- /* Not all atof()s return -0 from "-0" */
- if (*p == '-' && FLOAT_IS_ZERO(f))
-#if defined(_MSC_VER)
- /* Visual C++ compiles -0.0 to 0.0, so we need to trick
- the compiler. */
- f = 0.0 * -1;
-#else
- f = -0.0;
-#endif
- Parrot_str_free_cstring(cstr);
+ case parse_before_dot:
+ if (isdigit(c)) {
+ f = f*10.0 + (c-'0');
+ m = m*10 + (c-'0');
+ /* Integer overflow for mantissa */
+ if (m >= max_safe)
+ m_is_safe = 0;
+ }
+ else if (c == '.') {
+ state = parse_after_dot;
+ /*
+ * Throw gathered result. Recalulate from integer mantissa
+ * to preserve precision.
+ */
+ if (m_is_safe)
+ f = m;
+ mantissa = f;
+ }
+ else if (c == 'e' || c == 'E') {
+ state = parse_after_e;
+ /* See comment above */
+ if (m_is_safe)
+ f = m;
+ mantissa = f;
+ }
+ else {
+ check_nan = 1;
+ state = parse_end;
+ }
+ break;
+
+ case parse_after_dot:
+ if (isdigit(c)) {
+ f += (c-'0') * divider;
+ divider /= 10.0;
+ d = d*10 + (c-'0');
+ if (d >= max_safe)
+ d_is_safe = 0;
+ d_length++;
+ }
+ else if (c == 'e' || c == 'E')
+ state = parse_after_e;
+ else
+ state = parse_end;
+ break;
+
+ case parse_after_e:
+ if (isdigit(c)) {
+ e = e*10 + (c-'0');
+ state = parse_after_e_sign;
+ }
+ else if (c == '-') {
+ e_sign = -1;
+ state = parse_after_e_sign;
+ }
+ else if (c == '+')
+ state = parse_after_e_sign;
+ else
+ state = parse_end;
+ break;
+
+ case parse_after_e_sign:
+ if (isdigit(c))
+ e = e*10 + (c-'0');
+ else
+ state = parse_end;
+ break;
+
+ case parse_end:
+ default:
+ /* Pacify compiler */
+ break;
+ }
+ }
+
+ /* Support for non-canonical NaN and Inf */
+ /* charpos <=2 because for "-i" iter will be advanced to next char already */
+ if (check_nan && (iter.charpos <= 2)) {
+ STRING *t = Parrot_str_upcase(interp, s);
+ if (Parrot_str_equal(interp, t, CONST_STRING(interp, "NAN")))
+ return PARROT_FLOATVAL_NAN_QUIET;
+ else if (Parrot_str_equal(interp, t, CONST_STRING(interp, "INF"))
+ || Parrot_str_equal(interp, t, CONST_STRING(interp, "INFINITY")))
+ return PARROT_FLOATVAL_INF_POSITIVE;
+ else if (Parrot_str_equal(interp, t, CONST_STRING(interp, "-INF"))
+ || Parrot_str_equal(interp, t, CONST_STRING(interp, "-INFINITY")))
+ return PARROT_FLOATVAL_INF_NEGATIVE;
+ else
+ return 0.0;
+ }
+
+ if (d && d_is_safe) {
+ f = mantissa + (1.0 * d / powl(10, d_length));
+ }
+
+ f = f * sign;
+
+ if (e) {
+ if (e_sign == 1)
+ f *= powl(10, e);
+ else
+ f /= powl(10, e);
+ }
return f;
}
Modified: trunk/t/op/number.t
==============================================================================
--- trunk/t/op/number.t Fri Jun 12 18:06:27 2009 (r39528)
+++ trunk/t/op/number.t Fri Jun 12 23:23:26 2009 (r39529)
@@ -1081,14 +1081,7 @@
0.5
OUTPUT
-# long double succeeds
-$output = $PConfig{numvalsize} == 8
- ? '1.4142135623731
-1.41421356237309
-' : '1.4142135623731
-1.4142135623731
-';
-pasm_output_is( <<'CODE', $output, "sqrt_n_n" );
+pasm_output_is( <<'CODE', <<OUTPUT, "sqrt_n_n" );
set N1, 2
sqrt N2, N1
say N2
@@ -1096,6 +1089,9 @@
say N2
end
CODE
+1.4142135623731
+1.4142135623731
+OUTPUT
pasm_error_output_like( <<'CODE', <<OUTPUT, "div_n_n by zero" );
set N0, 0
Modified: trunk/t/op/sprintf.t
==============================================================================
--- trunk/t/op/sprintf.t Fri Jun 12 18:06:27 2009 (r39528)
+++ trunk/t/op/sprintf.t Fri Jun 12 23:23:26 2009 (r39529)
@@ -198,6 +198,9 @@
description .= ' actual: >'
description .= actual
description .= '<'
+ description .= ' expected: >'
+ description .= expected
+ description .= '<'
goto is_nok
# remove /'s
Modified: trunk/t/op/string.t
==============================================================================
--- trunk/t/op/string.t Fri Jun 12 18:06:27 2009 (r39528)
+++ trunk/t/op/string.t Fri Jun 12 23:23:26 2009 (r39529)
@@ -7,7 +7,7 @@
use lib qw( . lib ../lib ../../lib );
use Test::More;
-use Parrot::Test tests => 163;
+use Parrot::Test tests => 165;
use Parrot::Config;
=head1 NAME
@@ -2926,6 +2926,38 @@
Foo/Bar
OUT
+pir_output_is( <<'CODE', <<'OUT', 'Corner cases of numification' );
+.sub main :main
+ say 2147483647.0
+ say -2147483648.0
+.end
+CODE
+2147483647
+-2147483648
+OUT
+pir_output_is( <<'CODE', <<'OUT', 'Non canonical nan and inf' );
+.sub main :main
+ $N0 = 'nan'
+ say $N0
+ $N0 = 'iNf'
+ say $N0
+ $N0 = 'INFINITY'
+ say $N0
+ $N0 = '-INF'
+ say $N0
+ $N0 = '-Infinity'
+ say $N0
+.end
+CODE
+NaN
+Inf
+Inf
+-Inf
+-Inf
+OUT
+
+
+
# Local Variables:
# mode: cperl
# cperl-indent-level: 4
Modified: trunk/t/op/stringu.t
==============================================================================
--- trunk/t/op/stringu.t Fri Jun 12 18:06:27 2009 (r39528)
+++ trunk/t/op/stringu.t Fri Jun 12 23:23:26 2009 (r39529)
@@ -6,7 +6,7 @@
use warnings;
use lib qw( . lib ../lib ../../lib );
use Test::More;
-use Parrot::Test tests => 28;
+use Parrot::Test tests => 31;
use Parrot::Config;
=head1 NAME
@@ -502,6 +502,51 @@
OUTPUT
+pir_output_is( <<'CODE', <<'OUT', 'numification of unicode strings' );
+.sub main :main
+ $S0 = "140"
+ $I0 = $S0
+ say $I0
+ $I0 = find_encoding 'ucs2'
+ $S0 = trans_encoding $S0, $I0
+ $I0 = $S0
+ say $I0
+.end
+CODE
+140
+140
+OUT
+
+pir_output_is( <<'CODE', <<'OUT', 'numification of unicode strings' );
+.sub main :main
+ $S0 = "140"
+ $N0 = $S0
+ say $N0
+ $I0 = find_encoding 'ucs2'
+ $S0 = trans_encoding $S0, $I0
+ $N0 = $S0
+ say $N0
+.end
+CODE
+140
+140
+OUT
+
+pir_output_is( <<'CODE', <<'OUT', 'numification of unicode strings' );
+.sub main :main
+ $S0 = unicode:"140 r\x{e9}sum\x{e9}s"
+ $N0 = $S0
+ say $N0
+ $I0 = find_encoding 'ucs2'
+ $S0 = trans_encoding $S0, $I0
+ $N0 = $S0
+ say $N0
+.end
+CODE
+140
+140
+OUT
+
# Local Variables:
More information about the parrot-commits
mailing list