[svn:parrot] r37857 - branches/p6strings/compilers/pge/PGE

pmichaud at svn.parrot.org pmichaud at svn.parrot.org
Wed Apr 1 21:06:16 UTC 2009


Author: pmichaud
Date: Wed Apr  1 21:06:15 2009
New Revision: 37857
URL: https://trac.parrot.org/parrot/changeset/37857

Log:
[pge]:  Refactor handling of perl 6 character escapes
* easier to re-use in other perl6-like languages
* handles \c[YOUR CHARACTER NAME HERE]
* handles \cC

Modified:
   branches/p6strings/compilers/pge/PGE/Perl6Regex.pir

Modified: branches/p6strings/compilers/pge/PGE/Perl6Regex.pir
==============================================================================
--- branches/p6strings/compilers/pge/PGE/Perl6Regex.pir	Wed Apr  1 20:38:34 2009	(r37856)
+++ branches/p6strings/compilers/pge/PGE/Perl6Regex.pir	Wed Apr  1 21:06:15 2009	(r37857)
@@ -145,6 +145,121 @@
     .return (match)
 .end
 
+=item C<p6escapes>
+
+Parse and calculate various Perl 6 string escapes, such as \n, \r,
+\x, \o, and \c.  For the latter escapes, also handle the bracketed
+forms and other special forms.
+
+Note that this function is used directly by PCT::Grammar and Rakudo,
+and someday may be refactored to a different location.
+
+=cut
+
+.sub 'p6escapes'
+    .param pmc mob
+    .param pmc adverbs         :slurpy :named
+    .local string target, backchar, literal
+    .local int pos, lastpos
+    $P0 = get_hll_global ['PGE'], '$!MATCH'
+    (mob, pos, target) = $P0.'new'(mob, adverbs :flat :named)
+    lastpos = length target
+    if pos >= lastpos goto fail
+    $S0 = substr target, pos, 1
+    inc pos
+    if $S0 != "\\" goto fail
+    if pos >= lastpos goto fail
+    backchar = substr target, pos, 1
+    inc pos
+    backchar = downcase backchar
+    $I0 = index "\\0abefnrtxco", backchar
+    if $I0 < 0 goto fail
+    if $I0 >= 9 goto scan_xco
+    literal = substr "\\\0\a\b\e\f\n\r\t", $I0, 1
+    goto succeed
+  scan_xco:
+    ##  Handle \x, \c, and \o escapes.  Start by converting
+    ##  backchar into the appropriate radix, then loop through
+    ##  the characters that follow to compute the decimal value
+    ##  of codepoints, and concatenate the codepoints into a
+    ##  literal.
+    .local int base, decnum, isbracketed
+    base = index '        o c     x', backchar
+    literal = ''
+    $S0 = substr target, pos, 1
+    isbracketed = iseq $S0, '['
+    pos += isbracketed
+    ##  Handle the case of \cC (control escape).
+    if base != 10 goto scan_xco_char
+    if isbracketed goto scan_xco_char
+    $I0 = is_cclass .CCLASS_NUMERIC, $S0, 0
+    if $I0 goto scan_xco_char
+    ##  xor the 64 bit
+    $I0 = ord $S0
+    bxor $I0, 64
+    literal = chr $I0
+    inc pos
+    goto succeed
+  scan_xco_char:
+    decnum = 0
+    if base != 10 goto scan_xco_char_digits
+    unless isbracketed goto scan_xco_char_digits
+    $I0 = is_cclass .CCLASS_NUMERIC, $S0, pos
+    if $I0 goto scan_xco_char_digits
+    ##  look up character by name
+    .local int namepos
+    namepos = index target, ']', pos
+    if namepos < 0 goto err_missing_bracket
+    $I0 = index target, ',', pos
+    if $I0 < 0 goto have_namepos
+    if namepos < $I0 goto have_namepos
+    namepos = $I0
+  have_namepos:
+    $I0 = namepos - pos
+    $S0 = substr target, pos, $I0
+    say $S0
+    $P0 = new 'CodeString'
+    decnum = $P0.'charname_to_ord'($S0)
+    if decnum < 0 goto err_unicode_name
+    pos = namepos
+    goto scan_xco_char_end
+  scan_xco_char_digits:
+    $S0 = substr target, pos, 1
+    $I0 = index "0123456789abcdef0123456789ABCDEF", $S0
+    if $I0 < 0 goto scan_xco_char_end
+    $I0 %= 16
+    if $I0 >= base goto scan_xco_char_end
+    decnum *= base
+    decnum += $I0
+    inc pos
+    goto scan_xco_char_digits
+  scan_xco_char_end:
+    $S1 = chr decnum
+    concat literal, $S1
+    $S0 = substr target, pos, 1
+    unless isbracketed goto scan_xco_end
+    if $S0 == ']' goto scan_xco_end
+    if $S0 == '' goto err_missing_bracket
+    if $S0 != ',' goto err_digit
+    inc pos
+    goto scan_xco_char
+  scan_xco_end:
+    pos += isbracketed
+  succeed:
+    mob.'!make'(literal)
+    mob.'to'(pos)
+  fail:
+    .return (mob)
+
+  err_unicode_name:
+    'parse_error'(mob, pos, "Unrecognized character name")
+  err_missing_bracket:
+    'parse_error'(mob, pos, "Missing close bracket for \\x[...], \\o[...], or \\c[...]")
+  err_digit:
+    'parse_error'(mob, pos, "Invalid digit in \\x[...], \\o[...], or \\c[...]")
+.end
+
+
 =item C<onload()>
 
 Initializes the Perl6Regex parser and other data structures
@@ -362,39 +477,50 @@
     .param pmc adverbs         :slurpy :named
 
     .local string target
-    .local int pos, lastpos
+    .local int pos, lastpos, isnegated
     $P0 = getattribute mob, '$.target'
     target = $P0
     $P0 = getattribute mob, '$.pos'
     pos = $P0
     lastpos = length target
+    isnegated = 0
 
-    .local string initchar
-    initchar = substr target, pos, 1
-    $I0 = is_cclass .CCLASS_WORD, initchar, 0
-    if $I0 goto term_metachar
-  quoted_metachar:
-    inc pos
-    mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
-    mob.'!make'(initchar)
-    mob.'to'(pos)
-    .return (mob)
-
-  term_metachar:
-    .local int isnegated
-    isnegated = is_cclass .CCLASS_UPPERCASE, initchar, 0
-    ## $S0 = downcase     FIXME: RT# 48108
-            $I0 = ord initchar
+    .local string backchar, charlist
+    ##  get whatever follows the backslash
+    backchar = substr target, pos, 1
+    charlist = backchar
+    inc pos
+
+    ##  if it's not a word character, it's a quoted metachar
+    $I0 = is_cclass .CCLASS_WORD, backchar, 0
+    unless $I0 goto term_literal
+
+    ##  if it's a word character, it may be negated
+    isnegated = is_cclass .CCLASS_UPPERCASE, backchar, 0
+    ##  $S0 = downcase charlist   FIXME: RT #48108
+            $I0 = ord backchar
             $S0 = chr $I0
-            $S0 = downcase $S0
-    if $S0 == 'x' goto scan_xdo
-    if $S0 == 'o' goto scan_xdo
+            backchar = downcase $S0
+
+    ##  if it's \x, \c, or \o, parse as string escape
+    $I0 = index 'xco', backchar
+    if $I0 < 0 goto meta_esclist
+  meta_xco:
+    $I0 = pos - 2
+    $P0 = 'p6escapes'(mob, 'pos' => $I0)
+    unless $P0 goto err_xcoparse
+    pos = $P0.'to'()
+    charlist = $P0.'ast'()
+    unless isnegated goto term_literal
+    $I0 = length charlist
+    if $I0 > 1 goto err_negated_brackets
+    goto term_charlist
+
+  meta_esclist:
     $P0 = get_global '%esclist'
-    $I0 = exists $P0[$S0]
-    if $I0 == 0 goto err_reserved_metachar
-    inc pos
-    .local string charlist
-    charlist = $P0[$S0]
+    $I0 = exists $P0[backchar]
+    unless $I0 goto err_reserved_metachar
+    charlist = $P0[backchar]
     if isnegated goto term_charlist
     $I0 = length charlist
     if $I0 > 1 goto term_charlist
@@ -412,48 +538,13 @@
     mob.'to'(pos)
     .return (mob)
 
-  scan_xdo:
-    inc pos
-    .local int base, decnum, isbracketed
-    charlist = ''
-    base = index '        o d     x', $S0
-    decnum = 0
-    $S0 = substr target, pos, 1
-    isbracketed = iseq $S0, '['
-    pos += isbracketed
-  scan_xdo_char_loop:
-    $S0 = substr target, pos, 1
-    $I0 = index '0123456789abcdef', $S0
-    if $I0 < 0 goto scan_xdo_char_end
-    if $I0 >= base goto scan_xdo_char_end
-    decnum *= base
-    decnum += $I0
-    inc pos
-    goto scan_xdo_char_loop
-  scan_xdo_char_end:
-    $S1 = chr decnum
-    concat charlist, $S1
-    unless isbracketed goto scan_xdo_end
-    if $S0 == ']' goto scan_xdo_end
-    if $S0 == '' goto err_missing_bracket
-    if $S0 != ',' goto err_bracketed
-    if isnegated goto err_negated_brackets
-    inc pos
-    decnum = 0
-    goto scan_xdo_char_loop
-  scan_xdo_end:
-    pos += isbracketed
-    if isnegated goto term_charlist
-    goto term_literal
-
-  err_reserved_metachar:
-    parse_error(mob, pos, 'Alphanumeric metacharacters are reserved')
-  err_missing_bracket:
-    parse_error(mob, pos, 'Missing close bracket for \\x[...] or \\o[...]')
-  err_bracketed:
-    parse_error(mob, pos, 'Invalid digit in \\x[...] or \\o[...]')
+  err_xcoparse:
+    parse_error(mob, pos, 'Unable to parse \x, \c, or \o value')
   err_negated_brackets:
+    pos = mob.'from'()
     parse_error(mob, pos, 'Cannot use comma in \\X[...] or \\O[...]')
+  err_reserved_metachar:
+    parse_error(mob, pos, 'Alphanumeric metacharacters are reserved')
 .end
 
 


More information about the parrot-commits mailing list