[svn:parrot] r41590 - in branches/pct-rx: compilers/pct/src compilers/pct/src/Regex t/compilers/pct/regex

pmichaud at svn.parrot.org pmichaud at svn.parrot.org
Thu Oct 1 15:16:29 UTC 2009


Author: pmichaud
Date: Thu Oct  1 15:16:27 2009
New Revision: 41590
URL: https://trac.parrot.org/parrot/changeset/41590

Log:
[pct-rx]:
* Create and initialize cursors and match objects.
* Helper method for generating "match a token" regexes.
* Add initial code for protoregexes.
* Start a test suite.

Added:
   branches/pct-rx/t/compilers/pct/regex/
   branches/pct-rx/t/compilers/pct/regex/03-symtoken.t
Modified:
   branches/pct-rx/compilers/pct/src/Regex.pir
   branches/pct-rx/compilers/pct/src/Regex/Cursor.pir

Modified: branches/pct-rx/compilers/pct/src/Regex.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/Regex.pir	Thu Oct  1 09:30:40 2009	(r41589)
+++ branches/pct-rx/compilers/pct/src/Regex.pir	Thu Oct  1 15:16:27 2009	(r41590)
@@ -13,6 +13,8 @@
 
 .include 'src/Regex/Cursor.pir'
 
+.include 'src/Regex/Match.pir'
+
 =head1 AUTHOR
 
 Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.

Modified: branches/pct-rx/compilers/pct/src/Regex/Cursor.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/Regex/Cursor.pir	Thu Oct  1 09:30:40 2009	(r41589)
+++ branches/pct-rx/compilers/pct/src/Regex/Cursor.pir	Thu Oct  1 15:16:27 2009	(r41590)
@@ -19,14 +19,95 @@
     load_bytecode 'P6object.pbc'
     .local pmc p6meta
     p6meta = new 'P6metaclass'
-    $P0 = p6meta.'new_class'('Regex::Cursor', 'attr'=>'$!target $!from $!pos @!pstack')
+    $P0 = p6meta.'new_class'('Regex::Cursor', 'attr'=>'$!target $!from $!pos $!match @!bstack @!mstack')
     .return ()
 .end
 
+=head2 Methods
+
+=over 4
+
+=item MATCH()
+
+Return this cursor's current Match object.
+
+=cut
+
+.sub 'MATCH' :method
+    .local pmc match
+    match = getattribute self, '$!match'
+    .return (match)
+.end
+
+
+=item pos()
+
+Return the cursor's current position.
+
+=cut
+
+.sub 'pos' :method
+    $P0 = getattribute self, '$!pos'
+    .return ($P0)
+.end
+
 =head2 Private methods
 
 =over 4
 
+=item !cursor_init(target, pos)
+
+Create a new cursor for matching C<target>.
+
+=cut
+
+.sub '!cursor_init' :method
+    .param string target
+
+    .local pmc parrotclass, cur
+    $P0 = self.'HOW'()
+    parrotclass = getattribute $P0, 'parrotclass'
+    cur = new parrotclass
+
+    $P0 = new ['CodeString']
+    $P0 = target
+    setattribute cur, '$!target', $P0
+
+    $P0 = box 0
+    setattribute cur, '$!from', $P0
+    $P0 = box 0 
+    setattribute cur, '$!pos', $P0
+    .return (cur)
+.end
+
+=item !cursor_start()
+
+Create and initialize a new cursor from C<self>.
+
+=cut
+
+.sub '!cursor_start' :method
+    .local pmc parrotclass, cur
+    $P0 = self.'HOW'()
+    parrotclass = getattribute $P0, 'parrotclass'
+    cur = new parrotclass
+
+    .local pmc from, pos, target
+    from = getattribute self, '$!pos'
+    from = clone from
+    setattribute cur, '$!from', from
+
+    pos = box -1
+    setattribute cur, '$!pos', pos
+
+    target = getattribute self, '$!target'
+    setattribute cur, '$!target', target
+
+    .return (cur, from, target)
+.end
+
+
+
 =item !mark_push(rep, pos, mark)
 
 Push a new backtracking point onto the cursor with the given
@@ -47,10 +128,11 @@
     push bstack, mark
     push bstack, pos
     push bstack, rep
-    $I0 = mstack
+    $I0 = -1
     push bstack, $I0
 .end
 
+
 =item !mark_fail([mark])
 
 Remove the most recent C<mark> and backtrack the cursor to the
@@ -66,7 +148,7 @@
 
     .local pmc bstack, mstack
     bstack = getattribute self, '@!bstack'
-    bstack = getattribute self, '@!mstack'
+    mstack = getattribute self, '@!mstack'
 
     # get the frame associated with the mark
     .local int bptr, rep, pos, mark, mptr
@@ -83,17 +165,13 @@
 
     # release the backtracked stack elements
     assign bstack, bptr
-    assign mstack, mptr
-
-    .local pmc match
-    match = mstack[-1]
 
     # return mark values
-    .return (rep, pos, mark, match)
+    .return (rep, pos, mark)
 .end
 
 
-=item '!mark_commit'(mark)
+=item !mark_commit(mark)
 
 Like C<!mark_fail> above this backtracks the cursor to C<mark>
 (releasing any intermediate marks), but preserves the current 
@@ -104,27 +182,341 @@
 .sub '!mark_commit' :method
     .param int mark
 
-    # preserve the current match object
-    .local pmc mstack, match
-    mstack = getattribute self, '@!mstack'
-
     # backtrack
     .local int rep, pos, mark
     (rep, pos, mark, $P0) = self.'!mark_fail'(mark)
 
-    # if match we backtracked to is the same as this one,
-    # we don't need to preserve it specially
-    $I0 = issame $P0, match
-    if $I0 goto done
-    # save current match state
-    push mstack, match
+    .return (rep, pos, mark)
+.end
+
+
+=item !matchify(pos)
+
+Generate a successful match at pos.
+
+=cut
+
+.sub '!matchify' :method
+    .param int pos
+
+    .local pmc match
+    match = new ['Regex';'Match']
+    setattribute self, '$!match', match
+
+    $P0 = box pos
+    setattribute self, '$!pos', $P0
+    setattribute match, '$!to', $P0
+    $P0 = getattribute self, '$!from'
+    setattribute match, '$!from', $P0
+    $P0 = getattribute self, '$!target'
+    setattribute match, '$!target', $P0
+    .return (match)
+.end
+
+
+=item !protoregex(name)
+
+Perform a match for protoregex C<name>.
+
+=cut
+
+.sub '!protoregex' :method
+    .param string name
+
+    .local pmc generation
+    generation = get_global '$!generation'
 
+    .local pmc parrotclass, prototable
+    parrotclass = typeof self
+    prototable = getprop '%!prototable', parrotclass
+    if null prototable goto make_prototable
+    $P0 = getprop '$!generation', prototable
+    $I0 = issame $P0, generation
+    if $I0 goto have_prototable
+  make_prototable:
+    prototable = self.'!protoregex_gen_table'(parrotclass)
+  have_prototable:
+
+    .local pmc tokrx, toklen
+    $S0 = concat name, '.tokrx'
+    tokrx = prototable[$S0]
+    $S0 = concat name, '.toklen'
+    toklen = prototable[$S0]
+    unless null tokrx goto have_tokrx
+    (tokrx, toklen) = self.'!protoregex_gen_tokrx'(prototable, name)
+  have_tokrx:
+
+    .local pmc target
+    .local int pos
+    target = getattribute self, '$!target'
+    $P1 = getattribute self, '$!pos'
+    pos = $P1
+
+    # Create a hash to keep track of the methods we've already called,
+    # so that we don't end up calling it twice.  
+    .local pmc mcalled
+    mcalled = new ['Hash']
+
+    .local string token
+    $S0 = substr target, pos, 1
+    $I0 = toklen[$S0]
+    token = substr target, pos, $I0
+
+
+  token_loop:
+    .local pmc rx, result
+    rx = tokrx[token]
+    if null rx goto token_next
+    $I0 = isa rx, ['ResizablePMCArray']
+    if $I0 goto rx_array
+    .local int rxaddr
+    rxaddr = get_addr rx
+    result = mcalled[rxaddr]
+    unless null result goto token_next
+    result = self.rx()
+    mcalled[rxaddr] = result
+    if result goto done
+    goto token_next
+  rx_array:
+    .local pmc rx_it
+    rx_it = iter rx
+  cand_loop:
+    unless rx_it goto cand_done
+    rx = shift rx_it
+    rxaddr = get_addr rx
+    result = mcalled[rxaddr]
+    unless null result goto token_next
+    result = self.rx()
+    mcalled[rxaddr] = result
+    if result goto done
+    goto cand_loop
+  cand_done:
+  token_next:
+    unless token goto fail
+    chopn token, 1
+    goto token_loop
+
+  fail:
+    .tailcall self.'!cursor_start'()
   done:
-    .return (rep, pos, mark, match)
+    .return (result)
+.end
+
+
+=item !protoregex_gen_table(parrotclass)
+
+Generate a new protoregex table for C<parrotclass>.  This involves
+creating a hash keyed with method names containing ':sym<' from
+C<parrotclass> and all of its superclasses.  This new hash is
+then given the current C<$!generate> property so we can avoid
+recreating it.
+
+The categorization of the protoregex candidate lists 
+for individual protoregexes is handled (lazily) by 
+C<!protoregex_gen_tokrx> below.
+
+=cut
+
+.sub '!protoregex_gen_table' :method
+    .param pmc parrotclass
+
+    .local pmc prototable
+    prototable = new ['Hash']
+    .local pmc class_it, method_it
+    $P0 = parrotclass.'inspect'('all_parents')
+    class_it = iter $P0
+  class_loop:
+    unless class_it goto class_done
+    $P0 = shift class_it
+    $P0 = $P0.'methods'()
+    method_it = iter $P0
+  method_loop:
+    unless method_it goto class_loop
+    $S0 = shift method_it
+    $I0 = index $S0, ':sym<'
+    if $I0 < 0 goto method_loop
+    prototable[$S0] = prototable
+    goto method_loop
+  class_done:
+    $P0 = get_global '$!generation'
+    setprop prototable, '$!generation', $P0
+    setprop parrotclass, '%!prototable', prototable
+    .return (prototable)
+.end
+    
+
+=item !protoregex_gen_tokrx(prototable, name)
+
+Generate this class' token list in prototable for the protoregex 
+called C<name>.
+
+=cut
+
+.sub '!protoregex_gen_tokrx' :method
+    .param pmc prototable
+    .param string name
+
+    .local pmc toklen, tokrx
+    toklen = new ['Hash']
+    tokrx  = new ['Hash']
+
+    # The prototable has already collected all of the names of
+    # protoregex methods into C<prototable>.  We set up a loop
+    # to find all of the method names that begin with "name:sym<".
+    .local string mprefix
+    .local int mlen
+    mprefix = concat name, ':sym<'
+    mlen   = length mprefix
+    
+    .local pmc method_it, method
+    .local string method_name
+    method_it = iter prototable
+  method_loop:
+    unless method_it goto method_done
+    method_name = shift method_it
+    $S0 = substr method_name, 0, mlen
+    if $S0 != mprefix goto method_loop
+
+    # Okay, we've found a method name intended for this protoregex.
+    # Now we look up the method itself, and ask it for its prefix tokens.
+    # If it doesn't return any, we use '' as its only prefix.
+    .local pmc rx, tokens, tokens_it
+    rx = find_method self, method_name
+    (tokens :slurpy) = self.rx('peek'=>prototable)
+    if tokens goto have_tokens
+    push tokens, ''
+  have_tokens:
+
+    # Now loop through all of the tokens for the method, updating
+    # the longest initial key and adding it to the tokrx hash.
+    # We automatically promote entries in tokrx to arrays when
+    # there's more than one method candidate for a given token.
+  tokens_loop:
+    unless tokens goto tokens_done
+    .local string tkey, tfirst
+    tkey = shift tokens
+    tfirst = substr tkey, 0, 1
+    $I0 = length tkey
+    $I1 = toklen[tfirst]
+    if $I0 <= $I1 goto toklen_done
+    toklen[tfirst] = $I0
+  toklen_done:
+    .local pmc rxlist
+    rxlist = tokrx[tkey]
+    if null rxlist goto rxlist_0
+    $I0 = isa rxlist, ['ResizablePMCArray']
+    if $I0 goto rxlist_n
+  rxlist_1:
+    $I0 = issame rx, rxlist
+    if $I0 goto tokens_loop
+    $P0 = rxlist
+    rxlist = new ['ResizablePMCArray']
+    push rxlist, $P0
+    push rxlist, rx
+    tokrx[tkey] = rxlist
+    goto tokens_loop
+  rxlist_n:
+    push rxlist, rx
+    goto tokens_loop
+  rxlist_0:
+    tokrx[tkey] = rx
+    goto tokens_loop
+  tokens_done:
+    goto method_loop
+  method_done:
+
+    # It's built!  Now store the tokrx and toklen hashes in the
+    # prototable and return them to the caller.
+    $S0 = concat name, '.tokrx'
+    prototable[$S0] = tokrx
+    $S0 = concat name, '.toklen'
+    prototable[$S0] = toklen
+    .return (tokrx, toklen)
+.end
+
+
+=item !symtoken_add(name, sym)
+
+Add a regex C<name> for matching fixed-string C<sym> tokens to
+the current grammar.
+
+=cut
+
+.sub '!symtoken_add' :method
+    .param string name
+    .param string sym
+
+    # create a new symtoken method instance, name it, attach <sym>
+    .local pmc symtoken
+    .const 'Sub' $P99 = 'symtoken'
+    symtoken = clone $P99
+    # symtoken = name
+    $P0 = box sym
+    setprop symtoken, '$!sym', $P0
+
+    # add method to our class
+    $P0 = self.'HOW'()
+    $P0.'add_method'(self, name, symtoken)
+
+    # self.'!regenerate'()
+.end
+
+.sub '' :method :subid('symtoken')
+    .param pmc action          :named('action') :optional
+    .param pmc peek            :named('peek')   :optional
+
+    # get the string to be matched
+    .local pmc sym
+    $P0 = getinterp
+    $P0 = $P0['sub';0]
+    sym = getprop '$!sym', $P0
+
+    if null peek goto peek_done
+    .return (sym)
+  peek_done:
+
+    # get a fresh cursor for matching
+    .local pmc cur
+    .local string target
+    .local int pos
+    (cur, pos, target) = self.'!cursor_start'()
+
+    # compare target with sym
+    $S0 = sym
+    $I0 = length $S0
+    $S1 = substr target, pos, $I0
+    if $S0 != $S1 goto fail
+    pos += $I0
+  pass:
+    .local pmc match
+    match = cur.'!matchify'(pos)
+    match['sym'] = sym
+  fail:
+    .return (cur)
 .end
+    
 
 =back
 
+=head2 Vtable functions
+
+=over 4
+
+=item get_bool
+
+=cut
+
+.sub '' :vtable('get_bool') :method
+    .local pmc match
+    match = getattribute self, '$!match'
+    if null match goto false
+    $I0 = istrue match
+    .return ($I0)
+  false:
+    .return (0)
+.end
+
+
 =head1 AUTHORS
 
 Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.

Added: branches/pct-rx/t/compilers/pct/regex/03-symtoken.t
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/pct-rx/t/compilers/pct/regex/03-symtoken.t	Thu Oct  1 15:16:27 2009	(r41590)
@@ -0,0 +1,73 @@
+#! parrot
+# Copyright (C) 2009, Patrick R. Michaud
+# $Id$
+
+=head1 NAME
+
+t/compilers/pct/regex/03-symtoken.t - tests for PCT::Regex symtokens
+
+=head1 SYNOPSIS
+
+    % prove t/compilers/pct/regex/03-symtoken.t
+
+=cut
+
+.sub 'main' :main
+    load_bytecode 'Test/More.pbc'
+
+    .local pmc exports, curr_namespace, test_namespace
+    curr_namespace = get_namespace
+    test_namespace = get_namespace ['Test';'More']
+    exports        = split ' ', 'plan diag ok nok is todo'
+    test_namespace.'export_to'(curr_namespace, exports)
+
+    load_bytecode 'PCT/Regex.pbc'
+
+    # create a grammar for us to test
+    .local pmc p6meta, xyz
+    p6meta = get_hll_global 'P6metaclass'
+    xyz = p6meta.'new_class'('XYZ', 'parent'=>'Regex::Cursor')
+
+    # add some symtokens
+    xyz.'!symtoken_add'('term:sym<abc>', 'abc')
+    xyz.'!symtoken_add'('infix:sym<+>', '+')
+    xyz.'!symtoken_add'('infix:sym<->', '-')
+    xyz.'!symtoken_add'('infix:sym<++>', '++')
+    xyz.'!symtoken_add'('infix:sym<+->', '+-')
+
+    # create a cursor for matching
+    .local pmc cur
+    cur = xyz.'!cursor_init'('abc++-')
+
+    $I0 = p6meta.'isa'(cur, xyz)
+    ok($I0, 'match cursor isa XYZ')
+
+    .local pmc abc, match
+    abc = cur.'term:sym<abc>'()
+    ok(abc, 'Matched initial abc')
+    match = abc.'MATCH'()
+    ok(match, '?$/')
+    $I0 = match.'from'()
+    is($I0, 0, "$/.from")
+    $I0 = match.'to'()
+    is($I0, 3, "$/.to")
+    $S0 = match['sym']
+    is($S0, 'abc', "$<sym>")
+
+    .local pmc abc2
+    abc2 = abc.'term:sym<abc>'()
+    nok(abc2, 'No second match for abc')
+
+    .local pmc plus
+    plus = abc.'infix:sym<+>'()
+    ok(plus, 'Matched plus following abc')
+    match = plus.'MATCH'()
+    ok(match, '?$/')
+    $I0 = match.'from'()
+    is($I0, 3, "$/.from")
+    $I0 = match.'to'()
+    is($I0, 4, "$/.to")
+    $S0 = match['sym']
+    is($S0, '+', "$<sym>")
+    
+.end


More information about the parrot-commits mailing list