[svn:parrot] r41590 - in branches/pct-rx: compilers/pct/src compilers/pct/src/Regex t/compilers/pct/regex
pmichaud at svn.parrot.org
pmichaud at svn.parrot.org
Thu Oct 1 15:16:29 UTC 2009
Author: pmichaud
Date: Thu Oct 1 15:16:27 2009
New Revision: 41590
URL: https://trac.parrot.org/parrot/changeset/41590
Log:
[pct-rx]:
* Create and initialize cursors and match objects.
* Helper method for generating "match a token" regexes.
* Add initial code for protoregexes.
* Start a test suite.
Added:
branches/pct-rx/t/compilers/pct/regex/
branches/pct-rx/t/compilers/pct/regex/03-symtoken.t
Modified:
branches/pct-rx/compilers/pct/src/Regex.pir
branches/pct-rx/compilers/pct/src/Regex/Cursor.pir
Modified: branches/pct-rx/compilers/pct/src/Regex.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/Regex.pir Thu Oct 1 09:30:40 2009 (r41589)
+++ branches/pct-rx/compilers/pct/src/Regex.pir Thu Oct 1 15:16:27 2009 (r41590)
@@ -13,6 +13,8 @@
.include 'src/Regex/Cursor.pir'
+.include 'src/Regex/Match.pir'
+
=head1 AUTHOR
Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.
Modified: branches/pct-rx/compilers/pct/src/Regex/Cursor.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/Regex/Cursor.pir Thu Oct 1 09:30:40 2009 (r41589)
+++ branches/pct-rx/compilers/pct/src/Regex/Cursor.pir Thu Oct 1 15:16:27 2009 (r41590)
@@ -19,14 +19,95 @@
load_bytecode 'P6object.pbc'
.local pmc p6meta
p6meta = new 'P6metaclass'
- $P0 = p6meta.'new_class'('Regex::Cursor', 'attr'=>'$!target $!from $!pos @!pstack')
+ $P0 = p6meta.'new_class'('Regex::Cursor', 'attr'=>'$!target $!from $!pos $!match @!bstack @!mstack')
.return ()
.end
+=head2 Methods
+
+=over 4
+
+=item MATCH()
+
+Return this cursor's current Match object.
+
+=cut
+
+.sub 'MATCH' :method
+ .local pmc match
+ match = getattribute self, '$!match'
+ .return (match)
+.end
+
+
+=item pos()
+
+Return the cursor's current position.
+
+=cut
+
+.sub 'pos' :method
+ $P0 = getattribute self, '$!pos'
+ .return ($P0)
+.end
+
=head2 Private methods
=over 4
+=item !cursor_init(target, pos)
+
+Create a new cursor for matching C<target>.
+
+=cut
+
+.sub '!cursor_init' :method
+ .param string target
+
+ .local pmc parrotclass, cur
+ $P0 = self.'HOW'()
+ parrotclass = getattribute $P0, 'parrotclass'
+ cur = new parrotclass
+
+ $P0 = new ['CodeString']
+ $P0 = target
+ setattribute cur, '$!target', $P0
+
+ $P0 = box 0
+ setattribute cur, '$!from', $P0
+ $P0 = box 0
+ setattribute cur, '$!pos', $P0
+ .return (cur)
+.end
+
+=item !cursor_start()
+
+Create and initialize a new cursor from C<self>.
+
+=cut
+
+.sub '!cursor_start' :method
+ .local pmc parrotclass, cur
+ $P0 = self.'HOW'()
+ parrotclass = getattribute $P0, 'parrotclass'
+ cur = new parrotclass
+
+ .local pmc from, pos, target
+ from = getattribute self, '$!pos'
+ from = clone from
+ setattribute cur, '$!from', from
+
+ pos = box -1
+ setattribute cur, '$!pos', pos
+
+ target = getattribute self, '$!target'
+ setattribute cur, '$!target', target
+
+ .return (cur, from, target)
+.end
+
+
+
=item !mark_push(rep, pos, mark)
Push a new backtracking point onto the cursor with the given
@@ -47,10 +128,11 @@
push bstack, mark
push bstack, pos
push bstack, rep
- $I0 = mstack
+ $I0 = -1
push bstack, $I0
.end
+
=item !mark_fail([mark])
Remove the most recent C<mark> and backtrack the cursor to the
@@ -66,7 +148,7 @@
.local pmc bstack, mstack
bstack = getattribute self, '@!bstack'
- bstack = getattribute self, '@!mstack'
+ mstack = getattribute self, '@!mstack'
# get the frame associated with the mark
.local int bptr, rep, pos, mark, mptr
@@ -83,17 +165,13 @@
# release the backtracked stack elements
assign bstack, bptr
- assign mstack, mptr
-
- .local pmc match
- match = mstack[-1]
# return mark values
- .return (rep, pos, mark, match)
+ .return (rep, pos, mark)
.end
-=item '!mark_commit'(mark)
+=item !mark_commit(mark)
Like C<!mark_fail> above this backtracks the cursor to C<mark>
(releasing any intermediate marks), but preserves the current
@@ -104,27 +182,341 @@
.sub '!mark_commit' :method
.param int mark
- # preserve the current match object
- .local pmc mstack, match
- mstack = getattribute self, '@!mstack'
-
# backtrack
.local int rep, pos, mark
(rep, pos, mark, $P0) = self.'!mark_fail'(mark)
- # if match we backtracked to is the same as this one,
- # we don't need to preserve it specially
- $I0 = issame $P0, match
- if $I0 goto done
- # save current match state
- push mstack, match
+ .return (rep, pos, mark)
+.end
+
+
+=item !matchify(pos)
+
+Generate a successful match at pos.
+
+=cut
+
+.sub '!matchify' :method
+ .param int pos
+
+ .local pmc match
+ match = new ['Regex';'Match']
+ setattribute self, '$!match', match
+
+ $P0 = box pos
+ setattribute self, '$!pos', $P0
+ setattribute match, '$!to', $P0
+ $P0 = getattribute self, '$!from'
+ setattribute match, '$!from', $P0
+ $P0 = getattribute self, '$!target'
+ setattribute match, '$!target', $P0
+ .return (match)
+.end
+
+
+=item !protoregex(name)
+
+Perform a match for protoregex C<name>.
+
+=cut
+
+.sub '!protoregex' :method
+ .param string name
+
+ .local pmc generation
+ generation = get_global '$!generation'
+ .local pmc parrotclass, prototable
+ parrotclass = typeof self
+ prototable = getprop '%!prototable', parrotclass
+ if null prototable goto make_prototable
+ $P0 = getprop '$!generation', prototable
+ $I0 = issame $P0, generation
+ if $I0 goto have_prototable
+ make_prototable:
+ prototable = self.'!protoregex_gen_table'(parrotclass)
+ have_prototable:
+
+ .local pmc tokrx, toklen
+ $S0 = concat name, '.tokrx'
+ tokrx = prototable[$S0]
+ $S0 = concat name, '.toklen'
+ toklen = prototable[$S0]
+ unless null tokrx goto have_tokrx
+ (tokrx, toklen) = self.'!protoregex_gen_tokrx'(prototable, name)
+ have_tokrx:
+
+ .local pmc target
+ .local int pos
+ target = getattribute self, '$!target'
+ $P1 = getattribute self, '$!pos'
+ pos = $P1
+
+ # Create a hash to keep track of the methods we've already called,
+ # so that we don't end up calling it twice.
+ .local pmc mcalled
+ mcalled = new ['Hash']
+
+ .local string token
+ $S0 = substr target, pos, 1
+ $I0 = toklen[$S0]
+ token = substr target, pos, $I0
+
+
+ token_loop:
+ .local pmc rx, result
+ rx = tokrx[token]
+ if null rx goto token_next
+ $I0 = isa rx, ['ResizablePMCArray']
+ if $I0 goto rx_array
+ .local int rxaddr
+ rxaddr = get_addr rx
+ result = mcalled[rxaddr]
+ unless null result goto token_next
+ result = self.rx()
+ mcalled[rxaddr] = result
+ if result goto done
+ goto token_next
+ rx_array:
+ .local pmc rx_it
+ rx_it = iter rx
+ cand_loop:
+ unless rx_it goto cand_done
+ rx = shift rx_it
+ rxaddr = get_addr rx
+ result = mcalled[rxaddr]
+ unless null result goto token_next
+ result = self.rx()
+ mcalled[rxaddr] = result
+ if result goto done
+ goto cand_loop
+ cand_done:
+ token_next:
+ unless token goto fail
+ chopn token, 1
+ goto token_loop
+
+ fail:
+ .tailcall self.'!cursor_start'()
done:
- .return (rep, pos, mark, match)
+ .return (result)
+.end
+
+
+=item !protoregex_gen_table(parrotclass)
+
+Generate a new protoregex table for C<parrotclass>. This involves
+creating a hash keyed with method names containing ':sym<' from
+C<parrotclass> and all of its superclasses. This new hash is
+then given the current C<$!generate> property so we can avoid
+recreating it.
+
+The categorization of the protoregex candidate lists
+for individual protoregexes is handled (lazily) by
+C<!protoregex_gen_tokrx> below.
+
+=cut
+
+.sub '!protoregex_gen_table' :method
+ .param pmc parrotclass
+
+ .local pmc prototable
+ prototable = new ['Hash']
+ .local pmc class_it, method_it
+ $P0 = parrotclass.'inspect'('all_parents')
+ class_it = iter $P0
+ class_loop:
+ unless class_it goto class_done
+ $P0 = shift class_it
+ $P0 = $P0.'methods'()
+ method_it = iter $P0
+ method_loop:
+ unless method_it goto class_loop
+ $S0 = shift method_it
+ $I0 = index $S0, ':sym<'
+ if $I0 < 0 goto method_loop
+ prototable[$S0] = prototable
+ goto method_loop
+ class_done:
+ $P0 = get_global '$!generation'
+ setprop prototable, '$!generation', $P0
+ setprop parrotclass, '%!prototable', prototable
+ .return (prototable)
+.end
+
+
+=item !protoregex_gen_tokrx(prototable, name)
+
+Generate this class' token list in prototable for the protoregex
+called C<name>.
+
+=cut
+
+.sub '!protoregex_gen_tokrx' :method
+ .param pmc prototable
+ .param string name
+
+ .local pmc toklen, tokrx
+ toklen = new ['Hash']
+ tokrx = new ['Hash']
+
+ # The prototable has already collected all of the names of
+ # protoregex methods into C<prototable>. We set up a loop
+ # to find all of the method names that begin with "name:sym<".
+ .local string mprefix
+ .local int mlen
+ mprefix = concat name, ':sym<'
+ mlen = length mprefix
+
+ .local pmc method_it, method
+ .local string method_name
+ method_it = iter prototable
+ method_loop:
+ unless method_it goto method_done
+ method_name = shift method_it
+ $S0 = substr method_name, 0, mlen
+ if $S0 != mprefix goto method_loop
+
+ # Okay, we've found a method name intended for this protoregex.
+ # Now we look up the method itself, and ask it for its prefix tokens.
+ # If it doesn't return any, we use '' as its only prefix.
+ .local pmc rx, tokens, tokens_it
+ rx = find_method self, method_name
+ (tokens :slurpy) = self.rx('peek'=>prototable)
+ if tokens goto have_tokens
+ push tokens, ''
+ have_tokens:
+
+ # Now loop through all of the tokens for the method, updating
+ # the longest initial key and adding it to the tokrx hash.
+ # We automatically promote entries in tokrx to arrays when
+ # there's more than one method candidate for a given token.
+ tokens_loop:
+ unless tokens goto tokens_done
+ .local string tkey, tfirst
+ tkey = shift tokens
+ tfirst = substr tkey, 0, 1
+ $I0 = length tkey
+ $I1 = toklen[tfirst]
+ if $I0 <= $I1 goto toklen_done
+ toklen[tfirst] = $I0
+ toklen_done:
+ .local pmc rxlist
+ rxlist = tokrx[tkey]
+ if null rxlist goto rxlist_0
+ $I0 = isa rxlist, ['ResizablePMCArray']
+ if $I0 goto rxlist_n
+ rxlist_1:
+ $I0 = issame rx, rxlist
+ if $I0 goto tokens_loop
+ $P0 = rxlist
+ rxlist = new ['ResizablePMCArray']
+ push rxlist, $P0
+ push rxlist, rx
+ tokrx[tkey] = rxlist
+ goto tokens_loop
+ rxlist_n:
+ push rxlist, rx
+ goto tokens_loop
+ rxlist_0:
+ tokrx[tkey] = rx
+ goto tokens_loop
+ tokens_done:
+ goto method_loop
+ method_done:
+
+ # It's built! Now store the tokrx and toklen hashes in the
+ # prototable and return them to the caller.
+ $S0 = concat name, '.tokrx'
+ prototable[$S0] = tokrx
+ $S0 = concat name, '.toklen'
+ prototable[$S0] = toklen
+ .return (tokrx, toklen)
+.end
+
+
+=item !symtoken_add(name, sym)
+
+Add a regex C<name> for matching fixed-string C<sym> tokens to
+the current grammar.
+
+=cut
+
+.sub '!symtoken_add' :method
+ .param string name
+ .param string sym
+
+ # create a new symtoken method instance, name it, attach <sym>
+ .local pmc symtoken
+ .const 'Sub' $P99 = 'symtoken'
+ symtoken = clone $P99
+ # symtoken = name
+ $P0 = box sym
+ setprop symtoken, '$!sym', $P0
+
+ # add method to our class
+ $P0 = self.'HOW'()
+ $P0.'add_method'(self, name, symtoken)
+
+ # self.'!regenerate'()
+.end
+
+.sub '' :method :subid('symtoken')
+ .param pmc action :named('action') :optional
+ .param pmc peek :named('peek') :optional
+
+ # get the string to be matched
+ .local pmc sym
+ $P0 = getinterp
+ $P0 = $P0['sub';0]
+ sym = getprop '$!sym', $P0
+
+ if null peek goto peek_done
+ .return (sym)
+ peek_done:
+
+ # get a fresh cursor for matching
+ .local pmc cur
+ .local string target
+ .local int pos
+ (cur, pos, target) = self.'!cursor_start'()
+
+ # compare target with sym
+ $S0 = sym
+ $I0 = length $S0
+ $S1 = substr target, pos, $I0
+ if $S0 != $S1 goto fail
+ pos += $I0
+ pass:
+ .local pmc match
+ match = cur.'!matchify'(pos)
+ match['sym'] = sym
+ fail:
+ .return (cur)
.end
+
=back
+=head2 Vtable functions
+
+=over 4
+
+=item get_bool
+
+=cut
+
+.sub '' :vtable('get_bool') :method
+ .local pmc match
+ match = getattribute self, '$!match'
+ if null match goto false
+ $I0 = istrue match
+ .return ($I0)
+ false:
+ .return (0)
+.end
+
+
=head1 AUTHORS
Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.
Added: branches/pct-rx/t/compilers/pct/regex/03-symtoken.t
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ branches/pct-rx/t/compilers/pct/regex/03-symtoken.t Thu Oct 1 15:16:27 2009 (r41590)
@@ -0,0 +1,73 @@
+#! parrot
+# Copyright (C) 2009, Patrick R. Michaud
+# $Id$
+
+=head1 NAME
+
+t/compilers/pct/regex/03-symtoken.t - tests for PCT::Regex symtokens
+
+=head1 SYNOPSIS
+
+ % prove t/compilers/pct/regex/03-symtoken.t
+
+=cut
+
+.sub 'main' :main
+ load_bytecode 'Test/More.pbc'
+
+ .local pmc exports, curr_namespace, test_namespace
+ curr_namespace = get_namespace
+ test_namespace = get_namespace ['Test';'More']
+ exports = split ' ', 'plan diag ok nok is todo'
+ test_namespace.'export_to'(curr_namespace, exports)
+
+ load_bytecode 'PCT/Regex.pbc'
+
+ # create a grammar for us to test
+ .local pmc p6meta, xyz
+ p6meta = get_hll_global 'P6metaclass'
+ xyz = p6meta.'new_class'('XYZ', 'parent'=>'Regex::Cursor')
+
+ # add some symtokens
+ xyz.'!symtoken_add'('term:sym<abc>', 'abc')
+ xyz.'!symtoken_add'('infix:sym<+>', '+')
+ xyz.'!symtoken_add'('infix:sym<->', '-')
+ xyz.'!symtoken_add'('infix:sym<++>', '++')
+ xyz.'!symtoken_add'('infix:sym<+->', '+-')
+
+ # create a cursor for matching
+ .local pmc cur
+ cur = xyz.'!cursor_init'('abc++-')
+
+ $I0 = p6meta.'isa'(cur, xyz)
+ ok($I0, 'match cursor isa XYZ')
+
+ .local pmc abc, match
+ abc = cur.'term:sym<abc>'()
+ ok(abc, 'Matched initial abc')
+ match = abc.'MATCH'()
+ ok(match, '?$/')
+ $I0 = match.'from'()
+ is($I0, 0, "$/.from")
+ $I0 = match.'to'()
+ is($I0, 3, "$/.to")
+ $S0 = match['sym']
+ is($S0, 'abc', "$<sym>")
+
+ .local pmc abc2
+ abc2 = abc.'term:sym<abc>'()
+ nok(abc2, 'No second match for abc')
+
+ .local pmc plus
+ plus = abc.'infix:sym<+>'()
+ ok(plus, 'Matched plus following abc')
+ match = plus.'MATCH'()
+ ok(match, '?$/')
+ $I0 = match.'from'()
+ is($I0, 3, "$/.from")
+ $I0 = match.'to'()
+ is($I0, 4, "$/.to")
+ $S0 = match['sym']
+ is($S0, '+', "$<sym>")
+
+.end
More information about the parrot-commits
mailing list