[svn:parrot] r41545 - in branches/pct-rx: . compilers/pct/src/PAST examples/regex
pmichaud at svn.parrot.org
pmichaud at svn.parrot.org
Tue Sep 29 03:48:36 UTC 2009
Author: pmichaud
Date: Tue Sep 29 03:48:34 2009
New Revision: 41545
URL: https://trac.parrot.org/parrot/changeset/41545
Log:
[pct-rx]: Updates to add greedy + ratchet quantifiers; improve alt.
Add some example code to review the output.
Added:
branches/pct-rx/examples/regex/
branches/pct-rx/examples/regex/01-literal-past.nqp
branches/pct-rx/examples/regex/02-greedy-past.nqp
branches/pct-rx/examples/regex/03-ratchet-past.nqp
branches/pct-rx/examples/regex/05-alt-past.nqp
Modified:
branches/pct-rx/ (props changed)
branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir
branches/pct-rx/compilers/pct/src/PAST/Node.pir
Modified: branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir Tue Sep 29 01:49:46 2009 (r41544)
+++ branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir Tue Sep 29 03:48:34 2009 (r41545)
@@ -37,7 +37,7 @@
.local string prefix, rname, rtype
prefix = self.'unique'('rx')
concat prefix, '_'
- $P0 = split ' ', 'tgt string pos int off int len int cur pmc'
+ $P0 = split ' ', 'tgt string pos int off int len int rep int cur pmc'
$P1 = iter $P0
iter_loop:
unless $P1 goto iter_done
@@ -55,17 +55,17 @@
faillabel = self.'post_new'('Label', 'result'=>$S1)
reghash['fail'] = faillabel
- .local string cur, pos
- (cur, pos) = self.'!rxregs'('cur pos')
+ .local string cur, rep, pos
+ (cur, rep, pos) = self.'!rxregs'('cur rep pos')
$P0 = self.'post_regex'(node)
ops.'push'($P0)
ops.'push'(faillabel)
- $S0 = concat '(', cur
+ $S0 = concat '(', rep
concat $S0, ','
concat $S0, pos
concat $S0, ',$I10)'
- ops.'push_pirop'('callmethod', "'!popmark'", cur, 'result'=>$S0)
+ ops.'push_pirop'('callmethod', "'!mark_cut'", cur, 'result'=>$S0)
ops.'push_pirop'('jump', '$I10')
.return (ops)
.end
@@ -100,7 +100,8 @@
Return the POST representation of the regex component given by C<node>.
Normally this is handled by redispatching to a method corresponding to
-the node's "pasttype" and "backtrack" attributes.
+the node's "pasttype" and "backtrack" attributes. If no "pasttype" is
+given, then "concat" is assumed.
=cut
@@ -111,6 +112,9 @@
.local string pasttype
pasttype = node.'pasttype'()
+ if pasttype goto have_pasttype
+ pasttype = 'concat'
+ have_pasttype:
$P0 = find_method self, pasttype
$P1 = self.$P0(node)
unless have_cur goto done
@@ -135,70 +139,52 @@
.end
-=item regex_mark(prefix)
-
-Create a label starting with C<prefix> and POST instructions
-to set a backtrack to the label in the current cursor.
-
-=cut
-
-.sub 'regex_mark' :method
- .param string prefix
-
- .local pmc cur, pos, ops, backlabel
- (cur, pos) = self.'!rxregs'('cur pos')
- ops = self.'post_new'('Ops')
- backlabel = self.'post_new'('Label', 'name'=>prefix)
- ops.'push_pirop'('set_addr', '$I10', backlabel)
- ops.'push_pirop'('callmethod', "'!pushmark'", cur, pos, '$I10')
- .return (ops, backlabel)
-.end
-
-
=item alt(PAST::Regex node)
-Create POST to alternate among child regexes of C<node>, including
-backtracking.
-
=cut
.sub 'alt' :method :multi(_, ['PAST';'Regex'])
.param pmc node
- .local pmc cur
- cur = self.'!rxregs'('cur')
+ .local pmc cur, pos
+ (cur, pos) = self.'!rxregs'('cur pos')
- .local pmc ops, iter, cpast, cpost
- ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+ .local string name
+ name = self.'unique'('alt')
+ concat name, '_'
+ .local pmc ops, iter
+ ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
iter = node.'iterator'()
unless iter goto done
- # get post for first alternative
- .local pmc apast, apost, amark, alabel, endlabel
- apast = shift iter
- apost = self.'post_regex'(apast, cur)
- ops.'push'(apost)
- unless iter goto done
-
- endlabel = self.'post_new'('Label', 'name'=>'rx_alt_end_')
+ .local int acount
+ .local pmc alabel, endlabel
+ acount = 0
+ $S0 = acount
+ $S0 = concat name, $S0
+ alabel = self.'post_new'('Label', 'result'=>$S0)
+ $S0 = concat name, 'end'
+ endlabel = self.'post_new'('Label', 'result'=>$S0)
- # for all remaining alternatives, we put a label at the end
- # of the previous alternative, generate a label and backtracking
- # mark for the new alternative, and add those to our ops list
iter_loop:
- ops.'push_pirop'('goto', endlabel)
+ ops.'push'(alabel)
+ .local pmc apast, apost
apast = shift iter
apost = self.'post_regex'(apast, cur)
- (amark, alabel) = self.'regex_mark'('rx_alt_')
- ops.'unshift'(amark)
- ops.'push'(alabel)
+ unless iter goto iter_done
+ inc acount
+ $S0 = acount
+ $S0 = concat name, $S0
+ alabel = self.'post_new'('Label', 'result'=>$S0)
+ ops.'push_pirop'('set_addr', '$I10', alabel)
+ ops.'push_pirop'('callmethod', '"!mark_push"', cur, 0, pos, '$I10')
+ ops.'push'(apost)
+ ops.'push_pirop'('goto', endlabel)
+ goto iter_loop
+ iter_done:
ops.'push'(apost)
- if iter goto iter_loop
-
- d1:
ops.'push'(endlabel)
-
done:
.return (ops)
.end
@@ -284,6 +270,10 @@
.end
+=item pass(PAST::Regex node)
+
+=cut
+
.sub 'pass' :method :multi(_,['PAST';'Regex'])
.param pmc node
@@ -293,7 +283,94 @@
ops.'push_pirop'('yield', cur)
.return (ops)
.end
-
+
+
+=item quant(PAST::Regex node)
+
+=cut
+
+.sub 'quant' :method :multi(_,['PAST';'Regex'])
+ .param pmc node
+
+ .local string backtrack
+ backtrack = node.'backtrack'()
+ if backtrack goto have_backtrack
+ backtrack = 'g'
+ have_backtrack:
+
+ .local int min, max
+ min = node.'min'()
+ $P0 = node.'max'()
+ max = $P0
+ $I0 = defined $P0
+ if $I0 goto have_max
+ max = -1 # -1 represents Inf
+ have_max:
+
+ .local pmc cur, pos, rep, fail
+ (cur, pos, rep, fail) = self.'!rxregs'('cur pos rep fail')
+
+ .local string qname
+ .local pmc ops, q1label, q2label, q2reg, cpost
+ $S0 = concat 'quant', backtrack
+ qname = self.'unique'($S0)
+ ops = self.'post_new'('Ops', 'node'=>node)
+ $S0 = concat qname, '_loop'
+ q1label = self.'post_new'('Label', 'result'=>$S0)
+ $S0 = concat qname, '_done'
+ q2label = self.'post_new'('Label', 'result'=>$S0)
+ q2reg = self.'uniquereg'('I')
+ cpost = self.'concat'(node)
+
+ $S0 = max
+ .local int needrep
+ $I0 = isgt min, 1
+ $I1 = isgt max, 1
+ needrep = or $I0, $I1
+
+ unless max < 0 goto have_s0
+ $S0 = '*'
+ have_s0:
+ ops.'push_pirop'('inline', qname, min, $S0, 'inline'=>' # rx %0 ** %1..%2')
+ ops.'push_pirop'('set_addr', q2reg, q2label)
+
+
+ greedy:
+ .local int needmark
+ .local string peekcut
+ needmark = needrep
+ peekcut = '"!mark_peek"'
+ if backtrack != 'r' goto greedy_1
+ needmark = 1
+ peekcut = '"!mark_cut"'
+ greedy_1:
+ if min == 0 goto greedy_2
+ unless needmark goto greedy_loop
+ ops.'push_pirop'('callmethod', '"!mark_push"', cur, 0, -1, q2reg)
+ goto greedy_loop
+ greedy_2:
+ ops.'push_pirop'('callmethod', '"!mark_push"', cur, 0, pos, q2reg)
+ greedy_loop:
+ ops.'push'(q1label)
+ ops.'push'(cpost)
+ unless needmark goto greedy_3
+ ops.'push_pirop'('callmethod', peekcut, cur, q2reg, 'result'=>rep)
+ unless needrep goto greedy_3
+ ops.'push_pirop'('inc', rep)
+ greedy_3:
+ unless max > 1 goto greedy_4
+ ops.'push_pirop'('ge', rep, max, q2label)
+ greedy_4:
+ unless max != 1 goto greedy_5
+ ops.'push_pirop'('callmethod', '"!mark_push"', cur, rep, pos, q2reg)
+ ops.'push_pirop'('goto', q1label)
+ greedy_5:
+ ops.'push'(q2label)
+ unless min > 1 goto greedy_6
+ ops.'push_pirop'('lt', rep, min, fail)
+ greedy_6:
+ .return (ops)
+.end
=back
Modified: branches/pct-rx/compilers/pct/src/PAST/Node.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST/Node.pir Tue Sep 29 01:49:46 2009 (r41544)
+++ branches/pct-rx/compilers/pct/src/PAST/Node.pir Tue Sep 29 03:48:34 2009 (r41545)
@@ -743,6 +743,27 @@
.namespace ['PAST';'Regex']
+.sub 'backtrack' :method
+ .param pmc value :optional
+ .param int has_value :opt_flag
+ .tailcall self.'attr'('backtrack', value, has_value)
+.end
+
+
+.sub 'min' :method
+ .param pmc value :optional
+ .param int has_value :opt_flag
+ .tailcall self.'attr'('min', value, has_value)
+.end
+
+
+.sub 'max' :method
+ .param pmc value :optional
+ .param int has_value :opt_flag
+ .tailcall self.'attr'('max', value, has_value)
+.end
+
+
.sub 'pasttype' :method
.param pmc value :optional
.param int has_value :opt_flag
Added: branches/pct-rx/examples/regex/01-literal-past.nqp
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ branches/pct-rx/examples/regex/01-literal-past.nqp Tue Sep 29 03:48:34 2009 (r41545)
@@ -0,0 +1,14 @@
+# nqp
+
+# regex { 'foo' 'bar' }
+
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ PAST::Regex.new('bar', :pasttype('literal')),
+ PAST::Regex.new(:pasttype('pass')),
+ :pasttype('concat')
+ );
+
+say(PAST::Compiler.compile($past, :target('pir')));
+
Added: branches/pct-rx/examples/regex/02-greedy-past.nqp
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ branches/pct-rx/examples/regex/02-greedy-past.nqp Tue Sep 29 03:48:34 2009 (r41545)
@@ -0,0 +1,51 @@
+# nqp
+
+say("# regex { 'foo'*! }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ :pasttype('quant')
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo'+! }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ :pasttype('quant'),
+ :min(1)
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo'?! }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ :pasttype('quant'),
+ :max(1)
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo' **! 5..9 }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ :pasttype('quant'),
+ :min(5),
+ :max(9)
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+
Added: branches/pct-rx/examples/regex/03-ratchet-past.nqp
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ branches/pct-rx/examples/regex/03-ratchet-past.nqp Tue Sep 29 03:48:34 2009 (r41545)
@@ -0,0 +1,55 @@
+# nqp
+
+say("# regex { 'foo'*: }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ :pasttype('quant'),
+ :backtrack('r')
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo'+: }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ :pasttype('quant'),
+ :backtrack('r'),
+ :min(1)
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo'?: }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ :pasttype('quant'),
+ :backtrack('r'),
+ :max(1)
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo' **: 5..9 }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ :pasttype('quant'),
+ :backtrack('r'),
+ :min(5),
+ :max(9)
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+
Added: branches/pct-rx/examples/regex/05-alt-past.nqp
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ branches/pct-rx/examples/regex/05-alt-past.nqp Tue Sep 29 03:48:34 2009 (r41545)
@@ -0,0 +1,17 @@
+# nqp
+
+
+say("# regex { 'foo' | 'bar' | 'baz' }");
+my $past :=
+ PAST::Regex.new(
+ PAST::Regex.new(
+ PAST::Regex.new('foo', :pasttype('literal')),
+ PAST::Regex.new('bar', :pasttype('literal')),
+ PAST::Regex.new('baz', :pasttype('literal')),
+ :pasttype('alt')
+ ),
+ PAST::Regex.new(:pasttype('pass')),
+ );
+
+say(PAST::Compiler.compile($past, :target('pir')));
+
More information about the parrot-commits
mailing list