[svn:parrot] r41545 - in branches/pct-rx: . compilers/pct/src/PAST examples/regex

pmichaud at svn.parrot.org pmichaud at svn.parrot.org
Tue Sep 29 03:48:36 UTC 2009


Author: pmichaud
Date: Tue Sep 29 03:48:34 2009
New Revision: 41545
URL: https://trac.parrot.org/parrot/changeset/41545

Log:
[pct-rx]:  Updates to add greedy + ratchet quantifiers; improve alt.
Add some example code to review the output.

Added:
   branches/pct-rx/examples/regex/
   branches/pct-rx/examples/regex/01-literal-past.nqp
   branches/pct-rx/examples/regex/02-greedy-past.nqp
   branches/pct-rx/examples/regex/03-ratchet-past.nqp
   branches/pct-rx/examples/regex/05-alt-past.nqp
Modified:
   branches/pct-rx/   (props changed)
   branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir
   branches/pct-rx/compilers/pct/src/PAST/Node.pir

Modified: branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir	Tue Sep 29 01:49:46 2009	(r41544)
+++ branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir	Tue Sep 29 03:48:34 2009	(r41545)
@@ -37,7 +37,7 @@
     .local string prefix, rname, rtype
     prefix = self.'unique'('rx')
     concat prefix, '_'
-    $P0 = split ' ', 'tgt string pos int off int len int cur pmc'
+    $P0 = split ' ', 'tgt string pos int off int len int rep int cur pmc'
     $P1 = iter $P0
   iter_loop:
     unless $P1 goto iter_done
@@ -55,17 +55,17 @@
     faillabel = self.'post_new'('Label', 'result'=>$S1)
     reghash['fail'] = faillabel
 
-    .local string cur, pos
-    (cur, pos) = self.'!rxregs'('cur pos')
+    .local string cur, rep, pos
+    (cur, rep, pos) = self.'!rxregs'('cur rep pos')
 
     $P0 = self.'post_regex'(node)
     ops.'push'($P0)
     ops.'push'(faillabel)
-    $S0 = concat '(', cur
+    $S0 = concat '(', rep
     concat $S0, ','
     concat $S0, pos
     concat $S0, ',$I10)'
-    ops.'push_pirop'('callmethod', "'!popmark'", cur, 'result'=>$S0)
+    ops.'push_pirop'('callmethod', "'!mark_cut'", cur, 'result'=>$S0)
     ops.'push_pirop'('jump', '$I10')
     .return (ops)
 .end
@@ -100,7 +100,8 @@
 
 Return the POST representation of the regex component given by C<node>.
 Normally this is handled by redispatching to a method corresponding to
-the node's "pasttype" and "backtrack" attributes.
+the node's "pasttype" and "backtrack" attributes.  If no "pasttype" is
+given, then "concat" is assumed.
 
 =cut
 
@@ -111,6 +112,9 @@
 
     .local string pasttype
     pasttype = node.'pasttype'()
+    if pasttype goto have_pasttype
+    pasttype = 'concat'
+  have_pasttype:
     $P0 = find_method self, pasttype
     $P1 = self.$P0(node)
     unless have_cur goto done
@@ -135,70 +139,52 @@
 .end
 
 
-=item regex_mark(prefix)
-
-Create a label starting with C<prefix> and POST instructions
-to set a backtrack to the label in the current cursor.
-
-=cut
-
-.sub 'regex_mark' :method
-    .param string prefix
-
-    .local pmc cur, pos, ops, backlabel
-    (cur, pos) = self.'!rxregs'('cur pos')
-    ops = self.'post_new'('Ops')
-    backlabel = self.'post_new'('Label', 'name'=>prefix)
-    ops.'push_pirop'('set_addr', '$I10', backlabel)
-    ops.'push_pirop'('callmethod', "'!pushmark'", cur, pos, '$I10')
-    .return (ops, backlabel)
-.end
-
-
 =item alt(PAST::Regex node)
 
-Create POST to alternate among child regexes of C<node>, including
-backtracking.
-
 =cut
 
 .sub 'alt' :method :multi(_, ['PAST';'Regex'])
     .param pmc node
 
-    .local pmc cur
-    cur = self.'!rxregs'('cur')
+    .local pmc cur, pos
+    (cur, pos) = self.'!rxregs'('cur pos')
 
-    .local pmc ops, iter, cpast, cpost
-    ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+    .local string name
+    name = self.'unique'('alt')
+    concat name, '_'
 
+    .local pmc ops, iter
+    ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
     iter = node.'iterator'()
     unless iter goto done
 
-    # get post for first alternative
-    .local pmc apast, apost, amark, alabel, endlabel
-    apast = shift iter
-    apost = self.'post_regex'(apast, cur)
-    ops.'push'(apost)
-    unless iter goto done
-
-    endlabel = self.'post_new'('Label', 'name'=>'rx_alt_end_')
+    .local int acount
+    .local pmc alabel, endlabel
+    acount = 0
+    $S0 = acount
+    $S0 = concat name, $S0
+    alabel = self.'post_new'('Label', 'result'=>$S0)
+    $S0 = concat name, 'end'
+    endlabel = self.'post_new'('Label', 'result'=>$S0)
 
-    # for all remaining alternatives, we put a label at the end
-    # of the previous alternative, generate a label and backtracking
-    # mark for the new alternative, and add those to our ops list
   iter_loop:
-    ops.'push_pirop'('goto', endlabel)
+    ops.'push'(alabel)
+    .local pmc apast, apost
     apast = shift iter
     apost = self.'post_regex'(apast, cur)
-    (amark, alabel) = self.'regex_mark'('rx_alt_')
-    ops.'unshift'(amark)
-    ops.'push'(alabel)
+    unless iter goto iter_done
+    inc acount
+    $S0 = acount
+    $S0 = concat name, $S0
+    alabel = self.'post_new'('Label', 'result'=>$S0)
+    ops.'push_pirop'('set_addr', '$I10', alabel)
+    ops.'push_pirop'('callmethod', '"!mark_push"', cur, 0, pos, '$I10')
+    ops.'push'(apost)
+    ops.'push_pirop'('goto', endlabel)
+    goto iter_loop
+  iter_done:
     ops.'push'(apost)
-    if iter goto iter_loop
-
-  d1:
     ops.'push'(endlabel)
-
   done:
     .return (ops)
 .end
@@ -284,6 +270,10 @@
 .end
 
 
+=item pass(PAST::Regex node)
+
+=cut
+
 .sub 'pass' :method :multi(_,['PAST';'Regex'])
     .param pmc node
 
@@ -293,7 +283,94 @@
     ops.'push_pirop'('yield', cur)
     .return (ops)
 .end
-    
+
+
+=item quant(PAST::Regex node)
+
+=cut
+
+.sub 'quant' :method :multi(_,['PAST';'Regex'])
+    .param pmc node
+
+    .local string backtrack
+    backtrack = node.'backtrack'()
+    if backtrack goto have_backtrack
+    backtrack = 'g'
+  have_backtrack:
+
+     .local int min, max
+     min = node.'min'()
+     $P0 = node.'max'()
+     max = $P0
+     $I0 = defined $P0
+     if $I0 goto have_max
+     max = -1                          # -1 represents Inf
+   have_max:
+
+    .local pmc cur, pos, rep, fail
+    (cur, pos, rep, fail) = self.'!rxregs'('cur pos rep fail')
+
+    .local string qname
+    .local pmc ops, q1label, q2label, q2reg, cpost
+    $S0 = concat 'quant', backtrack
+    qname = self.'unique'($S0)
+    ops = self.'post_new'('Ops', 'node'=>node)
+    $S0 = concat qname, '_loop'
+    q1label = self.'post_new'('Label', 'result'=>$S0)
+    $S0 = concat qname, '_done'
+    q2label = self.'post_new'('Label', 'result'=>$S0)
+    q2reg = self.'uniquereg'('I')
+    cpost = self.'concat'(node)
+
+    $S0 = max
+    .local int needrep
+    $I0 = isgt min, 1
+    $I1 = isgt max, 1
+    needrep = or $I0, $I1
+
+    unless max < 0 goto have_s0
+    $S0 = '*'
+  have_s0:
+    ops.'push_pirop'('inline', qname, min, $S0, 'inline'=>'  # rx %0 ** %1..%2')
+    ops.'push_pirop'('set_addr', q2reg, q2label)
+
+
+  greedy:
+    .local int needmark
+    .local string peekcut
+    needmark = needrep
+    peekcut = '"!mark_peek"'
+    if backtrack != 'r' goto greedy_1
+    needmark = 1
+    peekcut = '"!mark_cut"'
+  greedy_1:
+    if min == 0 goto greedy_2
+    unless needmark goto greedy_loop
+    ops.'push_pirop'('callmethod', '"!mark_push"', cur, 0, -1, q2reg)
+    goto greedy_loop
+  greedy_2:
+    ops.'push_pirop'('callmethod', '"!mark_push"', cur, 0, pos, q2reg)
+  greedy_loop:
+    ops.'push'(q1label)
+    ops.'push'(cpost)
+    unless needmark goto greedy_3
+    ops.'push_pirop'('callmethod', peekcut, cur, q2reg, 'result'=>rep)
+    unless needrep goto greedy_3
+    ops.'push_pirop'('inc', rep)
+  greedy_3:
+    unless max > 1 goto greedy_4
+    ops.'push_pirop'('ge', rep, max, q2label)
+  greedy_4:
+    unless max != 1 goto greedy_5
+    ops.'push_pirop'('callmethod', '"!mark_push"', cur, rep, pos, q2reg)
+    ops.'push_pirop'('goto', q1label)
+  greedy_5:
+    ops.'push'(q2label)
+    unless min > 1 goto greedy_6
+    ops.'push_pirop'('lt', rep, min, fail)
+  greedy_6:
+    .return (ops)
+.end
 
 =back
 

Modified: branches/pct-rx/compilers/pct/src/PAST/Node.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST/Node.pir	Tue Sep 29 01:49:46 2009	(r41544)
+++ branches/pct-rx/compilers/pct/src/PAST/Node.pir	Tue Sep 29 03:48:34 2009	(r41545)
@@ -743,6 +743,27 @@
 
 .namespace ['PAST';'Regex']
 
+.sub 'backtrack' :method
+    .param pmc value           :optional
+    .param int has_value       :opt_flag
+    .tailcall self.'attr'('backtrack', value, has_value)
+.end
+
+
+.sub 'min' :method
+    .param pmc value           :optional
+    .param int has_value       :opt_flag
+    .tailcall self.'attr'('min', value, has_value)
+.end
+
+
+.sub 'max' :method
+    .param pmc value           :optional
+    .param int has_value       :opt_flag
+    .tailcall self.'attr'('max', value, has_value)
+.end
+
+
 .sub 'pasttype' :method
     .param pmc value           :optional
     .param int has_value       :opt_flag

Added: branches/pct-rx/examples/regex/01-literal-past.nqp
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/pct-rx/examples/regex/01-literal-past.nqp	Tue Sep 29 03:48:34 2009	(r41545)
@@ -0,0 +1,14 @@
+# nqp
+
+# regex { 'foo' 'bar' }
+
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new('foo', :pasttype('literal')),
+    PAST::Regex.new('bar', :pasttype('literal')),
+    PAST::Regex.new(:pasttype('pass')),
+    :pasttype('concat')
+  );
+
+say(PAST::Compiler.compile($past, :target('pir')));
+

Added: branches/pct-rx/examples/regex/02-greedy-past.nqp
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/pct-rx/examples/regex/02-greedy-past.nqp	Tue Sep 29 03:48:34 2009	(r41545)
@@ -0,0 +1,51 @@
+# nqp
+
+say("# regex { 'foo'*! }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      :pasttype('quant')
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo'+! }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      :pasttype('quant'),
+      :min(1)
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo'?! }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      :pasttype('quant'),
+      :max(1)
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo' **! 5..9 }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      :pasttype('quant'),
+      :min(5),
+      :max(9)
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+

Added: branches/pct-rx/examples/regex/03-ratchet-past.nqp
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/pct-rx/examples/regex/03-ratchet-past.nqp	Tue Sep 29 03:48:34 2009	(r41545)
@@ -0,0 +1,55 @@
+# nqp
+
+say("# regex { 'foo'*: }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      :pasttype('quant'),
+      :backtrack('r')
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo'+: }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      :pasttype('quant'),
+      :backtrack('r'),
+      :min(1)
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo'?: }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      :pasttype('quant'),
+      :backtrack('r'),
+      :max(1)
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+say("# regex { 'foo' **: 5..9 }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      :pasttype('quant'),
+      :backtrack('r'),
+      :min(5),
+      :max(9)
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+say(PAST::Compiler.compile($past, :target('pir')));
+
+

Added: branches/pct-rx/examples/regex/05-alt-past.nqp
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/pct-rx/examples/regex/05-alt-past.nqp	Tue Sep 29 03:48:34 2009	(r41545)
@@ -0,0 +1,17 @@
+# nqp
+
+
+say("# regex { 'foo' | 'bar' | 'baz' }");
+my $past :=
+  PAST::Regex.new(
+    PAST::Regex.new(
+      PAST::Regex.new('foo', :pasttype('literal')),
+      PAST::Regex.new('bar', :pasttype('literal')),
+      PAST::Regex.new('baz', :pasttype('literal')),
+      :pasttype('alt')
+    ),
+    PAST::Regex.new(:pasttype('pass')),
+  );
+
+say(PAST::Compiler.compile($past, :target('pir')));
+


More information about the parrot-commits mailing list