[svn:parrot] r41496 - in branches/pct-rx: . compilers/pct/src compilers/pct/src/PAST compilers/pct/src/POST compilers/pct/src/Regex config/gen/makefiles

pmichaud at svn.parrot.org pmichaud at svn.parrot.org
Sat Sep 26 13:45:13 UTC 2009


Author: pmichaud
Date: Sat Sep 26 13:45:06 2009
New Revision: 41496
URL: https://trac.parrot.org/parrot/changeset/41496

Log:
[pct-rx]  First pieces of code for the PCT-based regex implementation.

Added:
   branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir
   branches/pct-rx/compilers/pct/src/Regex/
   branches/pct-rx/compilers/pct/src/Regex.pir
   branches/pct-rx/compilers/pct/src/Regex/Cursor.pir
Modified:
   branches/pct-rx/MANIFEST
   branches/pct-rx/compilers/pct/src/PAST.pir
   branches/pct-rx/compilers/pct/src/PAST/Compiler.pir
   branches/pct-rx/compilers/pct/src/PAST/Node.pir
   branches/pct-rx/compilers/pct/src/POST/Compiler.pir
   branches/pct-rx/config/gen/makefiles/pct.in

Modified: branches/pct-rx/MANIFEST
==============================================================================
--- branches/pct-rx/MANIFEST	Sat Sep 26 12:57:12 2009	(r41495)
+++ branches/pct-rx/MANIFEST	Sat Sep 26 13:45:06 2009	(r41496)
@@ -1,7 +1,7 @@
 # ex: set ro:
 # $Id$
 #
-# generated by tools/dev/mk_manifest_and_skip.pl Sat Sep 19 08:24:23 2009 UT
+# generated by tools/dev/mk_manifest_and_skip.pl Sat Sep 26 13:40:35 2009 UT
 #
 # See below for documentation on the format of this file.
 #
@@ -122,6 +122,7 @@
 compilers/pct/PCT.pir                                       [pct]
 compilers/pct/README.pod                                    []doc
 compilers/pct/src/PAST.pir                                  [pct]
+compilers/pct/src/PAST/Compiler-Regex.pir                   [pct]
 compilers/pct/src/PAST/Compiler.pir                         [pct]
 compilers/pct/src/PAST/Node.pir                             [pct]
 compilers/pct/src/PCT/Dumper.pir                            [pct]
@@ -130,6 +131,8 @@
 compilers/pct/src/PCT/Node.pir                              [pct]
 compilers/pct/src/POST/Compiler.pir                         [pct]
 compilers/pct/src/POST/Node.pir                             [pct]
+compilers/pct/src/Regex.pir                                 [pct]
+compilers/pct/src/Regex/Cursor.pir                          [pct]
 compilers/pge/P6Rule.grammar                                [pge]
 compilers/pge/PGE.pir                                       [pge]
 compilers/pge/PGE/Exp.pir                                   [pge]

Modified: branches/pct-rx/compilers/pct/src/PAST.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST.pir	Sat Sep 26 12:57:12 2009	(r41495)
+++ branches/pct-rx/compilers/pct/src/PAST.pir	Sat Sep 26 13:45:06 2009	(r41496)
@@ -17,6 +17,8 @@
 
 .include 'src/PAST/Compiler.pir'
 
+.include 'src/PAST/Compiler-Regex.pir'
+
 .include 'src/POST/Node.pir'
 
 .include 'src/POST/Compiler.pir'

Added: branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir	Sat Sep 26 13:45:06 2009	(r41496)
@@ -0,0 +1,315 @@
+# $Id$
+
+=head1 NAME
+
+PAST::Compiler-Regex - Compiler for PAST::Regex nodes
+
+=head1 DESCRIPTION
+
+PAST::Compiler-Regex implements the transformations to convert 
+PAST::Regex nodes into POST.  It's still a part of PAST::Compiler;
+we've separated out the regex-specific transformations here for
+better code management and debugging.
+
+=head2 Compiler methods
+
+=head3 C<PAST::Regex>
+
+=over 4
+
+=item as_post(PAST::Regex node)
+
+Return the POST representation of the regex AST rooted by C<node>.
+
+=cut
+
+.sub 'as_post' :method :multi(_, ['PAST';'Regex'])
+    .param pmc node
+    .param pmc options         :slurpy :named
+
+    .local pmc ops
+    ops = self.'post_new'('Ops', 'node'=>node)
+
+    .local pmc reghash
+    reghash = new ['Hash']
+    .lex '$*REG', reghash
+
+    .local string prefix, rname, rtype
+    prefix = self.'unique'('rx')
+    concat prefix, '_'
+    $P0 = split ' ', 'tgt string pos int off int len int cur pmc'
+    $P1 = iter $P0
+  iter_loop:
+    unless $P1 goto iter_done
+    rname = shift $P1
+    rtype = shift $P1
+    $S1 = concat prefix, rname
+    reghash[rname] = $S1
+    $S2 = concat '.local ', rtype
+    ops.'push_pirop'($S2, $S1)
+    goto iter_loop
+  iter_done:
+
+    .local pmc faillabel
+    $S1 = concat prefix, 'fail'
+    faillabel = self.'post_new'('Label', 'result'=>$S1)
+    reghash['fail'] = faillabel
+
+    .local string cur, pos
+    (cur, pos) = self.'!rxregs'('cur pos')
+
+    $P0 = self.'post_regex'(node)
+    ops.'push'($P0)
+    ops.'push'(faillabel)
+    $S0 = concat '(', cur
+    concat $S0, ','
+    concat $S0, pos
+    concat $S0, ',$I10)'
+    ops.'push_pirop'('callmethod', "'!popmark'", cur, 'result'=>$S0)
+    ops.'push_pirop'('jump', '$I10')
+    .return (ops)
+.end
+
+=item !rxregs(keystr)
+
+Helper function -- looks up the current regex register table
+in the dynamic scope and returns a slice based on the keys
+given in C<keystr>.
+
+=cut
+
+.sub '!rxregs' :method
+    .param string keystr
+
+    .local pmc keys, reghash, vals
+    keys = split ' ', keystr
+    reghash = find_dynamic_lex '$*REG'
+    vals = new ['ResizablePMCArray']
+  keys_loop:
+    unless keys goto keys_done
+    $S0 = shift keys
+    $P0 = reghash[$S0]
+    push vals, $P0
+    goto keys_loop
+  keys_done:
+    .return (vals :flat)
+.end
+
+
+=item post_regex(PAST::Regex node)
+
+Return the POST representation of the regex component given by C<node>.
+Normally this is handled by redispatching to a method corresponding to
+the node's "pasttype" and "backtrack" attributes.
+
+=cut
+
+.sub 'post_regex' :method :multi(_,['PAST';'Regex'])
+    .param pmc node
+    .param string cur          :optional
+    .param int have_cur        :opt_flag
+
+    .local string pasttype
+    pasttype = node.'pasttype'()
+    $P0 = find_method self, pasttype
+    $P1 = self.$P0(node)
+    unless have_cur goto done
+    $S0 = $P1.'result'()
+    if $S0 == cur goto done
+    $P1 = self.'coerce'($P1, cur)
+  done:
+    .return ($P1)
+.end
+
+
+.sub 'post_regex' :method :multi(_, _)
+    .param pmc node
+    .param string cur          :optional
+    .param int have_cur        :opt_flag
+
+    $P0 = self.'as_post'(node)
+    unless have_cur goto done
+    $P0 = self.'coerce'($P0, cur)
+  done:
+    .return ($P0)
+.end
+
+
+=item regex_mark(prefix)
+
+Create a label starting with C<prefix> and POST instructions
+to set a backtrack to the label in the current cursor.
+
+=cut
+
+.sub 'regex_mark' :method
+    .param string prefix
+
+    .local pmc cur, pos, ops, backlabel
+    (cur, pos) = self.'!rxregs'('cur pos')
+    ops = self.'post_new'('Ops')
+    backlabel = self.'post_new'('Label', 'name'=>prefix)
+    ops.'push_pirop'('set_addr', '$I10', backlabel)
+    ops.'push_pirop'('callmethod', "'!pushmark'", cur, pos, '$I10')
+    .return (ops, backlabel)
+.end
+
+
+=item alt(PAST::Regex node)
+
+Create POST to alternate among child regexes of C<node>, including
+backtracking.
+
+=cut
+
+.sub 'alt' :method :multi(_, ['PAST';'Regex'])
+    .param pmc node
+
+    .local pmc cur
+    cur = self.'!rxregs'('cur')
+
+    .local pmc ops, iter, cpast, cpost
+    ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+
+    iter = node.'iterator'()
+    unless iter goto done
+
+    # get post for first alternative
+    .local pmc apast, apost, amark, alabel, endlabel
+    apast = shift iter
+    apost = self.'post_regex'(apast, cur)
+    ops.'push'(apost)
+    unless iter goto done
+
+    endlabel = self.'post_new'('Label', 'name'=>'rx_alt_end_')
+
+    # for all remaining alternatives, we put a label at the end
+    # of the previous alternative, generate a label and backtracking
+    # mark for the new alternative, and add those to our ops list
+  iter_loop:
+    ops.'push_pirop'('goto', endlabel)
+    apast = shift iter
+    apost = self.'post_regex'(apast, cur)
+    (amark, alabel) = self.'regex_mark'('rx_alt_')
+    ops.'unshift'(amark)
+    ops.'push'(alabel)
+    ops.'push'(apost)
+    if iter goto iter_loop
+
+  d1:
+    ops.'push'(endlabel)
+
+  done:
+    .return (ops)
+.end
+
+
+=item concat(PAST::Regex node)
+
+Handle a concatenation of regexes.
+
+=cut
+
+.sub 'concat' :method :multi(_, ['PAST';'Regex'])
+    .param pmc node
+
+    .local pmc cur, ops, iter
+    (cur) = self.'!rxregs'('cur')
+    ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+    iter = node.'iterator'()
+
+  iter_loop:
+    unless iter goto iter_done
+    .local pmc cpast, cpost
+    cpast = shift iter
+    cpost = self.'post_regex'(cpast, cur)
+    ops.'push'(cpost)
+    goto iter_loop
+  iter_done:
+
+    .return (ops)
+.end
+
+
+=item literal(PAST::Regex node)
+
+Generate POST for matching a literal string provided as the
+second child of this node.
+
+=cut
+
+.sub 'literal' :method :multi(_,['PAST';'Regex'])
+    .param pmc node
+
+    .local pmc cur, pos, len, tgt, fail, off
+    (cur, pos, len, tgt, fail, off) = self.'!rxregs'('cur pos len tgt fail off')
+    .local pmc ops, cpast, cpost, lpast, lpost
+    ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+
+    # literal to be matched is our first child
+    lpast = node[0]
+    lpost = self.'as_post'(lpast, 'rtype'=>'~')
+
+    $S0 = lpost.'result'()
+    ops.'push_pirop'('inline', $S0, 'inline'=>'  # rx literal %0')
+    ops.'push'(lpost)
+
+    # compute constant literal length at compile time
+    .local string litlen
+    $I0 = isa lpast, ['String']
+    if $I0 goto literal_string
+    litlen = '$I10'
+    ops.'push_pirop'('length', '$I10', lpost)
+    goto have_litlen
+  literal_string:
+    $S0 = lpast
+    $I0 = length $S0
+    litlen = $I0
+    if $I0 > 0 goto have_litlen
+    .return (cpost)
+  have_litlen:
+
+    # fail if there aren't enough characters left in string
+    ops.'push_pirop'('add', '$I11', pos, litlen)
+    ops.'push_pirop'('gt', '$I11', len, fail)
+
+    # compute string to be matched and fail if mismatch
+    ops.'push_pirop'('sub', '$I11', pos, off)
+    ops.'push_pirop'('substr', '$S10', tgt, '$I11', litlen)
+    ops.'push_pirop'('ne', '$S10', lpost, fail)
+
+    # increase position by literal length and move on
+    ops.'push_pirop'('add', pos, litlen)
+    .return (ops)
+.end
+
+
+.sub 'pass' :method :multi(_,['PAST';'Regex'])
+    .param pmc node
+
+    .local pmc cur, ops
+    cur = self.'!rxregs'('cur')
+    ops = self.'post_new'('Ops', 'result'=>cur)
+    ops.'push_pirop'('yield', cur)
+    .return (ops)
+.end
+    
+
+=back
+
+=head1 AUTHOR
+
+Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.
+
+=head1 COPYRIGHT
+
+Copyright (C) 2009, Patrick R. Michaud.
+
+=cut
+
+# Local Variables:
+#   mode: pir
+#   fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:
+

Modified: branches/pct-rx/compilers/pct/src/PAST/Compiler.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST/Compiler.pir	Sat Sep 26 12:57:12 2009	(r41495)
+++ branches/pct-rx/compilers/pct/src/PAST/Compiler.pir	Sat Sep 26 13:45:06 2009	(r41496)
@@ -179,6 +179,24 @@
     .tailcall self.'as_post'(past, 'rtype'=>'v')
 .end
 
+=item post_new(type, args :slurpy, options :slurpy :named)
+
+Helper function to quickly create POST nodes; looks up
+the POST protoobject corresponding to C<type>, then invokes
+the ".new" method on that protoobject passing C<args> and
+C<options>.
+
+=cut
+
+.sub 'post_new' :method
+    .param string type
+    .param pmc args            :slurpy
+    .param pmc options         :slurpy :named
+
+    $P0 = get_hll_global ['POST'], type
+    .tailcall $P0.'new'(args :flat, options :flat :named)
+.end
+
 =item escape(str)
 
 Return C<str> as a PIR constant string.

Modified: branches/pct-rx/compilers/pct/src/PAST/Node.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST/Node.pir	Sat Sep 26 12:57:12 2009	(r41495)
+++ branches/pct-rx/compilers/pct/src/PAST/Node.pir	Sat Sep 26 13:45:06 2009	(r41496)
@@ -26,6 +26,7 @@
     p6meta.'new_class'('PAST::Block', 'parent'=>base)
     p6meta.'new_class'('PAST::Control', 'parent'=>base)
     p6meta.'new_class'('PAST::VarList', 'parent'=>base)
+    p6meta.'new_class'('PAST::Regex', 'parent'=>base)
 
     .return ()
 .end
@@ -740,6 +741,15 @@
 .end
 
 
+.namespace ['PAST';'Regex']
+
+.sub 'pasttype' :method
+    .param pmc value           :optional
+    .param int has_value       :opt_flag
+    .tailcall self.'attr'('pasttype', value, has_value)
+.end
+
+
 =back
 
 =head1 AUTHOR

Modified: branches/pct-rx/compilers/pct/src/POST/Compiler.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/POST/Compiler.pir	Sat Sep 26 12:57:12 2009	(r41495)
+++ branches/pct-rx/compilers/pct/src/POST/Compiler.pir	Sat Sep 26 13:45:06 2009	(r41496)
@@ -136,6 +136,7 @@
     if pirop == 'callmethod' goto pirop_callmethod
     if pirop == 'return' goto pirop_return
     if pirop == 'tailcall' goto pirop_tailcall
+    if pirop == 'yield' goto pirop_yield
     if pirop == 'inline' goto pirop_inline
 
   pirop_opcode:
@@ -163,6 +164,10 @@
     fmt = '    .tailcall %n(%,)'
     goto pirop_emit
 
+  pirop_yield:
+    fmt = '    .yield (%,)'
+    goto pirop_emit
+
   pirop_inline:
     fmt = node.'inline'()
     result = node.'result'()

Added: branches/pct-rx/compilers/pct/src/Regex.pir
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/pct-rx/compilers/pct/src/Regex.pir	Sat Sep 26 13:45:06 2009	(r41496)
@@ -0,0 +1,30 @@
+# $Id$
+
+=head1 NAME
+
+Regex - Parrot Regex library
+
+=head1 DESCRIPTION
+
+This file brings together the various Regex modules needed for
+Regex.pbc .
+
+=cut
+
+.include 'src/Regex/Cursor.pir'
+
+=head1 AUTHOR
+
+Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.
+
+=head1 COPYRIGHT
+
+Copyright (C) 2009, Patrick R. Michaud
+
+=cut
+
+# Local Variables:
+#   mode: pir
+#   fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:

Added: branches/pct-rx/compilers/pct/src/Regex/Cursor.pir
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/pct-rx/compilers/pct/src/Regex/Cursor.pir	Sat Sep 26 13:45:06 2009	(r41496)
@@ -0,0 +1,42 @@
+# Copyright (C) 2009, Patrick R. Michaud
+# $Id$
+
+=head1 NAME
+
+RX::Cursor - RX Cursor nodes
+
+=head1 DESCRIPTION
+
+This file implements the RX::Cursor class, used for managing regular
+expression control flow.  RX::Cursor is also a base class for
+grammars.
+
+=cut
+
+.namespace ['RX';'Cursor']
+
+.sub '' :anon :load :init
+    load_bytecode 'P6object.pbc'
+    .local pmc p6meta
+    p6meta = new 'P6metaclass'
+    $P0 = p6meta.'new_class'('RX::Cursor', 'attr'=>'$!target $!from $!pos @!pstack')
+    .return ()
+.end
+
+=head2 Methods
+
+=over 4
+
+=back
+
+=head1 AUTHORS
+
+Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.
+
+=cut
+
+# Local Variables:
+#   mode: pir
+#   fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:

Modified: branches/pct-rx/config/gen/makefiles/pct.in
==============================================================================
--- branches/pct-rx/config/gen/makefiles/pct.in	Sat Sep 26 12:57:12 2009	(r41495)
+++ branches/pct-rx/config/gen/makefiles/pct.in	Sat Sep 26 13:45:06 2009	(r41496)
@@ -19,6 +19,7 @@
   $(PARROT_LIBRARY)/PCT/PAST.pbc \
   $(PARROT_LIBRARY)/PCT/Grammar.pbc \
   $(PARROT_LIBRARY)/PCT/HLLCompiler.pbc \
+  $(PARROT_LIBRARY)/PCT/Regex.pbc \
   $(PARROT_LIBRARY)/PCT/Dumper.pbc
 
 PCT_SOURCES := \
@@ -28,9 +29,11 @@
   src/PCT/HLLCompiler.pir \
   src/PCT/Node.pir \
   src/PAST/Compiler.pir \
+  src/PAST/Compiler-Regex.pir \
   src/PAST/Node.pir \
   src/POST/Compiler.pir \
-  src/POST/Node.pir
+  src/POST/Node.pir \
+  src/Regex/Cursor.pir
 
 # the default target
 all: $(PARROT_LIBRARY)/PCT.pbc
@@ -41,6 +44,7 @@
 	$(PARROT) -o $(PARROT_LIBRARY)/PCT/Grammar.pbc --output-pbc src/PCT/Grammar.pir
 	$(PARROT) -o $(PARROT_LIBRARY)/PCT/HLLCompiler.pbc --output-pbc src/PCT/HLLCompiler.pir
 	$(PARROT) -o $(PARROT_LIBRARY)/PCT/Dumper.pbc --output-pbc src/PCT/Dumper.pir
+	$(PARROT) -o $(PARROT_LIBRARY)/PCT/Regex.pbc --output-pbc src/Regex.pir
 
 # This is a listing of all targets, that are meant to be called by users
 help:


More information about the parrot-commits mailing list