[svn:parrot] r41496 - in branches/pct-rx: . compilers/pct/src compilers/pct/src/PAST compilers/pct/src/POST compilers/pct/src/Regex config/gen/makefiles
pmichaud at svn.parrot.org
pmichaud at svn.parrot.org
Sat Sep 26 13:45:13 UTC 2009
Author: pmichaud
Date: Sat Sep 26 13:45:06 2009
New Revision: 41496
URL: https://trac.parrot.org/parrot/changeset/41496
Log:
[pct-rx] First pieces of code for the PCT-based regex implementation.
Added:
branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir
branches/pct-rx/compilers/pct/src/Regex/
branches/pct-rx/compilers/pct/src/Regex.pir
branches/pct-rx/compilers/pct/src/Regex/Cursor.pir
Modified:
branches/pct-rx/MANIFEST
branches/pct-rx/compilers/pct/src/PAST.pir
branches/pct-rx/compilers/pct/src/PAST/Compiler.pir
branches/pct-rx/compilers/pct/src/PAST/Node.pir
branches/pct-rx/compilers/pct/src/POST/Compiler.pir
branches/pct-rx/config/gen/makefiles/pct.in
Modified: branches/pct-rx/MANIFEST
==============================================================================
--- branches/pct-rx/MANIFEST Sat Sep 26 12:57:12 2009 (r41495)
+++ branches/pct-rx/MANIFEST Sat Sep 26 13:45:06 2009 (r41496)
@@ -1,7 +1,7 @@
# ex: set ro:
# $Id$
#
-# generated by tools/dev/mk_manifest_and_skip.pl Sat Sep 19 08:24:23 2009 UT
+# generated by tools/dev/mk_manifest_and_skip.pl Sat Sep 26 13:40:35 2009 UT
#
# See below for documentation on the format of this file.
#
@@ -122,6 +122,7 @@
compilers/pct/PCT.pir [pct]
compilers/pct/README.pod []doc
compilers/pct/src/PAST.pir [pct]
+compilers/pct/src/PAST/Compiler-Regex.pir [pct]
compilers/pct/src/PAST/Compiler.pir [pct]
compilers/pct/src/PAST/Node.pir [pct]
compilers/pct/src/PCT/Dumper.pir [pct]
@@ -130,6 +131,8 @@
compilers/pct/src/PCT/Node.pir [pct]
compilers/pct/src/POST/Compiler.pir [pct]
compilers/pct/src/POST/Node.pir [pct]
+compilers/pct/src/Regex.pir [pct]
+compilers/pct/src/Regex/Cursor.pir [pct]
compilers/pge/P6Rule.grammar [pge]
compilers/pge/PGE.pir [pge]
compilers/pge/PGE/Exp.pir [pge]
Modified: branches/pct-rx/compilers/pct/src/PAST.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST.pir Sat Sep 26 12:57:12 2009 (r41495)
+++ branches/pct-rx/compilers/pct/src/PAST.pir Sat Sep 26 13:45:06 2009 (r41496)
@@ -17,6 +17,8 @@
.include 'src/PAST/Compiler.pir'
+.include 'src/PAST/Compiler-Regex.pir'
+
.include 'src/POST/Node.pir'
.include 'src/POST/Compiler.pir'
Added: branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ branches/pct-rx/compilers/pct/src/PAST/Compiler-Regex.pir Sat Sep 26 13:45:06 2009 (r41496)
@@ -0,0 +1,315 @@
+# $Id$
+
+=head1 NAME
+
+PAST::Compiler-Regex - Compiler for PAST::Regex nodes
+
+=head1 DESCRIPTION
+
+PAST::Compiler-Regex implements the transformations to convert
+PAST::Regex nodes into POST. It's still a part of PAST::Compiler;
+we've separated out the regex-specific transformations here for
+better code management and debugging.
+
+=head2 Compiler methods
+
+=head3 C<PAST::Regex>
+
+=over 4
+
+=item as_post(PAST::Regex node)
+
+Return the POST representation of the regex AST rooted by C<node>.
+
+=cut
+
+.sub 'as_post' :method :multi(_, ['PAST';'Regex'])
+ .param pmc node
+ .param pmc options :slurpy :named
+
+ .local pmc ops
+ ops = self.'post_new'('Ops', 'node'=>node)
+
+ .local pmc reghash
+ reghash = new ['Hash']
+ .lex '$*REG', reghash
+
+ .local string prefix, rname, rtype
+ prefix = self.'unique'('rx')
+ concat prefix, '_'
+ $P0 = split ' ', 'tgt string pos int off int len int cur pmc'
+ $P1 = iter $P0
+ iter_loop:
+ unless $P1 goto iter_done
+ rname = shift $P1
+ rtype = shift $P1
+ $S1 = concat prefix, rname
+ reghash[rname] = $S1
+ $S2 = concat '.local ', rtype
+ ops.'push_pirop'($S2, $S1)
+ goto iter_loop
+ iter_done:
+
+ .local pmc faillabel
+ $S1 = concat prefix, 'fail'
+ faillabel = self.'post_new'('Label', 'result'=>$S1)
+ reghash['fail'] = faillabel
+
+ .local string cur, pos
+ (cur, pos) = self.'!rxregs'('cur pos')
+
+ $P0 = self.'post_regex'(node)
+ ops.'push'($P0)
+ ops.'push'(faillabel)
+ $S0 = concat '(', cur
+ concat $S0, ','
+ concat $S0, pos
+ concat $S0, ',$I10)'
+ ops.'push_pirop'('callmethod', "'!popmark'", cur, 'result'=>$S0)
+ ops.'push_pirop'('jump', '$I10')
+ .return (ops)
+.end
+
+=item !rxregs(keystr)
+
+Helper function -- looks up the current regex register table
+in the dynamic scope and returns a slice based on the keys
+given in C<keystr>.
+
+=cut
+
+.sub '!rxregs' :method
+ .param string keystr
+
+ .local pmc keys, reghash, vals
+ keys = split ' ', keystr
+ reghash = find_dynamic_lex '$*REG'
+ vals = new ['ResizablePMCArray']
+ keys_loop:
+ unless keys goto keys_done
+ $S0 = shift keys
+ $P0 = reghash[$S0]
+ push vals, $P0
+ goto keys_loop
+ keys_done:
+ .return (vals :flat)
+.end
+
+
+=item post_regex(PAST::Regex node)
+
+Return the POST representation of the regex component given by C<node>.
+Normally this is handled by redispatching to a method corresponding to
+the node's "pasttype" and "backtrack" attributes.
+
+=cut
+
+.sub 'post_regex' :method :multi(_,['PAST';'Regex'])
+ .param pmc node
+ .param string cur :optional
+ .param int have_cur :opt_flag
+
+ .local string pasttype
+ pasttype = node.'pasttype'()
+ $P0 = find_method self, pasttype
+ $P1 = self.$P0(node)
+ unless have_cur goto done
+ $S0 = $P1.'result'()
+ if $S0 == cur goto done
+ $P1 = self.'coerce'($P1, cur)
+ done:
+ .return ($P1)
+.end
+
+
+.sub 'post_regex' :method :multi(_, _)
+ .param pmc node
+ .param string cur :optional
+ .param int have_cur :opt_flag
+
+ $P0 = self.'as_post'(node)
+ unless have_cur goto done
+ $P0 = self.'coerce'($P0, cur)
+ done:
+ .return ($P0)
+.end
+
+
+=item regex_mark(prefix)
+
+Create a label starting with C<prefix> and POST instructions
+to set a backtrack to the label in the current cursor.
+
+=cut
+
+.sub 'regex_mark' :method
+ .param string prefix
+
+ .local pmc cur, pos, ops, backlabel
+ (cur, pos) = self.'!rxregs'('cur pos')
+ ops = self.'post_new'('Ops')
+ backlabel = self.'post_new'('Label', 'name'=>prefix)
+ ops.'push_pirop'('set_addr', '$I10', backlabel)
+ ops.'push_pirop'('callmethod', "'!pushmark'", cur, pos, '$I10')
+ .return (ops, backlabel)
+.end
+
+
+=item alt(PAST::Regex node)
+
+Create POST to alternate among child regexes of C<node>, including
+backtracking.
+
+=cut
+
+.sub 'alt' :method :multi(_, ['PAST';'Regex'])
+ .param pmc node
+
+ .local pmc cur
+ cur = self.'!rxregs'('cur')
+
+ .local pmc ops, iter, cpast, cpost
+ ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+
+ iter = node.'iterator'()
+ unless iter goto done
+
+ # get post for first alternative
+ .local pmc apast, apost, amark, alabel, endlabel
+ apast = shift iter
+ apost = self.'post_regex'(apast, cur)
+ ops.'push'(apost)
+ unless iter goto done
+
+ endlabel = self.'post_new'('Label', 'name'=>'rx_alt_end_')
+
+ # for all remaining alternatives, we put a label at the end
+ # of the previous alternative, generate a label and backtracking
+ # mark for the new alternative, and add those to our ops list
+ iter_loop:
+ ops.'push_pirop'('goto', endlabel)
+ apast = shift iter
+ apost = self.'post_regex'(apast, cur)
+ (amark, alabel) = self.'regex_mark'('rx_alt_')
+ ops.'unshift'(amark)
+ ops.'push'(alabel)
+ ops.'push'(apost)
+ if iter goto iter_loop
+
+ d1:
+ ops.'push'(endlabel)
+
+ done:
+ .return (ops)
+.end
+
+
+=item concat(PAST::Regex node)
+
+Handle a concatenation of regexes.
+
+=cut
+
+.sub 'concat' :method :multi(_, ['PAST';'Regex'])
+ .param pmc node
+
+ .local pmc cur, ops, iter
+ (cur) = self.'!rxregs'('cur')
+ ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+ iter = node.'iterator'()
+
+ iter_loop:
+ unless iter goto iter_done
+ .local pmc cpast, cpost
+ cpast = shift iter
+ cpost = self.'post_regex'(cpast, cur)
+ ops.'push'(cpost)
+ goto iter_loop
+ iter_done:
+
+ .return (ops)
+.end
+
+
+=item literal(PAST::Regex node)
+
+Generate POST for matching a literal string provided as the
+second child of this node.
+
+=cut
+
+.sub 'literal' :method :multi(_,['PAST';'Regex'])
+ .param pmc node
+
+ .local pmc cur, pos, len, tgt, fail, off
+ (cur, pos, len, tgt, fail, off) = self.'!rxregs'('cur pos len tgt fail off')
+ .local pmc ops, cpast, cpost, lpast, lpost
+ ops = self.'post_new'('Ops', 'node'=>node, 'result'=>cur)
+
+ # literal to be matched is our first child
+ lpast = node[0]
+ lpost = self.'as_post'(lpast, 'rtype'=>'~')
+
+ $S0 = lpost.'result'()
+ ops.'push_pirop'('inline', $S0, 'inline'=>' # rx literal %0')
+ ops.'push'(lpost)
+
+ # compute constant literal length at compile time
+ .local string litlen
+ $I0 = isa lpast, ['String']
+ if $I0 goto literal_string
+ litlen = '$I10'
+ ops.'push_pirop'('length', '$I10', lpost)
+ goto have_litlen
+ literal_string:
+ $S0 = lpast
+ $I0 = length $S0
+ litlen = $I0
+ if $I0 > 0 goto have_litlen
+ .return (cpost)
+ have_litlen:
+
+ # fail if there aren't enough characters left in string
+ ops.'push_pirop'('add', '$I11', pos, litlen)
+ ops.'push_pirop'('gt', '$I11', len, fail)
+
+ # compute string to be matched and fail if mismatch
+ ops.'push_pirop'('sub', '$I11', pos, off)
+ ops.'push_pirop'('substr', '$S10', tgt, '$I11', litlen)
+ ops.'push_pirop'('ne', '$S10', lpost, fail)
+
+ # increase position by literal length and move on
+ ops.'push_pirop'('add', pos, litlen)
+ .return (ops)
+.end
+
+
+.sub 'pass' :method :multi(_,['PAST';'Regex'])
+ .param pmc node
+
+ .local pmc cur, ops
+ cur = self.'!rxregs'('cur')
+ ops = self.'post_new'('Ops', 'result'=>cur)
+ ops.'push_pirop'('yield', cur)
+ .return (ops)
+.end
+
+
+=back
+
+=head1 AUTHOR
+
+Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.
+
+=head1 COPYRIGHT
+
+Copyright (C) 2009, Patrick R. Michaud.
+
+=cut
+
+# Local Variables:
+# mode: pir
+# fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:
+
Modified: branches/pct-rx/compilers/pct/src/PAST/Compiler.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST/Compiler.pir Sat Sep 26 12:57:12 2009 (r41495)
+++ branches/pct-rx/compilers/pct/src/PAST/Compiler.pir Sat Sep 26 13:45:06 2009 (r41496)
@@ -179,6 +179,24 @@
.tailcall self.'as_post'(past, 'rtype'=>'v')
.end
+=item post_new(type, args :slurpy, options :slurpy :named)
+
+Helper function to quickly create POST nodes; looks up
+the POST protoobject corresponding to C<type>, then invokes
+the ".new" method on that protoobject passing C<args> and
+C<options>.
+
+=cut
+
+.sub 'post_new' :method
+ .param string type
+ .param pmc args :slurpy
+ .param pmc options :slurpy :named
+
+ $P0 = get_hll_global ['POST'], type
+ .tailcall $P0.'new'(args :flat, options :flat :named)
+.end
+
=item escape(str)
Return C<str> as a PIR constant string.
Modified: branches/pct-rx/compilers/pct/src/PAST/Node.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/PAST/Node.pir Sat Sep 26 12:57:12 2009 (r41495)
+++ branches/pct-rx/compilers/pct/src/PAST/Node.pir Sat Sep 26 13:45:06 2009 (r41496)
@@ -26,6 +26,7 @@
p6meta.'new_class'('PAST::Block', 'parent'=>base)
p6meta.'new_class'('PAST::Control', 'parent'=>base)
p6meta.'new_class'('PAST::VarList', 'parent'=>base)
+ p6meta.'new_class'('PAST::Regex', 'parent'=>base)
.return ()
.end
@@ -740,6 +741,15 @@
.end
+.namespace ['PAST';'Regex']
+
+.sub 'pasttype' :method
+ .param pmc value :optional
+ .param int has_value :opt_flag
+ .tailcall self.'attr'('pasttype', value, has_value)
+.end
+
+
=back
=head1 AUTHOR
Modified: branches/pct-rx/compilers/pct/src/POST/Compiler.pir
==============================================================================
--- branches/pct-rx/compilers/pct/src/POST/Compiler.pir Sat Sep 26 12:57:12 2009 (r41495)
+++ branches/pct-rx/compilers/pct/src/POST/Compiler.pir Sat Sep 26 13:45:06 2009 (r41496)
@@ -136,6 +136,7 @@
if pirop == 'callmethod' goto pirop_callmethod
if pirop == 'return' goto pirop_return
if pirop == 'tailcall' goto pirop_tailcall
+ if pirop == 'yield' goto pirop_yield
if pirop == 'inline' goto pirop_inline
pirop_opcode:
@@ -163,6 +164,10 @@
fmt = ' .tailcall %n(%,)'
goto pirop_emit
+ pirop_yield:
+ fmt = ' .yield (%,)'
+ goto pirop_emit
+
pirop_inline:
fmt = node.'inline'()
result = node.'result'()
Added: branches/pct-rx/compilers/pct/src/Regex.pir
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ branches/pct-rx/compilers/pct/src/Regex.pir Sat Sep 26 13:45:06 2009 (r41496)
@@ -0,0 +1,30 @@
+# $Id$
+
+=head1 NAME
+
+Regex - Parrot Regex library
+
+=head1 DESCRIPTION
+
+This file brings together the various Regex modules needed for
+Regex.pbc .
+
+=cut
+
+.include 'src/Regex/Cursor.pir'
+
+=head1 AUTHOR
+
+Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.
+
+=head1 COPYRIGHT
+
+Copyright (C) 2009, Patrick R. Michaud
+
+=cut
+
+# Local Variables:
+# mode: pir
+# fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:
Added: branches/pct-rx/compilers/pct/src/Regex/Cursor.pir
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ branches/pct-rx/compilers/pct/src/Regex/Cursor.pir Sat Sep 26 13:45:06 2009 (r41496)
@@ -0,0 +1,42 @@
+# Copyright (C) 2009, Patrick R. Michaud
+# $Id$
+
+=head1 NAME
+
+RX::Cursor - RX Cursor nodes
+
+=head1 DESCRIPTION
+
+This file implements the RX::Cursor class, used for managing regular
+expression control flow. RX::Cursor is also a base class for
+grammars.
+
+=cut
+
+.namespace ['RX';'Cursor']
+
+.sub '' :anon :load :init
+ load_bytecode 'P6object.pbc'
+ .local pmc p6meta
+ p6meta = new 'P6metaclass'
+ $P0 = p6meta.'new_class'('RX::Cursor', 'attr'=>'$!target $!from $!pos @!pstack')
+ .return ()
+.end
+
+=head2 Methods
+
+=over 4
+
+=back
+
+=head1 AUTHORS
+
+Patrick Michaud <pmichaud at pobox.com> is the author and maintainer.
+
+=cut
+
+# Local Variables:
+# mode: pir
+# fill-column: 100
+# End:
+# vim: expandtab shiftwidth=4 ft=pir:
Modified: branches/pct-rx/config/gen/makefiles/pct.in
==============================================================================
--- branches/pct-rx/config/gen/makefiles/pct.in Sat Sep 26 12:57:12 2009 (r41495)
+++ branches/pct-rx/config/gen/makefiles/pct.in Sat Sep 26 13:45:06 2009 (r41496)
@@ -19,6 +19,7 @@
$(PARROT_LIBRARY)/PCT/PAST.pbc \
$(PARROT_LIBRARY)/PCT/Grammar.pbc \
$(PARROT_LIBRARY)/PCT/HLLCompiler.pbc \
+ $(PARROT_LIBRARY)/PCT/Regex.pbc \
$(PARROT_LIBRARY)/PCT/Dumper.pbc
PCT_SOURCES := \
@@ -28,9 +29,11 @@
src/PCT/HLLCompiler.pir \
src/PCT/Node.pir \
src/PAST/Compiler.pir \
+ src/PAST/Compiler-Regex.pir \
src/PAST/Node.pir \
src/POST/Compiler.pir \
- src/POST/Node.pir
+ src/POST/Node.pir \
+ src/Regex/Cursor.pir
# the default target
all: $(PARROT_LIBRARY)/PCT.pbc
@@ -41,6 +44,7 @@
$(PARROT) -o $(PARROT_LIBRARY)/PCT/Grammar.pbc --output-pbc src/PCT/Grammar.pir
$(PARROT) -o $(PARROT_LIBRARY)/PCT/HLLCompiler.pbc --output-pbc src/PCT/HLLCompiler.pir
$(PARROT) -o $(PARROT_LIBRARY)/PCT/Dumper.pbc --output-pbc src/PCT/Dumper.pir
+ $(PARROT) -o $(PARROT_LIBRARY)/PCT/Regex.pbc --output-pbc src/Regex.pir
# This is a listing of all targets, that are meant to be called by users
help:
More information about the parrot-commits
mailing list