[svn:parrot] r40477 - in branches/auto_attrs: . compilers/pge compilers/pge/PGE config/auto/sizes docs docs/book/draft docs/book/module docs/book/pct docs/book/pir docs/dev docs/pdds examples/config examples/config/file examples/embed examples/languages/abc examples/languages/squaak examples/pge include/parrot lib/Parrot/Docs/Section lib/Parrot/Test ports/cpan ports/cygwin ports/debian ports/fedora ports/mandriva ports/suse runtime/parrot/languages runtime/parrot/library/Math src src/call src/gc src/interp src/runcore t/codingstd t/compilers/tge t/configure t/oo t/src t/tools tools/dev tools/util xconf

NotFound at svn.parrot.org NotFound at svn.parrot.org
Mon Aug 10 11:28:08 UTC 2009


Author: NotFound
Date: Mon Aug 10 11:28:05 2009
New Revision: 40477
URL: https://trac.parrot.org/parrot/changeset/40477

Log:
merge from trunk r40476

Added:
   branches/auto_attrs/docs/book/module/
      - copied from r40476, trunk/docs/book/module/
   branches/auto_attrs/docs/book/pct/
      - copied from r40476, trunk/docs/book/pct/
   branches/auto_attrs/examples/config/
      - copied from r40476, trunk/examples/config/
Replaced:
   branches/auto_attrs/docs/book/pct/ch01_introduction.pod
      - copied unchanged from r40476, trunk/docs/book/pct/ch01_introduction.pod
   branches/auto_attrs/docs/book/pct/ch02_getting_started.pod
      - copied unchanged from r40476, trunk/docs/book/pct/ch02_getting_started.pod
   branches/auto_attrs/docs/book/pct/ch03_compiler_tools.pod
      - copied unchanged from r40476, trunk/docs/book/pct/ch03_compiler_tools.pod
   branches/auto_attrs/docs/book/pct/ch04_pge.pod
      - copied unchanged from r40476, trunk/docs/book/pct/ch04_pge.pod
   branches/auto_attrs/docs/book/pct/ch05_nqp.pod
      - copied unchanged from r40476, trunk/docs/book/pct/ch05_nqp.pod
   branches/auto_attrs/examples/config/file/
      - copied from r40476, trunk/examples/config/file/
   branches/auto_attrs/examples/config/file/configcompiler
      - copied unchanged from r40476, trunk/examples/config/file/configcompiler
   branches/auto_attrs/examples/config/file/configwithfatalstep
      - copied unchanged from r40476, trunk/examples/config/file/configwithfatalstep
Deleted:
   branches/auto_attrs/docs/book/draft/ch04_compiler_tools.pod
   branches/auto_attrs/docs/book/draft/ch05_pge.pod
   branches/auto_attrs/docs/book/draft/ch06_nqp.pod
   branches/auto_attrs/xconf/
Modified:
   branches/auto_attrs/   (props changed)
   branches/auto_attrs/Configure.pl
   branches/auto_attrs/MANIFEST
   branches/auto_attrs/compilers/pge/PGE/OPTable.pir
   branches/auto_attrs/compilers/pge/STATUS
   branches/auto_attrs/config/auto/sizes/intval_maxmin_c.in   (props changed)
   branches/auto_attrs/docs/book/draft/README   (props changed)
   branches/auto_attrs/docs/book/draft/appa_glossary.pod   (props changed)
   branches/auto_attrs/docs/book/draft/appb_patch_submission.pod   (props changed)
   branches/auto_attrs/docs/book/draft/appc_command_line_options.pod   (props changed)
   branches/auto_attrs/docs/book/draft/appd_build_options.pod   (props changed)
   branches/auto_attrs/docs/book/draft/appe_source_code.pod   (props changed)
   branches/auto_attrs/docs/book/draft/ch01_introduction.pod   (props changed)
   branches/auto_attrs/docs/book/draft/ch02_getting_started.pod   (props changed)
   branches/auto_attrs/docs/book/draft/ch07_dynpmcs.pod   (props changed)
   branches/auto_attrs/docs/book/draft/ch08_dynops.pod   (props changed)
   branches/auto_attrs/docs/book/draft/ch10_opcode_reference.pod   (props changed)
   branches/auto_attrs/docs/book/draft/ch11_directive_reference.pod   (props changed)
   branches/auto_attrs/docs/book/draft/ch12_operator_reference.pod   (props changed)
   branches/auto_attrs/docs/book/draft/chXX_hlls.pod   (props changed)
   branches/auto_attrs/docs/book/draft/chXX_library.pod   (props changed)
   branches/auto_attrs/docs/book/draft/chXX_testing_and_debugging.pod   (props changed)
   branches/auto_attrs/docs/book/pir/ch04_variables.pod
   branches/auto_attrs/docs/book/pir/ch06_subroutines.pod
   branches/auto_attrs/docs/debugger.pod
   branches/auto_attrs/docs/dev/c_functions.pod   (props changed)
   branches/auto_attrs/docs/pdds/pdd30_install.pod   (props changed)
   branches/auto_attrs/examples/embed/cotorra.c   (props changed)
   branches/auto_attrs/examples/languages/abc/   (props changed)
   branches/auto_attrs/examples/languages/squaak/   (props changed)
   branches/auto_attrs/examples/pge/demo.pir   (props changed)
   branches/auto_attrs/include/parrot/call.h   (props changed)
   branches/auto_attrs/include/parrot/gc_api.h   (props changed)
   branches/auto_attrs/include/parrot/runcore_api.h   (props changed)
   branches/auto_attrs/include/parrot/runcore_trace.h   (props changed)
   branches/auto_attrs/lib/Parrot/Docs/Section/Parrot.pm
   branches/auto_attrs/lib/Parrot/Test/Pod.pm
   branches/auto_attrs/ports/cpan/pause_guide.pod   (props changed)
   branches/auto_attrs/ports/cygwin/parrot-1.0.0-1.cygport   (props changed)
   branches/auto_attrs/ports/debian/libparrot-dev.install.in   (props changed)
   branches/auto_attrs/ports/debian/libparrot.install.in   (props changed)
   branches/auto_attrs/ports/debian/parrot-doc.install.in   (props changed)
   branches/auto_attrs/ports/debian/parrot.install.in   (props changed)
   branches/auto_attrs/ports/fedora/parrot.spec.fedora   (props changed)
   branches/auto_attrs/ports/mandriva/parrot.spec.mandriva   (props changed)
   branches/auto_attrs/ports/suse/parrot.spec.suse   (props changed)
   branches/auto_attrs/runtime/parrot/languages/   (props changed)
   branches/auto_attrs/runtime/parrot/library/Math/Rand.pir   (props changed)
   branches/auto_attrs/src/call/ops.c   (props changed)
   branches/auto_attrs/src/call/pcc.c   (props changed)
   branches/auto_attrs/src/debug.c
   branches/auto_attrs/src/gc/alloc_memory.c   (props changed)
   branches/auto_attrs/src/gc/alloc_register.c   (props changed)
   branches/auto_attrs/src/gc/alloc_resources.c   (props changed)
   branches/auto_attrs/src/gc/api.c   (props changed)
   branches/auto_attrs/src/gc/generational_ms.c   (props changed)
   branches/auto_attrs/src/gc/incremental_ms.c   (props changed)
   branches/auto_attrs/src/gc/malloc.c   (props changed)
   branches/auto_attrs/src/gc/malloc_trace.c   (props changed)
   branches/auto_attrs/src/gc/mark_sweep.c   (props changed)
   branches/auto_attrs/src/gc/system.c   (props changed)
   branches/auto_attrs/src/interp/inter_cb.c   (props changed)
   branches/auto_attrs/src/interp/inter_create.c   (props changed)
   branches/auto_attrs/src/interp/inter_misc.c   (props changed)
   branches/auto_attrs/src/runcore/cores.c   (props changed)
   branches/auto_attrs/src/runcore/main.c   (props changed)
   branches/auto_attrs/src/runcore/trace.c   (props changed)
   branches/auto_attrs/t/codingstd/svn_id.t
   branches/auto_attrs/t/compilers/tge/NoneGrammar.tg   (props changed)
   branches/auto_attrs/t/configure/008-file_based_configuration.t
   branches/auto_attrs/t/oo/root_new.t   (props changed)
   branches/auto_attrs/t/src/embed.t   (props changed)
   branches/auto_attrs/t/tools/parrot_debugger.t
   branches/auto_attrs/tools/dev/fetch_languages.pl   (props changed)
   branches/auto_attrs/tools/dev/mk_gitignore.pl   (props changed)
   branches/auto_attrs/tools/dev/mk_manifest_and_skip.pl
   branches/auto_attrs/tools/util/perlcritic-cage.conf   (props changed)

Modified: branches/auto_attrs/Configure.pl
==============================================================================
--- branches/auto_attrs/Configure.pl	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/Configure.pl	Mon Aug 10 11:28:05 2009	(r40477)
@@ -652,11 +652,11 @@
 
 You may see how this works in practice by calling:
 
-    perl Configure.pl --file=xconf/samples/yourfoobar
+    perl Configure.pl --file=examples/config/file/configcompiler
 
 or
 
-    perl Configure.pl --file=xconf/samples/testfoobar
+    perl Configure.pl --file=examples/config/file/configwithfatalstep
 
 =head1 SEE ALSO
 

Modified: branches/auto_attrs/MANIFEST
==============================================================================
--- branches/auto_attrs/MANIFEST	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/MANIFEST	Mon Aug 10 11:28:05 2009	(r40477)
@@ -1,7 +1,7 @@
 # ex: set ro:
 # $Id$
 #
-# generated by tools/dev/mk_manifest_and_skip.pl Tue Aug  4 20:40:56 2009 UT
+# generated by tools/dev/mk_manifest_and_skip.pl Sun Aug  9 22:34:52 2009 UT
 #
 # See below for documentation on the format of this file.
 #
@@ -418,9 +418,6 @@
 docs/book/draft/appe_source_code.pod                        []
 docs/book/draft/ch01_introduction.pod                       []
 docs/book/draft/ch02_getting_started.pod                    []
-docs/book/draft/ch04_compiler_tools.pod                     []
-docs/book/draft/ch05_pge.pod                                []
-docs/book/draft/ch06_nqp.pod                                []
 docs/book/draft/ch07_dynpmcs.pod                            []
 docs/book/draft/ch08_dynops.pod                             []
 docs/book/draft/ch10_opcode_reference.pod                   []
@@ -429,6 +426,11 @@
 docs/book/draft/chXX_hlls.pod                               []
 docs/book/draft/chXX_library.pod                            []
 docs/book/draft/chXX_testing_and_debugging.pod              []
+docs/book/pct/ch01_introduction.pod                         []
+docs/book/pct/ch02_getting_started.pod                      []
+docs/book/pct/ch03_compiler_tools.pod                       []
+docs/book/pct/ch04_pge.pod                                  []
+docs/book/pct/ch05_nqp.pod                                  []
 docs/book/pir/ch01_introduction.pod                         []
 docs/book/pir/ch02_getting_started.pod                      []
 docs/book/pir/ch03_basic_syntax.pod                         []
@@ -642,6 +644,8 @@
 examples/c/test_main.c                                      [examples]
 examples/compilers/Makefile                                 [examples]
 examples/compilers/japhc.c                                  [examples]
+examples/config/file/configcompiler                         [examples]
+examples/config/file/configwithfatalstep                    [examples]
 examples/embed/Makefile                                     [examples]
 examples/embed/Makefile.msvc                                [examples]
 examples/embed/cotorra.c                                    [examples]
@@ -2165,8 +2169,6 @@
 tools/util/release.json                                     []
 tools/util/templates.json                                   []
 tools/util/update_copyright.pl                              []
-xconf/samples/testfoobar                                    []
-xconf/samples/yourfoobar                                    []
 # Local variables:
 #   mode: text
 #   buffer-read-only: t

Modified: branches/auto_attrs/compilers/pge/PGE/OPTable.pir
==============================================================================
--- branches/auto_attrs/compilers/pge/PGE/OPTable.pir	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/compilers/pge/PGE/OPTable.pir	Mon Aug 10 11:28:05 2009	(r40477)
@@ -52,7 +52,7 @@
     .return ()
 .end
 
-=item C<syncat(string name, adverbs :slurpy :named)>
+=item C<sctable(string name, adverbs :slurpy :named)>
 
 Adds (or replaces) a syntactic category's defaults.
 

Modified: branches/auto_attrs/compilers/pge/STATUS
==============================================================================
--- branches/auto_attrs/compilers/pge/STATUS	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/compilers/pge/STATUS	Mon Aug 10 11:28:05 2009	(r40477)
@@ -13,9 +13,11 @@
 * capturing and non-capturing groups
 * modifiers :i, :ignorecase, :s, :sigspace (with synonyms :w and :words)
 * enumerated character lists (<[abcdef]>, <-[a..f]>)
+* composed character classes (<+alpha-[aeiou]>)
 * character classes (., \d, \w, \s, \n, \e, \f, \r, \t, \x[hhh], \o[nnn])
 * anchors (^, $, ^^, $$, \b, \b)
 * negated character classes 
+* \c[...] and \C[...]
 * capturing subrules (<expr>)
 * non-capturing subrules  (<.expr>)
 * negated subrules (<!expr>)
@@ -34,7 +36,6 @@
 
 Perl 6 rule features to-do
 --------------------------
-* rule composition (<+alpha-[aeiou]>)
 * lookbehind according to S05 definition 
 * <cut> assertion
 * versions of <commit> and cuts that release backtracking state
@@ -42,7 +43,6 @@
 * literal assertions (<'literal'>)
 * interpolated assertions (<"literal">)
 * array and hash aliasing (@<foo>:=(...), %<foo>:=(...))
-* \c[...] and \C[...]
 * don't generate unneeded internal data structures (e.g., gpad)
 
 

Deleted: branches/auto_attrs/docs/book/draft/ch04_compiler_tools.pod
==============================================================================
--- branches/auto_attrs/docs/book/draft/ch04_compiler_tools.pod	Mon Aug 10 11:28:05 2009	(r40476)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,273 +0,0 @@
-=pod
-
-=head1 Parrot Compiler Tools
-
-Z<CHP-4>
-
-The previous chapters demonstrated low-level Parrot programming in PIR.  That's
-fun, but Parrot's true power is to host programs written in high level
-languages such as Perl 6, Python, Ruby, Tcl, and PHP.
-
-Parrot's language neutrality was a conscious design decision.  Parrot and Perl
-6 hewed closely in the early days; it would have been easy for the two to
-overlap and intermingle.
-
-By keeping the two projects separate and encapsulated, the possibility arose to
-support many other dynamic languages equally well. This modular design also
-benefits designers of future languages.  Instead of having to reimplement
-low-level features such as garbage collection and dynamic data types, language
-designers and compiler implementers can leave these details of infrastructure
-to Parrot and focus on the syntax, capabilities, and libraries of their
-high-level languages instead.
-
-Parrot exposes a rich interface for these languages to use, offering several
-important features: a robust exceptions system, compilation into
-platform-independent bytecode, a clean extension and embedding interface,
-just-in-time compilation to machine code, native library interface mechanisms,
-garbage collection, support for objects and classes, and a robust concurrency
-model.  Designing a new language or implementing a new compiler for an old
-language is easier with all of these features designed, implemented, tested,
-and supported in a VM already.
-
-Language interoperability is a core goal for Parrot. Different languages are
-suited to different tasks; heated debates explode across the Internet about
-which language is right for which project.  There's rarely a perfect fit.
-Developers often settle for one particular language if only because it offers
-the fewest I<disadvantages>.  Parrot changes this game by allowing developers
-to combine multiple languages seamlessly within a single project.  Well-tested
-libraries written in one languages can interoperate with clean problem-domain
-expression in a second language, glued together by a third language which
-elegantly describes the entire system's architecture.  You can use the
-strengths of multiple language and mitigate their weaknesses.
-
-For language hosting and interoperability to work, languages developers need to
-write compilers that convert source code written in high level languages to
-bytecode.  This process is analogous to how a compiler such as GCC converts C
-or C++ into machine code -- though instead of targeting machine code for a
-specific hardware platform, compilers written in Parrot produce Parrot code
-which can run on any hardware platform that can run Parrot.
-
-Parrot includes a suite of compiler tools for every step of this conversion:
-lexical analysis, parsing, optimization, resource allocation, and code
-generation.  Instead of using traditional low-level languages -- such as the C
-produced by C<lex> and C<yacc> -- to write compilers, Parrot can use any
-language hosted on Parrot in its compiler process.  As a practical matter, the
-prevalent tool uses a subset of the Perl 6 programming language called I<Not
-Quite Perl>X<Not Quite Perl> (NQP) and an implementation of the Perl 6 Grammar
-Engine X<Perl 6 Grammar Engine> (PGE) to build compilers for Parrot.
-
-=begin notetip
-
-Yes, the Perl 6 compiler on Parrot is itself written in Perl 6.  This
-X<bootstrapping> I<bootstrapping> process is mind-boggling at first.
-
-=end notetip
-
-PGE and NQP are part of the Parrot Compiler Tools.  A<CHP-5> Chapter 5
-discusses PGE and A<CHP-6> Chapter 6 explains NQP.
-
-=head2 PCT Overview
-
-The X<Parrot Compiler Tools;PCT> Parrot Compiler Tools (PCT) enable the
-creation of high-level language compilers and runtimes.  Though the Perl 6
-development team originally created these tools to produce Rakudo (Perl 6 on
-Parrot), several other Parrot-hosted compilers use them to great effect.
-Writing a compiler using Perl 6 syntax and dynamic language tools is much
-easier than writing a compiler in C, C<lex>, and C<yacc>.
-
-PCT contains several classes that implement various parts of a compiler. HLL
-developers write language-specific subclasses to fill in the details their
-languages require.  The X<HLLCompiler> C<PCT::HLLCompiler> class specifies the
-compiler's interface and represents the object used to parse and execute code.
-The X<Parrot Compiler Tools;PCT::Grammar> C<PCT::Grammar> and X<Parrot Compiler
-Tools;PCT::Grammar::Actions> C<PCT::Grammar::Actions> classes represent the
-parser and syntax tree generators, respectively. Creating a new HLL compiler is
-as easy as subclassing these three entities with methods specific to your
-language.
-
-=head3 Grammars and Action Files
-
-A PCT-based compiler requires three basic files: the main entry point file, the
-grammar specification file, and the grammar actions file. In addition,
-compilers and the languages they implement often use large libaries of built-in
-routines to provide language-specific behaviors.
-
-=over 4
-
-=item * The main file
-
-The main file is (often) a PIR program which contains the C<:main> function
-that creates and executes the compiler object. This program instantiates a
-C<PCT::HLLCompiler> subclass, loads any necessary support libraries, and
-initializes any compiler- or languages-specific data.
-
-The main file tends to be short.  The guts of the compiler logic is in the
-grammar and actions files.  Runtime support and auxiliary functions often
-appear in other files, by convention.  This separation of concerns tends to
-make compilers easier to maintain.
-
-=item * A grammar file
-
-The high-level language's grammar appears in a F<.pg> file.  This file
-subclasses C<PCT::Grammar> class and implements all of the necessary rules --
-written using PGE -- to parse the languages.
-
-=item * An actions file
-
-Actions contains methods -- written in NQP -- on the C<PCT::Grammar:Actions>
-object which receive parse data from the grammar rules and construct an
-X<Abstract Syntax Tree;Parrot Abstract Syntax Tree;AST;PAST> Abstract Syntax
-Tree (AST).N<The Parrot version of an AST is, of course, the Parrot Abstract
-Syntax Tree, or PAST.>
-
-=back
-
-PCT's workflow is customizable, but simple.  The compiler passes the source
-code of the HLL into the grammar engine.  The grammer engine parses this code
-and returns a X<PGE;Match Object> special Match object which represents a
-parsed version of the code.  The compiler then passes this match object to the
-action methods, which convert it in stages into PAST.  The compiler finally
-converts this PAST into PIR code, which it can save to a file, convert to
-bytecode, or execute directly.
-
-=head3 C<mk_language_shell.pl>
-
-The only way creating a new language compiler could be easier is if these files
-created themselves. PCT includes a tool to do just that:
-C<mk_language_shell.pl>.  This program automatically creates a new directory in
-F<languages/> for your new language, the necessary three files, starter files
-for libraries, a F<Makefile> to automate the build process, and a basic test
-harness to demonstrate that your language works as expects.
-
-These generated files are all stubs which will require extensive editing to
-implement a full language, but they are a well-understood and working starting
-point.  With this single command you can create a working compiler.  It's up to
-you to fill the details.
-
-C<mk_language_shell.pl> prefers to run from within a working Parrot repository.
-It requires a single argument, the name of the new project to create.  There
-are no hard-and-fast rules about names, but the Parrot developers reccomend
-that Parrot-based implementations of existing languages use unique names.
-
-Consider the names of Perl 5 distributions: Active Perl and Strawberry Perl.
-Python implementations are IronPython (running on the CLR) and Jython (running
-on the JVM).  The Ruby-on-Parrot compiler isn't just "Ruby": it's Cardinal.
-The Tcl compiler on Parrot is Partcl.
-
-An entirely new language has no such constraints.
-
-From the Parrot directory, invoke C<mk_language_shell.pl> like:
-
-  $ B<cd languages/>
-  $ B<perl ../tools/build/mk_language_shell.pl <project name>>
-
-=head3 Parsing Fundamentals
-
-An important part of a compiler is the parser and lexical analyzer.  The
-lexical analyzer converts the HLL input file into individual tokens. A token
-may consist of an individual punctuation ("+"), an identifier ("myVar"), a
-keyword ("while"), or any other artifact that stands on its own as a single
-unit.  The parser attempts to match a stream of these input tokens against a
-given pattern, or grammar. The matching process orders the input tokens into an
-abstract syntax tree which the other portions of the compiler can process.
-
-X<top-down parser>
-X<bottom-up parser>
-X<parsers; top-down>
-X<parsers; bottom-up>
-Parsers come in top-down and bottom-up varieties. Top-down parsers start with a
-top-level rule which represents the entire input. It attempts to match various
-combination of subrules until it has consumed the entire input.  Bottom-down
-parsers start with individual tokens from the lexical analyzer and attempt to
-combine them together into larger and larger patterns until they produce a
-top-level token.
-
-PGE is a top-down parser, although it also contains a bottom-up I<operator
-precedence> parser to make processing token clusters such as mathematical
-expressions more efficient.
-
-=head2 Driver Programs
-
-The driver program for the new compiler must create instances of the various
-necessary classes that run the parser. It must also include the standard
-function libraries, create global variables, and handle commandline options.
-PCT provides several useful command-line options, but driver programs may need
-to override several behaviors.
-
-PCT programs can run in two ways.  An interactive mode runs one statement at a
-time in the console.  A file mode loads and runs an entire file at once.  A
-driver program may specificy information about the interactive prompt and
-environment, as well as help and error messages.
-
-=head3 C<HLLCompiler> class
-
-The C<HLLCompiler> class implements a compiler object. This object contains
-references to language-specific parser grammar and actions files, as well as
-the steps involved in the compilation process.  The stub compiler created by
-C<mk_language_shell.pl> might resemble:
-
-  .sub 'onload' :anon :load :init
-      load_bytecode 'PCT.pbc'
-      $P0 = get_hll_global ['PCT'], 'HLLCompiler'
-      $P1 = $P0.'new'()
-      $P1.'language'('MyCompiler')
-      $P1.'parsegrammar'('MyCompiler::Grammar')
-      $P1.'parseactions'('MyCompiler::Grammar::Actions')
-  .end
-
-  .sub 'main' :main
-      .param pmc args
-      $P0 = compreg 'MyCompiler'
-      $P1 = $P0.'command_line'(args)
-  .end
-
-The C<:onload> function creates the driver object as an instance of
-C<HLLCompiler>, sets the necessary options, and registers the compiler with
-Parrot. The C<:main> function drives parsing and execution begin. It calls the
-C<compreg> opcode to retrieve the registered compiler object for the language
-"MyCompiler" and invokes that compiler object using the options received from
-the commandline.
-
-The C<compreg> opcode hides some of Parrot's magic; you can use it multiple
-times in a program to compile and run different languages. You can create
-multiple instances of a compiler object for a single language (such as for
-runtime C<eval>) or you can create compiler objects for multiple languages for
-easy interoperability. The Rakudo Perl 6 C<eval> function uses this mechanism
-to allow runtime eval of code snippets in other languages:
-
-  eval("puts 'Konnichiwa'", :lang<Ruby>);
-
-=head3 C<HLLCompiler> methods
-
-The previous example showed the use of several HLLCompiler methods:
-C<language>, C<parsegrammar>, and C<parseactions>.  These three methods are the
-bare minimum interface any PCT-based compiler should provide.  The C<language>
-method takes a string argument that is the name of the compiler. The
-HLLCompiler object uses this name to register the compiler object with Parrot.
-The C<parsegrammar> method creates a reference to the grammar file that you
-write with PGE. The C<parseactions> method takes the class name of the NQP file
-used to create the AST-generator for the compiler.
-
-If your compiler needs additional features, there are several other available
-methods:
-
-=over 4
-
-=item * C<commandline_prompt>
-
-The C<commandline_prompt> method allows you to specify a custom prompt to
-display to users in interactive mode.
-
-=item * C<commandline_banner>
-
-The C<commandline_banner> method allows you to specify a banner message that
-displays at the beginning of interactive mode.
-
-=back
-
-=cut
-
-# Local variables:
-#   c-file-style: "parrot"
-# End:
-# vim: expandtab shiftwidth=4:

Deleted: branches/auto_attrs/docs/book/draft/ch05_pge.pod
==============================================================================
--- branches/auto_attrs/docs/book/draft/ch05_pge.pod	Mon Aug 10 11:28:05 2009	(r40476)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,1558 +0,0 @@
-=pod
-
-=head1 Grammar Engine
-
-X<Parrot Grammar Engine>
-X<PGE (Parrot Grammar Engine)>
-The Parrot Grammar Engine (PGE) is a parser generator, one of the key
-components of the Parrot Compiler Toolkit. It reads grammar files written in
-the PGE rules format and generates parser modules written in PIR code. PGE
-rules provide the full power of I<recursive descent parsing> and I<operator
-precedence parsing>. Fortunately, you don't need to know what those terms
-mean in order to make good use of PGE. We'll introduce the necessary
-concepts as we talk about various features in this chapter.
-
-=head2 Grammars
-
-The ultimate goal of a parser is to match patterns in a source language and
-convert them to an internal data structure for later manipulations. As a
-programmer, you're probably already familiar with some of these types of
-patterns: function declarations, function calls, statements, and assignments.
-Each of these different concepts have a particular form called a I<syntax>.
-In C for example, the syntax to define a function looks something like this:
-
-  <return_type> <function_name> ( <arguments> ) { <function_body> }
-
-Things that fit this pattern, so long as all the sub-patterns use the proper
-syntax also, are valid subroutines in C. Similarly, we can use a slightly
-different pattern to create a subroutine:
-
-  sub <function_name> { <function_body> }
-
-A grammar is a collection of rules like the ones above that specify all the
-acceptable patterns in a language. Grammars group together these rules in
-much the same way that a groups together related data fields and methods
-N<In languages like Perl 6 for instance, a grammar is just a special kind
-of class and a rule is just a special kind of method.>. Each rule defines
-a pattern for matching one unit of text, and can be made up of various other
-rules which are called recursively to make a complete match.
-
-A rule can contain regular expressions to match patterns of characters:
-
-  rule id { \d+ }
-
-A rule can also contain patterns of references to other rules:
-
-  rule record { <id> <name> <phone> }
-
-A grammar contains a group of rules that work together to match the entire
-language:
-
-  grammar Contacts;
-
-  rule name { 'John' | 'Bob ' | 'Fred' }
-
-  rule id   { \d+ }
-
-  rule record { <id> <name> }
-
-  ...
-
-=head3 Rules and Tokens
-
-X<rule>
-X<token>
-There are two different kinds of rules: C<rule>, which we saw above, and
-C<token>. A C<rule> performs smart whitespace matching between the various
-pieces of the pattern. The C<record> rule given previously would match
-"6355 John" or "6355      John" but not "6355John".
-
-A C<token> matches whitespace only if you specifically request it. To get the
-same effect with a token, add the C<\s> (match a space character) and C<+>
-(match the preceding atom -- the space character, in this case -- one or more
-times) pattern to the rule:
-
-  token record { <id> \s+ <name> }
-
-=head3 The Start Rule
-
-X<top>
-X<top-down parser>
-A recursive descent parser is what's called a I<top-down parser>. It starts
-at the highest-level rule, called C<TOP>, and works its way down through
-individual rules to match an entire string or file. Real Perl 6 allows any
-name for the top-level rule, but PCT expects a rule called C<TOP>. If PCT
-was as fully-featured as Perl 6, people would use it instead! Here's an
-example of a TOP rule:
-
-  rule TOP { <record> }
-
-This rule matches a single C<record> pattern in a string or file. Once the
-parser has succeeded in matching the entire string or file passed to the
-start rule, it returns a parse tree. If it cannot match the entire input
-with the rules provided, it can either return a partial match, or it can
-throw a parse error.
-
-=head3 Testing a Grammar
-
-Let's do a small example grammar. Save this example to a file called
-F<Contacts.pg>:
-
-  grammar Contacts is PGE::Grammar;
-
-  rule  TOP    { <record> }
-  rule  record { <id> <name> }
-  token name   { 'John' | 'Bob ' | 'Fred' }
-  token id     { \d+ }
-
-Then compile the grammar:
-
-  $ B<parrot Perl6Grammar.pbc --output=Contacts.pir Contacts.pg>
-
-=for author
-
-Assume an installed Parrot for all examples?  Anyone working from the source
-tree should be able to mangle paths appropriately.
-
-=end for
-
-The path to F<parrot> and to the F<Perl6Grammar.pbc> file will vary on
-different systems. If you compiled Parrot from source, it will be:
-
-  $ B<./parrot runtime/parrot/library/PGE/Perl6Grammar.pbc \>
-        B<--output=Contacts.pir Contacts.pg>
-
-Next, create a small PIR script to run your grammar. Save it as
-F<grammar_test.pir>:
-
-=begin PIR
-
-  .sub main :main
-      load_bytecode 'PGE.pbc'        # load some required modules
-      load_bytecode 'dumper.pbc'
-      load_bytecode 'PGE/Dumper.pbc'
-
-      load_bytecode 'Contacts.pir'   # load your grammar
-
-      .local string source
-      source  = "3 John"
-
-      .local pmc top, grammar, match
-      top     = get_hll_global ['Contacts'], 'TOP'
-      grammar = get_class 'Contacts'
-      match   = top(source, 'grammar' => grammar)
-
-      _dumper(match, "match")
-  .end
-
-=end PIR
-
-Run the test script:
-
-  $ B<parrot grammar_test.pir>
-
-It will print out a text representation of the raw parse tree stored in the
-C<match> variable:
-
-  "match" => PMC 'Contacts' => "3 John" @ 0 {
-      <record> => PMC 'Contacts' => "3 John" @ 0 {
-          <id> => PMC 'Contacts' => "3" @ 0
-          <name> => PMC 'Contacts' => "John" @ 2
-      }
-  }
-
-Each node in the tree corresponds to a rule in the grammar.  The top-level
-match variable contains one child named C<record>, which contains two children
-named C<id> and C<name>.  C<id> contains the number 3, and C<name> contains the
-string "John". This is exactly what the simple grammar should have matched.
-
-=head2 Rule Syntax
-
-Every language has a set of basic components (words or parts of words) and
-syntax conventions for combining them. The "words" in rules are literal
-characters or symbols, some X<metacharacters> metacharacters (or metasymbols),
-and X<rules;escape sequences>X<escape sequences, rules> escape sequences, while
-the combining syntax includes other metacharacters, X<quantifiers, rules>
-X<rules;quantifiers> quantifiers, bracketing characters, and assertions.
-
-=head3 Metacharacters
-
-The C<.> metacharacter matches any single character, even a newline character.
-The C<^> and C<$> metacharacters are zero-width matches which represent the
-beginning and end of a string. They each have doubled alternates C<^^> and
-C<$$> that match at the beginning and end of every (newline-delimited) line
-within a string.
-
-The C<|>, C<&>, C<\>, C<#>, and C<:=> metacharacters are all syntax structure
-elements. C<|> alternates between two options. C<&> matches two patterns
-simultaneously (the patterns must be the same length). C<\> turns literal
-characters into metacharacters (producing escape sequences). C<#> starts a
-comment which proceeds until the end of the line. You can start a comment at
-any point on any line in a rule. C<:=> binds a hypothetical variable to the
-result of a subrule or grouped pattern (see L<Hypothetical Variables>).
-
-The metacharacters C<()>, C<[]>, C<{}> and C<E<lt>E<gt>> are bracketing pairs.
-Bracketing pairs must always be balanced within the rule; to use a literal
-character, escape it with a C<\>.  The C<()> and C<[]> pairs group patterns as
-a single atom. They often capture a result, mark the boundaries of an
-alternation, or mark a group of patterns with a quantifier. Parentheses C<()>
-capture, but square brackets C<[]> do not. The C<{}> brackets define a section
-of code (a closure) within a rule. These closures are always a successful
-zero-width match. The C<E<lt>...E<gt>> brackets mark assertions, which handle a
-variety of constructs including character classes and user-defined quantifiers
-(see L<Assertions>).
-
-Table 7-2 summarizes the basic metacharacters.
-
-=begin table picture Metacharacters
-
-Z<CHP-7-TABLE-2>
-
-=headrow
-
-=row
-
-=cell Symbol
-
-=cell Meaning
-
-=bodyrows
-
-=row
-
-=cell C<.>
-
-=cell Match any single character, including a newline.
-X<. (dot);. match single character (rules)>
-
-=row
-
-=cell C<^>
-
-=cell Match the beginning of a string.
-X<^ (caret);^ beginning of string (rules)>
-
-=row
-
-=cell C<$>
-
-=cell Match the end of a string.
-X<$ (dollar sign);$ end of string (rules)>
-
-=row
-
-=cell C<^^>
-
-=cell Match the beginning of a line within the string.
-X<^ (caret);^^ beginning of line (rules)>
-
-=row
-
-=cell C<$$>
-
-=cell Match the end of a line within the string.
-X<$ (dollar sign);$$ end of line (rules)>
-
-=row
-
-=cell C<|>
-
-=cell Match alternate patterns (OR).
-
-=row
-
-=cell C<&>
-
-=cell Match multiple patterns (AND).
-
-=row
-
-=cell C<\>
-
-=cell Escape a metacharacter to get a literal character, or escape a
-literal character to get a metacharacter.
-X<\ (backslash);\ escape sequences (rules)>
-X<\ (backslash);\ to escape metacharacters (rules)>
-
-=row
-
-=cell C<#>
-
-=cell Mark a comment (to the end of the line).
-
-=row
-
-=cell C<:=>
-
-=cell Bind the result of a match to a hypothetical variable.
-X<: (colon);:= (binding);in rules>
-
-=row
-
-=cell C<(...)>
-
-=cell Group patterns and capture the result.
-
-=row
-
-=cell C<[...]>
-
-=cell Group patterns without capturing.
-
-=row
-
-=cell C<{...}>
-
-=cell Execute a closure (Perl 6 code) within a rule.
-
-=row
-
-=cell C<E<lt>...E<gt>>
-
-=cell Match an assertion.
-
-=end table
-
-=head3 Escape Sequences
-
-Z<CHP-7-SECT-2.2>
-
-X<escape sequences, rules>
-X<rules;escape sequences>
-X<\ (backslash);\ escape sequences (rules)>
-
-Escape sequences are literal characters acting as metacharacters.  A preceding
-backslash (C<\>) identifies them as escapes. Some escape sequences represent
-single characters that are difficult to represent literally, such as C<\t> for
-tab, or C<\x[...]> to specify a character by its hexadecimal number.  Some
-represent limited character classes, such as C<\d> for digits or C<\w> for word
-characters. Some represent zero-width positions in a match, such as C<\b> for a
-word boundary.
-
-X<variable interpolation in rules>
-X<rules;variable interpolation>
-If you've used Perl 5 regexps, you may remember the C<\Q> escape sequence which
-treats everything until the following C<\E> sequence as literal text,
-containing no escape sequences.  Because ordinary variables now interpolate as
-literal strings by default, the C<\Q> escape sequence is rarely needed.
-
-A<CHP-7-TABLE-3>Table 7-3 shows the escape sequences for rules.
-
-=begin table picture Escape sequences
-
-Z<CHP-7-TABLE-3>
-
-=headrow
-
-=row
-
-=cell Escape
-
-=cell Meaning
-
-=bodyrows
-
-=row
-
-=cell C<\0[...]>
-
-=cell Match a character given in octal (brackets optional).
-
-=row
-
-=cell C<\b>
-
-=cell Match a word boundary.
-
-=row
-
-=cell C<\B>
-
-=cell Match when not on a word boundary.
-
-=row
-
-=cell C<\c[...]>
-
-=cell Match a named character or control character.
-
-=row
-
-=cell C<\C[...]>
-
-=cell Match any character except the bracketed named or control character.
-
-=row
-
-=cell C<\d>
-
-=cell Match a digit.
-
-=row
-
-=cell C<\D>
-
-=cell Match a non-digit.
-
-=row
-
-=cell C<\e>
-
-=cell Match an escape character.
-
-=row
-
-=cell C<\E>
-
-=cell Match anything but an escape character.
-
-=row
-
-=cell C<\f>
-
-=cell Match the form feed character.
-
-=row
-
-=cell C<\F>
-
-=cell Match anything but a form feed.
-
-=row
-
-=cell C<\n>
-
-=cell Match a (logical) newline.
-
-=row
-
-=cell C<\N>
-
-=cell Match anything but a (logical) newline.
-
-=row
-
-=cell C<\h>
-
-=cell Match horizontal whitespace.
-
-=row
-
-=cell C<\H>
-
-=cell Match anything but horizontal whitespace.
-
-=row
-
-=cell C<\L[...]>
-
-=cell Everything within the brackets is lowercase.
-
-=row
-
-=cell C<\Q[...]>
-
-=cell All metacharacters within the brackets match as literal characters.
-
-=row
-
-=cell C<\r>
-
-=cell Match a return.
-
-=row
-
-=cell C<\R>
-
-=cell Match anything but a return.
-
-=row
-
-=cell C<\s>
-
-=cell Match any whitespace character.
-
-=row
-
-=cell C<\S>
-
-=cell Match anything but whitespace.
-
-=row
-
-=cell C<\t>
-
-=cell Match a tab.
-
-=row
-
-=cell C<\T>
-
-=cell Match anything but a tab.
-
-=row
-
-=cell C<\U[...]>
-
-=cell Everything within the brackets is uppercase.
-
-=row
-
-=cell C<\v>
-
-=cell Match vertical whitespace.
-
-=row
-
-=cell C<\V>
-
-=cell Match anything but vertical whitespace.
-
-=row
-
-=cell C<\w>
-
-=cell Match a word character (Unicode alphanumeric characters plus the
-underscore C<_>).
-
-=row
-
-=cell C<\W>
-
-=cell Match anything but a word character.
-
-=row
-
-=cell C<\x[...]>
-
-=cell Match a character given in hexadecimal (brackets optional).
-
-=row
-
-=cell C<\X[...]>
-
-=cell Match anything but the character given in hexadecimal (brackets
-optional).
-
-=end table
-
-=head3 Quantifiers
-
-Z<CHP-7-SECT-2.3>
-
-Quantifiers specify the number of times an atom (a single character,
-metacharacter, escape sequence, grouped pattern, assertion, etc) will match.
-
-X<. (dot);.. (range);quantifier (rules)>
-X<. (dot);... (infinite range);quantifier (rules)>
-The numeric quantifiers use assertion syntax. A single number (C<E<lt>3E<gt>>)
-requires exactly that many matches. A numeric range quantifier
-(C<E<lt>3C<..>5E<gt>>) succeeds if the number of matches is between the minimum
-and maximum numbers, inclusive. A range with three trailing dots
-(C<E<lt>2...E<gt>>) is shorthand for C<E<lt>R<n>..InfE<gt>>; it matches as many
-times as possible.
-
-Each quantifier has a minimal alternate form -- marked with a trailing C<?> --
-which matches the shortest possible sequence first.  That is, given the string
-C<aaaaaa>, C<aE<lt>3C<..>5E<gt>> will match C<aaaaa> and C<aE<lt>3C<..>5E<gt>?>
-will match C<aaa>.
-
-A<CHP-7-TABLE-4>Table 7-4 shows the built-in
-X<quantifiers, rules> X<rules;quantifiers> quantifiers.
-
-=begin table picture Quantifiers
-
-Z<CHP-7-TABLE-4>
-
-=headrow
-
-=row
-
-=cell Maximal
-
-=cell Minimal
-
-=cell Meaning
-
-=bodyrows
-
-=row
-
-=cell C<*>
-
-=cell C<*?>
-
-=cell Match 0 or more times.
-
-=row
-
-=cell C<+>
-
-=cell C<+?>
-
-=cell Match 1 or more times.
-
-=row
-
-=cell C<?>
-
-=cell C<??>
-
-=cell Match 0 or 1 times.
-
-=row
-
-=cell C<E<lt>>R<n>C<E<gt>>
-
-=cell C<E<lt>>R<n>C<E<gt>?>
-
-=cell Match exactly R<n> times.
-
-=row
-
-=cell C<E<lt>>R<n>C<..>R<m>C<E<gt>>
-
-=cell C<E<lt>>R<n>C<..>R<m>C<E<gt>?>
-
-=cell Match at least R<n> and no more than R<m> times.
-
-=row
-
-=cell C<E<lt>>R<n>C<...E<gt>>
-
-=cell C<E<lt>>R<n>C<...E<gt>?>
-
-=cell Match at least R<n> times.
-
-=end table
-
-=head3 Assertions
-
-Z<CHP-7-SECT-2.4>
-
-X<assertions, rules>
-X<rules;assertions>
-An assertion states that some condition or state is true. The match fails when
-that assertion is false.
-
-X<variable interpolation in rules>
-X<rules;variable interpolation>
-
-Assertions match named and anonymous rules, arrays or hashes containing
-anonymous rules, and subroutines or closures that return anonymous rules.
-
-To interpolate a variable in assertion rules, enclose it in assertion
-delimiters.
-A bare scalar in a pattern
-interpolates as a literal string, while a scalar variable in assertion
-brackets interpolates as an anonymous rule. A bare array in a pattern
-matches as a series of alternate literal strings, while an array in
-assertion brackets interpolates as a series of alternate anonymous
-rules.
-
-A bare hash in a pattern matches a word (C<\w+>) if and only if that word is
-one of its keysN<The effect is similar to matching the keys as a series of
-alternates, but it prefers to match the longest possible key, instead of the
-first potential match.>, while a hash in assertion brackets also matches the
-associated value as an anonymous rule.
-
-X<fail keyword>
-A bare closure in a pattern always matches (unless it calls C<fail>), but a
-closure in assertion brackets C<E<lt>{...}E<gt>> must return an anonymous rule
-to match.
-
-An assertion with parentheses C<E<lt>(...)E<gt>> resembles a bare closure in a
-pattern in that it allows you to include Perl code within a rule.
-C<E<lt>(...)E<gt>> evaluates the return value of the closure in boolean
-context. The match succeeds or fails based on that return value.
-
-Assertions match character classes, both named and enumerated. A named rule
-character class is often more accurate than an enumerated character class. The
-common C<E<lt>[a-zA-Z]E<gt>> idiom matches ASCII alphabetic characters, but the
-more comprehensive built-in rule C<E<lt>alphaE<gt>> matches the full set of
-Unicode alphabetic characters.
-
-A<CHP-7-TABLE-5>Table 7-5 shows the syntax of assertions.
-
-=begin table picture Assertions
-
-Z<CHP-7-TABLE-5>
-
-=headrow
-
-=row
-
-=cell Syntax
-
-=cell Meaning
-
-=bodyrows
-
-=row
-
-=cell C<E<lt>...E<gt>>
-
-=cell Generic assertion delimiter.
-
-=row
-
-=cell C<E<lt>!...E<gt>>
-
-=cell Negate any assertion.
-
-=row
-
-=cell C<E<lt>>R<name>C<E<gt>>
-
-=cell Match a named rule or character class.
-
-=row
-
-=cell C<E<lt>[...]E<gt>>
-
-=cell Match an enumerated character class.
-
-=row
-
-=cell C<E<lt>-...E<gt>>
-
-=cell Complement a character class (named or enumerated).
-
-=row
-
-=cell C<E<lt>"..."E<gt>>
-
-=cell Match a literal string (interpolated at match time).
-
-=row
-
-=cell C<E<lt>'...'E<gt>>
-
-=cell Match a literal string (not interpolated).
-
-=row
-
-=cell C<E<lt>(...)E<gt>>
-
-=cell Boolean assertion. Execute a closure and match if it returns a true
-result.
-
-=row
-
-=cell C<E<lt>$scalarE<gt>>
-
-=cell Match an anonymous rule.
-
-=row
-
-=cell C<E<lt>@arrayE<gt>>
-
-=cell Match a series of anonymous rules as alternates.
-
-=row
-
-=cell C<E<lt>%hashE<gt>>
-
-=cell Match a key from the hash, then its value (as an anonymous rule).
-
-=row
-
-=cell C<E<lt>E<amp>sub()E<gt>>
-
-=cell Match an anonymous rule returned by a sub.
-
-=row
-
-=cell C<E<lt>{>R<code>C<}E<gt>>
-
-=cell Match an anonymous rule returned by a closure.
-
-=row
-
-=cell C<E<lt>.E<gt>>
-
-=cell Match any logical grapheme, including combining character sequences.
-
-=end table
-
-=head3 Modifiers
-
-Z<CHP-7-SECT-2.5>
-
-X<modifiers>
-X<: (colon);: modifier delimiter in rules>
-Modifiers alter the meaning of a pattern. The standard position for modifiers
-is at the beginning of the rule, right after the C<m>, C<s>, or C<rx>, or after
-the name in a named rule. Modifiers cannot attach to the outside of a bare
-C</.../>. For example:
-
-  m:i/marvin/ # case insensitive
-  rule names :i { marvin | ford | arthur }
-
-You may group single-character modifiers, but you must separate longer
-modifiers by colons:
-
-  m:wig/ zaphod /                        # OK
-  m:words:ignorecase:globally / zaphod / # OK
-  m:wordsignorecaseglobally / zaphod /   # Not OK
-
-Most modifiers can also appear inside the rule when attached to rule or
-grouping delimiters. Internal modifiers are lexically scoped to their enclosing
-delimiters, so can alter subpatterns:
-
-  m/:w I saw [:i zaphod] / # only 'zaphod' is case insensitive
-
-The repetition modifiers (C<:R<N>x>, C<:R<N>th>, C<:once>, C<:globally>, and
-C<:exhaustive>) and the continue modifier (C<:cont>) alter the return value of
-the rule as a whole, so you cannot use them lexically inside a rule.
-
-The C<:R<N>x> modifier matches the rule a specific number of times. If the
-modifier expects more matches than the string has, the match fails.  Its
-alternate form (C<:x(R<N>)>) can take a variable in place of the number.
-
-The C<:once> modifier on a rule only allows it to match once. The rule will not
-match again until the you call the C<.reset> method on the rule object.
-
-The C<:globally> modifier matches as many times as possible. The C<:exhaustive>
-modifier also matches as many times as possible, in as many different ways as
-possible.
-
-The C<:R<N>th> modifier preserves one result from a particular counted match.
-If the rule matches fewer times than the modifier expects, the match fails. It
-has several alternate forms. One form, C<:th(R<N>)>, takes a variable in place
-of the number. The other forms -- C<:R<N>st>, C<:R<N>nd>, and C<:R<N>rd> --
-allow you to write more naturally C<:1st>, C<:2nd>, C<:3rd>.  The other way is
-valid as well; choose whichever is most comfortable.
-
-By default, rules ignore literal whitespace within the pattern.  The C<:w>
-modifier makes rules sensitive to literal whitespace, but in an intelligent
-way. Any cluster of literal whitespace acts like an explicit C<\s+> when it
-separates two identifiers and C<\s*> everywhere else.
-
-I<No> modifiers exist to treat the matched string as a single line or multiple
-lines.  Instead, use the "beginning of string" and "end of string" or
-"beginning of line" and "end of line" metacharacters.
-
-A<CHP-7-TABLE-6>Table 7-6 lists the available modifiers.
-
-=begin table picture Modifiers
-
-Z<CHP-7-TABLE-6>
-
-=headrow
-
-=row
-
-=cell Short
-
-=cell Long
-
-=cell Meaning
-
-=bodyrows
-
-=row
-
-=cell C<:i>
-
-=cell C<:ignorecase>
-
-=cell Case-insensitive match.
-
-=row
-
-=cell C<:I>
-
-=cell
-
-=cell Case-sensitive match (on by default).
-
-=row
-
-=cell C<:c>
-
-=cell C<:cont>
-
-=cell Continue where the previous match on the string left off.
-
-=row
-
-=cell C<:w>
-
-=cell C<:words>
-
-=cell Literal whitespace in the pattern matches as C<\s+>
-or C<\s*>.
-
-=row
-
-=cell C<:W>
-
-=cell
-
-=cell Turn off intelligent whitespace matching (return to default).
-
-=row
-
-=cell
-
-=cell :R<N>C<x>/C<:x(>R<N>C<)>
-
-=cell Match the pattern R<N> times.
-
-=row
-
-=cell
-
-=cell C<:>R<N>C<th>/C<:nth(>R<N>C<)>
-
-=cell Match the R<N>th occurrence of a pattern.
-
-=row
-
-=cell
-
-=cell C<:once>
-
-=cell Match the pattern once and only once.
-
-=row
-
-=cell C<:g>
-
-=cell C<:globally>
-
-=cell Match the pattern as many times as possible without overlapping
-possibilities.
-
-=row
-
-=cell C<:e>
-
-=cell C<:exhaustive>
-
-=cell Match every possible occurrence of a pattern, including overlapping
-possibilities.
-
-=row
-
-=cell
-
-=cell C<:u0>
-
-=cell . is a byte.
-
-=row
-
-=cell
-
-=cell C<:u1>
-
-=cell . is a Unicode codepoint.
-
-=row
-
-=cell
-
-=cell C<:u2>
-
-=cell . is a Unicode grapheme.
-
-=row
-
-=cell
-
-=cell C<:u3>
-
-=cell . is language dependent.
-
-=row
-
-=cell
-
-=cell C<:p5>
-
-=cell The pattern uses Perl 5 regex syntax.
-
-=end table
-
-=head3 Built-in Rules
-
-Z<CHP-7-SECT-3>
-
-X<rules;built-in>
-PGE provides several named rules, including a complete set of X<POSIX-style
-classes> POSIX-style classes, and X<Unicode property classes> Unicode property
-classes. The list isn't fully defined yet, but A<CHP-7-TABLE-7>Table 7-7 shows
-a few you're likely to see.
-
-The C<E<lt>nullE<gt>> rule matches a zero-width string (it always matches) and
-C<E<lt>priorE<gt>> matches whatever the most recent successful rule matched.
-These replace the two behaviors of X</ (slash);// invalid null pattern>
-X<invalid null pattern //> the Perl 5 null pattern C<//>, which is no longer
-valid syntax for rules.
-
-=begin table picture Built-in rules
-
-Z<CHP-7-TABLE-7>
-
-=headrow
-
-=row
-
-=cell Rule
-
-=cell Meaning
-
-=bodyrows
-
-=row
-
-=cell C<E<lt>alphaE<gt>>
-
-=cell Match a Unicode alphabetic character.
-
-=row
-
-=cell C<E<lt>digitE<gt>>
-
-=cell Match a Unicode digit.
-
-=row
-
-=cell C<E<lt>spE<gt>>
-
-=cell Match a single space character (the same as C<\s>).
-
-=row
-
-=cell C<E<lt>wsE<gt>>
-
-=cell Match any whitespace (the same as C<\s+>).
-
-=row
-
-=cell C<E<lt>nullE<gt>>
-
-=cell Match the null string.
-
-=row
-
-=cell C<E<lt>priorE<gt>>
-
-=cell Match the same thing as the previous match.
-
-=row
-
-=cell C<E<lt>before ...E<gt>>
-
-=cell Zero-width lookahead. Assert that the current position I<precedes> a
-pattern.
-
-=row
-
-=cell C<E<lt>after ...E<gt>>
-
-=cell Zero-width lookbehind. Assert that the current position I<follows> a
-pattern.
-
-=row
-
-=cell C<E<lt>prop ...E<gt>>
-
-=cell Match any character with the named property.
-
-=row
-
-=cell C<E<lt>replace(...)E<gt>>
-
-=cell Replace everything matched so far in the rule or subrule with the
-given string (under consideration).
-
-=end table
-
-=head3 Backtracking Control
-
-Z<CHP-7-SECT-4>
-
-X<backtracking controls>
-X<fail keyword>
-Whenever part of the pattern fails to match, PGE performs backtracking --
-backing up to the previous point at which the match could succeed and trying
-again.  You can explicitly trigger backtracking by calling the C<fail> function
-within a closure. A<CHP-7-TABLE-8>Table 7-8 displays metacharacters and
-built-in rules relevant to backtracking.
-
-=for author
-
-This could use an example.
-
-=end for
-
-=begin table picture Backtracking controls
-
-Z<CHP-7-TABLE-8>
-
-=headrow
-
-=row
-
-=cell Operator
-
-=cell Meaning
-
-=bodyrows
-
-=row
-
-=cell C<:>
-
-=cell Don't retry the previous atom.  Instead, fail to the next earlier atom.
-X<: (colon);: fail to atom before last (rules)>
-X<backtracking controls;: fail to atom before last>
-
-=row
-
-=cell C<::>
-
-=cell Don't backtrack over this point. Instead fail out of the closest
-enclosing group (C<(...)>, C<[...]>, or the rule delimiters).
-X<: (colon);:: fail out of group (rules)>
-X<backtracking controls;: fail out of group>
-
-=row
-
-=cell C<:::>
-
-=cell Don't backtrack over this point.  Instead, fail out of the current rule
-or subrule.
-X<: (colon);::: fail out of rule (rules)>
-X<backtracking controls;: fail out of rule>
-
-=row
-
-=cell C<E<lt>commitE<gt>>
-
-=cell Don't backtrack over this point. Instead, fail out of the entire match
-(even from within a subrule).
-
-=row
-
-=cell C<E<lt>cutE<gt>>
-
-=cell Like C<E<lt>commitE<gt>>, but also cuts the string matched. The current
-matching position at this point becomes the new beginning of the string.
-
-=end table
-
-=head3 Calling Actions
-
-Once the parser has matched the entire input N<a source code file, or a line of
-input at the terminal in interactive mode> the parse has succeeded.  The
-generated AST is now available to the code generator for conversion into PIR.
-
-=for author
-
-Please review.  The forward declaration is awkward here, but a little bit of
-explanation might ameliorate this.
-
-=end for
-
-This AST gets built up by actions -- code snippets attached to rules and
-tokens.  To call an action, insert the C<{*}> token into the rule. When PGE
-encounters C<{*}>, it will call the associated action method with the current
-match object as an argument.
-
-The best way to demonstrate this is by example.  Sprinkle the C<persons_name>
-rule liberally with action calls:
-
- rule persons_name {
-    {*} <first_name> {*} <last_name> {*}
- }
-
-The first call to the action method contains an empty match object because the
-parser hasn't matched anything yet.  The second call contains only the first
-name of the match. The third and final call contains both the matched first and
-last name.
-
-If the match fails halfway through, PGE will still call the actions that have
-succeeded; it will not call the actions after the failure.  If you try to match
-the string "Leia", PGE will call the first two action methods.  When the rule
-tries to match the last name, it fails, and PGE will not call the third action
-method.
-
-=head3 Alternations and Keys
-
-In addition to sub-rules, groups, and quantifiers, you can also express
-either-or alternations between options. The vertical bar token (C<|>)
-distinguishes between options where only one may match:
-
- rule hero {
-    ['Luke' | 'Leia'] 'Skywalker'
- }
-
-This rule will match either "Luke Skywalker" or "Leia Skywalker" but won't
-match "Luke Leia Skywalker"N<nor anything else.>.  Given alternations and
-action methods, it's often important to distinguish which alternation matched:
-
- rule hero {
-    [
-      'Luke' {*}    #= Luke
-    | 'Leia' {*}    #= Leia
-    ]
-    'Skywalker'
- }
-
-This is the same rule, except now it passes two arguments to its action method:
-the match object and the name of the person who matched.
-
-=head3 Warning: Left Recursion
-
-If you've worked with parsers before, you may have seen this coming.  If not,
-don't fear.  Like functions in ordinary procedural or functional languages, the
-methods in the PGE parser grammar can call themselves recursively.  Consider
-some rules derived in part from the grammar for the C programming language:
-
- rule if_statement {
-    'if' <condition> '{' <statement>* '}' <else_block>?
- }
-
- rule statement {
-    <if_statement> | <expression>
- }
-
- rule else_block {
-    'else' '{' <statements>* '}'
- }
-
-An C<if_statement> can contain a list of C<statement>s, and that each statement
-may itself be an C<if_statement>.  This is I<recursion> X<Recursion>; it's one
-of the reasons PGE is a "Recursive descent" parser.
-
-Consider the more direct example of a comma-separated list of integer digits
-which form a list.  A recursive definition might be:
-
- rule list {
-     <list> ',' <digit> | <digit>
- }
-
-If there is only one digit, the second option in the alternation matches.  If
-there are multiple digits, recursion will match them through the first
-alternation.
-
-That's the intention.  The results are insidious.
-
-The recursive descent parser enters the C<list> rule. Its first option is to
-enter the list rule again, so it does.  Recursive descent is a X<depth-first
-algorithm> depth-first algorithm; PGE will continue to descend down a
-particular path until it finds a successful match or a match failure. In this
-case, it matches C<list>, then it matches C<list> again, then it matches
-C<list> again, and so on.  This rule forms an infinite loop -- a pattern called
-X<left recursion> I<left recursion>.  The problem is that the left-most item of
-the left-most alternation is itself a recursion.
-
-The rule above does not recurse infinitely when rewritten as:
-
- rule list {
-    <digit> | <list> ',' <digit>
- }
-
-... or even:
-
- rule list {
-    <digit> ',' <list> | <digit>
- }
-
-Both options ensure that the left-most item in the rule is recursive.
-
-Left recursion may be trickier.  It's not immediately obvious in this grammar:
-
- rule term {
-    <expression> '*' <term> | <digit>
- }
-
- rule expression {
-    <term> '+' <expression> | <term>
- }
-
-Even this common, limited subset of mathematical equations has the same
-problem.  To match a C<term>, the parser first tries to match an C<expression>,
-which in turn matches a C<term> and then an C<expression> ....
-
-Again, the solution is simple.  Rewrite at least one of the rules so that the
-first condition it tries to match is not itself a recursive situation.
-
-=head3 Operator Precedence Parser
-
-Recursive descent parsing can be inefficient where statements have lots of
-little tokens and many possible options to match.  For example, mathematical
-expressions are very open-ended, with many valid forms which are difficult to
-anticipate.  Consider the expression:
-
- a + b * c + d
-
-A recursive descent parser will undergo significant trial and error to parse
-this statement.  Recursive descent parsing is not ideal for these situations.
-Instead, a type of bottom-up parser called an I<operator precedence> X<Parser,
-Operator precedence> parser is much better.
-
-=for author
-
-Is this a categorization of all opps or just PGE's opp?
-
-=end for
-
-Operator precedence parsers work similarly to more versatile bottom-up parsers
-such as Lex or Yacc, but are optimized for use with expressions and equations.
-Equations have two subtypes, I<terms> and I<operators>. Operators themselves
-have several subtypes, including prefix (C<-a>), postfix (C<i++>), infix (C<x +
-y>), circumfix (C<[z]>), postcircumfix (C<a[b]>), and list (C<1, 2, 3>). Each
-operator gets its own precedence number that specifies how closely it binds to
-the terms. The previous example should parse as:
-
- a + (b * c) + d
-
-... because the C<*> operator has a higher precedence -- binding more tightly
-to its terms -- than the C<+> operator.
-
-Within a grammar, switch from the top-down recursive descent parser to the
-bottom-up operator precedence parser with an C<optable> X<Parser, optable>
-rule:
-
- rule expression is optable { ... }
-
-The C<...> ellipsis isn't an editorial shortcut, it's the Perl 6 operator to to
-define a function signature. The C<...> indicates that this is just a
-signature; the actual implementation is elsewhere.  In this case, that location
-in the definition of the optable.
-
-=head3 Protofunction Definitions
-
-X<Protofunctions>
-
-Protofunctions define operators in the optable in the same way that rules and
-tokens make up the grammar. A proto declares a rule, defined elsewhere, which
-other code may override dynamically.  In this case, PCT takes information from
-the proto declaration and fills in the details. The "dynamic overriding"
-implies that a high-level language itself itself can modify its own grammar at
-run time, by overriding the proto definitions for its operator table. Some
-languages call this process X<operator overloading> I<operator overloading>.
-
-A proto definition resembles:
-
- 'proto' <proto_name> [ 'is' <property> ] '{' '...' '}'
-
-The name of the operator, noted as C<< <proto_name> >>, contains both a
-location part and an identifier part. The location is the type of the operator,
-such as infix, postfix, prefix, circumfix, and postcircumfix. The name of the
-operator is the symbol used for the operator in any of the quotes that Perl 6
-understands:
-
- proto infix:<+>                  # a + b
- proto postfix:'--'               # i--
- proto circumfix:«<>»             # <x>
-
-The C<is> X<Parser, is> keyword defines a property of the rule. Examples
-include:
-
- is precedence(1)     # Specifies an exact precedence
- is equiv('+')        # Has the same precedence as the "+" operator
- is assoc('right')    # Right associative. May also be "left" or "list"
- is pirop('add')      # Operands are passed to the PIR operator "and"
- is subname('mySub')  # Operands are passed to the function "mySub"
- is pasttype('if')    # Operands are passed as children to an "if" PAST node in
-                      # the parse tree
- is parsed(&myRule)   # The token is parsed and identified using the rule
-                      # "myRule" from the top-down parser
-
-=for author
-
-Please review.
-
-=end for
-
-Protofunction definitions are function signatures; you can override them with
-multimethod dispatch. This means that you can write functions I<with the same
-name> as the rule to implement the behavior of the operator.  Here's a proto:
-
- rule infix:"+" { ... }
-
-... and its corresponding PIR rule:
-
-=begin PIR
-
- .sub 'infix:+'
-    .param pmc a
-    .param pmc b
-    .local pmc c
-    c = a + b
-    .return(c)
- .end
-
-=end PIR
-
-You may ask "Why have an C<is subname()> property, if you can define all
-operators as subroutines?" Using the C<is subname()> property allows PCT to
-call a subroutine of a different name then the operator.  This is a good idea
-if there is already a built-in function in the language that duplicates the
-functionality of the operator.  There is no sense in duplicating behavior.
-
-The great thing about protos being overloadable is that you can specify
-different functions to call with different signatures:
-
-=begin PIR
-
- .sub 'infix:+' :multi('Integer', 'Integer')
-    #...
- .end
-
- .sub 'infix:+' :multi('CLispRatio', 'Number')
-    #...
- .end
-
- .sub 'infix:+' :multi('Perl6Double', 'PythonInteger')
-    #...
- .end
-
-=end PIR
-
-This list can be a bit intimidating, and it's hard to imagine that it would be
-necessary to write up a new function to handle addition between every
-conceivable pair of operands. Fortunately, this is rarely the case in Parrot,
-because all these data types support common the VTABLE interface. For most data
-types Parrot already has basic arithmetic operations built in, and it's only
-necessary to override for those data types with special needs.
-
-=head3 Hypothetical Variables
-
-Z<CHP-7-SECT-5>
-
-X<variables;hypothetical>
-X<hypothetical variables>
-X<rules;captures>
-Hypothetical variables are a powerful way of building up data structures from
-within a match. Ordinary captures with C<()> store the result of the captures
-in C<$1>, C<$2>, etc. PGE stores values in these variables if the match is
-successful, but throws them away if the match fails.  The numbered capture
-variables are accessible outside the match, but only within the immediate
-surrounding lexical scope:
-
-  "Zaphod Beeblebrox" ~~ m:w/ (\w+) (\w+) /;
-
-  print $1; # prints Zaphod
-
-You can also capture into any user-defined variable with the binding operator
-C<:=> -- I<if> you have declared these variables in a lexical scope enclosing
-the rule:
-
-  my $person;
-  "Zaphod's just this guy." ~~ / ^ $person := (\w+) /;
-  print $person; # prints Zaphod
-
-You may capture repeated matches into an array:
-
-  my @words;
-  "feefifofum" ~~ / @words := (f<-[f]>+)* /;
-  # @words contains ("fee", "fi", "fo", "fum")
-
-You may capture pairs of repeated matches into a hash:
-
-  my %customers;
-  $records ~~ m:w/ %customers := [ E<lt>idE<gt> = E<lt>nameE<gt> \n]* /;
-
-If you don't need the captured value outside the rule, use a C<$?> variable
-instead. These are only directly accessible within the rule:
-
-  "Zaphod saw Zaphod" ~~ m:w/ $?name := (\w+) \w+ $?name/;
-
-A match of a named rule stores the result in a C<$?> variable with the same
-name as the rule. These variables are also accessible only within the rule:
-
-  "Zaphod saw Zaphod" ~~ m:w/ E<lt>nameE<gt> \w+ $?name /;
-
-=for author
-
-This next paragraph feels out of place; is there more?
-
-=end for
-
-When a rule matches a sequence of input tokens, PCT calls an associated method
-within NQP to convert that match into an AST node, which it inserts into the
-I<parse tree>.
-
-=head3 Basic Rules
-
-Consider the simple example rule:
-
- rule persons_name {
-    <first_name> <last_name>
- }
-
-... and two example tokens:
-
- token first_name { <alpha>+ }
- token last_name  { <alpha>+ }
-
-The special token C<< <alpha> >> is a built-in construct that only accepts
-upper case and lower case letters. The C<+> after the C<< <alpha> >> tag is a
-short way of saying "one or more". The rule will match names like C<Darth
-Vader>N<It also matches many strings that I<aren't> real names>, but won't
-match something like C<C 3P0>.
-
-This rule I<will> match C<Jar Jar Binks>, but not as you might expect: way you
-would expect: It would match the first "Jar" as C<< <first_name> >>, the second
-"Jar" as C<< <last_name> >>, and ignore "Binks"N<You should ignore the whole
-thing.>.
-
-=for author
-
-The rest seems vestigial.  An example like this should precede the rest of the
-chapter.  There are forward references, but it's a decent overview for people
-who haven't used similar systems before -- if you avoid going out in the weeds.
-
-=end for
-
-this example shows another new construct, the square brackets. Square
-brackets are ways to group things together. The star at the end means
-that we take all the things inside the brackets zero or more times.
-This is similar to the plus, except the plus matches one or more times.
-Notice, however, that the above rule always matches a comma at the end,
-so we would need to have something like:
-
- Darth Vader, Luke Skywalker,
-
-Instead of something more natural like:
-
- Darth Vader, Luke Skywalker
-
-We can modify the rule a little bit so that it always ends with a name
-instead of a comma:
-
- rule TOP {
-    [ <persons_name> ',' ]* <persons_name>
- }
-
-Now we don't need a trailing comma, but at the same time we can't match
-an empty file because it always expects to have at least one name at the
-end. If we still want to match empty files successfully, we need to make
-the whole rule optional:
-
- rule TOP {
-    [ [ <persons_name> ',' ]* <persons_name> ]?
- }
-
-We've grouped the whole rule together in another set of brackets, and
-put a "?" question mark at the end. The question mark means zero or
-one of the prior item.
-
-The symbols "*" (zero or more), "+" (one or more) and "?" are called
-I<quantifiers>, and allow an item in the rule to match a variable
-number of times. These aren't the only quantifiers, but they are the
-most common. We will talk about other quantifiers later on.
-
-=cut
-
-# Local variables:
-#   c-file-style: "parrot"
-# End:
-# vim: expandtab shiftwidth=4:

Deleted: branches/auto_attrs/docs/book/draft/ch06_nqp.pod
==============================================================================
--- branches/auto_attrs/docs/book/draft/ch06_nqp.pod	Mon Aug 10 11:28:05 2009	(r40476)
+++ /dev/null	00:00:00 1970	(deleted)
@@ -1,219 +0,0 @@
-=pod
-
-=head1 Grammar Actions
-
-The job of the grammar is to match input patterns from the source language.
-These patterns then need to get converted to nodes in the abstract syntax
-tree for manipulation in other stages of the compiler. We've already seen
-one example of a subroutine structure that takes a match and produces a
-tree node: Protofunctions. Protofunction signatures aren't the only way to
-apply functions to rules matched by a parser.  They are limited and are
-slightly primitive, but effective for handling operators.  There is an
-easier and more powerful way to write subroutines to convert match objects
-into parse tree nodes, using a language that's almost, but Not Quite Perl.
-
-X<NQP>
-X<Not Quite Perl>
-NQP (Not Quite Perl) is a small language which offers a limited subset of
-Perl 6's syntax and semantics. Though it originated as a bootstrapping tool
-for the Rakudo Perl 6 compiler, several other Parrot-based compilers use it
-as well.  It has become a permanent member of PCT, and therefore a permanent
-part of the Parrot code base. 
-
-NQP represents almost the smallest subset of the Perl 6 language necessary to
-implement parser transformations, plus a few syntactic convenience features
-that developers have requested. NQP's Perl 6 subset shows its Perl 5 roots,
-so existing Perl 5 programmers should find much of it familiar and should be
-able to leverage their existing skills for writing compilers.
-
-In PGE, at the time of a match the grammar we can invoke an action using the
-special <code>{*}</code> symbol. In general, these action methods are
-written in NQP, although it is possible for them to be written in PIR N<In
-fact, this is how the NQP compiler itself is written>. We won't discuss the
-PIR case here because it's uncommon and needlessly difficult. NQP is the
-standard and preferred choice for this.
-
-=head2 NQP Basics
-
-X<sigils>
-X<scalar>
-Like all flavors and versions of Perl, NQP uses special prefix symbols called
-I<sigils> to distinguish variable types. The C<$> sigil represents scalars,
-C<@> arrays, and C<%> for hashes.  A scalar is any single value which can
-interchangeably contain given a string value, an integer value, or an object
-reference. Simple NQP assignments are:
-
- $scalar := "This is a string"
- $x      := 123
- $pi     := 3.1415      # rounding
-
-X<bind operator>
-X<operators; binding>
-X<:=>
-The C<:=> I<bind> operator performs reference assignment in NQP. Reference
-assignment makes one variable into an alias for another. This means that
-the two variables are just different names for the same storage location,
-and changes to one will change both. It's important to remember that a bind
-is not a copy!
-
-NQP has hashes and arrays just like other flavors of Perl and various
-dynamic languages. NQP does not have a notion of hash and array context,
-but otherwise it works the way you would expect. Arrays have the C<@> sigil,
-and hashes have the C<%> sigil. Here are some examples:
-
-  @ary[0] := 1;
-  @ary[1] := "foo";
-  ...
-
-  %hsh{'bar'} := 2;
-  %hsh{'baz'} := "parrot";
-  ...
-
-There is also a nice shorthand way to index hashes, using angle brackets:
-
-  %hsh<bar> := "parrot";
-
-It's also possible to assign a list in I<scalar context>:
-
- $array_but_a_scalar := (1, 2, 3, 4)
-
-Or you could write a new function in PIR to create a new array from a variadic
-argument list:
-
- @my_array := create_new_array(1, 2, 3, 4)
-
-... which calls the PIR function:
-
-=begin PIR
-
- .namespace []
-
- .sub 'create_new_array'
-     .param pmc elems :slurpy
-     .return(elems)
- .end
-
-=end PIR
-
-=head3 Calling Actions From Rules
-
-=for editor
-
-Needs a link to that section.
-
-=end for
-
-As mentioned in the chapter on grammar rules, the funny little C<{*}> symbol
-calls an action. The action in question is an NQP method with the same name as
-the rule that calls it. NQP rules can have two different signatures:
-
- method name ($/)      { ... }
- method name($/, $key) { ... }
-
-Where does the key come from?  Consider this grammar:
-
- rule cavepeople {
-      'Fred'  {*}    #= Caveman
-    | 'Wilma' {*}    #= Cavewoman
-    | 'Dino'  {*}    #= Dinosaur
- }
-
-The C<cavepeople> rule demonstrates the result:
-
- method cavepeople($/, $key) {
-    if $key eq 'Caveman' {
-        say "We've found a caveman!";
-    } elsif $key eq 'Cavewoman' {
-        say "We've found a cavewoman!";
-    } elsif $key eq 'Dinosaur' {
-        say "A dinosaur isn't a caveperson at all!";
-    }
- }
-
-The key is a string that contains whatever any text following the C<#=> symbol.
-Without a C<#=> following the rule invocation, there's no C<$key> to use in the
-method.  If you attempt to use one without the other, the NQP compiler will die
-with error messages about mismatched argument/parameter numbers.
-
-=head3 The Match Object C<$/>
-
-X<match object>
-X<$/>
-The match object C<$/> it's a data structure that's all business: it's both a
-hash and an array. Because it's a special variable used pervasively in PCT, it
-has a special shortcut syntax:
-
- $/{'Match_item'}   is the same as $<Match_item>
- $/[0]              is the same as $[0]
-
-Each key in the match object's hash is the name of a matched rule.  Given a
-file containing "C<X + 5>" and a rule:
-
- rule introductions {
-    <variable> <operator> <number>
- }
-
-The resulting match object will contain the key/value pairs:
-
- "variable" => "x"
- "operator" => "+"
- "number"   => "5"
-
-When the match contains multiple values with the same name, or when rules have
-quantifiers such as C<*> or C<+>, the values in the hash may be arrays.  Given
-the input "A A A B B" and the rule:
-
- rule letters {
-    <vowel>* <consonant>*
- }
-
-The match object will contain the pairs:
-
- "vowel"     => ["A", "A", "A"]
- "consonant" => ["B", "B"]
-
-X<$( ) operator>
-
-Use the C<$( )> operator to count the number of matches in each group (by
-casting it to a scalar):
-
- $($<vowel>) == 3
-
-=head3 Inline PIR
-
-=for author
-
-Needs expansion.
-
-=end for
-
-X<{{ }}>
-X<double curly brackets>
-Sometimes NQP isn't quite flexible enough to handle transforms appropriately.
-In a PGE rule, the C<{{ }}> double curly brackets demarcate inline-PIR mode.
-PGE will execute any PIR code in those brackets. You can access C<$/> directly
-in the grammar without having to jump into NQP.
-
-=head3 PAST Nodes
-
-X<PAST>
-X<PAST nodes>
-NQP's job is to make abstract syntax trees.  These trees are all objects -- and
-as such, instances of PAST nodes.  Each PAST class represents a unique program
-construct.  These constructs are common and simple, but combine to represent
-complicated programming structures.
-
-=head3 Making Trees
-
-Every action has the ability to create a PAST node that represents that action,
-as well as any children of that node. Calling C<make> on that node adds it into
-the growing PAST tree that PCT maintains. Once the C<TOP> rule matches
-successfully and returns, PCT optimizes and converts that tree into PIR and PBC
-for execution.
-
-=cut
-
-# Local variables:
-#   c-file-style: "parrot"
-# End:
-# vim: expandtab shiftwidth=4:

Copied: branches/auto_attrs/docs/book/pct/ch01_introduction.pod (from r40476, trunk/docs/book/pct/ch01_introduction.pod)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/auto_attrs/docs/book/pct/ch01_introduction.pod	Mon Aug 10 11:28:05 2009	(r40477, copy of r40476, trunk/docs/book/pct/ch01_introduction.pod)
@@ -0,0 +1,215 @@
+=pod
+
+=head1 Introduction
+
+Parrot is a language-neutral virtual machine for dynamic languages such as
+Ruby, Python, PHP, and Perl. It hosts a powerful suite of compiler tools
+tailored to dynamic languages and a next generation regular expression engine.
+Its architecture is fundamentally different than existing virtual machines such
+as the JVM or CLR, with optimizations for dynamic languages included, a
+register-based system rather than stack-based, and the use of continuations as
+the core means of flow control.
+
+The name "Parrot" was inspired by Monty Python's Parrot sketch. As an April
+Fools' Day joke in 2001, Simon Cozens published "Programming Parrot", a
+fictional interview between Guido van Rossum and Larry Wall detailing their
+plans to merge Python and Perl into a new language called Parrot
+(U<http://www.perl.com/pub/a/2001/04/01/parrot.htm>).
+
+=head2 Parrot Resources
+
+The starting point for all things related to Parrot is the main website
+U<http://www.parrot.org/>. The site lists additional resources, well as recent
+news and information about the project and the Parrot Foundation, which holds
+the copyright over Parrot and helps guide development and the community.
+
+=head3 Documentation
+
+Parrot includes extensive documentation in the distribution. The full
+documentation for the latest release is available online at
+U<http://docs.parrot.org/>.
+
+=head3 Mailing Lists
+
+X<parrot-dev (Parrot mailing list)>
+X<mailing lists>
+
+The primary mailing list for Parrot is I<parrot-dev at lists.parrot.org>.  If
+you're interested in getting involved in development, you may also want to
+follow the I<parrot-commits> and I<parrot-tickets> lists.  Information on all
+the Parrot mailing lists and subscription forms for each is available at
+U<http://lists.parrot.org/mailman/listinfo>.
+
+The archives for I<parrot-dev> are also available on Google Groups at
+U<http://groups.google.com/group/parrot-dev> and via NNTP at
+U<nntp://news.gmane.org/gmane.comp.compilers.parrot.devel>.
+
+=head3 IRC
+
+X<#parrot (Parrot IRC channel)>
+X<IRC channel (#parrot)>
+
+Parrot developers and users congregate on IRC at C<#parrot> on the
+U<irc://irc.parrot.org> server. It's a good place to get real-time answers to
+questions or see how things are progressing.
+
+=head3 Issue Tracking & Wiki
+
+X<trac.parrot.org website>
+X<issue tracking (trac.parrot.org)>
+
+Parrot developers track issues with a Trac site at U<https://trac.parrot.org/>.
+Users can submit new tickets and track the status of existing tickets.  The
+site also includes a wiki used in project development, a source code browser,
+and the project roadmap.
+
+=head2 Parrot Development
+
+X<development cycles>
+
+Parrot's first release occurred in September 2001.  It reached 1.0 in March
+2009. The Parrot project makes releases on the third Tuesday of each month. Two
+releases a year E<mdash> occuring every January and July E<mdash> are
+"supported" releases intended for production use.  The other ten releases are
+development releases for language implementers and testers.
+
+Development proceeds in cycles around releases. Activity just before a release
+focuses on closing tickets, fixing bugs, reviewing documentation, and preparing
+for the release. Immediately after the release, larger changes occur, such as
+merging branches, adding large features, or removing deprecated features. This
+allows developers to ensure that changes have sufficient testing time before
+the next release.  Releases also encourage feedback as casual users and testers
+explore the newest versions.
+
+=head2 The Parrot Team
+
+Parrot developers fulfill several rules according to their skills and interests.
+
+=over 4
+
+=item Architect
+
+X<architect role>
+
+The architect has primary responsibility for setting the overall direction of
+the project, facilitating team communication, and explaining and evaluating
+architectural issues. The architect makes design decisions and documents them
+in Parrot Design Documents, and oversees design and documentation work
+delegated to other members of the team to provide a coherent vision across the
+project. The architect also works with the release managers to develop and
+maintain the release schedule. Allison Randal currently leads the Parrot
+project as architect.
+
+=item Release Managers
+
+X<release manager role>
+
+Release managers manage and produce monthly releases according to the release
+schedule. Parrot has multiple release managers who rotate the responsibility
+for each monthly release. The release managers develop and maintain the release
+schedule jointly with the project architect.
+
+=item Metacommitter
+
+X<metacommitter role>
+
+Metacommitters manage commit access to the Parrot repository. Once a
+contributor is selected for commit access, a metacommitter gives the new
+committer access to the SVN repository and the bugtracker. The architect is a
+metacommitter, but other team members also hold this role.
+
+=item Committer
+
+X<committer role>
+
+Contributors who submit numerous, high-quality patches may be considered to
+become a committer. Committers have commit access to the full Parrot
+repository, though they often specialize on particular parts of the project.
+Contributors may be considered for commit access either by being nominated by
+another committer, or by requesting it.
+
+=item Core Developer
+
+X<core developer role>
+
+Core developers develop and maintain core subsystems such as the I/O
+subsystem, the exceptions system, or the concurrency scheduler.
+
+=item Compiler Developer
+
+X<compiler developer role>
+
+Compiler developers develop and maintain one or more Parrot front-end
+compilers such as IMCC, PIRC, PGE and TGE.
+
+=item High-Level Language Developer
+
+X<HLL developer role>
+
+Developers who work on any of the high-level languages that target
+ParrotE<mdash>such as Lua, Perl, PHP, Python, Ruby, or TclE<mdash>are
+high-level language developers. The Parrot repository includes a few example
+languages. A full list of languages is available at
+U<https://trac.parrot.org/parrot/wiki/Languages>.
+
+=item Build Manager
+
+X<build manager role>
+
+Build managers maintain and extend configuration and build subsystems.
+They review smoke reports and attempt to extend platform support.
+
+=item Tester
+
+X<tester role>
+
+Testers develop, maintain, and extend the core test suite coverage and testing
+tools. Testers are also responsible for testing goals, including complete
+coverage of core components on targeted platforms.
+
+=item Patch Monsters
+
+X<patch monster role>
+
+Hackers and developers submit patches to Parrot every day, and it takes a keen
+eye and a steady hand to review and apply them all. Patch monsters check
+patches for conformance with coding standards and desirability of features,
+rework them as necessary, verify that the patches work as desired, and apply
+them.
+
+=item Cage Cleaners
+
+X<cage cleaner role>
+
+The cage cleaners ensure that development follows the project's coding
+standards, documentation is complete and accurate, all tests function properly,
+and new users have accurate and comprehensive coding examples. A special class
+of Trac tickets is available for these tasks.  Cage cleaning tasks run the
+gamut from entry-level to advanced; this is a good entry point for new users
+to work on Parrot.
+
+=item General Contributor
+
+X<contributor role>
+
+Contributors write code or documentation, report bugs, take part in email or
+online conversations, or contribute to the project in other ways. All volunteer
+contributions are appreciated.
+
+=back
+
+=head2 Licensing
+
+X<license>
+
+The Parrot foundation supports the Parrot development community and holds
+trademarks and copyrights to Parrot.  The project is available under the
+Artistic License 2.0, allowing free use in commercial and open source/free
+software contexts.
+
+=cut
+
+# Local variables:
+#   c-file-style: "parrot"
+# End:
+# vim: expandtab shiftwidth=4:

Copied: branches/auto_attrs/docs/book/pct/ch02_getting_started.pod (from r40476, trunk/docs/book/pct/ch02_getting_started.pod)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/auto_attrs/docs/book/pct/ch02_getting_started.pod	Mon Aug 10 11:28:05 2009	(r40477, copy of r40476, trunk/docs/book/pct/ch02_getting_started.pod)
@@ -0,0 +1,182 @@
+=pod
+
+=head1 Getting Started
+
+Before you can use Parrot, you have to get it running on your machine.
+
+=head2 Installing Parrot
+
+The simplest way to install Parrot is to use a pre-compiled binary for your
+operating system or distribution. Packages are available for many packaging
+systems, including Debian, Ubuntu, Fedora, Mandriva, FreeBSD, Cygwin, and
+MacPorts. The Parrot website lists all known packages at
+U<http://www.parrot.org/download>. A binary installer for Windows is also
+available at U<http://parrotwin32.sourceforge.net/>.
+
+If packages aren't available on your system, download the latest supported
+release from U<http://www.parrot.org/release/supported>.
+
+You need a C compiler and a make utility to build Parrot from source code --
+usually C<gcc> and C<make>, but Parrot can build with standard compiler
+toolchains on different operating systems.  Perl 5.8 is also a prerequiste for
+configuring and building Parrot.
+
+If you have these dependencies installed, build the core virtual machine and
+compiler toolkit and run the standard test suite with the commands:
+
+  $ B<perl Configure.pl>
+  $ B<make>
+  $ B<make test>
+
+By default, Parrot installs to directories F<bin/>, C<lib/>, et cetera under
+the prefix F</usr/local>.  If you have privileges to write to these
+directories, install Parrot with:
+
+  $ B<make install>
+
+To install Parrot beneath a different prefix, use the C<--prefix> option to
+C<Configure.pl>:
+
+    $ B<perl Configure.pl --prefix=/home/me/parrot>
+
+If you intend to I<develop> -- not just I<use> -- a language on Parrot, install
+the Parrot developer tools as well:
+
+  $ B<make install-dev>
+
+=head2 Running Parrot
+
+Once you've installed Parrot, run it.  Create a test file called F<news.pasm>.
+C<.pasm> files contain Parrot Assembly Language (PASM) instructions; this is a
+low-level language native to the Parrot virtual machine.
+
+=begin PASM
+
+  say "Here is the news for Parrots."
+  end
+
+=end PASM
+
+Now run this file with:
+
+  $ B<parrot news.pasm>
+
+which will print:
+
+  Here is the news for Parrots.
+
+=head2 Running a Language on Parrot
+
+Next, try out one of Parrot's high-level languages. Create a test file
+called F<more_news.nqp>:
+
+  say "No parrots were involved in an accident on the M1 today..."
+
+Then run it as:
+
+  $ nqp more_news.nqp
+
+which will print:
+
+  No parrots were involved in an accident on the M1 today...
+
+=head2 What Next?
+
+This book describes Parrot in terms of tasks it supports.  You may pick and
+choose chapters based on your area of interest:
+
+=over 4
+
+=item Chapter 3, I<Parrot Intermediate Representation>
+
+Parrot Intermediate Representation (PIR) is a mid-level language native to the
+Parrot virtual machine s commonly used for writing extensions and tools for
+Parrot.
+
+=item Chapter 4, I<Compiler Tools>
+
+The Parrot Compiler Toolkit (PCT) provides a common infrastructure and
+utilities for implementing languages on Parrot.
+
+=item Chapter 5, I<Grammar Engine>
+
+The Parrot Grammar Engine (PGE) is a powerful regular expression engine and
+recursive descent parser. PGE is part of the compiler tools; understanding PGE
+is essential to implementing a language on Parrot.
+
+=item Chapter 6, I<Grammar Actions>
+
+NQP (Not Quite Perl) is a lightweight language loosely inspired by Perl 6. NQP
+is part of the compiler tools used for transforming a Parrot-hosted language
+into instructions for Parrot to execute.
+
+=item Chapter 7, I<Dynamic PMCs>
+
+=for author
+
+This chapter suggests the need for a chapter on core PMCs.  Alternately, this
+chapter could cover PMCs in general and dynpmcs as a special case of PMCs.
+
+=end for
+
+Parrot allows language developers to extend Parrot's core data types to suit
+the needs of advanced languages.
+
+=item Chapter 8, I<Dynamic Opcodes>
+
+=for author
+
+The same point applies for ops and dynops.
+
+=end for
+
+Parrot allows language developers to extend Parrot's core instruction set --
+again to suit the needs of advanced languages.
+
+=item Chapter 10, I<Instruction Reference>
+
+Parrot's standard instruction set provides powerful behavior for primitive
+operations, control flow, object orientation, exception handling, and more.
+
+=item Chapter 11, I<Directive Reference>
+
+Parrot supports directives used within PIR and PASM code to change the behavior
+of code and to control what happens in bytecode.
+
+=item Chapter 13, I<Operator Reference>
+
+PIR provides several higher-level operators as a convenience to programmers and
+code generators.
+
+=item Appendix A, I<Glossary>
+
+Parrot and its environment have common jargon.
+
+=item Appendix B, I<Command-Line Options>
+
+Parrot supports several flags to control execution modes, debugging, library
+loading, and more.
+
+=item Appendix C, I<Build Options>
+
+Parrot's configuration process gives administrators and developers tremendous
+control over the build system.
+
+=item Appendix D, I<Source Code>
+
+Parrot's source code is organized along logical lines -- logical, once you know
+the layout.
+
+=item Appendix E, I<Patch Submission>
+
+Parrot depends on the combined efforts of numerous volunteers.  Your
+contributions are very welcome.
+
+=back
+
+=cut
+
+# Local variables:
+#   c-file-style: "parrot"
+# End:
+# vim: expandtab shiftwidth=4:

Copied: branches/auto_attrs/docs/book/pct/ch03_compiler_tools.pod (from r40476, trunk/docs/book/pct/ch03_compiler_tools.pod)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/auto_attrs/docs/book/pct/ch03_compiler_tools.pod	Mon Aug 10 11:28:05 2009	(r40477, copy of r40476, trunk/docs/book/pct/ch03_compiler_tools.pod)
@@ -0,0 +1,333 @@
+=pod
+
+=head1 Parrot Compiler Tools
+
+Z<CHP-4>
+
+Parrot is able to natively compile and execute code in two low-level
+languages, PASM and PIR. These two languages, which are very similar to
+one another, are very close to the machine and analagous to assembly
+languages from hardware processors and other virtual machines. While they
+do expose the power of the PVM in a very direct way, PASM and PIR are not
+designed to be used for writing large or maintainable programs. For these
+tasks, higher level languages such as Perl 6, Python 3, Tcl, Ruby, and PHP
+are preferred instead, and the ultimate goal of Parrot is to support these
+languages and more. The question is, how do we get programs written in these
+languages running on Parrot? The answer is PCT.
+
+PCT is a set of classes and tools that enable the easy creation of
+compilers for high level languages (HLLs) that will run on Parrot. PCT is
+itself written in PIR, and compiles HLL code down to PIR for compilation
+and execution on Parrot, but allows the compiler designer to do most work
+in a high-level dialect of the Perl 6 language. The result is a flexible,
+dynamic language that can be used for creating other flexible, dynamic
+languages.
+
+
+=head2 History
+
+The Parrot Virtual Machine was originally conceived of as the engine for
+executing the new Perl 6 language, when specifications for that were first
+starting to be drafted. However, as time went on it was decided that Parrot
+would benefit from having a clean abstraction layer between it's internals
+and the Perl 6 language syntax. Thisclean abstraction layer brough with it
+the side effect that Parrot could be used to host a wide variety of dynamic
+languages, not just Perl 6. And beyond just hosting them, it could
+facilitate their advancement, interaction, and code sharing.
+
+The end result is that Parrot is both powerful enough to support one of the
+most modern and powerful dynamic languages, Perl 6, but well-encapsulated
+enough to host other languages as well. Parrot would be more powerful and
+feature-full than any single language, and would provide all that power and
+all those features to any languages that wanted them.
+
+Perl 6, under the project name Rakudo N<http://www.rakudo.org>, is still one
+of the primary user of Parrot and therefore one of the primary drivers in
+its development. However, compilers for other dynamic languages such as
+Python 3, Ruby, Tcl, are under active development. Several compilers exist
+which are not being as actively developed, and many compilers exist for
+new languages and toy languages which do not exist anywhere else.
+
+=head2 Capabilities and Benefits
+
+Parrot exposes a rich interface for high level languages to use, including
+several important features: a robust exceptions system, compilation into
+platform-independent bytecode, a clean extension and embedding interface,
+just-in-time compilation to machine code, native library interface mechanisms,
+garbage collection, support for objects and classes, and a robust concurrency
+model.  Designing a new language or implementing a new compiler for an old
+language is easier with all of these features designed, implemented, tested,
+and supported in a VM already. In fact, the only tasks required of compiler
+implementers who target the Parrot platform is the creation of the parser
+and the language runtime.
+
+Parrot also has a number of other benefits for compiler writers to tap into:
+
+=over 4
+
+=item* Write Once and Share
+
+All HLLs on Parrot ultimately compile down to Parrot's platform-independent
+bytecode which Parrot can execute natively. This means at the lowest level
+Parrot supports interoperability between programs written in multiple high
+level languages. Find a library you want to use in Perl's CPAN
+N<http://www.cpan.org>? Have a web framework you want to use that's written
+in Ruby? A mathematics library that only has C bindings? Want to plug all
+of these things into a web application you are writing in PHP? Parrot
+supports this and more.
+
+=item* Native Library Support
+
+Parrot has a robust system for interfacing with external native code
+libraries, such as those commonly written in C, C++, Fortran and other
+compiled languages. Where previously every interpreter would need to
+maintain it's own bindings and interfaces to libraries, Parrot enables
+developers to write library bindings once and use them seamlessly from
+any language executing on Parrot. Want to use Tcl's Tk libraries, along with
+Python's image manipulation libraries in a program you are writing in Perl?
+Parrot supports that.
+
+=back
+
+=head2 Compilation and Hosting
+
+For language hosting and interoperability to work, languages developers need
+to write compilers that convert source code written in high level languages
+to Parrot's bytecode.  This process is analogous to how a compiler such as
+GCC converts C or C++ into machine code -- though instead of targeting
+machine code for a specific hardware platform, compilers written in Parrot
+produce Parrot code which can run on any hardware platform that can run
+Parrot.
+
+Creating a compiler for Parrot written directly in PIR is possible. Creating
+a compiler in C using the common tools lex and yacc is also possible.
+Neither of these options are really as good, as fast, or as powerful as
+writing a compiler using PCT.
+
+PCT is a suite of compiler tools that helps to abstract and automate the
+process of writing a new compiler on Parrot. Lexical analysis, parsing,
+optimization, resource allocation, and code generation are all handled
+internally by PCT and the compiler designer does not need to be concerned
+with any of it.
+
+
+=head2 PCT Overview
+
+The X<Parrot Compiler Tools;PCT> Parrot Compiler Tools (PCT) enable the
+creation of high-level language compilers and runtimes.  Though the Perl 6
+development team originally created these tools to aide in the development
+of the Rakudo Perl 6 implementation, several other Parrot-hosted compilers
+also use PCT to great effect. Writing a compiler using Perl 6 syntax and
+dynamic language tools is much easier than writing a compiler in C,
+C<lex>, and C<yacc>.
+
+PCT is broken down into three separate tools:
+
+=over 4
+
+=item* Not Quite Perl (NQP)
+
+NQP a subset of the Perl 6 language that requires no runtime library to
+execute.
+
+=item* Perl Grammar Engine (PGE)
+
+PGE is an implementation of Perl 6's powerful regular expression and grammar
+tools.
+
+=item* HLLCompiler
+
+The HLLCompiler compiler helps to manage and encapsulate the compilation
+process. An HLLCompiler object, once created, enables the user to use the
+compiler interactively from the commandline, in batch mode from code files,
+or at runtime using a runtime eval.
+
+=back
+
+=head2 Grammars and Action Files
+
+A PCT-based compiler requires three basic files: the main entry point file
+which is typically written in PIR, the grammar specification file which uses
+PGE, and the grammar actions file which is in NQP. These are just the three
+mandatory components, most languages are also going to require additional
+files for runtime libraries and other features as well.
+
+=over 4
+
+=item * The main file
+
+The main file is (often) a PIR program which contains the C<:main> function
+that creates and executes the compiler object. This program instantiates a
+C<PCT::HLLCompiler> subclass, loads any necessary support libraries, and
+initializes any compiler- or languages-specific data.
+
+The main file tends to be short.  The guts of the compiler logic is in the
+grammar and actions files.  Runtime support and auxiliary functions often
+appear in other files, by convention.  This separation of concerns tends to
+make compilers easier to maintain.
+
+=item * A grammar file
+
+The high-level language's grammar appears in a F<.pg> file.  This file
+subclasses the C<PCT::Grammar> class and implements all of the necessary
+rules -- written using PGE -- to parse the language.
+
+=item * An actions file
+
+Actions contains methods -- written in NQP -- on the C<PCT::Grammar:Actions>
+object which receive parse data from the grammar rules and construct an
+X<Abstract Syntax Tree;Parrot Abstract Syntax Tree;AST;PAST> Abstract Syntax
+Tree (AST).N<The Parrot version of an AST is, of course, the Parrot Abstract
+Syntax Tree, or PAST.>
+
+=back
+
+PCT's workflow is customizable, but simple.  The compiler passes the source
+code of the HLL into the grammar engine.  The grammer engine parses this code
+and returns a X<PGE;Match Object> special Match object which represents a
+parsed version of the code.  The compiler then passes this match object to the
+action methods, which convert it in stages into PAST.  The compiler finally
+converts this PAST into PIR code, which it can save to a file, convert to
+bytecode, or execute directly.
+
+=head3 C<mk_language_shell.pl>
+
+The only way creating a new language compiler could be easier is if these files
+created themselves. PCT includes a tool to do just that:
+C<mk_language_shell.pl>.  This program automatically creates a new directory in
+F<languages/> for your new language, the necessary three files, starter files
+for libraries, a F<Makefile> to automate the build process, and a basic test
+harness to demonstrate that your language works as expects.
+
+These generated files are all stubs which will require extensive editing to
+implement a full language, but they are a well-understood and working starting
+point.  With this single command you can create a working compiler.  It's up to
+you to fill the details.
+
+C<mk_language_shell.pl> prefers to run from within a working Parrot repository.
+It requires a single argument, the name of the new project to create.  There
+are no hard-and-fast rules about names, but the Parrot developers reccomend
+that Parrot-based implementations of existing languages use unique names.
+
+Consider the names of Perl 5 distributions: Active Perl and Strawberry Perl.
+Python implementations are IronPython (running on the CLR) and Jython (running
+on the JVM).  The Ruby-on-Parrot compiler isn't just "Ruby": it's Cardinal.
+The Tcl compiler on Parrot is Partcl.
+
+An entirely new language has no such constraints.
+
+From the Parrot directory, invoke C<mk_language_shell.pl> like:
+
+  $ B<cd languages/>
+  $ B<perl ../tools/build/mk_language_shell.pl <project name>>
+
+=head3 Parsing Fundamentals
+
+An important part of a compiler is the parser and lexical analyzer.  The
+lexical analyzer converts the HLL input file into individual tokens. A token
+may consist of an individual punctuation ("+"), an identifier ("myVar"), a
+keyword ("while"), or any other artifact that stands on its own as a single
+unit.  The parser attempts to match a stream of these input tokens against a
+given pattern, or grammar. The matching process orders the input tokens into an
+abstract syntax tree which the other portions of the compiler can process.
+
+X<top-down parser>
+X<bottom-up parser>
+X<parsers; top-down>
+X<parsers; bottom-up>
+Parsers come in top-down and bottom-up varieties. Top-down parsers start with a
+top-level rule which represents the entire input. It attempts to match various
+combination of subrules until it has consumed the entire input.  Bottom-down
+parsers start with individual tokens from the lexical analyzer and attempt to
+combine them together into larger and larger patterns until they produce a
+top-level token.
+
+PGE is a top-down parser, although it also contains a bottom-up I<operator
+precedence> parser to make processing token clusters such as mathematical
+expressions more efficient.
+
+=head2 Driver Programs
+
+The driver program for the new compiler must create instances of the various
+necessary classes that run the parser. It must also include the standard
+function libraries, create global variables, and handle commandline options.
+PCT provides several useful command-line options, but driver programs may need
+to override several behaviors.
+
+PCT programs can run in two ways.  An interactive mode runs one statement at a
+time in the console.  A file mode loads and runs an entire file at once.  A
+driver program may specificy information about the interactive prompt and
+environment, as well as help and error messages.
+
+=head3 C<HLLCompiler> class
+
+The C<HLLCompiler> class implements a compiler object. This object contains
+references to language-specific parser grammar and actions files, as well as
+the steps involved in the compilation process.  The stub compiler created by
+C<mk_language_shell.pl> might resemble:
+
+  .sub 'onload' :anon :load :init
+      load_bytecode 'PCT.pbc'
+      $P0 = get_hll_global ['PCT'], 'HLLCompiler'
+      $P1 = $P0.'new'()
+      $P1.'language'('MyCompiler')
+      $P1.'parsegrammar'('MyCompiler::Grammar')
+      $P1.'parseactions'('MyCompiler::Grammar::Actions')
+  .end
+
+  .sub 'main' :main
+      .param pmc args
+      $P0 = compreg 'MyCompiler'
+      $P1 = $P0.'command_line'(args)
+  .end
+
+The C<:onload> function creates the driver object as an instance of
+C<HLLCompiler>, sets the necessary options, and registers the compiler with
+Parrot. The C<:main> function drives parsing and execution begin. It calls the
+C<compreg> opcode to retrieve the registered compiler object for the language
+"MyCompiler" and invokes that compiler object using the options received from
+the commandline.
+
+The C<compreg> opcode hides some of Parrot's magic; you can use it multiple
+times in a program to compile and run different languages. You can create
+multiple instances of a compiler object for a single language (such as for
+runtime C<eval>) or you can create compiler objects for multiple languages for
+easy interoperability. The Rakudo Perl 6 C<eval> function uses this mechanism
+to allow runtime eval of code snippets in other languages:
+
+  eval("puts 'Konnichiwa'", :lang<Ruby>);
+
+=head3 C<HLLCompiler> methods
+
+The previous example showed the use of several HLLCompiler methods:
+C<language>, C<parsegrammar>, and C<parseactions>.  These three methods are the
+bare minimum interface any PCT-based compiler should provide.  The C<language>
+method takes a string argument that is the name of the compiler. The
+HLLCompiler object uses this name to register the compiler object with Parrot.
+The C<parsegrammar> method creates a reference to the grammar file that you
+write with PGE. The C<parseactions> method takes the class name of the NQP file
+used to create the AST-generator for the compiler.
+
+If your compiler needs additional features, there are several other available
+methods:
+
+=over 4
+
+=item * C<commandline_prompt>
+
+The C<commandline_prompt> method allows you to specify a custom prompt to
+display to users in interactive mode.
+
+=item * C<commandline_banner>
+
+The C<commandline_banner> method allows you to specify a banner message that
+displays at the beginning of interactive mode.
+
+=back
+
+=cut
+
+# Local variables:
+#   c-file-style: "parrot"
+# End:
+# vim: expandtab shiftwidth=4:

Copied: branches/auto_attrs/docs/book/pct/ch04_pge.pod (from r40476, trunk/docs/book/pct/ch04_pge.pod)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/auto_attrs/docs/book/pct/ch04_pge.pod	Mon Aug 10 11:28:05 2009	(r40477, copy of r40476, trunk/docs/book/pct/ch04_pge.pod)
@@ -0,0 +1,1558 @@
+=pod
+
+=head1 Grammar Engine
+
+X<Parrot Grammar Engine>
+X<PGE (Parrot Grammar Engine)>
+The Parrot Grammar Engine (PGE) is a parser generator, one of the key
+components of the Parrot Compiler Toolkit. It reads grammar files written in
+the PGE rules format and generates parser modules written in PIR code. PGE
+rules provide the full power of I<recursive descent parsing> and I<operator
+precedence parsing>. Fortunately, you don't need to know what those terms
+mean in order to make good use of PGE. We'll introduce the necessary
+concepts as we talk about various features in this chapter.
+
+=head2 Grammars
+
+The ultimate goal of a parser is to match patterns in a source language and
+convert them to an internal data structure for later manipulations. As a
+programmer, you're probably already familiar with some of these types of
+patterns: function declarations, function calls, statements, and assignments.
+Each of these different concepts have a particular form called a I<syntax>.
+In C for example, the syntax to define a function looks something like this:
+
+  <return_type> <function_name> ( <arguments> ) { <function_body> }
+
+Things that fit this pattern, so long as all the sub-patterns use the proper
+syntax also, are valid subroutines in C. Similarly, we can use a slightly
+different pattern to create a subroutine:
+
+  sub <function_name> { <function_body> }
+
+A grammar is a collection of rules like the ones above that specify all the
+acceptable patterns in a language. Grammars group together these rules in
+much the same way that a groups together related data fields and methods
+N<In languages like Perl 6 for instance, a grammar is just a special kind
+of class and a rule is just a special kind of method.>. Each rule defines
+a pattern for matching one unit of text, and can be made up of various other
+rules which are called recursively to make a complete match.
+
+A rule can contain regular expressions to match patterns of characters:
+
+  rule id { \d+ }
+
+A rule can also contain patterns of references to other rules:
+
+  rule record { <id> <name> <phone> }
+
+A grammar contains a group of rules that work together to match the entire
+language:
+
+  grammar Contacts;
+
+  rule name { 'John' | 'Bob ' | 'Fred' }
+
+  rule id   { \d+ }
+
+  rule record { <id> <name> }
+
+  ...
+
+=head3 Rules and Tokens
+
+X<rule>
+X<token>
+There are two different kinds of rules: C<rule>, which we saw above, and
+C<token>. A C<rule> performs smart whitespace matching between the various
+pieces of the pattern. The C<record> rule given previously would match
+"6355 John" or "6355      John" but not "6355John".
+
+A C<token> matches whitespace only if you specifically request it. To get the
+same effect with a token, add the C<\s> (match a space character) and C<+>
+(match the preceding atom -- the space character, in this case -- one or more
+times) pattern to the rule:
+
+  token record { <id> \s+ <name> }
+
+=head3 The Start Rule
+
+X<top>
+X<top-down parser>
+A recursive descent parser is what's called a I<top-down parser>. It starts
+at the highest-level rule, called C<TOP>, and works its way down through
+individual rules to match an entire string or file. Real Perl 6 allows any
+name for the top-level rule, but PCT expects a rule called C<TOP>. If PCT
+was as fully-featured as Perl 6, people would use it instead! Here's an
+example of a TOP rule:
+
+  rule TOP { <record> }
+
+This rule matches a single C<record> pattern in a string or file. Once the
+parser has succeeded in matching the entire string or file passed to the
+start rule, it returns a parse tree. If it cannot match the entire input
+with the rules provided, it can either return a partial match, or it can
+throw a parse error.
+
+=head3 Testing a Grammar
+
+Let's do a small example grammar. Save this example to a file called
+F<Contacts.pg>:
+
+  grammar Contacts is PGE::Grammar;
+
+  rule  TOP    { <record> }
+  rule  record { <id> <name> }
+  token name   { 'John' | 'Bob ' | 'Fred' }
+  token id     { \d+ }
+
+Then compile the grammar:
+
+  $ B<parrot Perl6Grammar.pbc --output=Contacts.pir Contacts.pg>
+
+=for author
+
+Assume an installed Parrot for all examples?  Anyone working from the source
+tree should be able to mangle paths appropriately.
+
+=end for
+
+The path to F<parrot> and to the F<Perl6Grammar.pbc> file will vary on
+different systems. If you compiled Parrot from source, it will be:
+
+  $ B<./parrot runtime/parrot/library/PGE/Perl6Grammar.pbc \>
+        B<--output=Contacts.pir Contacts.pg>
+
+Next, create a small PIR script to run your grammar. Save it as
+F<grammar_test.pir>:
+
+=begin PIR
+
+  .sub main :main
+      load_bytecode 'PGE.pbc'        # load some required modules
+      load_bytecode 'dumper.pbc'
+      load_bytecode 'PGE/Dumper.pbc'
+
+      load_bytecode 'Contacts.pir'   # load your grammar
+
+      .local string source
+      source  = "3 John"
+
+      .local pmc top, grammar, match
+      top     = get_hll_global ['Contacts'], 'TOP'
+      grammar = get_class 'Contacts'
+      match   = top(source, 'grammar' => grammar)
+
+      _dumper(match, "match")
+  .end
+
+=end PIR
+
+Run the test script:
+
+  $ B<parrot grammar_test.pir>
+
+It will print out a text representation of the raw parse tree stored in the
+C<match> variable:
+
+  "match" => PMC 'Contacts' => "3 John" @ 0 {
+      <record> => PMC 'Contacts' => "3 John" @ 0 {
+          <id> => PMC 'Contacts' => "3" @ 0
+          <name> => PMC 'Contacts' => "John" @ 2
+      }
+  }
+
+Each node in the tree corresponds to a rule in the grammar.  The top-level
+match variable contains one child named C<record>, which contains two children
+named C<id> and C<name>.  C<id> contains the number 3, and C<name> contains the
+string "John". This is exactly what the simple grammar should have matched.
+
+=head2 Rule Syntax
+
+Every language has a set of basic components (words or parts of words) and
+syntax conventions for combining them. The "words" in rules are literal
+characters or symbols, some X<metacharacters> metacharacters (or metasymbols),
+and X<rules;escape sequences>X<escape sequences, rules> escape sequences, while
+the combining syntax includes other metacharacters, X<quantifiers, rules>
+X<rules;quantifiers> quantifiers, bracketing characters, and assertions.
+
+=head3 Metacharacters
+
+The C<.> metacharacter matches any single character, even a newline character.
+The C<^> and C<$> metacharacters are zero-width matches which represent the
+beginning and end of a string. They each have doubled alternates C<^^> and
+C<$$> that match at the beginning and end of every (newline-delimited) line
+within a string.
+
+The C<|>, C<&>, C<\>, C<#>, and C<:=> metacharacters are all syntax structure
+elements. C<|> alternates between two options. C<&> matches two patterns
+simultaneously (the patterns must be the same length). C<\> turns literal
+characters into metacharacters (producing escape sequences). C<#> starts a
+comment which proceeds until the end of the line. You can start a comment at
+any point on any line in a rule. C<:=> binds a hypothetical variable to the
+result of a subrule or grouped pattern (see L<Hypothetical Variables>).
+
+The metacharacters C<()>, C<[]>, C<{}> and C<E<lt>E<gt>> are bracketing pairs.
+Bracketing pairs must always be balanced within the rule; to use a literal
+character, escape it with a C<\>.  The C<()> and C<[]> pairs group patterns as
+a single atom. They often capture a result, mark the boundaries of an
+alternation, or mark a group of patterns with a quantifier. Parentheses C<()>
+capture, but square brackets C<[]> do not. The C<{}> brackets define a section
+of code (a closure) within a rule. These closures are always a successful
+zero-width match. The C<E<lt>...E<gt>> brackets mark assertions, which handle a
+variety of constructs including character classes and user-defined quantifiers
+(see L<Assertions>).
+
+Table 7-2 summarizes the basic metacharacters.
+
+=begin table picture Metacharacters
+
+Z<CHP-7-TABLE-2>
+
+=headrow
+
+=row
+
+=cell Symbol
+
+=cell Meaning
+
+=bodyrows
+
+=row
+
+=cell C<.>
+
+=cell Match any single character, including a newline.
+X<. (dot);. match single character (rules)>
+
+=row
+
+=cell C<^>
+
+=cell Match the beginning of a string.
+X<^ (caret);^ beginning of string (rules)>
+
+=row
+
+=cell C<$>
+
+=cell Match the end of a string.
+X<$ (dollar sign);$ end of string (rules)>
+
+=row
+
+=cell C<^^>
+
+=cell Match the beginning of a line within the string.
+X<^ (caret);^^ beginning of line (rules)>
+
+=row
+
+=cell C<$$>
+
+=cell Match the end of a line within the string.
+X<$ (dollar sign);$$ end of line (rules)>
+
+=row
+
+=cell C<|>
+
+=cell Match alternate patterns (OR).
+
+=row
+
+=cell C<&>
+
+=cell Match multiple patterns (AND).
+
+=row
+
+=cell C<\>
+
+=cell Escape a metacharacter to get a literal character, or escape a
+literal character to get a metacharacter.
+X<\ (backslash);\ escape sequences (rules)>
+X<\ (backslash);\ to escape metacharacters (rules)>
+
+=row
+
+=cell C<#>
+
+=cell Mark a comment (to the end of the line).
+
+=row
+
+=cell C<:=>
+
+=cell Bind the result of a match to a hypothetical variable.
+X<: (colon);:= (binding);in rules>
+
+=row
+
+=cell C<(...)>
+
+=cell Group patterns and capture the result.
+
+=row
+
+=cell C<[...]>
+
+=cell Group patterns without capturing.
+
+=row
+
+=cell C<{...}>
+
+=cell Execute a closure (Perl 6 code) within a rule.
+
+=row
+
+=cell C<E<lt>...E<gt>>
+
+=cell Match an assertion.
+
+=end table
+
+=head3 Escape Sequences
+
+Z<CHP-7-SECT-2.2>
+
+X<escape sequences, rules>
+X<rules;escape sequences>
+X<\ (backslash);\ escape sequences (rules)>
+
+Escape sequences are literal characters acting as metacharacters.  A preceding
+backslash (C<\>) identifies them as escapes. Some escape sequences represent
+single characters that are difficult to represent literally, such as C<\t> for
+tab, or C<\x[...]> to specify a character by its hexadecimal number.  Some
+represent limited character classes, such as C<\d> for digits or C<\w> for word
+characters. Some represent zero-width positions in a match, such as C<\b> for a
+word boundary.
+
+X<variable interpolation in rules>
+X<rules;variable interpolation>
+If you've used Perl 5 regexps, you may remember the C<\Q> escape sequence which
+treats everything until the following C<\E> sequence as literal text,
+containing no escape sequences.  Because ordinary variables now interpolate as
+literal strings by default, the C<\Q> escape sequence is rarely needed.
+
+A<CHP-7-TABLE-3>Table 7-3 shows the escape sequences for rules.
+
+=begin table picture Escape sequences
+
+Z<CHP-7-TABLE-3>
+
+=headrow
+
+=row
+
+=cell Escape
+
+=cell Meaning
+
+=bodyrows
+
+=row
+
+=cell C<\0[...]>
+
+=cell Match a character given in octal (brackets optional).
+
+=row
+
+=cell C<\b>
+
+=cell Match a word boundary.
+
+=row
+
+=cell C<\B>
+
+=cell Match when not on a word boundary.
+
+=row
+
+=cell C<\c[...]>
+
+=cell Match a named character or control character.
+
+=row
+
+=cell C<\C[...]>
+
+=cell Match any character except the bracketed named or control character.
+
+=row
+
+=cell C<\d>
+
+=cell Match a digit.
+
+=row
+
+=cell C<\D>
+
+=cell Match a non-digit.
+
+=row
+
+=cell C<\e>
+
+=cell Match an escape character.
+
+=row
+
+=cell C<\E>
+
+=cell Match anything but an escape character.
+
+=row
+
+=cell C<\f>
+
+=cell Match the form feed character.
+
+=row
+
+=cell C<\F>
+
+=cell Match anything but a form feed.
+
+=row
+
+=cell C<\n>
+
+=cell Match a (logical) newline.
+
+=row
+
+=cell C<\N>
+
+=cell Match anything but a (logical) newline.
+
+=row
+
+=cell C<\h>
+
+=cell Match horizontal whitespace.
+
+=row
+
+=cell C<\H>
+
+=cell Match anything but horizontal whitespace.
+
+=row
+
+=cell C<\L[...]>
+
+=cell Everything within the brackets is lowercase.
+
+=row
+
+=cell C<\Q[...]>
+
+=cell All metacharacters within the brackets match as literal characters.
+
+=row
+
+=cell C<\r>
+
+=cell Match a return.
+
+=row
+
+=cell C<\R>
+
+=cell Match anything but a return.
+
+=row
+
+=cell C<\s>
+
+=cell Match any whitespace character.
+
+=row
+
+=cell C<\S>
+
+=cell Match anything but whitespace.
+
+=row
+
+=cell C<\t>
+
+=cell Match a tab.
+
+=row
+
+=cell C<\T>
+
+=cell Match anything but a tab.
+
+=row
+
+=cell C<\U[...]>
+
+=cell Everything within the brackets is uppercase.
+
+=row
+
+=cell C<\v>
+
+=cell Match vertical whitespace.
+
+=row
+
+=cell C<\V>
+
+=cell Match anything but vertical whitespace.
+
+=row
+
+=cell C<\w>
+
+=cell Match a word character (Unicode alphanumeric characters plus the
+underscore C<_>).
+
+=row
+
+=cell C<\W>
+
+=cell Match anything but a word character.
+
+=row
+
+=cell C<\x[...]>
+
+=cell Match a character given in hexadecimal (brackets optional).
+
+=row
+
+=cell C<\X[...]>
+
+=cell Match anything but the character given in hexadecimal (brackets
+optional).
+
+=end table
+
+=head3 Quantifiers
+
+Z<CHP-7-SECT-2.3>
+
+Quantifiers specify the number of times an atom (a single character,
+metacharacter, escape sequence, grouped pattern, assertion, etc) will match.
+
+X<. (dot);.. (range);quantifier (rules)>
+X<. (dot);... (infinite range);quantifier (rules)>
+The numeric quantifiers use assertion syntax. A single number (C<E<lt>3E<gt>>)
+requires exactly that many matches. A numeric range quantifier
+(C<E<lt>3C<..>5E<gt>>) succeeds if the number of matches is between the minimum
+and maximum numbers, inclusive. A range with three trailing dots
+(C<E<lt>2...E<gt>>) is shorthand for C<E<lt>R<n>..InfE<gt>>; it matches as many
+times as possible.
+
+Each quantifier has a minimal alternate form -- marked with a trailing C<?> --
+which matches the shortest possible sequence first.  That is, given the string
+C<aaaaaa>, C<aE<lt>3C<..>5E<gt>> will match C<aaaaa> and C<aE<lt>3C<..>5E<gt>?>
+will match C<aaa>.
+
+A<CHP-7-TABLE-4>Table 7-4 shows the built-in
+X<quantifiers, rules> X<rules;quantifiers> quantifiers.
+
+=begin table picture Quantifiers
+
+Z<CHP-7-TABLE-4>
+
+=headrow
+
+=row
+
+=cell Maximal
+
+=cell Minimal
+
+=cell Meaning
+
+=bodyrows
+
+=row
+
+=cell C<*>
+
+=cell C<*?>
+
+=cell Match 0 or more times.
+
+=row
+
+=cell C<+>
+
+=cell C<+?>
+
+=cell Match 1 or more times.
+
+=row
+
+=cell C<?>
+
+=cell C<??>
+
+=cell Match 0 or 1 times.
+
+=row
+
+=cell C<E<lt>>R<n>C<E<gt>>
+
+=cell C<E<lt>>R<n>C<E<gt>?>
+
+=cell Match exactly R<n> times.
+
+=row
+
+=cell C<E<lt>>R<n>C<..>R<m>C<E<gt>>
+
+=cell C<E<lt>>R<n>C<..>R<m>C<E<gt>?>
+
+=cell Match at least R<n> and no more than R<m> times.
+
+=row
+
+=cell C<E<lt>>R<n>C<...E<gt>>
+
+=cell C<E<lt>>R<n>C<...E<gt>?>
+
+=cell Match at least R<n> times.
+
+=end table
+
+=head3 Assertions
+
+Z<CHP-7-SECT-2.4>
+
+X<assertions, rules>
+X<rules;assertions>
+An assertion states that some condition or state is true. The match fails when
+that assertion is false.
+
+X<variable interpolation in rules>
+X<rules;variable interpolation>
+
+Assertions match named and anonymous rules, arrays or hashes containing
+anonymous rules, and subroutines or closures that return anonymous rules.
+
+To interpolate a variable in assertion rules, enclose it in assertion
+delimiters.
+A bare scalar in a pattern
+interpolates as a literal string, while a scalar variable in assertion
+brackets interpolates as an anonymous rule. A bare array in a pattern
+matches as a series of alternate literal strings, while an array in
+assertion brackets interpolates as a series of alternate anonymous
+rules.
+
+A bare hash in a pattern matches a word (C<\w+>) if and only if that word is
+one of its keysN<The effect is similar to matching the keys as a series of
+alternates, but it prefers to match the longest possible key, instead of the
+first potential match.>, while a hash in assertion brackets also matches the
+associated value as an anonymous rule.
+
+X<fail keyword>
+A bare closure in a pattern always matches (unless it calls C<fail>), but a
+closure in assertion brackets C<E<lt>{...}E<gt>> must return an anonymous rule
+to match.
+
+An assertion with parentheses C<E<lt>(...)E<gt>> resembles a bare closure in a
+pattern in that it allows you to include Perl code within a rule.
+C<E<lt>(...)E<gt>> evaluates the return value of the closure in boolean
+context. The match succeeds or fails based on that return value.
+
+Assertions match character classes, both named and enumerated. A named rule
+character class is often more accurate than an enumerated character class. The
+common C<E<lt>[a-zA-Z]E<gt>> idiom matches ASCII alphabetic characters, but the
+more comprehensive built-in rule C<E<lt>alphaE<gt>> matches the full set of
+Unicode alphabetic characters.
+
+A<CHP-7-TABLE-5>Table 7-5 shows the syntax of assertions.
+
+=begin table picture Assertions
+
+Z<CHP-7-TABLE-5>
+
+=headrow
+
+=row
+
+=cell Syntax
+
+=cell Meaning
+
+=bodyrows
+
+=row
+
+=cell C<E<lt>...E<gt>>
+
+=cell Generic assertion delimiter.
+
+=row
+
+=cell C<E<lt>!...E<gt>>
+
+=cell Negate any assertion.
+
+=row
+
+=cell C<E<lt>>R<name>C<E<gt>>
+
+=cell Match a named rule or character class.
+
+=row
+
+=cell C<E<lt>[...]E<gt>>
+
+=cell Match an enumerated character class.
+
+=row
+
+=cell C<E<lt>-...E<gt>>
+
+=cell Complement a character class (named or enumerated).
+
+=row
+
+=cell C<E<lt>"..."E<gt>>
+
+=cell Match a literal string (interpolated at match time).
+
+=row
+
+=cell C<E<lt>'...'E<gt>>
+
+=cell Match a literal string (not interpolated).
+
+=row
+
+=cell C<E<lt>(...)E<gt>>
+
+=cell Boolean assertion. Execute a closure and match if it returns a true
+result.
+
+=row
+
+=cell C<E<lt>$scalarE<gt>>
+
+=cell Match an anonymous rule.
+
+=row
+
+=cell C<E<lt>@arrayE<gt>>
+
+=cell Match a series of anonymous rules as alternates.
+
+=row
+
+=cell C<E<lt>%hashE<gt>>
+
+=cell Match a key from the hash, then its value (as an anonymous rule).
+
+=row
+
+=cell C<E<lt>E<amp>sub()E<gt>>
+
+=cell Match an anonymous rule returned by a sub.
+
+=row
+
+=cell C<E<lt>{>R<code>C<}E<gt>>
+
+=cell Match an anonymous rule returned by a closure.
+
+=row
+
+=cell C<E<lt>.E<gt>>
+
+=cell Match any logical grapheme, including combining character sequences.
+
+=end table
+
+=head3 Modifiers
+
+Z<CHP-7-SECT-2.5>
+
+X<modifiers>
+X<: (colon);: modifier delimiter in rules>
+Modifiers alter the meaning of a pattern. The standard position for modifiers
+is at the beginning of the rule, right after the C<m>, C<s>, or C<rx>, or after
+the name in a named rule. Modifiers cannot attach to the outside of a bare
+C</.../>. For example:
+
+  m:i/marvin/ # case insensitive
+  rule names :i { marvin | ford | arthur }
+
+You may group single-character modifiers, but you must separate longer
+modifiers by colons:
+
+  m:wig/ zaphod /                        # OK
+  m:words:ignorecase:globally / zaphod / # OK
+  m:wordsignorecaseglobally / zaphod /   # Not OK
+
+Most modifiers can also appear inside the rule when attached to rule or
+grouping delimiters. Internal modifiers are lexically scoped to their enclosing
+delimiters, so can alter subpatterns:
+
+  m/:w I saw [:i zaphod] / # only 'zaphod' is case insensitive
+
+The repetition modifiers (C<:R<N>x>, C<:R<N>th>, C<:once>, C<:globally>, and
+C<:exhaustive>) and the continue modifier (C<:cont>) alter the return value of
+the rule as a whole, so you cannot use them lexically inside a rule.
+
+The C<:R<N>x> modifier matches the rule a specific number of times. If the
+modifier expects more matches than the string has, the match fails.  Its
+alternate form (C<:x(R<N>)>) can take a variable in place of the number.
+
+The C<:once> modifier on a rule only allows it to match once. The rule will not
+match again until the you call the C<.reset> method on the rule object.
+
+The C<:globally> modifier matches as many times as possible. The C<:exhaustive>
+modifier also matches as many times as possible, in as many different ways as
+possible.
+
+The C<:R<N>th> modifier preserves one result from a particular counted match.
+If the rule matches fewer times than the modifier expects, the match fails. It
+has several alternate forms. One form, C<:th(R<N>)>, takes a variable in place
+of the number. The other forms -- C<:R<N>st>, C<:R<N>nd>, and C<:R<N>rd> --
+allow you to write more naturally C<:1st>, C<:2nd>, C<:3rd>.  The other way is
+valid as well; choose whichever is most comfortable.
+
+By default, rules ignore literal whitespace within the pattern.  The C<:w>
+modifier makes rules sensitive to literal whitespace, but in an intelligent
+way. Any cluster of literal whitespace acts like an explicit C<\s+> when it
+separates two identifiers and C<\s*> everywhere else.
+
+I<No> modifiers exist to treat the matched string as a single line or multiple
+lines.  Instead, use the "beginning of string" and "end of string" or
+"beginning of line" and "end of line" metacharacters.
+
+A<CHP-7-TABLE-6>Table 7-6 lists the available modifiers.
+
+=begin table picture Modifiers
+
+Z<CHP-7-TABLE-6>
+
+=headrow
+
+=row
+
+=cell Short
+
+=cell Long
+
+=cell Meaning
+
+=bodyrows
+
+=row
+
+=cell C<:i>
+
+=cell C<:ignorecase>
+
+=cell Case-insensitive match.
+
+=row
+
+=cell C<:I>
+
+=cell
+
+=cell Case-sensitive match (on by default).
+
+=row
+
+=cell C<:c>
+
+=cell C<:cont>
+
+=cell Continue where the previous match on the string left off.
+
+=row
+
+=cell C<:w>
+
+=cell C<:words>
+
+=cell Literal whitespace in the pattern matches as C<\s+>
+or C<\s*>.
+
+=row
+
+=cell C<:W>
+
+=cell
+
+=cell Turn off intelligent whitespace matching (return to default).
+
+=row
+
+=cell
+
+=cell :R<N>C<x>/C<:x(>R<N>C<)>
+
+=cell Match the pattern R<N> times.
+
+=row
+
+=cell
+
+=cell C<:>R<N>C<th>/C<:nth(>R<N>C<)>
+
+=cell Match the R<N>th occurrence of a pattern.
+
+=row
+
+=cell
+
+=cell C<:once>
+
+=cell Match the pattern once and only once.
+
+=row
+
+=cell C<:g>
+
+=cell C<:globally>
+
+=cell Match the pattern as many times as possible without overlapping
+possibilities.
+
+=row
+
+=cell C<:e>
+
+=cell C<:exhaustive>
+
+=cell Match every possible occurrence of a pattern, including overlapping
+possibilities.
+
+=row
+
+=cell
+
+=cell C<:u0>
+
+=cell . is a byte.
+
+=row
+
+=cell
+
+=cell C<:u1>
+
+=cell . is a Unicode codepoint.
+
+=row
+
+=cell
+
+=cell C<:u2>
+
+=cell . is a Unicode grapheme.
+
+=row
+
+=cell
+
+=cell C<:u3>
+
+=cell . is language dependent.
+
+=row
+
+=cell
+
+=cell C<:p5>
+
+=cell The pattern uses Perl 5 regex syntax.
+
+=end table
+
+=head3 Built-in Rules
+
+Z<CHP-7-SECT-3>
+
+X<rules;built-in>
+PGE provides several named rules, including a complete set of X<POSIX-style
+classes> POSIX-style classes, and X<Unicode property classes> Unicode property
+classes. The list isn't fully defined yet, but A<CHP-7-TABLE-7>Table 7-7 shows
+a few you're likely to see.
+
+The C<E<lt>nullE<gt>> rule matches a zero-width string (it always matches) and
+C<E<lt>priorE<gt>> matches whatever the most recent successful rule matched.
+These replace the two behaviors of X</ (slash);// invalid null pattern>
+X<invalid null pattern //> the Perl 5 null pattern C<//>, which is no longer
+valid syntax for rules.
+
+=begin table picture Built-in rules
+
+Z<CHP-7-TABLE-7>
+
+=headrow
+
+=row
+
+=cell Rule
+
+=cell Meaning
+
+=bodyrows
+
+=row
+
+=cell C<E<lt>alphaE<gt>>
+
+=cell Match a Unicode alphabetic character.
+
+=row
+
+=cell C<E<lt>digitE<gt>>
+
+=cell Match a Unicode digit.
+
+=row
+
+=cell C<E<lt>spE<gt>>
+
+=cell Match a single space character (the same as C<\s>).
+
+=row
+
+=cell C<E<lt>wsE<gt>>
+
+=cell Match any whitespace (the same as C<\s+>).
+
+=row
+
+=cell C<E<lt>nullE<gt>>
+
+=cell Match the null string.
+
+=row
+
+=cell C<E<lt>priorE<gt>>
+
+=cell Match the same thing as the previous match.
+
+=row
+
+=cell C<E<lt>before ...E<gt>>
+
+=cell Zero-width lookahead. Assert that the current position I<precedes> a
+pattern.
+
+=row
+
+=cell C<E<lt>after ...E<gt>>
+
+=cell Zero-width lookbehind. Assert that the current position I<follows> a
+pattern.
+
+=row
+
+=cell C<E<lt>prop ...E<gt>>
+
+=cell Match any character with the named property.
+
+=row
+
+=cell C<E<lt>replace(...)E<gt>>
+
+=cell Replace everything matched so far in the rule or subrule with the
+given string (under consideration).
+
+=end table
+
+=head3 Backtracking Control
+
+Z<CHP-7-SECT-4>
+
+X<backtracking controls>
+X<fail keyword>
+Whenever part of the pattern fails to match, PGE performs backtracking --
+backing up to the previous point at which the match could succeed and trying
+again.  You can explicitly trigger backtracking by calling the C<fail> function
+within a closure. A<CHP-7-TABLE-8>Table 7-8 displays metacharacters and
+built-in rules relevant to backtracking.
+
+=for author
+
+This could use an example.
+
+=end for
+
+=begin table picture Backtracking controls
+
+Z<CHP-7-TABLE-8>
+
+=headrow
+
+=row
+
+=cell Operator
+
+=cell Meaning
+
+=bodyrows
+
+=row
+
+=cell C<:>
+
+=cell Don't retry the previous atom.  Instead, fail to the next earlier atom.
+X<: (colon);: fail to atom before last (rules)>
+X<backtracking controls;: fail to atom before last>
+
+=row
+
+=cell C<::>
+
+=cell Don't backtrack over this point. Instead fail out of the closest
+enclosing group (C<(...)>, C<[...]>, or the rule delimiters).
+X<: (colon);:: fail out of group (rules)>
+X<backtracking controls;: fail out of group>
+
+=row
+
+=cell C<:::>
+
+=cell Don't backtrack over this point.  Instead, fail out of the current rule
+or subrule.
+X<: (colon);::: fail out of rule (rules)>
+X<backtracking controls;: fail out of rule>
+
+=row
+
+=cell C<E<lt>commitE<gt>>
+
+=cell Don't backtrack over this point. Instead, fail out of the entire match
+(even from within a subrule).
+
+=row
+
+=cell C<E<lt>cutE<gt>>
+
+=cell Like C<E<lt>commitE<gt>>, but also cuts the string matched. The current
+matching position at this point becomes the new beginning of the string.
+
+=end table
+
+=head3 Calling Actions
+
+Once the parser has matched the entire input N<a source code file, or a line of
+input at the terminal in interactive mode> the parse has succeeded.  The
+generated AST is now available to the code generator for conversion into PIR.
+
+=for author
+
+Please review.  The forward declaration is awkward here, but a little bit of
+explanation might ameliorate this.
+
+=end for
+
+This AST gets built up by actions -- code snippets attached to rules and
+tokens.  To call an action, insert the C<{*}> token into the rule. When PGE
+encounters C<{*}>, it will call the associated action method with the current
+match object as an argument.
+
+The best way to demonstrate this is by example.  Sprinkle the C<persons_name>
+rule liberally with action calls:
+
+ rule persons_name {
+    {*} <first_name> {*} <last_name> {*}
+ }
+
+The first call to the action method contains an empty match object because the
+parser hasn't matched anything yet.  The second call contains only the first
+name of the match. The third and final call contains both the matched first and
+last name.
+
+If the match fails halfway through, PGE will still call the actions that have
+succeeded; it will not call the actions after the failure.  If you try to match
+the string "Leia", PGE will call the first two action methods.  When the rule
+tries to match the last name, it fails, and PGE will not call the third action
+method.
+
+=head3 Alternations and Keys
+
+In addition to sub-rules, groups, and quantifiers, you can also express
+either-or alternations between options. The vertical bar token (C<|>)
+distinguishes between options where only one may match:
+
+ rule hero {
+    ['Luke' | 'Leia'] 'Skywalker'
+ }
+
+This rule will match either "Luke Skywalker" or "Leia Skywalker" but won't
+match "Luke Leia Skywalker"N<nor anything else.>.  Given alternations and
+action methods, it's often important to distinguish which alternation matched:
+
+ rule hero {
+    [
+      'Luke' {*}    #= Luke
+    | 'Leia' {*}    #= Leia
+    ]
+    'Skywalker'
+ }
+
+This is the same rule, except now it passes two arguments to its action method:
+the match object and the name of the person who matched.
+
+=head3 Warning: Left Recursion
+
+If you've worked with parsers before, you may have seen this coming.  If not,
+don't fear.  Like functions in ordinary procedural or functional languages, the
+methods in the PGE parser grammar can call themselves recursively.  Consider
+some rules derived in part from the grammar for the C programming language:
+
+ rule if_statement {
+    'if' <condition> '{' <statement>* '}' <else_block>?
+ }
+
+ rule statement {
+    <if_statement> | <expression>
+ }
+
+ rule else_block {
+    'else' '{' <statements>* '}'
+ }
+
+An C<if_statement> can contain a list of C<statement>s, and that each statement
+may itself be an C<if_statement>.  This is I<recursion> X<Recursion>; it's one
+of the reasons PGE is a "Recursive descent" parser.
+
+Consider the more direct example of a comma-separated list of integer digits
+which form a list.  A recursive definition might be:
+
+ rule list {
+     <list> ',' <digit> | <digit>
+ }
+
+If there is only one digit, the second option in the alternation matches.  If
+there are multiple digits, recursion will match them through the first
+alternation.
+
+That's the intention.  The results are insidious.
+
+The recursive descent parser enters the C<list> rule. Its first option is to
+enter the list rule again, so it does.  Recursive descent is a X<depth-first
+algorithm> depth-first algorithm; PGE will continue to descend down a
+particular path until it finds a successful match or a match failure. In this
+case, it matches C<list>, then it matches C<list> again, then it matches
+C<list> again, and so on.  This rule forms an infinite loop -- a pattern called
+X<left recursion> I<left recursion>.  The problem is that the left-most item of
+the left-most alternation is itself a recursion.
+
+The rule above does not recurse infinitely when rewritten as:
+
+ rule list {
+    <digit> | <list> ',' <digit>
+ }
+
+... or even:
+
+ rule list {
+    <digit> ',' <list> | <digit>
+ }
+
+Both options ensure that the left-most item in the rule is recursive.
+
+Left recursion may be trickier.  It's not immediately obvious in this grammar:
+
+ rule term {
+    <expression> '*' <term> | <digit>
+ }
+
+ rule expression {
+    <term> '+' <expression> | <term>
+ }
+
+Even this common, limited subset of mathematical equations has the same
+problem.  To match a C<term>, the parser first tries to match an C<expression>,
+which in turn matches a C<term> and then an C<expression> ....
+
+Again, the solution is simple.  Rewrite at least one of the rules so that the
+first condition it tries to match is not itself a recursive situation.
+
+=head3 Operator Precedence Parser
+
+Recursive descent parsing can be inefficient where statements have lots of
+little tokens and many possible options to match.  For example, mathematical
+expressions are very open-ended, with many valid forms which are difficult to
+anticipate.  Consider the expression:
+
+ a + b * c + d
+
+A recursive descent parser will undergo significant trial and error to parse
+this statement.  Recursive descent parsing is not ideal for these situations.
+Instead, a type of bottom-up parser called an I<operator precedence> X<Parser,
+Operator precedence> parser is much better.
+
+=for author
+
+Is this a categorization of all opps or just PGE's opp?
+
+=end for
+
+Operator precedence parsers work similarly to more versatile bottom-up parsers
+such as Lex or Yacc, but are optimized for use with expressions and equations.
+Equations have two subtypes, I<terms> and I<operators>. Operators themselves
+have several subtypes, including prefix (C<-a>), postfix (C<i++>), infix (C<x +
+y>), circumfix (C<[z]>), postcircumfix (C<a[b]>), and list (C<1, 2, 3>). Each
+operator gets its own precedence number that specifies how closely it binds to
+the terms. The previous example should parse as:
+
+ a + (b * c) + d
+
+... because the C<*> operator has a higher precedence -- binding more tightly
+to its terms -- than the C<+> operator.
+
+Within a grammar, switch from the top-down recursive descent parser to the
+bottom-up operator precedence parser with an C<optable> X<Parser, optable>
+rule:
+
+ rule expression is optable { ... }
+
+The C<...> ellipsis isn't an editorial shortcut, it's the Perl 6 operator to to
+define a function signature. The C<...> indicates that this is just a
+signature; the actual implementation is elsewhere.  In this case, that location
+in the definition of the optable.
+
+=head3 Protofunction Definitions
+
+X<Protofunctions>
+
+Protofunctions define operators in the optable in the same way that rules and
+tokens make up the grammar. A proto declares a rule, defined elsewhere, which
+other code may override dynamically.  In this case, PCT takes information from
+the proto declaration and fills in the details. The "dynamic overriding"
+implies that a high-level language itself itself can modify its own grammar at
+run time, by overriding the proto definitions for its operator table. Some
+languages call this process X<operator overloading> I<operator overloading>.
+
+A proto definition resembles:
+
+ 'proto' <proto_name> [ 'is' <property> ] '{' '...' '}'
+
+The name of the operator, noted as C<< <proto_name> >>, contains both a
+location part and an identifier part. The location is the type of the operator,
+such as infix, postfix, prefix, circumfix, and postcircumfix. The name of the
+operator is the symbol used for the operator in any of the quotes that Perl 6
+understands:
+
+ proto infix:<+>                  # a + b
+ proto postfix:'--'               # i--
+ proto circumfix:«<>»             # <x>
+
+The C<is> X<Parser, is> keyword defines a property of the rule. Examples
+include:
+
+ is precedence(1)     # Specifies an exact precedence
+ is equiv('+')        # Has the same precedence as the "+" operator
+ is assoc('right')    # Right associative. May also be "left" or "list"
+ is pirop('add')      # Operands are passed to the PIR operator "and"
+ is subname('mySub')  # Operands are passed to the function "mySub"
+ is pasttype('if')    # Operands are passed as children to an "if" PAST node in
+                      # the parse tree
+ is parsed(&myRule)   # The token is parsed and identified using the rule
+                      # "myRule" from the top-down parser
+
+=for author
+
+Please review.
+
+=end for
+
+Protofunction definitions are function signatures; you can override them with
+multimethod dispatch. This means that you can write functions I<with the same
+name> as the rule to implement the behavior of the operator.  Here's a proto:
+
+ rule infix:"+" { ... }
+
+... and its corresponding PIR rule:
+
+=begin PIR
+
+ .sub 'infix:+'
+    .param pmc a
+    .param pmc b
+    .local pmc c
+    c = a + b
+    .return(c)
+ .end
+
+=end PIR
+
+You may ask "Why have an C<is subname()> property, if you can define all
+operators as subroutines?" Using the C<is subname()> property allows PCT to
+call a subroutine of a different name then the operator.  This is a good idea
+if there is already a built-in function in the language that duplicates the
+functionality of the operator.  There is no sense in duplicating behavior.
+
+The great thing about protos being overloadable is that you can specify
+different functions to call with different signatures:
+
+=begin PIR
+
+ .sub 'infix:+' :multi('Integer', 'Integer')
+    #...
+ .end
+
+ .sub 'infix:+' :multi('CLispRatio', 'Number')
+    #...
+ .end
+
+ .sub 'infix:+' :multi('Perl6Double', 'PythonInteger')
+    #...
+ .end
+
+=end PIR
+
+This list can be a bit intimidating, and it's hard to imagine that it would be
+necessary to write up a new function to handle addition between every
+conceivable pair of operands. Fortunately, this is rarely the case in Parrot,
+because all these data types support common the VTABLE interface. For most data
+types Parrot already has basic arithmetic operations built in, and it's only
+necessary to override for those data types with special needs.
+
+=head3 Hypothetical Variables
+
+Z<CHP-7-SECT-5>
+
+X<variables;hypothetical>
+X<hypothetical variables>
+X<rules;captures>
+Hypothetical variables are a powerful way of building up data structures from
+within a match. Ordinary captures with C<()> store the result of the captures
+in C<$1>, C<$2>, etc. PGE stores values in these variables if the match is
+successful, but throws them away if the match fails.  The numbered capture
+variables are accessible outside the match, but only within the immediate
+surrounding lexical scope:
+
+  "Zaphod Beeblebrox" ~~ m:w/ (\w+) (\w+) /;
+
+  print $1; # prints Zaphod
+
+You can also capture into any user-defined variable with the binding operator
+C<:=> -- I<if> you have declared these variables in a lexical scope enclosing
+the rule:
+
+  my $person;
+  "Zaphod's just this guy." ~~ / ^ $person := (\w+) /;
+  print $person; # prints Zaphod
+
+You may capture repeated matches into an array:
+
+  my @words;
+  "feefifofum" ~~ / @words := (f<-[f]>+)* /;
+  # @words contains ("fee", "fi", "fo", "fum")
+
+You may capture pairs of repeated matches into a hash:
+
+  my %customers;
+  $records ~~ m:w/ %customers := [ E<lt>idE<gt> = E<lt>nameE<gt> \n]* /;
+
+If you don't need the captured value outside the rule, use a C<$?> variable
+instead. These are only directly accessible within the rule:
+
+  "Zaphod saw Zaphod" ~~ m:w/ $?name := (\w+) \w+ $?name/;
+
+A match of a named rule stores the result in a C<$?> variable with the same
+name as the rule. These variables are also accessible only within the rule:
+
+  "Zaphod saw Zaphod" ~~ m:w/ E<lt>nameE<gt> \w+ $?name /;
+
+=for author
+
+This next paragraph feels out of place; is there more?
+
+=end for
+
+When a rule matches a sequence of input tokens, PCT calls an associated method
+within NQP to convert that match into an AST node, which it inserts into the
+I<parse tree>.
+
+=head3 Basic Rules
+
+Consider the simple example rule:
+
+ rule persons_name {
+    <first_name> <last_name>
+ }
+
+... and two example tokens:
+
+ token first_name { <alpha>+ }
+ token last_name  { <alpha>+ }
+
+The special token C<< <alpha> >> is a built-in construct that only accepts
+upper case and lower case letters. The C<+> after the C<< <alpha> >> tag is a
+short way of saying "one or more". The rule will match names like C<Darth
+Vader>N<It also matches many strings that I<aren't> real names>, but won't
+match something like C<C 3P0>.
+
+This rule I<will> match C<Jar Jar Binks>, but not as you might expect: way you
+would expect: It would match the first "Jar" as C<< <first_name> >>, the second
+"Jar" as C<< <last_name> >>, and ignore "Binks"N<You should ignore the whole
+thing.>.
+
+=for author
+
+The rest seems vestigial.  An example like this should precede the rest of the
+chapter.  There are forward references, but it's a decent overview for people
+who haven't used similar systems before -- if you avoid going out in the weeds.
+
+=end for
+
+this example shows another new construct, the square brackets. Square
+brackets are ways to group things together. The star at the end means
+that we take all the things inside the brackets zero or more times.
+This is similar to the plus, except the plus matches one or more times.
+Notice, however, that the above rule always matches a comma at the end,
+so we would need to have something like:
+
+ Darth Vader, Luke Skywalker,
+
+Instead of something more natural like:
+
+ Darth Vader, Luke Skywalker
+
+We can modify the rule a little bit so that it always ends with a name
+instead of a comma:
+
+ rule TOP {
+    [ <persons_name> ',' ]* <persons_name>
+ }
+
+Now we don't need a trailing comma, but at the same time we can't match
+an empty file because it always expects to have at least one name at the
+end. If we still want to match empty files successfully, we need to make
+the whole rule optional:
+
+ rule TOP {
+    [ [ <persons_name> ',' ]* <persons_name> ]?
+ }
+
+We've grouped the whole rule together in another set of brackets, and
+put a "?" question mark at the end. The question mark means zero or
+one of the prior item.
+
+The symbols "*" (zero or more), "+" (one or more) and "?" are called
+I<quantifiers>, and allow an item in the rule to match a variable
+number of times. These aren't the only quantifiers, but they are the
+most common. We will talk about other quantifiers later on.
+
+=cut
+
+# Local variables:
+#   c-file-style: "parrot"
+# End:
+# vim: expandtab shiftwidth=4:

Copied: branches/auto_attrs/docs/book/pct/ch05_nqp.pod (from r40476, trunk/docs/book/pct/ch05_nqp.pod)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/auto_attrs/docs/book/pct/ch05_nqp.pod	Mon Aug 10 11:28:05 2009	(r40477, copy of r40476, trunk/docs/book/pct/ch05_nqp.pod)
@@ -0,0 +1,219 @@
+=pod
+
+=head1 Grammar Actions
+
+The job of the grammar is to match input patterns from the source language.
+These patterns then need to get converted to nodes in the abstract syntax
+tree for manipulation in other stages of the compiler. We've already seen
+one example of a subroutine structure that takes a match and produces a
+tree node: Protofunctions. Protofunction signatures aren't the only way to
+apply functions to rules matched by a parser.  They are limited and are
+slightly primitive, but effective for handling operators.  There is an
+easier and more powerful way to write subroutines to convert match objects
+into parse tree nodes, using a language that's almost, but Not Quite Perl.
+
+X<NQP>
+X<Not Quite Perl>
+NQP (Not Quite Perl) is a small language which offers a limited subset of
+Perl 6's syntax and semantics. Though it originated as a bootstrapping tool
+for the Rakudo Perl 6 compiler, several other Parrot-based compilers use it
+as well.  It has become a permanent member of PCT, and therefore a permanent
+part of the Parrot code base. 
+
+NQP represents almost the smallest subset of the Perl 6 language necessary to
+implement parser transformations, plus a few syntactic convenience features
+that developers have requested. NQP's Perl 6 subset shows its Perl 5 roots,
+so existing Perl 5 programmers should find much of it familiar and should be
+able to leverage their existing skills for writing compilers.
+
+In PGE, at the time of a match the grammar we can invoke an action using the
+special <code>{*}</code> symbol. In general, these action methods are
+written in NQP, although it is possible for them to be written in PIR N<In
+fact, this is how the NQP compiler itself is written>. We won't discuss the
+PIR case here because it's uncommon and needlessly difficult. NQP is the
+standard and preferred choice for this.
+
+=head2 NQP Basics
+
+X<sigils>
+X<scalar>
+Like all flavors and versions of Perl, NQP uses special prefix symbols called
+I<sigils> to distinguish variable types. The C<$> sigil represents scalars,
+C<@> arrays, and C<%> for hashes.  A scalar is any single value which can
+interchangeably contain given a string value, an integer value, or an object
+reference. Simple NQP assignments are:
+
+ $scalar := "This is a string"
+ $x      := 123
+ $pi     := 3.1415      # rounding
+
+X<bind operator>
+X<operators; binding>
+X<:=>
+The C<:=> I<bind> operator performs reference assignment in NQP. Reference
+assignment makes one variable into an alias for another. This means that
+the two variables are just different names for the same storage location,
+and changes to one will change both. It's important to remember that a bind
+is not a copy!
+
+NQP has hashes and arrays just like other flavors of Perl and various
+dynamic languages. NQP does not have a notion of hash and array context,
+but otherwise it works the way you would expect. Arrays have the C<@> sigil,
+and hashes have the C<%> sigil. Here are some examples:
+
+  @ary[0] := 1;
+  @ary[1] := "foo";
+  ...
+
+  %hsh{'bar'} := 2;
+  %hsh{'baz'} := "parrot";
+  ...
+
+There is also a nice shorthand way to index hashes, using angle brackets:
+
+  %hsh<bar> := "parrot";
+
+It's also possible to assign a list in I<scalar context>:
+
+ $array_but_a_scalar := (1, 2, 3, 4)
+
+Or you could write a new function in PIR to create a new array from a variadic
+argument list:
+
+ @my_array := create_new_array(1, 2, 3, 4)
+
+... which calls the PIR function:
+
+=begin PIR
+
+ .namespace []
+
+ .sub 'create_new_array'
+     .param pmc elems :slurpy
+     .return(elems)
+ .end
+
+=end PIR
+
+=head3 Calling Actions From Rules
+
+=for editor
+
+Needs a link to that section.
+
+=end for
+
+As mentioned in the chapter on grammar rules, the funny little C<{*}> symbol
+calls an action. The action in question is an NQP method with the same name as
+the rule that calls it. NQP rules can have two different signatures:
+
+ method name ($/)      { ... }
+ method name($/, $key) { ... }
+
+Where does the key come from?  Consider this grammar:
+
+ rule cavepeople {
+      'Fred'  {*}    #= Caveman
+    | 'Wilma' {*}    #= Cavewoman
+    | 'Dino'  {*}    #= Dinosaur
+ }
+
+The C<cavepeople> rule demonstrates the result:
+
+ method cavepeople($/, $key) {
+    if $key eq 'Caveman' {
+        say "We've found a caveman!";
+    } elsif $key eq 'Cavewoman' {
+        say "We've found a cavewoman!";
+    } elsif $key eq 'Dinosaur' {
+        say "A dinosaur isn't a caveperson at all!";
+    }
+ }
+
+The key is a string that contains whatever any text following the C<#=> symbol.
+Without a C<#=> following the rule invocation, there's no C<$key> to use in the
+method.  If you attempt to use one without the other, the NQP compiler will die
+with error messages about mismatched argument/parameter numbers.
+
+=head3 The Match Object C<$/>
+
+X<match object>
+X<$/>
+The match object C<$/> it's a data structure that's all business: it's both a
+hash and an array. Because it's a special variable used pervasively in PCT, it
+has a special shortcut syntax:
+
+ $/{'Match_item'}   is the same as $<Match_item>
+ $/[0]              is the same as $[0]
+
+Each key in the match object's hash is the name of a matched rule.  Given a
+file containing "C<X + 5>" and a rule:
+
+ rule introductions {
+    <variable> <operator> <number>
+ }
+
+The resulting match object will contain the key/value pairs:
+
+ "variable" => "x"
+ "operator" => "+"
+ "number"   => "5"
+
+When the match contains multiple values with the same name, or when rules have
+quantifiers such as C<*> or C<+>, the values in the hash may be arrays.  Given
+the input "A A A B B" and the rule:
+
+ rule letters {
+    <vowel>* <consonant>*
+ }
+
+The match object will contain the pairs:
+
+ "vowel"     => ["A", "A", "A"]
+ "consonant" => ["B", "B"]
+
+X<$( ) operator>
+
+Use the C<$( )> operator to count the number of matches in each group (by
+casting it to a scalar):
+
+ $($<vowel>) == 3
+
+=head3 Inline PIR
+
+=for author
+
+Needs expansion.
+
+=end for
+
+X<{{ }}>
+X<double curly brackets>
+Sometimes NQP isn't quite flexible enough to handle transforms appropriately.
+In a PGE rule, the C<{{ }}> double curly brackets demarcate inline-PIR mode.
+PGE will execute any PIR code in those brackets. You can access C<$/> directly
+in the grammar without having to jump into NQP.
+
+=head3 PAST Nodes
+
+X<PAST>
+X<PAST nodes>
+NQP's job is to make abstract syntax trees.  These trees are all objects -- and
+as such, instances of PAST nodes.  Each PAST class represents a unique program
+construct.  These constructs are common and simple, but combine to represent
+complicated programming structures.
+
+=head3 Making Trees
+
+Every action has the ability to create a PAST node that represents that action,
+as well as any children of that node. Calling C<make> on that node adds it into
+the growing PAST tree that PCT maintains. Once the C<TOP> rule matches
+successfully and returns, PCT optimizes and converts that tree into PIR and PBC
+for execution.
+
+=cut
+
+# Local variables:
+#   c-file-style: "parrot"
+# End:
+# vim: expandtab shiftwidth=4:

Modified: branches/auto_attrs/docs/book/pir/ch04_variables.pod
==============================================================================
--- branches/auto_attrs/docs/book/pir/ch04_variables.pod	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/docs/book/pir/ch04_variables.pod	Mon Aug 10 11:28:05 2009	(r40477)
@@ -188,7 +188,7 @@
 =end PIR_FRAGMENT
 
 Both C<and> and C<or> are short-circuiting ops. If they can determine what
-value to return from the first argument, they'll never evaluate the third.
+value to return from the first argument, they'll never evaluate the second.
 This is significant only for PMCs, as they might have side effects on
 evaluation.
 
@@ -1785,7 +1785,7 @@
 
   $P1 = $P0.'find_namespace'("Duck")
 
-The C<add_namespace>C<add_namespace method> method adds a new namespace
+The C<add_namespace>X<add_namespace method> method adds a new namespace
 as a child of the namespace object:
 
   $P0.'add_namespace'($P1)

Modified: branches/auto_attrs/docs/book/pir/ch06_subroutines.pod
==============================================================================
--- branches/auto_attrs/docs/book/pir/ch06_subroutines.pod	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/docs/book/pir/ch06_subroutines.pod	Mon Aug 10 11:28:05 2009	(r40477)
@@ -76,7 +76,7 @@
 By default, Parrot stores all subroutines in the namespace currently
 active at the point of their declaration. The C<:anon>X<:anon subroutine
 modifier> modifier tells Parrot not to store the subroutine in the
-namespace. The C<:nsentry>X:nsentry subroutine modifier> modifier stores
+namespace. The C<:nsentry>X<:nsentry subroutine modifier> modifier stores
 the subroutine in the currenly active namespace with a different name.
 For example, Parrot will store this subroutine in the current namespace
 as C<bar>, not C<foo>:

Modified: branches/auto_attrs/docs/debugger.pod
==============================================================================
--- branches/auto_attrs/docs/debugger.pod	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/docs/debugger.pod	Mon Aug 10 11:28:05 2009	(r40477)
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2006, Parrot Foundation.
+# Copyright (C) 2001-2009, Parrot Foundation.
 # $Id$
 
 =head1 NAME
@@ -38,8 +38,9 @@
 
   parrot_debugger file.pbc
 
-That is, F<parrot_debugger> takes exactly one argument, which is the Parrot bytecode that
-you're going to debug. F<parrot_debugger> will automatically load and disassemble the
+That is, F<parrot_debugger> takes exactly one argument, which is the Parrot file that
+you're going to debug. This file may be Parrot bytecode (*.pbc), PASM source code (*.pasm)
+or PIR (*.pir). F<parrot_debugger> will automatically load and disassemble the
 bytecode file for you.
 
 Note that you can't pass command line arguments to your program when you invoke
@@ -235,6 +236,8 @@
 
 =item eval (e)
 
+The eval command is currently unimplemeneted.
+
 Run an instruction. The syntax is:
 
   eval INSTRUCTION
@@ -297,54 +300,24 @@
 
 For PMC registers, the command will print the number, the class of the PMC (in
 square brackets) and its string representation (when available). It prints
-<null pmc> for uninitialized PMC registers.
+<PMCNULL> for uninitialized PMC registers.
 
-Example:
+Examples:
 
   # prints the content of I2
   (pdb) p i2
-  Integer Registers:
   I2 =              0
 
   # prints the content of P0
   (pdb) p P0
-  PMC Registers:
   P0 = [ResizablePMCArray]
 
   # prints the content of all string registers
   (pdb) p s
-  String Registers:
-   0 =
-          Buflen  =                  4
-          Flags   =                  0
-          Bufused =                  4
-          Strlen  =                  4
-          Offset  =                  0
-          String  =       Just
-   1 =
-          Buflen  =                  8
-          Flags   =                  0
-          Bufused =                  7
-          Strlen  =                  7
-          String  =       another
-   2 =
-          Buflen  =                  8
-          Flags   =                  0
-          Bufused =                  6
-          Strlen  =                  6
-          String  =       Parrot
-   3 =
-          Buflen  =                  8
-          Flags   =                  0
-          Bufused =                  6
-          Strlen  =                  6
-          String  =       hacker
-   4 =
-   5 =
-   6 =
-   7 =
-   8 =
-   # ... and so on
+   S0 = Just
+   S1 = Another
+   S2 = Parrot
+   S3 = Hacker
 
 =item info
 

Copied: branches/auto_attrs/examples/config/file/configcompiler (from r40476, trunk/examples/config/file/configcompiler)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/auto_attrs/examples/config/file/configcompiler	Mon Aug 10 11:28:05 2009	(r40477, copy of r40476, trunk/examples/config/file/configcompiler)
@@ -0,0 +1,87 @@
+# $Id$
+
+=variables
+
+CC=/usr/bin/gcc
+CX=/usr/bin/g++
+
+=general
+
+cc=$CC
+cxx=$CX
+link=$CX
+ld=/usr/bin/g++
+
+=steps
+
+init::manifest nomanicheck
+init::defaults
+init::install
+init::hints verbose-step
+init::headers
+inter::progs
+inter::make
+inter::lex
+inter::yacc
+auto::gcc
+auto::glibc
+auto::backtrace
+auto::fink
+auto::macports
+auto::msvc
+auto::attributes
+auto::warnings
+init::optimize
+inter::shlibs
+inter::libparrot
+inter::charset
+inter::encoding
+inter::types
+auto::ops
+auto::pmc
+auto::alignptrs
+auto::headers
+auto::sizes
+auto::byteorder
+auto::va_ptr
+auto::format
+auto::isreg
+auto::arch
+auto::jit
+auto::cpu
+auto::funcptr
+auto::cgoto
+auto::inline
+auto::gc
+auto::memalign
+auto::signal
+auto::socklen_t
+auto::neg_0
+auto::env
+auto::gmp
+auto::readline
+auto::gdbm
+auto::pcre
+auto::opengl
+auto::crypto
+auto::gettext
+auto::snprintf
+# auto::perldoc
+# auto::pod2man
+auto::ctags
+auto::revision
+auto::icu
+gen::config_h
+gen::core_pmcs
+gen::crypto
+gen::parrot_include
+gen::opengl
+gen::call_list
+gen::languages
+gen::makefiles
+gen::platform
+gen::config_pm
+
+=cut
+
+

Copied: branches/auto_attrs/examples/config/file/configwithfatalstep (from r40476, trunk/examples/config/file/configwithfatalstep)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ branches/auto_attrs/examples/config/file/configwithfatalstep	Mon Aug 10 11:28:05 2009	(r40477, copy of r40476, trunk/examples/config/file/configwithfatalstep)
@@ -0,0 +1,78 @@
+# $Id$
+
+=variables
+
+=general
+
+=steps
+
+init::manifest nomanicheck
+init::defaults
+init::install
+init::hints verbose-step fatal-step
+init::headers
+inter::progs
+inter::make
+inter::lex
+inter::yacc
+auto::gcc
+auto::glibc
+auto::backtrace
+auto::fink
+auto::macports
+auto::msvc
+auto::attributes
+auto::warnings
+init::optimize
+inter::shlibs
+inter::libparrot
+inter::charset
+inter::encoding
+inter::types
+auto::ops
+auto::pmc
+auto::alignptrs
+auto::headers
+auto::sizes
+auto::byteorder
+auto::va_ptr
+auto::format
+auto::isreg
+auto::arch
+auto::jit
+auto::cpu
+auto::funcptr
+auto::cgoto
+auto::inline
+auto::gc
+auto::memalign
+auto::signal
+auto::socklen_t
+auto::neg_0
+auto::env
+auto::gmp
+auto::readline
+auto::gdbm
+auto::pcre
+auto::opengl
+auto::crypto
+auto::gettext
+auto::snprintf
+# auto::perldoc
+# auto::pod2man
+auto::ctags
+auto::revision
+auto::icu
+gen::config_h
+gen::core_pmcs
+gen::crypto
+gen::parrot_include
+gen::opengl
+gen::call_list
+gen::languages
+gen::makefiles
+gen::platform
+gen::config_pm
+
+=cut
+

Modified: branches/auto_attrs/lib/Parrot/Docs/Section/Parrot.pm
==============================================================================
--- branches/auto_attrs/lib/Parrot/Docs/Section/Parrot.pm	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/lib/Parrot/Docs/Section/Parrot.pm	Mon Aug 10 11:28:05 2009	(r40477)
@@ -96,28 +96,9 @@
             Parrot::Docs::Section::Tools->new,
             $self->new_item( 'Syntax Highlighting for Vim & Emacs', 'editor/README.pod'),
         ),
-        $self->new_group(
-            'Book draft',
-            '',
-            $self->new_item( 'Chapter 1 Introduction', 'docs/book/draft/ch01_introduction.pod'),
-            $self->new_item( 'Chapter 2 Getting Started', 'docs/book/draft/ch02_getting_started.pod'),
-            $self->new_item( 'Chapter 4 Parrot Compiler Tools', 'docs/book/draft/ch04_compiler_tools.pod'),
-            $self->new_item( 'Chapter 5 Parrot Grammar Engine', 'docs/book/draft/ch05_pge.pod'),
-            $self->new_item( 'Chapter 6 Not Quite Perl', 'docs/book/draft/ch06_nqp.pod'),
-            $self->new_item( 'Chapter 7 Dynamic C-level Objects', 'docs/book/draft/ch07_dynpmcs.pod'),
-            $self->new_item( 'Chapter 8 Dynamic Opcodes', 'docs/book/draft/ch08_dynops.pod'),
-            $self->new_item( 'Chapter 10 Instruction Reference', 'docs/book/draft/ch10_opcode_reference.pod'),
-            $self->new_item( 'Chapter 11 Directive Reference', 'docs/book/draft/ch11_directive_reference.pod'),
-            $self->new_item( 'Chapter 12 PIR Operator Reference', 'docs/book/draft/ch12_operator_reference.pod'),
-            $self->new_item( 'Appendix A Glossary', 'docs/book/draft/appa_glossary.pod'),
-            $self->new_item( 'Appendix B Patch Submission', 'docs/book/draft/appb_patch_submission.pod'),
-            $self->new_item( 'Appendix C ', 'docs/book/draft/appc_command_line_options.pod'),
-            $self->new_item( 'Appendix D ', 'docs/book/draft/appd_build_options.pod'),
-            $self->new_item( 'Appendix E ', 'docs/book/draft/appe_source_code.pod'),
-        ),
 
         $self->new_group(
-            'Book PIR',
+            'PIR Book',
             '',
             $self->new_item( 'Chapter 1 Introduction', 'docs/book/pir/ch01_introduction.pod'),
             $self->new_item( 'Chapter 2 Getting Started', 'docs/book/pir/ch02_getting_started.pod'),

Modified: branches/auto_attrs/lib/Parrot/Test/Pod.pm
==============================================================================
--- branches/auto_attrs/lib/Parrot/Test/Pod.pm	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/lib/Parrot/Test/Pod.pm	Mon Aug 10 11:28:05 2009	(r40477)
@@ -45,13 +45,13 @@
                 if ($full_file =~ m{
                         t/tools/dev/searchops/samples\.pm
                         | languages/pod/test\.pod
-                        | xconf/samples/yourfoobar
+                        | examples/config/file/configcompiler
                         | t/configure/testlib/verbosefoobar
                         | t/configure/testlib/ddefectivefoobar
                         | t/configure/testlib/adefectivefoobar
                         | t/configure/testlib/cdefectivefoobar
                         | t/configure/testlib/bdefectivefoobar
-                        | xconf/samples/testfoobar
+                        | examples/config/file/configwithfatalstep
                     }x
                 ) {
                     delete $files_needing_analysis->{ $file };

Modified: branches/auto_attrs/src/debug.c
==============================================================================
--- branches/auto_attrs/src/debug.c	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/src/debug.c	Mon Aug 10 11:28:05 2009	(r40477)
@@ -1729,8 +1729,8 @@
     /* Add it to the head of the list */
     if (pdb->watchpoint)
         condition->next = pdb->watchpoint;
-
     pdb->watchpoint = condition;
+    fprintf(stderr, "Adding watchpoint\n");
 }
 
 /*

Modified: branches/auto_attrs/t/codingstd/svn_id.t
==============================================================================
--- branches/auto_attrs/t/codingstd/svn_id.t	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/t/codingstd/svn_id.t	Mon Aug 10 11:28:05 2009	(r40477)
@@ -5,7 +5,7 @@
 use strict;
 use warnings;
 use Cwd;
-use File::Spec ();
+use File::Spec::Functions;
 use lib qw( . lib ../lib ../../lib );
 use Parrot::Distribution;
 use Test::More            tests => 1;
@@ -35,18 +35,18 @@
 =cut
 
 my $DIST = Parrot::Distribution->new;
-my $cwd = cwd();
+my $cwd  = getcwd(); # cwd() has some bugs when parent directory is a symbolic link
 
 # Certain files, for various reasons, cannot have an
 # SVN Id tag.  We exclude them from examination by this test.
 
 my %known_exceptions = map {
         $_ => 1,
-        ( File::Spec->catdir( $cwd, $_ ) ) => 1,
-    } qw(
-        examples/pir/quine_ord.pir
-        examples/streams/FileLines.pir
-        examples/streams/ParrotIO.pir
+        ( catdir( $cwd, $_ ) ) => 1,
+    } (
+        catfile(qw/ examples pir quine_ord.pir/),
+        catfile(qw/ examples streams FileLines.pir/),
+        catfile(qw/ examples streams ParrotIO.pir/),
     );
 
 my @files = grep { ! $known_exceptions{$_} }
@@ -59,6 +59,7 @@
             $DIST->get_pir_language_files(),
         )
 );
+
 my @no_id_files;
 
 foreach my $file (@files) {

Modified: branches/auto_attrs/t/configure/008-file_based_configuration.t
==============================================================================
--- branches/auto_attrs/t/configure/008-file_based_configuration.t	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/t/configure/008-file_based_configuration.t	Mon Aug 10 11:28:05 2009	(r40477)
@@ -17,7 +17,7 @@
 use Parrot::Configure::Options qw| process_options |;
 
 {
-    my $configfile = q{xconf/samples/testfoobar};
+    my $configfile = q{examples/config/file/configwithfatalstep};
     my ($args, $steps_list_ref) = _test_good_config_file($configfile);
 
     ok(! defined $args->{maintainer}, 
@@ -40,7 +40,7 @@
 }
 
 {
-    my $configfile = q{xconf/samples/yourfoobar};
+    my $configfile = q{examples/config/file/configcompiler};
     my ($args, $steps_list_ref) = _test_good_config_file($configfile);
     
     my $c_compiler = '/usr/bin/gcc';

Modified: branches/auto_attrs/t/tools/parrot_debugger.t
==============================================================================
--- branches/auto_attrs/t/tools/parrot_debugger.t	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/t/tools/parrot_debugger.t	Mon Aug 10 11:28:05 2009	(r40477)
@@ -270,7 +270,32 @@
 .end
 PIR
 
-BEGIN { $tests += 41 }
+pdb_output_like( <<PIR, "pir", "r", qr/great job!/, 'run code');
+.sub main :main
+    print "great job!"
+.end
+PIR
+
+TODO: {
+
+local $TODO = 'arguments do not seem to populate $P0';
+pdb_output_like( <<PIR, "pir", "r gomer", qr/gomer/, 'run code with args');
+.sub main :main
+    print \$P0
+.end
+PIR
+
+}
+
+pdb_output_like( <<PIR, "pir", "t\nw I0 == 2\nt", qr/Adding watchpoint/, 'watchpoint');
+.sub main :main
+    \$I0 = 1
+    \$I0 = 2
+    \$I0 = 3
+.end
+PIR
+
+BEGIN { $tests += 44 }
 
 BEGIN { plan tests => $tests; }
 

Modified: branches/auto_attrs/tools/dev/mk_manifest_and_skip.pl
==============================================================================
--- branches/auto_attrs/tools/dev/mk_manifest_and_skip.pl	Mon Aug 10 03:09:37 2009	(r40476)
+++ branches/auto_attrs/tools/dev/mk_manifest_and_skip.pl	Mon Aug 10 11:28:05 2009	(r40477)
@@ -10,6 +10,12 @@
 
 my $script = $0;
 
+if (-e '.git') {
+    print "Sorry, this script is not compatible with git-svn\n";
+    print "Patches Welcome!\n";
+    exit 1;
+}
+
 my $mani = Parrot::Manifest->new( { script => $script, } );
 
 my $manifest_lines_ref = $mani->prepare_manifest();


More information about the parrot-commits mailing list