[svn:parrot] r37537 - in trunk: . tools/dev

cotto at svn.parrot.org cotto at svn.parrot.org
Tue Mar 17 23:57:55 UTC 2009


Author: cotto
Date: Tue Mar 17 23:57:53 2009
New Revision: 37537
URL: https://trac.parrot.org/parrot/changeset/37537

Log:
[tools] add parrot-fuzzer

Added:
   trunk/tools/dev/parrot-fuzzer   (contents, props changed)
Modified:
   trunk/MANIFEST

Modified: trunk/MANIFEST
==============================================================================
--- trunk/MANIFEST	Tue Mar 17 23:11:13 2009	(r37536)
+++ trunk/MANIFEST	Tue Mar 17 23:57:53 2009	(r37537)
@@ -1,7 +1,7 @@
 # ex: set ro:
 # $Id$
 #
-# generated by tools/dev/mk_manifest_and_skip.pl Tue Mar 17 20:11:32 2009 UT
+# generated by tools/dev/mk_manifest_and_skip.pl Tue Mar 17 23:49:00 2009 UT
 #
 # See tools/dev/install_files.pl for documentation on the
 # format of this file.
@@ -2086,6 +2086,7 @@
 tools/dev/nopaste.pl                                        []
 tools/dev/ops_not_tested.pl                                 []
 tools/dev/opsrenumber.pl                                    []
+tools/dev/parrot-fuzzer                                     []
 tools/dev/parrot.supp                                       []
 tools/dev/parrot_8.supp                                     []
 tools/dev/parrot_api.pl                                     []

Added: trunk/tools/dev/parrot-fuzzer
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ trunk/tools/dev/parrot-fuzzer	Tue Mar 17 23:57:53 2009	(r37537)
@@ -0,0 +1,474 @@
+#!/usr/bin/env python
+
+from fusil.application    import Application
+from fusil.process.watch  import WatchProcess
+from fusil.process.create import CreateProcess
+from fusil.process.stdout import WatchStdout
+from fusil.project_agent  import ProjectAgent
+from fusil.process.tools  import locateProgram
+from fusil.write_code     import WriteCode
+from optparse             import OptionGroup
+import re
+import string
+import random
+
+'''
+
+=head1 Name
+
+parrot-fuzzer
+
+=head1 Description
+
+This is a fuzzer for Parrot, written in Python using the fusil library.  It
+attempts to break Parrot by generating calls to random PIR opcodes.
+
+=head1 Dependencies
+
+This script requires Python 2.5+ to run.  The fusil
+L<http://fusil.hachoir.org/trac> and python-ptrace
+L<http://python-ptrace.hachoir.org/trac> libraries are also required.
+
+=head1 Running
+
+Short version: C<sudo ./tools/dev/parrot-fuzzer>
+
+C<parrot-fuzzer> is run like any other fusil-based fuzzer.  Fusil likes to be
+run as root.  This so that the child process in which Parrot runs can be put in
+a more restricted environment, limiting potential damage.
+
+fusil assumes the existence of a C<fusil> user and group.  Parrot runs as this
+user/group as part of its restricted environment.  Passing C<--unsafe> allows
+it to run as the current user.  Although it is not likely that this will cause
+any damage to your system, it is possible.
+
+C<parrot-fuzzer> needs access to Parrot's source code in order to figure out
+which PMCs and ops are available.  It assumes that it's running in the root dir
+of Parrot's source code.  You can use a different dir via
+C<--parrot_root=/some/other/path>.
+
+=head1 Options
+
+=over 4
+
+=item C<--parrot_root=/path/to/parrot>
+
+Specify the path to the root of Parrot's source dir.  By default, this is the
+current dir.
+
+=item C<--runcore=--some-runcore>
+
+Specify which runcore to use when running Parrot.  By default, --slow-core is
+used.  This option is passed directly to the parrot exectuable.  Other runcores
+include C<--CGP-core>, C<--fast-core>, C<--computed-goto-core>, C<--jit-core>,
+C<--switched-core> and C<--profile>.
+
+=item C<--instructions=10>
+
+Generate this number of instructions during test run.  The default is 3.  Note
+that a larger number such as 20 does not necessarily result in more failures.
+
+=back 4
+
+=cut
+
+'''
+
+class ParrotFuzzer(Application):
+
+    #base name of the dir where temp files and successful results will be stored
+    NAME="parrot-fuzz"
+
+    def createFuzzerOptions(self, parser):
+        options = OptionGroup(parser, "Parrot fuzzer")
+        options.add_option("--parrot_root",
+                help="Parrot program path (default: .)",
+                type="str",
+                default=".")
+        options.add_option("--runcore",
+                help="Run Parrot with the specified runcore (default: --slow-core)",
+                type="str",
+                default="--slow-core")
+        options.add_option("--instructions",
+                help="Generate this many instructions per test run (default: 3)",
+                type="int",
+                default="3")
+        return options
+
+
+    def setupProject(self):
+        parrot_root  = self.options.parrot_root
+        runcore      = self.options.runcore
+        instructions = self.options.instructions
+        parrot       = locateProgram(parrot_root + "/parrot")
+        process      = ParrotProcess(self.project, [parrot, runcore, "<fuzzy.pir>"])
+        pirgen       = PirGenerator(self.project, parrot_root, instructions)
+        WatchProcess(process)
+        WatchStdout(process)
+
+class PirGenerator(ProjectAgent, WriteCode):
+
+    def __init__(self, project, parrot_root, instructions):
+        self.parrot_root       = parrot_root
+        self.opfunc_gen        = OpfuncGenerator()
+        self.arg_gen           = ArgGenerator(parrot_root)
+        self.instruction_count = instructions
+
+        self.opfunc_gen.populateOpfuncList(parrot_root)
+
+        ProjectAgent.__init__(self, project, "pir_source")
+        WriteCode.__init__(self)
+
+    def generatePir(self, filename):
+
+        self.pir_body     = ''
+        self.pir_preamble = """
+.sub main
+    $P0 = new ['ExceptionHandler']
+    set_addr $P0, catchall
+    push_eh $P0   #pokemon: gotta catch 'em all
+"""
+        self.pir_postamble = """
+catchall:
+    #Don't do anything with exceptions: we're hoping for a segfault or similar.
+.end
+"""
+        #how many instructions to generate
+        #Strangely, a low number like 3 seems to generate slightly more faults
+        #than a high number like 20.
+        opfunc_count = self.instruction_count
+        self.pir_body += "    #generating "+str(opfunc_count)+" instructions\n"
+
+        arg_types = ['s', 'p', 'i', 'n', 'sc', 'ic', 'nc']
+        opfuncs      = []
+        arg_counts   = dict()
+        self.createFile(filename)
+        arg_gen = self.arg_gen
+
+        #pick some opfuncs
+        for i in range(opfunc_count):
+            opfuncs.append(OpfuncCall(*self.opfunc_gen.getOpfunc()))
+
+        #calculate how many of each type of arg will be needed
+        for arg_type in arg_types:
+            arg_counts[arg_type] = 0
+            for opfunc in opfuncs:
+                arg_counts[arg_type] += opfunc.getArgCount(arg_type)
+
+        for arg_type in arg_types:
+            #print "need "+str(arg_counts[arg_type])+" args of type "+arg_type
+            arg_gen.setArgCount(arg_type, arg_counts[arg_type])
+
+        #generate the args, adding any supporting code to the preamble
+        self.pir_preamble += arg_gen.generateStringArgs()
+        self.pir_preamble += arg_gen.generatePMCArgs()
+        self.pir_preamble += arg_gen.generateIntArgs()
+        self.pir_preamble += arg_gen.generateNumArgs()
+        self.pir_preamble += arg_gen.generateStringConstArgs()
+        self.pir_preamble += arg_gen.generateIntConstArgs()
+        self.pir_preamble += arg_gen.generateNumConstArgs()
+
+        #put the args into the opfunc calls
+        for opfunc in opfuncs:
+            #print "working on " + opfunc.getLongName()
+            for arg_num in range(opfunc.getTotalArgCount()):
+                arg_type = opfunc.getArgType(arg_num)
+                #print "arg type for #"+str(arg_num)+" is "+arg_type
+                opfunc.setArgVal(arg_num, arg_gen.getArgVal(arg_type))
+            #append getOpfuncCall
+            self.pir_body += opfunc.getOpfuncCall()
+
+        #write the code
+        self.write(0, self.pir_preamble)
+        self.write(0, self.pir_body)
+        self.write(0, self.pir_postamble)
+        self.close()
+
+    def on_session_start(self):
+        filename = self.session().createFilename('fuzzy.pir')
+        self.generatePir(filename)
+        self.send('pir_source', filename)
+
+#Representation of a call to an opfunc, including values of arguments
+#Note that argumens are literal, e.g. '$P0', '"foo"', etc
+class OpfuncCall:
+    def __init__(self, name, sig):
+        self.arg_types = []
+        self.arg_vals = []
+        self.name = name
+        if sig == '':
+            self.long_name = name
+        else:
+            self.long_name = name + '_' + sig
+        self.total_arg_count = 0
+        #print "making an opfunc: " + self.long_name
+        if sig != '':
+            for arg in string.split(sig, "_"):
+                self.arg_types.append(arg)
+                self.arg_vals.append('')
+                self.total_arg_count += 1
+                #print "found an arg: " + arg
+
+    def getLongName(self):
+        return self.long_name
+
+    def getArgCount(self, arg):
+        return self.arg_types.count(arg)
+
+    def getTotalArgCount(self):
+        return self.total_arg_count
+
+    def getArgType(self, n):
+        return self.arg_types[n]
+
+    def getArgType(self, n):
+        return self.arg_types[n]
+
+    def setArgVal(self, n, arg_val):
+        self.arg_vals[n] = arg_val
+
+    def getOpfuncCall(self):
+        opfunc_call = '\n    #'+self.long_name+'\n    ' + self.name
+        for arg_val in self.arg_vals:
+            opfunc_call += ' ' + arg_val + ','
+        opfunc_call = string.rstrip(opfunc_call, ",")
+        opfunc_call += "\n"
+        return opfunc_call
+
+class ArgGenerator:
+    arg_counts = {}
+    args       = {}
+
+    def __init__(self, parrot_root):
+        self.pmc_gen = PMCTypeGenerator()
+        self.pmc_gen.populatePMCList(parrot_root)
+
+    def setArgCount(self, arg_type, count):
+        self.arg_counts[arg_type] = count
+
+    def getArgVal(self, arg_type):
+        return random.choice(self.args[arg_type])
+
+    def generateStringArgs(self):
+        pir_preamble = ""
+        self.args['s'] = []
+        for n in range(self.arg_counts['s']):
+            str_val = ''
+            chars = string.printable + string.punctuation + string.whitespace
+            str_len = random.randint(0,10)
+            for m in range(str_len):
+                char = chars[random.randint(0, len(chars)-1)]
+                if char == '"':
+                    char = '\\"'
+                if char == '\\':
+                    char = '\\\\'
+                if char == '\n' or char == '\r':
+                    char = ''
+                str_val += char
+            pir_preamble += "    $S" + str(n) + " = \"" + str_val + "\"\n"
+            self.args['s'].append('$S' + str(n))
+        return pir_preamble
+
+    def generatePMCArgs(self):
+        pir_preamble = ""
+        self.args['p'] = []
+        for n in range(self.arg_counts['p']):
+            pir_preamble += "    $P" + str(n) + " = new ['" + self.pmc_gen.getPMCType() + "']\n"
+            self.args['p'].append('$P' + str(n))
+        return pir_preamble
+
+    def generateIntArgs(self):
+        pir_preamble = ""
+        self.args['i'] = []
+        for n in range(self.arg_counts['i']):
+            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
+
+            if num == 'neg_many':
+                num_val = random.randint(-999999,-2)
+            if num == 'neg_one':
+                num_val = -1
+            if num == 'zero':
+                num_val = 0
+            if num == 'pos_one':
+                num_val = 1
+            if num == 'pos_many':
+                num_val = random.randint(2, 999999)
+
+            pir_preamble += "    $I" + str(n) + " = "+str(num_val)+"\n"
+            self.args['i'].append('$I' + str(n))
+        return pir_preamble
+
+    def generateNumArgs(self):
+        pir_preamble = ""
+        self.args['n'] = []
+        for n in range(self.arg_counts['n']):
+            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
+
+            if num == 'neg_many':
+                num_val = (random.random() * -999999) - 1
+            if num == 'neg_one':
+                num_val = -1.0
+            if num == 'zero':
+                num_val = 0.0
+            if num == 'pos_one':
+                num_val = 1.0
+            if num == 'pos_many':
+                num_val = (random.random() * 999999) + 1
+            pir_preamble += "    $N" + str(n) + " = "+str(num_val)+"\n"
+            self.args['n'].append('$N' + str(n))
+        return pir_preamble
+
+    def generateStringConstArgs(self):
+        pir_preamble = ""
+        self.args['sc'] = []
+        for n in range(self.arg_counts['sc']):
+            self.args['sc'].append('"Fooooo'+str(n)+'"')
+        return pir_preamble
+
+    def generateIntConstArgs(self):
+        pir_preamble = ""
+        self.args['ic'] = []
+        for n in range(self.arg_counts['ic']):
+            #negative numbers and zero mess up control flow-related ops
+            #num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
+            num = random.choice(['pos_one','pos_many'])
+
+            if num == 'neg_many':
+                num_val = random.randint(-999999,-2)
+            if num == 'neg_one':
+                num_val = -1
+            if num == 'zero':
+                num_val = 0
+            if num == 'pos_one':
+                num_val = 1
+            if num == 'pos_many':
+                num_val = random.randint(2, 999999)
+
+            self.args['ic'].append(str(num_val))
+        return pir_preamble
+
+    def generateNumConstArgs(self):
+        pir_preamble = ""
+        self.args['nc'] = []
+        for n in range(self.arg_counts['nc']):
+            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
+
+            if num == 'neg_many':
+                num_val = (random.random() * -999999) - 1
+            if num == 'neg_one':
+                num_val = -1.0
+            if num == 'zero':
+                num_val = 0.0
+            if num == 'pos_one':
+                num_val = 1.0
+            if num == 'pos_many':
+                num_val = (random.random() * 999999) + 1
+            self.args['nc'].append(str(num_val))
+        return pir_preamble
+
+class PMCTypeGenerator:
+    pmc_list = []
+    pmc_blacklist = ['Slice']
+
+    def populatePMCList(self, parrot_root):
+        pmc_pm = parrot_root + "/lib/Parrot/PMC.pm"
+        pmc_f  = open(pmc_pm, 'r')
+        for line in pmc_f:
+            if re.search('\t[a-zA-Z]+ => [0-9]+,', line):
+                line = re.sub('\t',      '', line)
+                line = re.sub(' =>.*\n', '', line)
+                if line not in self.pmc_blacklist:
+                    self.pmc_list.append(line)
+
+    def getPMCType(self):
+        return random.choice(self.pmc_list)
+
+
+class OpfuncGenerator:
+    opfunc_list = []
+    opfunc_blacklist = [
+            'branch_cs', #known to segfault
+            'check_events', #only for testing
+            'check_events__', #not for direct use
+            'clears', #clearing all [SPIN] registers isn't useful
+            'clearp',
+            'cleari',
+            'clearn',
+            'cpu_ret',
+            'debug',
+            'debug_break',
+            'debug_init',
+            'debug_load',
+            'debug_print',
+            'die',
+            'enternative',
+            'exec',
+            'exit',
+            'gc_debug',
+            'if',
+            'pic_callr__',
+            'pic_get_params__',
+            'pic_infix__',
+            'pic_inline_sub__',
+            'pic_set_returns__',
+            'pin',
+            'pop_eh',
+            'prederef__',
+            'profile',
+            'push_eh',
+            'returncc',
+            'setn_ind',
+            'sets_ind',
+            'seti_ind',
+            'setp_ind',
+            'sleep',
+            'tailcall',
+            'trace',
+            'trap',
+            'unless',
+            'unpin',
+            'wrapper__',
+            'yield',
+            ]
+
+    def populateOpfuncList(self, parrot_root):
+        ops_h = parrot_root + "/src/ops/core_ops.c"
+        ops_f = open(ops_h, 'r')
+        #This is a moderately fragile hack that relies on the specific
+        #format of some generated code.  Expect breakage.
+        for line in ops_f:
+            if line.find('PARROT_INLINE_OP') > -1 or line.find('PARROT_FUNCTION_OP') > -1:
+                line = ops_f.next()
+                short_name = line
+                line = ops_f.next()
+                long_name = line
+                #strip leading space and opening double-quote
+                short_name = re.sub('[ ]+"', '', short_name)
+                long_name  = re.sub('[ ]+"', '', long_name)
+                #strip everything after closing double-quote
+                short_name = re.sub('".*\n', '', short_name)
+                long_name  = re.sub('".*\n', '', long_name)
+
+                if long_name == short_name:
+                    sig = ''
+                else:
+                    sig = string.replace(long_name, short_name + '_', '')
+
+                #XXX: don't know how to handle these args
+                if (not re.search('(pc|k|ki|kc|kic)', sig)):
+                    if short_name not in self.opfunc_blacklist:
+                        self.opfunc_list.append([short_name, sig])
+                #        print "accepted "+long_name+"("+sig+")"
+                #else:
+                #    print "REJECTED "+long_name+"("+sig+")"
+
+    def getOpfunc(self):
+        return random.choice(self.opfunc_list)
+
+class ParrotProcess(CreateProcess):
+    def on_pir_source(self, filename):
+        self.cmdline.arguments[1] = filename
+        self.createProcess()
+
+if __name__ == "__main__":
+    ParrotFuzzer().main()


More information about the parrot-commits mailing list