summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Hylton <jeremy@alum.mit.edu>2005-10-20 19:59:25 +0000
committerJeremy Hylton <jeremy@alum.mit.edu>2005-10-20 19:59:25 +0000
commit3e0055f8c65c407e74ce476b8e2b1fb889723514 (patch)
tree169cce8c87033e15364b57de947073e6e9c34d59 /Parser/asdl.py
parentEnhance the performance of two important Unicode character (diff)
downloadcpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.gz
cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.bz2
cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.zip
Merge ast-branch to head
This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests.
Diffstat (limited to 'Parser/asdl.py')
-rw-r--r--Parser/asdl.py393
1 files changed, 393 insertions, 0 deletions
diff --git a/Parser/asdl.py b/Parser/asdl.py
new file mode 100644
index 00000000000..0db4e3bc470
--- /dev/null
+++ b/Parser/asdl.py
@@ -0,0 +1,393 @@
+"""An implementation of the Zephyr Abstract Syntax Definition Language.
+
+See http://asdl.sourceforge.net/ and
+http://www.cs.princeton.edu/~danwang/Papers/dsl97/dsl97-abstract.html.
+
+Only supports top level module decl, not view. I'm guessing that view
+is intended to support the browser and I'm not interested in the
+browser.
+"""
+
+#__metaclass__ = type
+
+import os
+import traceback
+
+import spark
+
+class Token:
+ # spark seems to dispatch in the parser based on a token's
+ # type attribute
+ def __init__(self, type, lineno):
+ self.type = type
+ self.lineno = lineno
+
+ def __str__(self):
+ return self.type
+
+ def __repr__(self):
+ return str(self)
+
+class Id(Token):
+ def __init__(self, value, lineno):
+ self.type = 'Id'
+ self.value = value
+ self.lineno = lineno
+
+ def __str__(self):
+ return self.value
+
+class ASDLSyntaxError:
+
+ def __init__(self, lineno, token=None, msg=None):
+ self.lineno = lineno
+ self.token = token
+ self.msg = msg
+
+ def __str__(self):
+ if self.msg is None:
+ return "Error at '%s', line %d" % (self.token, self.lineno)
+ else:
+ return "%s, line %d" % (self.msg, self.lineno)
+
+class ASDLScanner(spark.GenericScanner, object):
+
+ def tokenize(self, input):
+ self.rv = []
+ self.lineno = 1
+ super(ASDLScanner, self).tokenize(input)
+ return self.rv
+
+ def t_id(self, s):
+ r"[\w\.]+"
+ # XXX doesn't distinguish upper vs. lower, which is
+ # significant for ASDL.
+ self.rv.append(Id(s, self.lineno))
+
+ def t_xxx(self, s): # not sure what this production means
+ r"<="
+ self.rv.append(Token(s, self.lineno))
+
+ def t_punctuation(self, s):
+ r"[\{\}\*\=\|\(\)\,\?\:]"
+ self.rv.append(Token(s, self.lineno))
+
+ def t_comment(self, s):
+ r"\-\-[^\n]*"
+ pass
+
+ def t_newline(self, s):
+ r"\n"
+ self.lineno += 1
+
+ def t_whitespace(self, s):
+ r"[ \t]+"
+ pass
+
+ def t_default(self, s):
+ r" . +"
+ raise ValueError, "unmatched input: %s" % `s`
+
+class ASDLParser(spark.GenericParser, object):
+ def __init__(self):
+ super(ASDLParser, self).__init__("module")
+
+ def typestring(self, tok):
+ return tok.type
+
+ def error(self, tok):
+ raise ASDLSyntaxError(tok.lineno, tok)
+
+ def p_module_0(self, (module, name, _0, _1)):
+ " module ::= Id Id { } "
+ if module.value != "module":
+ raise ASDLSyntaxError(module.lineno,
+ msg="expected 'module', found %s" % module)
+ return Module(name, None)
+
+ def p_module(self, (module, name, _0, definitions, _1)):
+ " module ::= Id Id { definitions } "
+ if module.value != "module":
+ raise ASDLSyntaxError(module.lineno,
+ msg="expected 'module', found %s" % module)
+ return Module(name, definitions)
+
+ def p_definition_0(self, (definition,)):
+ " definitions ::= definition "
+ return definition
+
+ def p_definition_1(self, (definitions, definition)):
+ " definitions ::= definition definitions "
+ return definitions + definition
+
+ def p_definition(self, (id, _, type)):
+ " definition ::= Id = type "
+ return [Type(id, type)]
+
+ def p_type_0(self, (product,)):
+ " type ::= product "
+ return product
+
+ def p_type_1(self, (sum,)):
+ " type ::= sum "
+ return Sum(sum)
+
+ def p_type_2(self, (sum, id, _0, attributes, _1)):
+ " type ::= sum Id ( fields ) "
+ if id.value != "attributes":
+ raise ASDLSyntaxError(id.lineno,
+ msg="expected attributes, found %s" % id)
+ return Sum(sum, attributes)
+
+ def p_product(self, (_0, fields, _1)):
+ " product ::= ( fields ) "
+ # XXX can't I just construct things in the right order?
+ fields.reverse()
+ return Product(fields)
+
+ def p_sum_0(self, (constructor,)):
+ " sum ::= constructor """
+ return [constructor]
+
+ def p_sum_1(self, (constructor, _, sum)):
+ " sum ::= constructor | sum "
+ return [constructor] + sum
+
+ def p_sum_2(self, (constructor, _, sum)):
+ " sum ::= constructor | sum "
+ return [constructor] + sum
+
+ def p_constructor_0(self, (id,)):
+ " constructor ::= Id "
+ return Constructor(id)
+
+ def p_constructor_1(self, (id, _0, fields, _1)):
+ " constructor ::= Id ( fields ) "
+ # XXX can't I just construct things in the right order?
+ fields.reverse()
+ return Constructor(id, fields)
+
+ def p_fields_0(self, (field,)):
+ " fields ::= field "
+ return [field]
+
+ def p_fields_1(self, (field, _, fields)):
+ " fields ::= field , fields "
+ return fields + [field]
+
+ def p_field_0(self, (type,)):
+ " field ::= Id "
+ return Field(type)
+
+ def p_field_1(self, (type, name)):
+ " field ::= Id Id "
+ return Field(type, name)
+
+ def p_field_2(self, (type, _, name)):
+ " field ::= Id * Id "
+ return Field(type, name, seq=1)
+
+ def p_field_3(self, (type, _, name)):
+ " field ::= Id ? Id "
+ return Field(type, name, opt=1)
+
+ def p_field_4(self, (type, _)):
+ " field ::= Id * "
+ return Field(type, seq=1)
+
+ def p_field_5(self, (type, _)):
+ " field ::= Id ? "
+ return Field(type, opt=1)
+
+builtin_types = ("identifier", "string", "int", "bool", "object")
+
+# below is a collection of classes to capture the AST of an AST :-)
+# not sure if any of the methods are useful yet, but I'm adding them
+# piecemeal as they seem helpful
+
+class AST:
+ pass # a marker class
+
+class Module(AST):
+ def __init__(self, name, dfns):
+ self.name = name
+ self.dfns = dfns
+ self.types = {} # maps type name to value (from dfns)
+ for type in dfns:
+ self.types[type.name.value] = type.value
+
+ def __repr__(self):
+ return "Module(%s, %s)" % (self.name, self.dfns)
+
+class Type(AST):
+ def __init__(self, name, value):
+ self.name = name
+ self.value = value
+
+ def __repr__(self):
+ return "Type(%s, %s)" % (self.name, self.value)
+
+class Constructor(AST):
+ def __init__(self, name, fields=None):
+ self.name = name
+ self.fields = fields or []
+
+ def __repr__(self):
+ return "Constructor(%s, %s)" % (self.name, self.fields)
+
+class Field(AST):
+ def __init__(self, type, name=None, seq=0, opt=0):
+ self.type = type
+ self.name = name
+ self.seq = seq
+ self.opt = opt
+
+ def __repr__(self):
+ if self.seq:
+ extra = ", seq=1"
+ elif self.opt:
+ extra = ", opt=1"
+ else:
+ extra = ""
+ if self.name is None:
+ return "Field(%s%s)" % (self.type, extra)
+ else:
+ return "Field(%s, %s%s)" % (self.type, self.name, extra)
+
+class Sum(AST):
+ def __init__(self, types, attributes=None):
+ self.types = types
+ self.attributes = attributes or []
+
+ def __repr__(self):
+ if self.attributes is None:
+ return "Sum(%s)" % self.types
+ else:
+ return "Sum(%s, %s)" % (self.types, self.attributes)
+
+class Product(AST):
+ def __init__(self, fields):
+ self.fields = fields
+
+ def __repr__(self):
+ return "Product(%s)" % self.fields
+
+class VisitorBase(object):
+
+ def __init__(self, skip=0):
+ self.cache = {}
+ self.skip = skip
+
+ def visit(self, object, *args):
+ meth = self._dispatch(object)
+ if meth is None:
+ return
+ try:
+ meth(object, *args)
+ except Exception, err:
+ print "Error visiting", repr(object)
+ print err
+ traceback.print_exc()
+ # XXX hack
+ if hasattr(self, 'file'):
+ self.file.flush()
+ os._exit(1)
+
+ def _dispatch(self, object):
+ assert isinstance(object, AST), repr(object)
+ klass = object.__class__
+ meth = self.cache.get(klass)
+ if meth is None:
+ methname = "visit" + klass.__name__
+ if self.skip:
+ meth = getattr(self, methname, None)
+ else:
+ meth = getattr(self, methname)
+ self.cache[klass] = meth
+ return meth
+
+class Check(VisitorBase):
+
+ def __init__(self):
+ super(Check, self).__init__(skip=1)
+ self.cons = {}
+ self.errors = 0
+ self.types = {}
+
+ def visitModule(self, mod):
+ for dfn in mod.dfns:
+ self.visit(dfn)
+
+ def visitType(self, type):
+ self.visit(type.value, str(type.name))
+
+ def visitSum(self, sum, name):
+ for t in sum.types:
+ self.visit(t, name)
+
+ def visitConstructor(self, cons, name):
+ key = str(cons.name)
+ conflict = self.cons.get(key)
+ if conflict is None:
+ self.cons[key] = name
+ else:
+ print "Redefinition of constructor %s" % key
+ print "Defined in %s and %s" % (conflict, name)
+ self.errors += 1
+ for f in cons.fields:
+ self.visit(f, key)
+
+ def visitField(self, field, name):
+ key = str(field.type)
+ l = self.types.setdefault(key, [])
+ l.append(name)
+
+ def visitProduct(self, prod, name):
+ for f in prod.fields:
+ self.visit(f, name)
+
+def check(mod):
+ v = Check()
+ v.visit(mod)
+
+ for t in v.types:
+ if not mod.types.has_key(t) and not t in builtin_types:
+ v.errors += 1
+ uses = ", ".join(v.types[t])
+ print "Undefined type %s, used in %s" % (t, uses)
+
+ return not v.errors
+
+def parse(file):
+ scanner = ASDLScanner()
+ parser = ASDLParser()
+
+ buf = open(file).read()
+ tokens = scanner.tokenize(buf)
+ try:
+ return parser.parse(tokens)
+ except ASDLSyntaxError, err:
+ print err
+ lines = buf.split("\n")
+ print lines[err.lineno - 1] # lines starts at 0, files at 1
+
+if __name__ == "__main__":
+ import glob
+ import sys
+
+ if len(sys.argv) > 1:
+ files = sys.argv[1:]
+ else:
+ testdir = "tests"
+ files = glob.glob(testdir + "/*.asdl")
+
+ for file in files:
+ print file
+ mod = parse(file)
+ print "module", mod.name
+ print len(mod.dfns), "definitions"
+ if not check(mod):
+ print "Check failed"
+ else:
+ for dfn in mod.dfns:
+ print dfn.type