Merge ast-branch to head

This change implements a new bytecode compiler, based on a transformation of the parse tree to an abstract syntax defined in Parser/Python.asdl. The compiler implementation is not complete, but it is in stable enough shape to run the entire test suite excepting two disabled tests.
author: Jeremy Hylton <jeremy@alum.mit.edu> 2005-10-20 19:59:25 +0000
committer: Jeremy Hylton <jeremy@alum.mit.edu> 2005-10-20 19:59:25 +0000
commit: 3e0055f8c65c407e74ce476b8e2b1fb889723514 (patch)
tree: 169cce8c87033e15364b57de947073e6e9c34d59 /Parser/asdl.py
parent: Enhance the performance of two important Unicode character (diff)
download: cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.gz
cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.bz2
cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.zip
1 files changed, 393 insertions, 0 deletions
diff --git a/Parser/asdl.py b/Parser/asdl.py
new file mode 100644
index 00000000000..0db4e3bc470
--- /dev/null
+++ b/Parser/asdl.py
@@ -0,0 +1,393 @@
+"""An implementation of the Zephyr Abstract Syntax Definition Language.
+
+See http://asdl.sourceforge.net/ and
+http://www.cs.princeton.edu/~danwang/Papers/dsl97/dsl97-abstract.html.
+
+Only supports top level module decl, not view.  I'm guessing that view
+is intended to support the browser and I'm not interested in the
+browser.
+"""
+
+#__metaclass__ = type
+
+import os
+import traceback
+
+import spark
+
+class Token:
+    # spark seems to dispatch in the parser based on a token's
+    # type attribute
+    def __init__(self, type, lineno):
+        self.type = type
+        self.lineno = lineno
+
+    def __str__(self):
+        return self.type
+
+    def __repr__(self):
+        return str(self)
+
+class Id(Token):
+    def __init__(self, value, lineno):
+        self.type = 'Id'
+        self.value = value
+        self.lineno = lineno
+
+    def __str__(self):
+        return self.value
+
+class ASDLSyntaxError:
+
+    def __init__(self, lineno, token=None, msg=None):
+        self.lineno = lineno
+        self.token = token
+        self.msg = msg
+
+    def __str__(self):
+        if self.msg is None:
+            return "Error at '%s', line %d" % (self.token, self.lineno)
+        else:
+            return "%s, line %d" % (self.msg, self.lineno)
+
+class ASDLScanner(spark.GenericScanner, object):
+
+    def tokenize(self, input):
+        self.rv = []
+        self.lineno = 1
+        super(ASDLScanner, self).tokenize(input)
+        return self.rv
+
+    def t_id(self, s):
+        r"[\w\.]+"
+        # XXX doesn't distinguish upper vs. lower, which is
+        # significant for ASDL.
+        self.rv.append(Id(s, self.lineno))
+
+    def t_xxx(self, s): # not sure what this production means
+        r"<="
+        self.rv.append(Token(s, self.lineno))
+
+    def t_punctuation(self, s):
+        r"[\{\}\*\=\|\(\)\,\?\:]"
+        self.rv.append(Token(s, self.lineno))
+
+    def t_comment(self, s):
+        r"\-\-[^\n]*"
+        pass
+
+    def t_newline(self, s):
+        r"\n"
+        self.lineno += 1
+
+    def t_whitespace(self, s):
+        r"[ \t]+"
+        pass
+
+    def t_default(self, s):
+        r" . +"
+        raise ValueError, "unmatched input: %s" % `s`
+
+class ASDLParser(spark.GenericParser, object):
+    def __init__(self):
+        super(ASDLParser, self).__init__("module")
+
+    def typestring(self, tok):
+        return tok.type
+
+    def error(self, tok):
+        raise ASDLSyntaxError(tok.lineno, tok)
+
+    def p_module_0(self, (module, name, _0, _1)):
+        " module ::= Id Id { } "
+        if module.value != "module":
+            raise ASDLSyntaxError(module.lineno,
+                                  msg="expected 'module', found %s" % module)
+        return Module(name, None)
+
+    def p_module(self, (module, name, _0, definitions, _1)):
+        " module ::= Id Id { definitions } "
+        if module.value != "module":
+            raise ASDLSyntaxError(module.lineno,
+                                  msg="expected 'module', found %s" % module)
+        return Module(name, definitions)
+
+    def p_definition_0(self, (definition,)):
+        " definitions ::= definition "
+        return definition
+
+    def p_definition_1(self, (definitions, definition)):
+        " definitions ::= definition definitions "
+        return definitions + definition
+
+    def p_definition(self, (id, _, type)):
+        " definition ::= Id = type "
+        return [Type(id, type)]
+
+    def p_type_0(self, (product,)):
+        " type ::= product "
+        return product
+
+    def p_type_1(self, (sum,)):
+        " type ::= sum "
+        return Sum(sum)
+
+    def p_type_2(self, (sum, id, _0, attributes, _1)):
+        " type ::= sum Id ( fields ) "
+        if id.value != "attributes":
+            raise ASDLSyntaxError(id.lineno,
+                                  msg="expected attributes, found %s" % id)
+        return Sum(sum, attributes)
+
+    def p_product(self, (_0, fields, _1)):
+        " product ::= ( fields ) "
+        # XXX can't I just construct things in the right order?
+        fields.reverse() 
+        return Product(fields)
+
+    def p_sum_0(self, (constructor,)):
+        " sum ::= constructor """
+        return [constructor]
+
+    def p_sum_1(self, (constructor, _, sum)):
+        " sum ::= constructor | sum "
+        return [constructor] + sum
+
+    def p_sum_2(self, (constructor, _, sum)):
+        " sum ::= constructor | sum "
+        return [constructor] + sum
+
+    def p_constructor_0(self, (id,)):
+        " constructor ::= Id "
+        return Constructor(id)
+
+    def p_constructor_1(self, (id, _0, fields, _1)):
+        " constructor ::= Id ( fields ) "
+        # XXX can't I just construct things in the right order?
+        fields.reverse() 
+        return Constructor(id, fields)
+
+    def p_fields_0(self, (field,)):
+        " fields ::= field "
+        return [field]
+
+    def p_fields_1(self, (field, _, fields)):
+        " fields ::= field , fields "
+        return fields + [field]
+
+    def p_field_0(self, (type,)):
+        " field ::= Id "
+        return Field(type)
+
+    def p_field_1(self, (type, name)):
+        " field ::= Id Id "
+        return Field(type, name)
+
+    def p_field_2(self, (type, _, name)):
+        " field ::= Id * Id "
+        return Field(type, name, seq=1)
+
+    def p_field_3(self, (type, _, name)):
+        " field ::= Id ? Id "
+        return Field(type, name, opt=1)
+
+    def p_field_4(self, (type, _)):
+        " field ::= Id * "
+        return Field(type, seq=1)
+
+    def p_field_5(self, (type, _)):
+        " field ::= Id ? "
+        return Field(type, opt=1)
+
+builtin_types = ("identifier", "string", "int", "bool", "object")
+
+# below is a collection of classes to capture the AST of an AST :-)
+# not sure if any of the methods are useful yet, but I'm adding them
+# piecemeal as they seem helpful
+
+class AST:
+    pass # a marker class
+
+class Module(AST):
+    def __init__(self, name, dfns):
+        self.name = name
+        self.dfns = dfns
+        self.types = {} # maps type name to value (from dfns)
+        for type in dfns:
+            self.types[type.name.value] = type.value
+
+    def __repr__(self):
+        return "Module(%s, %s)" % (self.name, self.dfns)
+
+class Type(AST):
+    def __init__(self, name, value):
+        self.name = name
+        self.value = value
+
+    def __repr__(self):
+        return "Type(%s, %s)" % (self.name, self.value)
+
+class Constructor(AST):
+    def __init__(self, name, fields=None):
+        self.name = name
+        self.fields = fields or []
+
+    def __repr__(self):
+        return "Constructor(%s, %s)" % (self.name, self.fields)
+
+class Field(AST):
+    def __init__(self, type, name=None, seq=0, opt=0):
+        self.type = type
+        self.name = name
+        self.seq = seq
+        self.opt = opt
+
+    def __repr__(self):
+        if self.seq:
+            extra = ", seq=1"
+        elif self.opt:
+            extra = ", opt=1"
+        else:
+            extra = ""
+        if self.name is None:
+            return "Field(%s%s)" % (self.type, extra)
+        else:
+            return "Field(%s, %s%s)" % (self.type, self.name, extra)
+
+class Sum(AST):
+    def __init__(self, types, attributes=None):
+        self.types = types
+        self.attributes = attributes or []
+
+    def __repr__(self):
+        if self.attributes is None:
+            return "Sum(%s)" % self.types
+        else:
+            return "Sum(%s, %s)" % (self.types, self.attributes)
+
+class Product(AST):
+    def __init__(self, fields):
+        self.fields = fields
+
+    def __repr__(self):
+        return "Product(%s)" % self.fields
+
+class VisitorBase(object):
+
+    def __init__(self, skip=0):
+        self.cache = {}
+        self.skip = skip
+
+    def visit(self, object, *args):
+        meth = self._dispatch(object)
+        if meth is None:
+            return
+        try:
+            meth(object, *args)
+        except Exception, err:
+            print "Error visiting", repr(object)
+            print err
+            traceback.print_exc()
+            # XXX hack
+            if hasattr(self, 'file'):
+                self.file.flush()
+            os._exit(1)
+
+    def _dispatch(self, object):
+        assert isinstance(object, AST), repr(object)
+        klass = object.__class__
+        meth = self.cache.get(klass)
+        if meth is None:
+            methname = "visit" + klass.__name__
+            if self.skip:
+                meth = getattr(self, methname, None)
+            else:
+                meth = getattr(self, methname)
+            self.cache[klass] = meth
+        return meth
+
+class Check(VisitorBase):
+
+    def __init__(self):
+        super(Check, self).__init__(skip=1)
+        self.cons = {}
+        self.errors = 0
+        self.types = {}
+
+    def visitModule(self, mod):
+        for dfn in mod.dfns:
+            self.visit(dfn)
+
+    def visitType(self, type):
+        self.visit(type.value, str(type.name))
+
+    def visitSum(self, sum, name):
+        for t in sum.types:
+            self.visit(t, name)
+
+    def visitConstructor(self, cons, name):
+        key = str(cons.name)
+        conflict = self.cons.get(key)
+        if conflict is None:
+            self.cons[key] = name
+        else:
+            print "Redefinition of constructor %s" % key
+            print "Defined in %s and %s" % (conflict, name)
+            self.errors += 1
+        for f in cons.fields:
+            self.visit(f, key)
+
+    def visitField(self, field, name):
+        key = str(field.type)
+        l = self.types.setdefault(key, [])
+        l.append(name)
+
+    def visitProduct(self, prod, name):
+        for f in prod.fields:
+            self.visit(f, name)
+
+def check(mod):
+    v = Check()
+    v.visit(mod)
+
+    for t in v.types:
+        if not mod.types.has_key(t) and not t in builtin_types:
+            v.errors += 1
+            uses = ", ".join(v.types[t])
+            print "Undefined type %s, used in %s" % (t, uses)
+    
+    return not v.errors
+
+def parse(file):
+    scanner = ASDLScanner()
+    parser = ASDLParser()
+
+    buf = open(file).read()
+    tokens = scanner.tokenize(buf)
+    try:
+        return parser.parse(tokens)
+    except ASDLSyntaxError, err:
+        print err
+        lines = buf.split("\n")
+        print lines[err.lineno - 1] # lines starts at 0, files at 1
+
+if __name__ == "__main__":
+    import glob
+    import sys
+
+    if len(sys.argv) > 1:
+        files = sys.argv[1:]
+    else:
+        testdir = "tests"
+        files = glob.glob(testdir + "/*.asdl")
+    
+    for file in files:
+        print file
+        mod = parse(file)
+        print "module", mod.name
+        print len(mod.dfns), "definitions"
+        if not check(mod):
+            print "Check failed"
+        else:
+            for dfn in mod.dfns:
+                print dfn.type
author	Jeremy Hylton <jeremy@alum.mit.edu>	2005-10-20 19:59:25 +0000
committer	Jeremy Hylton <jeremy@alum.mit.edu>	2005-10-20 19:59:25 +0000
commit	3e0055f8c65c407e74ce476b8e2b1fb889723514 (patch)
tree	169cce8c87033e15364b57de947073e6e9c34d59 /Parser/asdl.py
parent	Enhance the performance of two important Unicode character (diff)
download	cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.gz cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.tar.bz2 cpython-3e0055f8c65c407e74ce476b8e2b1fb889723514.zip