diff --git a/antlr/antlr_python_runtime-3.1.3/AUTHORS b/antlr/antlr_python_runtime-3.1.3/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..01e79eeafe68971faf0596063c4d74a6e2489a23 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/AUTHORS @@ -0,0 +1,2 @@ +Benjamin Niemann : Main developer of Python target. +Clinton Roy : AST templates and runtime. diff --git a/antlr/antlr_python_runtime-3.1.3/LICENSE b/antlr/antlr_python_runtime-3.1.3/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..1d1d5d644b687487ed86ad161ab880a65c589406 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/LICENSE @@ -0,0 +1,26 @@ +[The "BSD licence"] +Copyright (c) 2003-2006 Terence Parr +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/antlr/antlr_python_runtime-3.1.3/MANIFEST.in b/antlr/antlr_python_runtime-3.1.3/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..29c4ad6492a6a8d78c9303aff4fe9b209124d8ec --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/MANIFEST.in @@ -0,0 +1,2 @@ +include LICENSE AUTHORS ez_setup.py + diff --git a/antlr/antlr_python_runtime-3.1.3/PKG-INFO b/antlr/antlr_python_runtime-3.1.3/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..7827917438f4813cec941482677875d5a13b0926 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/PKG-INFO @@ -0,0 +1,13 @@ +Metadata-Version: 1.0 +Name: antlr_python_runtime +Version: 3.1.3 +Summary: Runtime package for ANTLR3 +Home-page: http://www.antlr.org/ +Author: Benjamin Niemann +Author-email: pink@odahoda.de +License: BSD +Download-URL: http://www.antlr.org/download.html +Description: This is the runtime package for ANTLR3, which is required to use parsers + generated by ANTLR3. + +Platform: UNKNOWN diff --git a/antlr/antlr_python_runtime-3.1.3/README b/antlr/antlr_python_runtime-3.1.3/README new file mode 100644 index 0000000000000000000000000000000000000000..98a50bb77bb2e3d315469c87f9cc8c4664427a92 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/README @@ -0,0 +1,90 @@ +1) ABOUT +======== + +This is the Python package 'antlr3', which is required to use parsers created +by the ANTLR3 tool. See for more information about +ANTLR3. + + +2) STATUS +========= + +The Python target for ANTLR3 is still in beta. Documentation is lacking, some +bits of the code is not yet done, some functionality has not been tested yet. +Also the API might change a bit - it currently mimics the Java implementation, +but it may be made a bit more pythonic here and there. + +WARNING: Currently the runtime library for V3.1 is not compatible with +recognizers generated by ANTLR V3.0.x. If you are an application developer, +then the suggested way to solve this is to package the correct runtime with +your application. Installing the runtime in the global site-packages directory +may not be a good idea. +It is still undetermined, if a future release of the V3.1 runtime will be +compatible with V3.0.x recognizers or if future runtimes V3.2+ will be +compatible with V3.1 recognizers. +Sorry for the inconvenience. + + +3) DOWNLOAD +=========== + +This runtime is part of the ANTLR distribution. The latest version can be found +at . + +If you are interested in the latest, most bleeding edge version, have a look at +the perforce depot at . There are +tarballs ready to download, so you don't have to install the perforce client. + + +4) INSTALLATION +=============== + +Just like any other Python package: +$ python setup.py install + +See for more information. + + +5) DOCUMENTATION +================ + +Documentation (as far as it exists) can be found in the wiki + + + +6) REPORTING BUGS +================= + +Please send bug reports to the ANTLR mailing list + or +. + +Existing bugs may appear someday in the bugtracker: + + + +7) HACKING +========== + +Only the runtime package can be found here. There are also some StringTemplate +files in 'src/org/antlr/codegen/templates/Python/' and some Java code in +'src/org/antlr/codegen/PythonTarget.java' (of the main ANTLR3 source +distribution). + +If there are no directories 'tests' and 'unittests' in 'runtime/Python', you +should fetch the latest ANTLR3 version from the perforce depot. See section +DOWNLOAD. +You'll need java and ant in order to compile and use the tool. +Be sure to properly setup your CLASSPATH. +(FIXME: is there some generic information, how to build it yourself? I should +point to it to avoid duplication.) + +You can then use the commands +$ python setup.py unittest +$ python setup.py functest +to ensure that changes do not break existing behaviour. + +Please send patches to . For larger code contributions you'll +have to sign the "Developer's Certificate of Origin", which can be found on + or use the feedback form at +. diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/__init__.py b/antlr/antlr_python_runtime-3.1.3/antlr3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed6f2fe2a60b56bab15fa56f1d73b2825890306 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/__init__.py @@ -0,0 +1,171 @@ +""" @package antlr3 +@brief ANTLR3 runtime package + +This module contains all support classes, which are needed to use recognizers +generated by ANTLR3. + +@mainpage + +\note Please be warned that the line numbers in the API documentation do not +match the real locations in the source code of the package. This is an +unintended artifact of doxygen, which I could only convince to use the +correct module names by concatenating all files from the package into a single +module file... + +Here is a little overview over the most commonly used classes provided by +this runtime: + +@section recognizers Recognizers + +These recognizers are baseclasses for the code which is generated by ANTLR3. + +- BaseRecognizer: Base class with common recognizer functionality. +- Lexer: Base class for lexers. +- Parser: Base class for parsers. +- tree.TreeParser: Base class for %tree parser. + +@section streams Streams + +Each recognizer pulls its input from one of the stream classes below. Streams +handle stuff like buffering, look-ahead and seeking. + +A character stream is usually the first element in the pipeline of a typical +ANTLR3 application. It is used as the input for a Lexer. + +- ANTLRStringStream: Reads from a string objects. The input should be a unicode + object, or ANTLR3 will have trouble decoding non-ascii data. +- ANTLRFileStream: Opens a file and read the contents, with optional character + decoding. +- ANTLRInputStream: Reads the date from a file-like object, with optional + character decoding. + +A Parser needs a TokenStream as input (which in turn is usually fed by a +Lexer): + +- CommonTokenStream: A basic and most commonly used TokenStream + implementation. +- TokenRewriteStream: A modification of CommonTokenStream that allows the + stream to be altered (by the Parser). See the 'tweak' example for a usecase. + +And tree.TreeParser finally fetches its input from a tree.TreeNodeStream: + +- tree.CommonTreeNodeStream: A basic and most commonly used tree.TreeNodeStream + implementation. + + +@section tokenstrees Tokens and Trees + +A Lexer emits Token objects which are usually buffered by a TokenStream. A +Parser can build a Tree, if the output=AST option has been set in the grammar. + +The runtime provides these Token implementations: + +- CommonToken: A basic and most commonly used Token implementation. +- ClassicToken: A Token object as used in ANTLR 2.x, used to %tree + construction. + +Tree objects are wrapper for Token objects. + +- tree.CommonTree: A basic and most commonly used Tree implementation. + +A tree.TreeAdaptor is used by the parser to create tree.Tree objects for the +input Token objects. + +- tree.CommonTreeAdaptor: A basic and most commonly used tree.TreeAdaptor +implementation. + + +@section Exceptions + +RecognitionException are generated, when a recognizer encounters incorrect +or unexpected input. + +- RecognitionException + - MismatchedRangeException + - MismatchedSetException + - MismatchedNotSetException + . + - MismatchedTokenException + - MismatchedTreeNodeException + - NoViableAltException + - EarlyExitException + - FailedPredicateException + . +. + +A tree.RewriteCardinalityException is raised, when the parsers hits a +cardinality mismatch during AST construction. Although this is basically a +bug in your grammar, it can only be detected at runtime. + +- tree.RewriteCardinalityException + - tree.RewriteEarlyExitException + - tree.RewriteEmptyStreamException + . +. + +""" + +# tree.RewriteRuleElementStream +# tree.RewriteRuleSubtreeStream +# tree.RewriteRuleTokenStream +# CharStream +# DFA +# TokenSource + +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +__version__ = '3.1.3' + +def version_str_to_tuple(version_str): + import re + import sys + + if version_str == 'HEAD': + return (sys.maxint, sys.maxint, sys.maxint, sys.maxint) + + m = re.match(r'(\d+)\.(\d+)(\.(\d+))?(b(\d+))?', version_str) + if m is None: + raise ValueError("Bad version string %r" % version_str) + + major = int(m.group(1)) + minor = int(m.group(2)) + patch = int(m.group(4) or 0) + beta = int(m.group(6) or sys.maxint) + + return (major, minor, patch, beta) + + +runtime_version_str = __version__ +runtime_version = version_str_to_tuple(runtime_version_str) + + +from constants import * +from dfa import * +from exceptions import * +from recognizers import * +from streams import * +from tokens import * diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/compat.py b/antlr/antlr_python_runtime-3.1.3/antlr3/compat.py new file mode 100644 index 0000000000000000000000000000000000000000..b29afcaae8d5cde60c490d3b92367fec02b6e5c7 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/compat.py @@ -0,0 +1,48 @@ +"""Compatibility stuff""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +try: + set = set + frozenset = frozenset +except NameError: + from sets import Set as set, ImmutableSet as frozenset + + +try: + reversed = reversed +except NameError: + def reversed(l): + l = l[:] + l.reverse() + return l + + diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/constants.py b/antlr/antlr_python_runtime-3.1.3/antlr3/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..bf4a47a02bc3d8d7fa9dca7c67941c3ce9013df3 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/constants.py @@ -0,0 +1,57 @@ +"""ANTLR3 runtime package""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +EOF = -1 + +## All tokens go to the parser (unless skip() is called in that rule) +# on a particular "channel". The parser tunes to a particular channel +# so that whitespace etc... can go to the parser on a "hidden" channel. +DEFAULT_CHANNEL = 0 + +## Anything on different channel than DEFAULT_CHANNEL is not parsed +# by parser. +HIDDEN_CHANNEL = 99 + +# Predefined token types +EOR_TOKEN_TYPE = 1 + +## +# imaginary tree navigation type; traverse "get child" link +DOWN = 2 +## +#imaginary tree navigation type; finish with a child list +UP = 3 + +MIN_TOKEN_TYPE = UP+1 + +INVALID_TOKEN_TYPE = 0 + diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/debug.py b/antlr/antlr_python_runtime-3.1.3/antlr3/debug.py new file mode 100644 index 0000000000000000000000000000000000000000..f20d2d41fab3f35029d64075ad1ca094b2a12da3 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/debug.py @@ -0,0 +1,1137 @@ +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2009 Terence Parr +# All rights reserved. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +import socket +from antlr3 import Parser, TokenStream, RecognitionException, Token +from antlr3.tree import CommonTreeAdaptor, TreeAdaptor, Tree + +class DebugParser(Parser): + def __init__(self, stream, state=None, dbg=None, *args, **kwargs): + # wrap token stream in DebugTokenStream (unless user already did so). + if not isinstance(stream, DebugTokenStream): + stream = DebugTokenStream(stream, dbg) + + super(DebugParser, self).__init__(stream, state, *args, **kwargs) + + # Who to notify when events in the parser occur. + self._dbg = None + + self.setDebugListener(dbg) + + + def setDebugListener(self, dbg): + """Provide a new debug event listener for this parser. Notify the + input stream too that it should send events to this listener. + """ + + if hasattr(self.input, 'dbg'): + self.input.dbg = dbg + + self._dbg = dbg + + def getDebugListener(self): + return self._dbg + + dbg = property(getDebugListener, setDebugListener) + + + def beginResync(self): + self._dbg.beginResync() + + + def endResync(self): + self._dbg.endResync() + + + def beginBacktrack(self, level): + self._dbg.beginBacktrack(level) + + + def endBacktrack(self, level, successful): + self._dbg.endBacktrack(level,successful) + + + def reportError(self, exc): + if isinstance(exc, RecognitionException): + self._dbg.recognitionException(exc) + + else: + traceback.print_exc(exc) + + +class DebugTokenStream(TokenStream): + def __init__(self, input, dbg=None): + self.input = input + self.initialStreamState = True + # Track the last mark() call result value for use in rewind(). + self.lastMarker = None + + self._dbg = None + self.setDebugListener(dbg) + + # force TokenStream to get at least first valid token + # so we know if there are any hidden tokens first in the stream + self.input.LT(1) + + + def getDebugListener(self): + return self._dbg + + def setDebugListener(self, dbg): + self._dbg = dbg + + dbg = property(getDebugListener, setDebugListener) + + + def consume(self): + if self.initialStreamState: + self.consumeInitialHiddenTokens() + + a = self.input.index() + t = self.input.LT(1) + self.input.consume() + b = self.input.index() + self._dbg.consumeToken(t) + + if b > a+1: + # then we consumed more than one token; must be off channel tokens + for idx in range(a+1, b): + self._dbg.consumeHiddenToken(self.input.get(idx)); + + + def consumeInitialHiddenTokens(self): + """consume all initial off-channel tokens""" + + firstOnChannelTokenIndex = self.input.index() + for idx in range(firstOnChannelTokenIndex): + self._dbg.consumeHiddenToken(self.input.get(idx)) + + self.initialStreamState = False + + + def LT(self, i): + if self.initialStreamState: + self.consumeInitialHiddenTokens() + + t = self.input.LT(i) + self._dbg.LT(i, t) + return t + + + def LA(self, i): + if self.initialStreamState: + self.consumeInitialHiddenTokens() + + t = self.input.LT(i) + self._dbg.LT(i, t) + return t.type + + + def get(self, i): + return self.input.get(i) + + + def index(self): + return self.input.index() + + + def mark(self): + self.lastMarker = self.input.mark() + self._dbg.mark(self.lastMarker) + return self.lastMarker + + + def rewind(self, marker=None): + self._dbg.rewind(marker) + self.input.rewind(marker) + + + def release(self, marker): + pass + + + def seek(self, index): + # TODO: implement seek in dbg interface + # self._dbg.seek(index); + self.input.seek(index) + + + def size(self): + return self.input.size() + + + def getTokenSource(self): + return self.input.getTokenSource() + + + def getSourceName(self): + return self.getTokenSource().getSourceName() + + + def toString(self, start=None, stop=None): + return self.input.toString(start, stop) + + +class DebugTreeAdaptor(TreeAdaptor): + """A TreeAdaptor proxy that fires debugging events to a DebugEventListener + delegate and uses the TreeAdaptor delegate to do the actual work. All + AST events are triggered by this adaptor; no code gen changes are needed + in generated rules. Debugging events are triggered *after* invoking + tree adaptor routines. + + Trees created with actions in rewrite actions like "-> ^(ADD {foo} {bar})" + cannot be tracked as they might not use the adaptor to create foo, bar. + The debug listener has to deal with tree node IDs for which it did + not see a createNode event. A single node is sufficient even + if it represents a whole tree. + """ + + def __init__(self, dbg, adaptor): + self.dbg = dbg + self.adaptor = adaptor + + + def createWithPayload(self, payload): + if payload.getTokenIndex() < 0: + # could be token conjured up during error recovery + return self.createFromType(payload.getType(), payload.getText()) + + node = self.adaptor.createWithPayload(payload) + self.dbg.createNode(node, payload) + return node + + def createFromToken(self, tokenType, fromToken, text=None): + node = self.adaptor.createFromToken(tokenType, fromToken, text) + self.dbg.createNode(node) + return node + + def createFromType(self, tokenType, text): + node = self.adaptor.createFromType(tokenType, text) + self.dbg.createNode(node) + return node + + + def errorNode(self, input, start, stop, exc): + node = selfadaptor.errorNode(input, start, stop, exc) + if node is not None: + dbg.errorNode(node) + + return node + + + def dupTree(self, tree): + t = self.adaptor.dupTree(tree) + # walk the tree and emit create and add child events + # to simulate what dupTree has done. dupTree does not call this debug + # adapter so I must simulate. + self.simulateTreeConstruction(t) + return t + + + def simulateTreeConstruction(self, t): + """^(A B C): emit create A, create B, add child, ...""" + self.dbg.createNode(t) + for i in range(self.adaptor.getChildCount(t)): + child = self.adaptor.getChild(t, i) + self.simulateTreeConstruction(child) + self.dbg.addChild(t, child) + + + def dupNode(self, treeNode): + d = self.adaptor.dupNode(treeNode) + self.dbg.createNode(d) + return d + + + def nil(self): + node = self.adaptor.nil() + self.dbg.nilNode(node) + return node + + + def isNil(self, tree): + return self.adaptor.isNil(tree) + + + def addChild(self, t, child): + if isinstance(child, Token): + n = self.createWithPayload(child) + self.addChild(t, n) + + else: + if t is None or child is None: + return + + self.adaptor.addChild(t, child) + self.dbg.addChild(t, child) + + def becomeRoot(self, newRoot, oldRoot): + if isinstance(newRoot, Token): + n = self.createWithPayload(newRoot) + self.adaptor.becomeRoot(n, oldRoot) + else: + n = self.adaptor.becomeRoot(newRoot, oldRoot) + + self.dbg.becomeRoot(newRoot, oldRoot) + return n + + + def rulePostProcessing(self, root): + return self.adaptor.rulePostProcessing(root) + + + def getType(self, t): + return self.adaptor.getType(t) + + + def setType(self, t, type): + self.adaptor.setType(t, type) + + + def getText(self, t): + return self.adaptor.getText(t) + + + def setText(self, t, text): + self.adaptor.setText(t, text) + + + def getToken(self, t): + return self.adaptor.getToken(t) + + + def setTokenBoundaries(self, t, startToken, stopToken): + self.adaptor.setTokenBoundaries(t, startToken, stopToken) + if t is not None and startToken is not None and stopToken is not None: + self.dbg.setTokenBoundaries( + t, startToken.getTokenIndex(), + stopToken.getTokenIndex()) + + + def getTokenStartIndex(self, t): + return self.adaptor.getTokenStartIndex(t) + + + def getTokenStopIndex(self, t): + return self.adaptor.getTokenStopIndex(t) + + + def getChild(self, t, i): + return self.adaptor.getChild(t, i) + + + def setChild(self, t, i, child): + self.adaptor.setChild(t, i, child) + + + def deleteChild(self, t, i): + return self.adaptor.deleteChild(t, i) + + + def getChildCount(self, t): + return self.adaptor.getChildCount(t) + + + def getUniqueID(self, node): + return self.adaptor.getUniqueID(node) + + + def getParent(self, t): + return self.adaptor.getParent(t) + + + def getChildIndex(self, t): + return self.adaptor.getChildIndex(t) + + + def setParent(self, t, parent): + self.adaptor.setParent(t, parent) + + + def setChildIndex(self, t, index): + self.adaptor.setChildIndex(t, index) + + + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t) + + + ## support + + def getDebugListener(self): + return dbg + + def setDebugListener(self, dbg): + self.dbg = dbg + + + def getTreeAdaptor(self): + return self.adaptor + + + +class DebugEventListener(object): + """All debugging events that a recognizer can trigger. + + I did not create a separate AST debugging interface as it would create + lots of extra classes and DebugParser has a dbg var defined, which makes + it hard to change to ASTDebugEventListener. I looked hard at this issue + and it is easier to understand as one monolithic event interface for all + possible events. Hopefully, adding ST debugging stuff won't be bad. Leave + for future. 4/26/2006. + """ + + # Moved to version 2 for v3.1: added grammar name to enter/exit Rule + PROTOCOL_VERSION = "2" + + def enterRule(self, grammarFileName, ruleName): + """The parser has just entered a rule. No decision has been made about + which alt is predicted. This is fired AFTER init actions have been + executed. Attributes are defined and available etc... + The grammarFileName allows composite grammars to jump around among + multiple grammar files. + """ + + pass + + + def enterAlt(self, alt): + """Because rules can have lots of alternatives, it is very useful to + know which alt you are entering. This is 1..n for n alts. + """ + pass + + + def exitRule(self, grammarFileName, ruleName): + """This is the last thing executed before leaving a rule. It is + executed even if an exception is thrown. This is triggered after + error reporting and recovery have occurred (unless the exception is + not caught in this rule). This implies an "exitAlt" event. + The grammarFileName allows composite grammars to jump around among + multiple grammar files. + """ + pass + + + def enterSubRule(self, decisionNumber): + """Track entry into any (...) subrule other EBNF construct""" + pass + + + def exitSubRule(self, decisionNumber): + pass + + + def enterDecision(self, decisionNumber): + """Every decision, fixed k or arbitrary, has an enter/exit event + so that a GUI can easily track what LT/consume events are + associated with prediction. You will see a single enter/exit + subrule but multiple enter/exit decision events, one for each + loop iteration. + """ + pass + + + def exitDecision(self, decisionNumber): + pass + + + def consumeToken(self, t): + """An input token was consumed; matched by any kind of element. + Trigger after the token was matched by things like match(), matchAny(). + """ + pass + + + def consumeHiddenToken(self, t): + """An off-channel input token was consumed. + Trigger after the token was matched by things like match(), matchAny(). + (unless of course the hidden token is first stuff in the input stream). + """ + pass + + + def LT(self, i, t): + """Somebody (anybody) looked ahead. Note that this actually gets + triggered by both LA and LT calls. The debugger will want to know + which Token object was examined. Like consumeToken, this indicates + what token was seen at that depth. A remote debugger cannot look + ahead into a file it doesn't have so LT events must pass the token + even if the info is redundant. + """ + pass + + + def mark(self, marker): + """The parser is going to look arbitrarily ahead; mark this location, + the token stream's marker is sent in case you need it. + """ + pass + + + def rewind(self, marker=None): + """After an arbitrairly long lookahead as with a cyclic DFA (or with + any backtrack), this informs the debugger that stream should be + rewound to the position associated with marker. + + """ + pass + + + def beginBacktrack(self, level): + pass + + + def endBacktrack(self, level, successful): + pass + + + def location(self, line, pos): + """To watch a parser move through the grammar, the parser needs to + inform the debugger what line/charPos it is passing in the grammar. + For now, this does not know how to switch from one grammar to the + other and back for island grammars etc... + + This should also allow breakpoints because the debugger can stop + the parser whenever it hits this line/pos. + """ + pass + + + def recognitionException(self, e): + """A recognition exception occurred such as NoViableAltException. I made + this a generic event so that I can alter the exception hierachy later + without having to alter all the debug objects. + + Upon error, the stack of enter rule/subrule must be properly unwound. + If no viable alt occurs it is within an enter/exit decision, which + also must be rewound. Even the rewind for each mark must be unwount. + In the Java target this is pretty easy using try/finally, if a bit + ugly in the generated code. The rewind is generated in DFA.predict() + actually so no code needs to be generated for that. For languages + w/o this "finally" feature (C++?), the target implementor will have + to build an event stack or something. + + Across a socket for remote debugging, only the RecognitionException + data fields are transmitted. The token object or whatever that + caused the problem was the last object referenced by LT. The + immediately preceding LT event should hold the unexpected Token or + char. + + Here is a sample event trace for grammar: + + b : C ({;}A|B) // {;} is there to prevent A|B becoming a set + | D + ; + + The sequence for this rule (with no viable alt in the subrule) for + input 'c c' (there are 3 tokens) is: + + commence + LT(1) + enterRule b + location 7 1 + enter decision 3 + LT(1) + exit decision 3 + enterAlt1 + location 7 5 + LT(1) + consumeToken [c/<4>,1:0] + location 7 7 + enterSubRule 2 + enter decision 2 + LT(1) + LT(1) + recognitionException NoViableAltException 2 1 2 + exit decision 2 + exitSubRule 2 + beginResync + LT(1) + consumeToken [c/<4>,1:1] + LT(1) + endResync + LT(-1) + exitRule b + terminate + """ + pass + + + def beginResync(self): + """Indicates the recognizer is about to consume tokens to resynchronize + the parser. Any consume events from here until the recovered event + are not part of the parse--they are dead tokens. + """ + pass + + + def endResync(self): + """Indicates that the recognizer has finished consuming tokens in order + to resychronize. There may be multiple beginResync/endResync pairs + before the recognizer comes out of errorRecovery mode (in which + multiple errors are suppressed). This will be useful + in a gui where you want to probably grey out tokens that are consumed + but not matched to anything in grammar. Anything between + a beginResync/endResync pair was tossed out by the parser. + """ + pass + + + def semanticPredicate(self, result, predicate): + """A semantic predicate was evaluate with this result and action text""" + pass + + + def commence(self): + """Announce that parsing has begun. Not technically useful except for + sending events over a socket. A GUI for example will launch a thread + to connect and communicate with a remote parser. The thread will want + to notify the GUI when a connection is made. ANTLR parsers + trigger this upon entry to the first rule (the ruleLevel is used to + figure this out). + """ + pass + + + def terminate(self): + """Parsing is over; successfully or not. Mostly useful for telling + remote debugging listeners that it's time to quit. When the rule + invocation level goes to zero at the end of a rule, we are done + parsing. + """ + pass + + + ## T r e e P a r s i n g + + def consumeNode(self, t): + """Input for a tree parser is an AST, but we know nothing for sure + about a node except its type and text (obtained from the adaptor). + This is the analog of the consumeToken method. Again, the ID is + the hashCode usually of the node so it only works if hashCode is + not implemented. If the type is UP or DOWN, then + the ID is not really meaningful as it's fixed--there is + just one UP node and one DOWN navigation node. + """ + pass + + + def LT(self, i, t): + """The tree parser lookedahead. If the type is UP or DOWN, + then the ID is not really meaningful as it's fixed--there is + just one UP node and one DOWN navigation node. + """ + pass + + + + ## A S T E v e n t s + + def nilNode(self, t): + """A nil was created (even nil nodes have a unique ID... + they are not "null" per se). As of 4/28/2006, this + seems to be uniquely triggered when starting a new subtree + such as when entering a subrule in automatic mode and when + building a tree in rewrite mode. + + If you are receiving this event over a socket via + RemoteDebugEventSocketListener then only t.ID is set. + """ + pass + + + def errorNode(self, t): + """Upon syntax error, recognizers bracket the error with an error node + if they are building ASTs. + """ + pass + + + def createNode(self, node, token=None): + """Announce a new node built from token elements such as type etc... + + If you are receiving this event over a socket via + RemoteDebugEventSocketListener then only t.ID, type, text are + set. + """ + pass + + + def becomeRoot(self, newRoot, oldRoot): + """Make a node the new root of an existing root. + + Note: the newRootID parameter is possibly different + than the TreeAdaptor.becomeRoot() newRoot parameter. + In our case, it will always be the result of calling + TreeAdaptor.becomeRoot() and not root_n or whatever. + + The listener should assume that this event occurs + only when the current subrule (or rule) subtree is + being reset to newRootID. + + If you are receiving this event over a socket via + RemoteDebugEventSocketListener then only IDs are set. + + @see antlr3.tree.TreeAdaptor.becomeRoot() + """ + pass + + + def addChild(self, root, child): + """Make childID a child of rootID. + + If you are receiving this event over a socket via + RemoteDebugEventSocketListener then only IDs are set. + + @see antlr3.tree.TreeAdaptor.addChild() + """ + pass + + + def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): + """Set the token start/stop token index for a subtree root or node. + + If you are receiving this event over a socket via + RemoteDebugEventSocketListener then only t.ID is set. + """ + pass + + +class BlankDebugEventListener(DebugEventListener): + """A blank listener that does nothing; useful for real classes so + they don't have to have lots of blank methods and are less + sensitive to updates to debug interface. + + Note: this class is identical to DebugEventListener and exists purely + for compatibility with Java. + """ + pass + + +class TraceDebugEventListener(DebugEventListener): + """A listener that simply records text representations of the events. + + Useful for debugging the debugging facility ;) + + Subclasses can override the record() method (which defaults to printing to + stdout) to record the events in a different way. + """ + + def __init__(self, adaptor=None): + super(TraceDebugEventListener, self).__init__() + + if adaptor is None: + adaptor = CommonTreeAdaptor() + self.adaptor = adaptor + + def record(self, event): + sys.stdout.write(event + '\n') + + def enterRule(self, grammarFileName, ruleName): + self.record("enterRule "+ruleName) + + def exitRule(self, grammarFileName, ruleName): + self.record("exitRule "+ruleName) + + def enterSubRule(self, decisionNumber): + self.record("enterSubRule") + + def exitSubRule(self, decisionNumber): + self.record("exitSubRule") + + def location(self, line, pos): + self.record("location %s:%s" % (line, pos)) + + ## Tree parsing stuff + + def consumeNode(self, t): + self.record("consumeNode %s %s %s" % ( + self.adaptor.getUniqueID(t), + self.adaptor.getText(t), + self.adaptor.getType(t))) + + def LT(self, i, t): + self.record("LT %s %s %s %s" % ( + i, + self.adaptor.getUniqueID(t), + self.adaptor.getText(t), + self.adaptor.getType(t))) + + + ## AST stuff + def nilNode(self, t): + self.record("nilNode %s" % self.adaptor.getUniqueID(t)) + + def createNode(self, t, token=None): + if token is None: + self.record("create %s: %s, %s" % ( + self.adaptor.getUniqueID(t), + self.adaptor.getText(t), + self.adaptor.getType(t))) + + else: + self.record("create %s: %s" % ( + self.adaptor.getUniqueID(t), + token.getTokenIndex())) + + def becomeRoot(self, newRoot, oldRoot): + self.record("becomeRoot %s, %s" % ( + self.adaptor.getUniqueID(newRoot), + self.adaptor.getUniqueID(oldRoot))) + + def addChild(self, root, child): + self.record("addChild %s, %s" % ( + self.adaptor.getUniqueID(root), + self.adaptor.getUniqueID(child))) + + def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): + self.record("setTokenBoundaries %s, %s, %s" % ( + self.adaptor.getUniqueID(t), + tokenStartIndex, tokenStopIndex)) + + +class RecordDebugEventListener(TraceDebugEventListener): + """A listener that records events as strings in an array.""" + + def __init__(self, adaptor=None): + super(RecordDebugEventListener, self).__init__(adaptor) + + self.events = [] + + def record(self, event): + self.events.append(event) + + +class DebugEventSocketProxy(DebugEventListener): + """A proxy debug event listener that forwards events over a socket to + a debugger (or any other listener) using a simple text-based protocol; + one event per line. ANTLRWorks listens on server socket with a + RemoteDebugEventSocketListener instance. These two objects must therefore + be kept in sync. New events must be handled on both sides of socket. + """ + + DEFAULT_DEBUGGER_PORT = 49100 + + def __init__(self, recognizer, adaptor=None, port=None, + debug=None): + super(DebugEventSocketProxy, self).__init__() + + self.grammarFileName = recognizer.getGrammarFileName() + + # Almost certainly the recognizer will have adaptor set, but + # we don't know how to cast it (Parser or TreeParser) to get + # the adaptor field. Must be set with a constructor. :( + self.adaptor = adaptor + + self.port = port or self.DEFAULT_DEBUGGER_PORT + + self.debug = debug + + self.socket = None + self.connection = None + self.input = None + self.output = None + + + def log(self, msg): + if self.debug is not None: + self.debug.write(msg + '\n') + + + def handshake(self): + if self.socket is None: + # create listening socket + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self.socket.bind(('', self.port)) + self.socket.listen(1) + self.log("Waiting for incoming connection on port %d" % self.port) + + # wait for an incoming connection + self.connection, addr = self.socket.accept() + self.log("Accepted connection from %s:%d" % addr) + + self.connection.setblocking(1) + self.connection.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) + + # FIXME(pink): wrap into utf8 encoding stream + self.output = self.connection.makefile('w', 0) + self.input = self.connection.makefile('r', 0) + + self.write("ANTLR %s" % self.PROTOCOL_VERSION) + self.write("grammar \"%s" % self.grammarFileName) + self.ack() + + + def write(self, msg): + self.log("> %s" % msg) + self.output.write("%s\n" % msg) + self.output.flush() + + + def ack(self): + t = self.input.readline() + self.log("< %s" % t.rstrip()) + + + def transmit(self, event): + self.write(event); + self.ack(); + + + def commence(self): + # don't bother sending event; listener will trigger upon connection + pass + + + def terminate(self): + self.transmit("terminate") + self.output.close() + self.input.close() + self.connection.close() + self.socket.close() + + + def enterRule(self, grammarFileName, ruleName): + self.transmit("enterRule\t%s\t%s" % (grammarFileName, ruleName)) + + + def enterAlt(self, alt): + self.transmit("enterAlt\t%d" % alt) + + + def exitRule(self, grammarFileName, ruleName): + self.transmit("exitRule\t%s\t%s" % (grammarFileName, ruleName)) + + + def enterSubRule(self, decisionNumber): + self.transmit("enterSubRule\t%d" % decisionNumber) + + + def exitSubRule(self, decisionNumber): + self.transmit("exitSubRule\t%d" % decisionNumber) + + + def enterDecision(self, decisionNumber): + self.transmit("enterDecision\t%d" % decisionNumber) + + + def exitDecision(self, decisionNumber): + self.transmit("exitDecision\t%d" % decisionNumber) + + + def consumeToken(self, t): + self.transmit("consumeToken\t%s" % self.serializeToken(t)) + + + def consumeHiddenToken(self, t): + self.transmit("consumeHiddenToken\t%s" % self.serializeToken(t)) + + + def LT(self, i, o): + if isinstance(o, Tree): + return self.LT_tree(i, o) + return self.LT_token(i, o) + + + def LT_token(self, i, t): + if t is not None: + self.transmit("LT\t%d\t%s" % (i, self.serializeToken(t))) + + + def mark(self, i): + self.transmit("mark\t%d" % i) + + + def rewind(self, i=None): + if i is not None: + self.transmit("rewind\t%d" % i) + else: + self.transmit("rewind") + + + def beginBacktrack(self, level): + self.transmit("beginBacktrack\t%d" % level) + + + def endBacktrack(self, level, successful): + self.transmit("endBacktrack\t%d\t%s" % ( + level, ['0', '1'][bool(successful)])) + + + def location(self, line, pos): + self.transmit("location\t%d\t%d" % (line, pos)) + + + def recognitionException(self, exc): + self.transmit('\t'.join([ + "exception", + exc.__class__.__name__, + str(int(exc.index)), + str(int(exc.line)), + str(int(exc.charPositionInLine))])) + + + def beginResync(self): + self.transmit("beginResync") + + + def endResync(self): + self.transmit("endResync") + + + def semanticPredicate(self, result, predicate): + self.transmit('\t'.join([ + "semanticPredicate", + str(int(result)), + self.escapeNewlines(predicate)])) + + ## A S T P a r s i n g E v e n t s + + def consumeNode(self, t): + FIXME(31) +# StringBuffer buf = new StringBuffer(50); +# buf.append("consumeNode"); +# serializeNode(buf, t); +# transmit(buf.toString()); + + + def LT_tree(self, i, t): + FIXME(34) +# int ID = adaptor.getUniqueID(t); +# String text = adaptor.getText(t); +# int type = adaptor.getType(t); +# StringBuffer buf = new StringBuffer(50); +# buf.append("LN\t"); // lookahead node; distinguish from LT in protocol +# buf.append(i); +# serializeNode(buf, t); +# transmit(buf.toString()); + + + def serializeNode(self, buf, t): + FIXME(33) +# int ID = adaptor.getUniqueID(t); +# String text = adaptor.getText(t); +# int type = adaptor.getType(t); +# buf.append("\t"); +# buf.append(ID); +# buf.append("\t"); +# buf.append(type); +# Token token = adaptor.getToken(t); +# int line = -1; +# int pos = -1; +# if ( token!=null ) { +# line = token.getLine(); +# pos = token.getCharPositionInLine(); +# } +# buf.append("\t"); +# buf.append(line); +# buf.append("\t"); +# buf.append(pos); +# int tokenIndex = adaptor.getTokenStartIndex(t); +# buf.append("\t"); +# buf.append(tokenIndex); +# serializeText(buf, text); + + + ## A S T E v e n t s + + def nilNode(self, t): + self.transmit("nilNode\t%d" % self.adaptor.getUniqueID(t)) + + + def errorNode(self, t): + self.transmit("errorNode\t%d\t%d\t\"%s" % ( + self.adaptor.getUniqueID(t), + Token.INVALID_TOKEN_TYPE, + self.escapeNewlines(t.toString()))) + + + + def createNode(self, node, token=None): + if token is not None: + self.transmit("createNode\t%d\t%d" % ( + self.adaptor.getUniqueID(node), + token.getTokenIndex())) + + else: + self.transmit("createNodeFromTokenElements\t%d\t%d\t\"%s" % ( + self.adaptor.getUniqueID(node), + self.adaptor.getType(node), + self.adaptor.getText(node))) + + + def becomeRoot(self, newRoot, oldRoot): + self.transmit("becomeRoot\t%d\t%d" % ( + self.adaptor.getUniqueID(newRoot), + self.adaptor.getUniqueID(oldRoot))) + + + def addChild(self, root, child): + self.transmit("addChild\t%d\t%d" % ( + self.adaptor.getUniqueID(root), + self.adaptor.getUniqueID(child))) + + + def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): + self.transmit("setTokenBoundaries\t%d\t%d\t%d" % ( + self.adaptor.getUniqueID(t), + tokenStartIndex, tokenStopIndex)) + + + + ## support + + def setTreeAdaptor(self, adaptor): + self.adaptor = adaptor + + def getTreeAdaptor(self): + return self.adaptor + + + def serializeToken(self, t): + buf = [str(int(t.getTokenIndex())), + str(int(t.getType())), + str(int(t.getChannel())), + str(int(t.getLine() or 0)), + str(int(t.getCharPositionInLine() or 0)), + '\"' + self.escapeNewlines(t.getText())] + return '\t'.join(buf) + + + def escapeNewlines(self, txt): + if txt is None: + return '' + + txt = txt.replace("%","%25") # escape all escape char ;) + txt = txt.replace("\n","%0A") # escape \n + txt = txt.replace("\r","%0D") # escape \r + return txt diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/dfa.py b/antlr/antlr_python_runtime-3.1.3/antlr3/dfa.py new file mode 100644 index 0000000000000000000000000000000000000000..ff93761ad5547c59151c31abcbec5395b4df70a8 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/dfa.py @@ -0,0 +1,213 @@ +"""ANTLR3 runtime package""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licensc] + +from antlr3.constants import EOF +from antlr3.exceptions import NoViableAltException, BacktrackingFailed + + +class DFA(object): + """@brief A DFA implemented as a set of transition tables. + + Any state that has a semantic predicate edge is special; those states + are generated with if-then-else structures in a specialStateTransition() + which is generated by cyclicDFA template. + + """ + + def __init__( + self, + recognizer, decisionNumber, + eot, eof, min, max, accept, special, transition + ): + ## Which recognizer encloses this DFA? Needed to check backtracking + self.recognizer = recognizer + + self.decisionNumber = decisionNumber + self.eot = eot + self.eof = eof + self.min = min + self.max = max + self.accept = accept + self.special = special + self.transition = transition + + + def predict(self, input): + """ + From the input stream, predict what alternative will succeed + using this DFA (representing the covering regular approximation + to the underlying CFL). Return an alternative number 1..n. Throw + an exception upon error. + """ + mark = input.mark() + s = 0 # we always start at s0 + try: + for _ in xrange(50000): + #print "***Current state = %d" % s + + specialState = self.special[s] + if specialState >= 0: + #print "is special" + s = self.specialStateTransition(specialState, input) + if s == -1: + self.noViableAlt(s, input) + return 0 + input.consume() + continue + + if self.accept[s] >= 1: + #print "accept state for alt %d" % self.accept[s] + return self.accept[s] + + # look for a normal char transition + c = input.LA(1) + + #print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF') + #print "range = %d..%d" % (self.min[s], self.max[s]) + + if c >= self.min[s] and c <= self.max[s]: + # move to next state + snext = self.transition[s][c-self.min[s]] + #print "in range, next state = %d" % snext + + if snext < 0: + #print "not a normal transition" + # was in range but not a normal transition + # must check EOT, which is like the else clause. + # eot[s]>=0 indicates that an EOT edge goes to another + # state. + if self.eot[s] >= 0: # EOT Transition to accept state? + #print "EOT trans to accept state %d" % self.eot[s] + + s = self.eot[s] + input.consume() + # TODO: I had this as return accept[eot[s]] + # which assumed here that the EOT edge always + # went to an accept...faster to do this, but + # what about predicated edges coming from EOT + # target? + continue + + #print "no viable alt" + self.noViableAlt(s, input) + return 0 + + s = snext + input.consume() + continue + + if self.eot[s] >= 0: + #print "EOT to %d" % self.eot[s] + + s = self.eot[s] + input.consume() + continue + + # EOF Transition to accept state? + if c == EOF and self.eof[s] >= 0: + #print "EOF Transition to accept state %d" \ + # % self.accept[self.eof[s]] + return self.accept[self.eof[s]] + + # not in range and not EOF/EOT, must be invalid symbol + self.noViableAlt(s, input) + return 0 + + else: + raise RuntimeError("DFA bang!") + + finally: + input.rewind(mark) + + + def noViableAlt(self, s, input): + if self.recognizer._state.backtracking > 0: + raise BacktrackingFailed + + nvae = NoViableAltException( + self.getDescription(), + self.decisionNumber, + s, + input + ) + + self.error(nvae) + raise nvae + + + def error(self, nvae): + """A hook for debugging interface""" + pass + + + def specialStateTransition(self, s, input): + return -1 + + + def getDescription(self): + return "n/a" + + +## def specialTransition(self, state, symbol): +## return 0 + + + def unpack(cls, string): + """@brief Unpack the runlength encoded table data. + + Terence implemented packed table initializers, because Java has a + size restriction on .class files and the lookup tables can grow + pretty large. The generated JavaLexer.java of the Java.g example + would be about 15MB with uncompressed array initializers. + + Python does not have any size restrictions, but the compilation of + such large source files seems to be pretty memory hungry. The memory + consumption of the python process grew to >1.5GB when importing a + 15MB lexer, eating all my swap space and I was to impacient to see, + if it could finish at all. With packed initializers that are unpacked + at import time of the lexer module, everything works like a charm. + + """ + + ret = [] + for i in range(len(string) / 2): + (n, v) = ord(string[i*2]), ord(string[i*2+1]) + + # Is there a bitwise operation to do this? + if v == 0xFFFF: + v = -1 + + ret += [v] * n + + return ret + + unpack = classmethod(unpack) diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/dottreegen.py b/antlr/antlr_python_runtime-3.1.3/antlr3/dottreegen.py new file mode 100644 index 0000000000000000000000000000000000000000..827d4ecc62d81a26601080a379ec7c130e71a1b5 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/dottreegen.py @@ -0,0 +1,210 @@ +""" @package antlr3.dottreegenerator +@brief ANTLR3 runtime package, tree module + +This module contains all support classes for AST construction and tree parsers. + +""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +# lot's of docstrings are missing, don't complain for now... +# pylint: disable-msg=C0111 + +from antlr3.tree import CommonTreeAdaptor +import stringtemplate3 + +class DOTTreeGenerator(object): + """ + A utility class to generate DOT diagrams (graphviz) from + arbitrary trees. You can pass in your own templates and + can pass in any kind of tree or use Tree interface method. + """ + + _treeST = stringtemplate3.StringTemplate( + template=( + "digraph {\n" + + " ordering=out;\n" + + " ranksep=.4;\n" + + " node [shape=plaintext, fixedsize=true, fontsize=11, fontname=\"Courier\",\n" + + " width=.25, height=.25];\n" + + " edge [arrowsize=.5]\n" + + " $nodes$\n" + + " $edges$\n" + + "}\n") + ) + + _nodeST = stringtemplate3.StringTemplate( + template="$name$ [label=\"$text$\"];\n" + ) + + _edgeST = stringtemplate3.StringTemplate( + template="$parent$ -> $child$ // \"$parentText$\" -> \"$childText$\"\n" + ) + + def __init__(self): + ## Track node to number mapping so we can get proper node name back + self.nodeToNumberMap = {} + + ## Track node number so we can get unique node names + self.nodeNumber = 0 + + + def toDOT(self, tree, adaptor=None, treeST=_treeST, edgeST=_edgeST): + if adaptor is None: + adaptor = CommonTreeAdaptor() + + treeST = treeST.getInstanceOf() + + self.nodeNumber = 0 + self.toDOTDefineNodes(tree, adaptor, treeST) + + self.nodeNumber = 0 + self.toDOTDefineEdges(tree, adaptor, treeST, edgeST) + return treeST + + + def toDOTDefineNodes(self, tree, adaptor, treeST, knownNodes=None): + if knownNodes is None: + knownNodes = set() + + if tree is None: + return + + n = adaptor.getChildCount(tree) + if n == 0: + # must have already dumped as child from previous + # invocation; do nothing + return + + # define parent node + number = self.getNodeNumber(tree) + if number not in knownNodes: + parentNodeST = self.getNodeST(adaptor, tree) + treeST.setAttribute("nodes", parentNodeST) + knownNodes.add(number) + + # for each child, do a " [label=text]" node def + for i in range(n): + child = adaptor.getChild(tree, i) + + number = self.getNodeNumber(child) + if number not in knownNodes: + nodeST = self.getNodeST(adaptor, child) + treeST.setAttribute("nodes", nodeST) + knownNodes.add(number) + + self.toDOTDefineNodes(child, adaptor, treeST, knownNodes) + + + def toDOTDefineEdges(self, tree, adaptor, treeST, edgeST): + if tree is None: + return + + n = adaptor.getChildCount(tree) + if n == 0: + # must have already dumped as child from previous + # invocation; do nothing + return + + parentName = "n%d" % self.getNodeNumber(tree) + + # for each child, do a parent -> child edge using unique node names + parentText = adaptor.getText(tree) + for i in range(n): + child = adaptor.getChild(tree, i) + childText = adaptor.getText(child) + childName = "n%d" % self.getNodeNumber(child) + edgeST = edgeST.getInstanceOf() + edgeST.setAttribute("parent", parentName) + edgeST.setAttribute("child", childName) + edgeST.setAttribute("parentText", parentText) + edgeST.setAttribute("childText", childText) + treeST.setAttribute("edges", edgeST) + self.toDOTDefineEdges(child, adaptor, treeST, edgeST) + + + def getNodeST(self, adaptor, t): + text = adaptor.getText(t) + nodeST = self._nodeST.getInstanceOf() + uniqueName = "n%d" % self.getNodeNumber(t) + nodeST.setAttribute("name", uniqueName) + if text is not None: + text = text.replace('"', r'\\"') + nodeST.setAttribute("text", text) + return nodeST + + + def getNodeNumber(self, t): + try: + return self.nodeToNumberMap[t] + except KeyError: + self.nodeToNumberMap[t] = self.nodeNumber + self.nodeNumber += 1 + return self.nodeNumber - 1 + + +def toDOT(tree, adaptor=None, treeST=DOTTreeGenerator._treeST, edgeST=DOTTreeGenerator._edgeST): + """ + Generate DOT (graphviz) for a whole tree not just a node. + For example, 3+4*5 should generate: + + digraph { + node [shape=plaintext, fixedsize=true, fontsize=11, fontname="Courier", + width=.4, height=.2]; + edge [arrowsize=.7] + "+"->3 + "+"->"*" + "*"->4 + "*"->5 + } + + Return the ST not a string in case people want to alter. + + Takes a Tree interface object. + + Example of invokation: + + import antlr3 + import antlr3.extras + + input = antlr3.ANTLRInputStream(sys.stdin) + lex = TLexer(input) + tokens = antlr3.CommonTokenStream(lex) + parser = TParser(tokens) + tree = parser.e().tree + print tree.toStringTree() + st = antlr3.extras.toDOT(t) + print st + + """ + + gen = DOTTreeGenerator() + return gen.toDOT(tree, adaptor, treeST, edgeST) diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/exceptions.py b/antlr/antlr_python_runtime-3.1.3/antlr3/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..97b10743bd9fcbb88d516d5e2a11e191bc286a3e --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/exceptions.py @@ -0,0 +1,364 @@ +"""ANTLR3 exception hierarchy""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +from antlr3.constants import INVALID_TOKEN_TYPE + + +class BacktrackingFailed(Exception): + """@brief Raised to signal failed backtrack attempt""" + + pass + + +class RecognitionException(Exception): + """@brief The root of the ANTLR exception hierarchy. + + To avoid English-only error messages and to generally make things + as flexible as possible, these exceptions are not created with strings, + but rather the information necessary to generate an error. Then + the various reporting methods in Parser and Lexer can be overridden + to generate a localized error message. For example, MismatchedToken + exceptions are built with the expected token type. + So, don't expect getMessage() to return anything. + + Note that as of Java 1.4, you can access the stack trace, which means + that you can compute the complete trace of rules from the start symbol. + This gives you considerable context information with which to generate + useful error messages. + + ANTLR generates code that throws exceptions upon recognition error and + also generates code to catch these exceptions in each rule. If you + want to quit upon first error, you can turn off the automatic error + handling mechanism using rulecatch action, but you still need to + override methods mismatch and recoverFromMismatchSet. + + In general, the recognition exceptions can track where in a grammar a + problem occurred and/or what was the expected input. While the parser + knows its state (such as current input symbol and line info) that + state can change before the exception is reported so current token index + is computed and stored at exception time. From this info, you can + perhaps print an entire line of input not just a single token, for example. + Better to just say the recognizer had a problem and then let the parser + figure out a fancy report. + + """ + + def __init__(self, input=None): + Exception.__init__(self) + + # What input stream did the error occur in? + self.input = None + + # What is index of token/char were we looking at when the error + # occurred? + self.index = None + + # The current Token when an error occurred. Since not all streams + # can retrieve the ith Token, we have to track the Token object. + # For parsers. Even when it's a tree parser, token might be set. + self.token = None + + # If this is a tree parser exception, node is set to the node with + # the problem. + self.node = None + + # The current char when an error occurred. For lexers. + self.c = None + + # Track the line at which the error occurred in case this is + # generated from a lexer. We need to track this since the + # unexpected char doesn't carry the line info. + self.line = None + + self.charPositionInLine = None + + # If you are parsing a tree node stream, you will encounter som + # imaginary nodes w/o line/col info. We now search backwards looking + # for most recent token with line/col info, but notify getErrorHeader() + # that info is approximate. + self.approximateLineInfo = False + + + if input is not None: + self.input = input + self.index = input.index() + + # late import to avoid cyclic dependencies + from antlr3.streams import TokenStream, CharStream + from antlr3.tree import TreeNodeStream + + if isinstance(self.input, TokenStream): + self.token = self.input.LT(1) + self.line = self.token.line + self.charPositionInLine = self.token.charPositionInLine + + if isinstance(self.input, TreeNodeStream): + self.extractInformationFromTreeNodeStream(self.input) + + else: + if isinstance(self.input, CharStream): + self.c = self.input.LT(1) + self.line = self.input.line + self.charPositionInLine = self.input.charPositionInLine + + else: + self.c = self.input.LA(1) + + def extractInformationFromTreeNodeStream(self, nodes): + from antlr3.tree import Tree, CommonTree + from antlr3.tokens import CommonToken + + self.node = nodes.LT(1) + adaptor = nodes.adaptor + payload = adaptor.getToken(self.node) + if payload is not None: + self.token = payload + if payload.line <= 0: + # imaginary node; no line/pos info; scan backwards + i = -1 + priorNode = nodes.LT(i) + while priorNode is not None: + priorPayload = adaptor.getToken(priorNode) + if priorPayload is not None and priorPayload.line > 0: + # we found the most recent real line / pos info + self.line = priorPayload.line + self.charPositionInLine = priorPayload.charPositionInLine + self.approximateLineInfo = True + break + + i -= 1 + priorNode = nodes.LT(i) + + else: # node created from real token + self.line = payload.line + self.charPositionInLine = payload.charPositionInLine + + elif isinstance(self.node, Tree): + self.line = self.node.line + self.charPositionInLine = self.node.charPositionInLine + if isinstance(self.node, CommonTree): + self.token = self.node.token + + else: + type = adaptor.getType(self.node) + text = adaptor.getText(self.node) + self.token = CommonToken(type=type, text=text) + + + def getUnexpectedType(self): + """Return the token type or char of the unexpected input element""" + + from antlr3.streams import TokenStream + from antlr3.tree import TreeNodeStream + + if isinstance(self.input, TokenStream): + return self.token.type + + elif isinstance(self.input, TreeNodeStream): + adaptor = self.input.treeAdaptor + return adaptor.getType(self.node) + + else: + return self.c + + unexpectedType = property(getUnexpectedType) + + +class MismatchedTokenException(RecognitionException): + """@brief A mismatched char or Token or tree node.""" + + def __init__(self, expecting, input): + RecognitionException.__init__(self, input) + self.expecting = expecting + + + def __str__(self): + #return "MismatchedTokenException("+self.expecting+")" + return "MismatchedTokenException(%r!=%r)" % ( + self.getUnexpectedType(), self.expecting + ) + __repr__ = __str__ + + +class UnwantedTokenException(MismatchedTokenException): + """An extra token while parsing a TokenStream""" + + def getUnexpectedToken(self): + return self.token + + + def __str__(self): + exp = ", expected %s" % self.expecting + if self.expecting == INVALID_TOKEN_TYPE: + exp = "" + + if self.token is None: + return "UnwantedTokenException(found=%s%s)" % (None, exp) + + return "UnwantedTokenException(found=%s%s)" % (self.token.text, exp) + __repr__ = __str__ + + +class MissingTokenException(MismatchedTokenException): + """ + We were expecting a token but it's not found. The current token + is actually what we wanted next. + """ + + def __init__(self, expecting, input, inserted): + MismatchedTokenException.__init__(self, expecting, input) + + self.inserted = inserted + + + def getMissingType(self): + return self.expecting + + + def __str__(self): + if self.inserted is not None and self.token is not None: + return "MissingTokenException(inserted %r at %r)" % ( + self.inserted, self.token.text) + + if self.token is not None: + return "MissingTokenException(at %r)" % self.token.text + + return "MissingTokenException" + __repr__ = __str__ + + +class MismatchedRangeException(RecognitionException): + """@brief The next token does not match a range of expected types.""" + + def __init__(self, a, b, input): + RecognitionException.__init__(self, input) + + self.a = a + self.b = b + + + def __str__(self): + return "MismatchedRangeException(%r not in [%r..%r])" % ( + self.getUnexpectedType(), self.a, self.b + ) + __repr__ = __str__ + + +class MismatchedSetException(RecognitionException): + """@brief The next token does not match a set of expected types.""" + + def __init__(self, expecting, input): + RecognitionException.__init__(self, input) + + self.expecting = expecting + + + def __str__(self): + return "MismatchedSetException(%r not in %r)" % ( + self.getUnexpectedType(), self.expecting + ) + __repr__ = __str__ + + +class MismatchedNotSetException(MismatchedSetException): + """@brief Used for remote debugger deserialization""" + + def __str__(self): + return "MismatchedNotSetException(%r!=%r)" % ( + self.getUnexpectedType(), self.expecting + ) + __repr__ = __str__ + + +class NoViableAltException(RecognitionException): + """@brief Unable to decide which alternative to choose.""" + + def __init__( + self, grammarDecisionDescription, decisionNumber, stateNumber, input + ): + RecognitionException.__init__(self, input) + + self.grammarDecisionDescription = grammarDecisionDescription + self.decisionNumber = decisionNumber + self.stateNumber = stateNumber + + + def __str__(self): + return "NoViableAltException(%r!=[%r])" % ( + self.unexpectedType, self.grammarDecisionDescription + ) + __repr__ = __str__ + + +class EarlyExitException(RecognitionException): + """@brief The recognizer did not match anything for a (..)+ loop.""" + + def __init__(self, decisionNumber, input): + RecognitionException.__init__(self, input) + + self.decisionNumber = decisionNumber + + +class FailedPredicateException(RecognitionException): + """@brief A semantic predicate failed during validation. + + Validation of predicates + occurs when normally parsing the alternative just like matching a token. + Disambiguating predicate evaluation occurs when we hoist a predicate into + a prediction decision. + """ + + def __init__(self, input, ruleName, predicateText): + RecognitionException.__init__(self, input) + + self.ruleName = ruleName + self.predicateText = predicateText + + + def __str__(self): + return "FailedPredicateException("+self.ruleName+",{"+self.predicateText+"}?)" + __repr__ = __str__ + + +class MismatchedTreeNodeException(RecognitionException): + """@brief The next tree mode does not match the expected type.""" + + def __init__(self, expecting, input): + RecognitionException.__init__(self, input) + + self.expecting = expecting + + def __str__(self): + return "MismatchedTreeNodeException(%r!=%r)" % ( + self.getUnexpectedType(), self.expecting + ) + __repr__ = __str__ diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/extras.py b/antlr/antlr_python_runtime-3.1.3/antlr3/extras.py new file mode 100644 index 0000000000000000000000000000000000000000..9155cda9e2eecac9c06492b2248dddd7f007ece1 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/extras.py @@ -0,0 +1,47 @@ +""" @package antlr3.dottreegenerator +@brief ANTLR3 runtime package, tree module + +This module contains all support classes for AST construction and tree parsers. + +""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +# lot's of docstrings are missing, don't complain for now... +# pylint: disable-msg=C0111 + +from treewizard import TreeWizard + +try: + from antlr3.dottreegen import toDOT +except ImportError, exc: + def toDOT(*args, **kwargs): + raise exc diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/main.py b/antlr/antlr_python_runtime-3.1.3/antlr3/main.py new file mode 100644 index 0000000000000000000000000000000000000000..19a08b504f034ed6686c1b7e6d69d86e74aa4096 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/main.py @@ -0,0 +1,307 @@ +"""ANTLR3 runtime package""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + + +import sys +import optparse + +import antlr3 + + +class _Main(object): + def __init__(self): + self.stdin = sys.stdin + self.stdout = sys.stdout + self.stderr = sys.stderr + + + def parseOptions(self, argv): + optParser = optparse.OptionParser() + optParser.add_option( + "--encoding", + action="store", + type="string", + dest="encoding" + ) + optParser.add_option( + "--input", + action="store", + type="string", + dest="input" + ) + optParser.add_option( + "--interactive", "-i", + action="store_true", + dest="interactive" + ) + optParser.add_option( + "--no-output", + action="store_true", + dest="no_output" + ) + optParser.add_option( + "--profile", + action="store_true", + dest="profile" + ) + optParser.add_option( + "--hotshot", + action="store_true", + dest="hotshot" + ) + optParser.add_option( + "--port", + type="int", + dest="port", + default=None + ) + optParser.add_option( + "--debug-socket", + action='store_true', + dest="debug_socket", + default=None + ) + + self.setupOptions(optParser) + + return optParser.parse_args(argv[1:]) + + + def setupOptions(self, optParser): + pass + + + def execute(self, argv): + options, args = self.parseOptions(argv) + + self.setUp(options) + + if options.interactive: + while True: + try: + input = raw_input(">>> ") + except (EOFError, KeyboardInterrupt): + self.stdout.write("\nBye.\n") + break + + inStream = antlr3.ANTLRStringStream(input) + self.parseStream(options, inStream) + + else: + if options.input is not None: + inStream = antlr3.ANTLRStringStream(options.input) + + elif len(args) == 1 and args[0] != '-': + inStream = antlr3.ANTLRFileStream( + args[0], encoding=options.encoding + ) + + else: + inStream = antlr3.ANTLRInputStream( + self.stdin, encoding=options.encoding + ) + + if options.profile: + try: + import cProfile as profile + except ImportError: + import profile + + profile.runctx( + 'self.parseStream(options, inStream)', + globals(), + locals(), + 'profile.dat' + ) + + import pstats + stats = pstats.Stats('profile.dat') + stats.strip_dirs() + stats.sort_stats('time') + stats.print_stats(100) + + elif options.hotshot: + import hotshot + + profiler = hotshot.Profile('hotshot.dat') + profiler.runctx( + 'self.parseStream(options, inStream)', + globals(), + locals() + ) + + else: + self.parseStream(options, inStream) + + + def setUp(self, options): + pass + + + def parseStream(self, options, inStream): + raise NotImplementedError + + + def write(self, options, text): + if not options.no_output: + self.stdout.write(text) + + + def writeln(self, options, text): + self.write(options, text + '\n') + + +class LexerMain(_Main): + def __init__(self, lexerClass): + _Main.__init__(self) + + self.lexerClass = lexerClass + + + def parseStream(self, options, inStream): + lexer = self.lexerClass(inStream) + for token in lexer: + self.writeln(options, str(token)) + + +class ParserMain(_Main): + def __init__(self, lexerClassName, parserClass): + _Main.__init__(self) + + self.lexerClassName = lexerClassName + self.lexerClass = None + self.parserClass = parserClass + + + def setupOptions(self, optParser): + optParser.add_option( + "--lexer", + action="store", + type="string", + dest="lexerClass", + default=self.lexerClassName + ) + optParser.add_option( + "--rule", + action="store", + type="string", + dest="parserRule" + ) + + + def setUp(self, options): + lexerMod = __import__(options.lexerClass) + self.lexerClass = getattr(lexerMod, options.lexerClass) + + + def parseStream(self, options, inStream): + kwargs = {} + if options.port is not None: + kwargs['port'] = options.port + if options.debug_socket is not None: + kwargs['debug_socket'] = sys.stderr + + lexer = self.lexerClass(inStream) + tokenStream = antlr3.CommonTokenStream(lexer) + parser = self.parserClass(tokenStream, **kwargs) + result = getattr(parser, options.parserRule)() + if result is not None: + if hasattr(result, 'tree'): + if result.tree is not None: + self.writeln(options, result.tree.toStringTree()) + else: + self.writeln(options, repr(result)) + + +class WalkerMain(_Main): + def __init__(self, walkerClass): + _Main.__init__(self) + + self.lexerClass = None + self.parserClass = None + self.walkerClass = walkerClass + + + def setupOptions(self, optParser): + optParser.add_option( + "--lexer", + action="store", + type="string", + dest="lexerClass", + default=None + ) + optParser.add_option( + "--parser", + action="store", + type="string", + dest="parserClass", + default=None + ) + optParser.add_option( + "--parser-rule", + action="store", + type="string", + dest="parserRule", + default=None + ) + optParser.add_option( + "--rule", + action="store", + type="string", + dest="walkerRule" + ) + + + def setUp(self, options): + lexerMod = __import__(options.lexerClass) + self.lexerClass = getattr(lexerMod, options.lexerClass) + parserMod = __import__(options.parserClass) + self.parserClass = getattr(parserMod, options.parserClass) + + + def parseStream(self, options, inStream): + lexer = self.lexerClass(inStream) + tokenStream = antlr3.CommonTokenStream(lexer) + parser = self.parserClass(tokenStream) + result = getattr(parser, options.parserRule)() + if result is not None: + assert hasattr(result, 'tree'), "Parser did not return an AST" + nodeStream = antlr3.tree.CommonTreeNodeStream(result.tree) + nodeStream.setTokenStream(tokenStream) + walker = self.walkerClass(nodeStream) + result = getattr(walker, options.walkerRule)() + if result is not None: + if hasattr(result, 'tree'): + self.writeln(options, result.tree.toStringTree()) + else: + self.writeln(options, repr(result)) + diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/recognizers.py b/antlr/antlr_python_runtime-3.1.3/antlr3/recognizers.py new file mode 100644 index 0000000000000000000000000000000000000000..b6bfcc17ba80881d13c0be82c48cedcdc8f5d7f0 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/recognizers.py @@ -0,0 +1,1487 @@ +"""ANTLR3 runtime package""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +import sys +import inspect + +from antlr3 import runtime_version, runtime_version_str +from antlr3.constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, \ + EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE +from antlr3.exceptions import RecognitionException, MismatchedTokenException, \ + MismatchedRangeException, MismatchedTreeNodeException, \ + NoViableAltException, EarlyExitException, MismatchedSetException, \ + MismatchedNotSetException, FailedPredicateException, \ + BacktrackingFailed, UnwantedTokenException, MissingTokenException +from antlr3.tokens import CommonToken, EOF_TOKEN, SKIP_TOKEN +from antlr3.compat import set, frozenset, reversed + + +class RecognizerSharedState(object): + """ + The set of fields needed by an abstract recognizer to recognize input + and recover from errors etc... As a separate state object, it can be + shared among multiple grammars; e.g., when one grammar imports another. + + These fields are publically visible but the actual state pointer per + parser is protected. + """ + + def __init__(self): + # Track the set of token types that can follow any rule invocation. + # Stack grows upwards. + self.following = [] + + # This is true when we see an error and before having successfully + # matched a token. Prevents generation of more than one error message + # per error. + self.errorRecovery = False + + # The index into the input stream where the last error occurred. + # This is used to prevent infinite loops where an error is found + # but no token is consumed during recovery...another error is found, + # ad naseum. This is a failsafe mechanism to guarantee that at least + # one token/tree node is consumed for two errors. + self.lastErrorIndex = -1 + + # If 0, no backtracking is going on. Safe to exec actions etc... + # If >0 then it's the level of backtracking. + self.backtracking = 0 + + # An array[size num rules] of Map that tracks + # the stop token index for each rule. ruleMemo[ruleIndex] is + # the memoization table for ruleIndex. For key ruleStartIndex, you + # get back the stop token for associated rule or MEMO_RULE_FAILED. + # + # This is only used if rule memoization is on (which it is by default). + self.ruleMemo = None + + ## Did the recognizer encounter a syntax error? Track how many. + self.syntaxErrors = 0 + + + # LEXER FIELDS (must be in same state object to avoid casting + # constantly in generated code and Lexer object) :( + + + ## The goal of all lexer rules/methods is to create a token object. + # This is an instance variable as multiple rules may collaborate to + # create a single token. nextToken will return this object after + # matching lexer rule(s). If you subclass to allow multiple token + # emissions, then set this to the last token to be matched or + # something nonnull so that the auto token emit mechanism will not + # emit another token. + self.token = None + + ## What character index in the stream did the current token start at? + # Needed, for example, to get the text for current token. Set at + # the start of nextToken. + self.tokenStartCharIndex = -1 + + ## The line on which the first character of the token resides + self.tokenStartLine = None + + ## The character position of first character within the line + self.tokenStartCharPositionInLine = None + + ## The channel number for the current token + self.channel = None + + ## The token type for the current token + self.type = None + + ## You can set the text for the current token to override what is in + # the input char buffer. Use setText() or can set this instance var. + self.text = None + + +class BaseRecognizer(object): + """ + @brief Common recognizer functionality. + + A generic recognizer that can handle recognizers generated from + lexer, parser, and tree grammars. This is all the parsing + support code essentially; most of it is error recovery stuff and + backtracking. + """ + + MEMO_RULE_FAILED = -2 + MEMO_RULE_UNKNOWN = -1 + + # copies from Token object for convenience in actions + DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL + + # for convenience in actions + HIDDEN = HIDDEN_CHANNEL + + # overridden by generated subclasses + tokenNames = None + + # The antlr_version attribute has been introduced in 3.1. If it is not + # overwritten in the generated recognizer, we assume a default of 3.0.1. + antlr_version = (3, 0, 1, 0) + antlr_version_str = "3.0.1" + + def __init__(self, state=None): + # Input stream of the recognizer. Must be initialized by a subclass. + self.input = None + + ## State of a lexer, parser, or tree parser are collected into a state + # object so the state can be shared. This sharing is needed to + # have one grammar import others and share same error variables + # and other state variables. It's a kind of explicit multiple + # inheritance via delegation of methods and shared state. + if state is None: + state = RecognizerSharedState() + self._state = state + + if self.antlr_version > runtime_version: + raise RuntimeError( + "ANTLR version mismatch: " + "The recognizer has been generated by V%s, but this runtime " + "is V%s. Please use the V%s runtime or higher." + % (self.antlr_version_str, + runtime_version_str, + self.antlr_version_str)) + elif (self.antlr_version < (3, 1, 0, 0) and + self.antlr_version != runtime_version): + # FIXME: make the runtime compatible with 3.0.1 codegen + # and remove this block. + raise RuntimeError( + "ANTLR version mismatch: " + "The recognizer has been generated by V%s, but this runtime " + "is V%s. Please use the V%s runtime." + % (self.antlr_version_str, + runtime_version_str, + self.antlr_version_str)) + + # this one only exists to shut up pylint :( + def setInput(self, input): + self.input = input + + + def reset(self): + """ + reset the parser's state; subclasses must rewinds the input stream + """ + + # wack everything related to error recovery + if self._state is None: + # no shared state work to do + return + + self._state.following = [] + self._state.errorRecovery = False + self._state.lastErrorIndex = -1 + self._state.syntaxErrors = 0 + # wack everything related to backtracking and memoization + self._state.backtracking = 0 + if self._state.ruleMemo is not None: + self._state.ruleMemo = {} + + + def match(self, input, ttype, follow): + """ + Match current input symbol against ttype. Attempt + single token insertion or deletion error recovery. If + that fails, throw MismatchedTokenException. + + To turn off single token insertion or deletion error + recovery, override recoverFromMismatchedToken() and have it + throw an exception. See TreeParser.recoverFromMismatchedToken(). + This way any error in a rule will cause an exception and + immediate exit from rule. Rule would recover by resynchronizing + to the set of symbols that can follow rule ref. + """ + + matchedSymbol = self.getCurrentInputSymbol(input) + if self.input.LA(1) == ttype: + self.input.consume() + self._state.errorRecovery = False + return matchedSymbol + + if self._state.backtracking > 0: + # FIXME: need to return matchedSymbol here as well. damn!! + raise BacktrackingFailed + + matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow) + return matchedSymbol + + + def matchAny(self, input): + """Match the wildcard: in a symbol""" + + self._state.errorRecovery = False + self.input.consume() + + + def mismatchIsUnwantedToken(self, input, ttype): + return input.LA(2) == ttype + + + def mismatchIsMissingToken(self, input, follow): + if follow is None: + # we have no information about the follow; we can only consume + # a single token and hope for the best + return False + + # compute what can follow this grammar element reference + if EOR_TOKEN_TYPE in follow: + viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW() + follow = follow | viableTokensFollowingThisRule + + if len(self._state.following) > 0: + # remove EOR if we're not the start symbol + follow = follow - set([EOR_TOKEN_TYPE]) + + # if current token is consistent with what could come after set + # then we know we're missing a token; error recovery is free to + # "insert" the missing token + if input.LA(1) in follow or EOR_TOKEN_TYPE in follow: + return True + + return False + + + def reportError(self, e): + """Report a recognition problem. + + This method sets errorRecovery to indicate the parser is recovering + not parsing. Once in recovery mode, no errors are generated. + To get out of recovery mode, the parser must successfully match + a token (after a resync). So it will go: + + 1. error occurs + 2. enter recovery mode, report error + 3. consume until token found in resynch set + 4. try to resume parsing + 5. next match() will reset errorRecovery mode + + If you override, make sure to update syntaxErrors if you care about + that. + + """ + + # if we've already reported an error and have not matched a token + # yet successfully, don't report any errors. + if self._state.errorRecovery: + return + + self._state.syntaxErrors += 1 # don't count spurious + self._state.errorRecovery = True + + self.displayRecognitionError(self.tokenNames, e) + + + def displayRecognitionError(self, tokenNames, e): + hdr = self.getErrorHeader(e) + msg = self.getErrorMessage(e, tokenNames) + self.emitErrorMessage(hdr+" "+msg) + + + def getErrorMessage(self, e, tokenNames): + """ + What error message should be generated for the various + exception types? + + Not very object-oriented code, but I like having all error message + generation within one method rather than spread among all of the + exception classes. This also makes it much easier for the exception + handling because the exception classes do not have to have pointers back + to this object to access utility routines and so on. Also, changing + the message for an exception type would be difficult because you + would have to subclassing exception, but then somehow get ANTLR + to make those kinds of exception objects instead of the default. + This looks weird, but trust me--it makes the most sense in terms + of flexibility. + + For grammar debugging, you will want to override this to add + more information such as the stack frame with + getRuleInvocationStack(e, this.getClass().getName()) and, + for no viable alts, the decision description and state etc... + + Override this to change the message generated for one or more + exception types. + """ + + if isinstance(e, UnwantedTokenException): + tokenName = "" + if e.expecting == EOF: + tokenName = "EOF" + + else: + tokenName = self.tokenNames[e.expecting] + + msg = "extraneous input %s expecting %s" % ( + self.getTokenErrorDisplay(e.getUnexpectedToken()), + tokenName + ) + + elif isinstance(e, MissingTokenException): + tokenName = "" + if e.expecting == EOF: + tokenName = "EOF" + + else: + tokenName = self.tokenNames[e.expecting] + + msg = "missing %s at %s" % ( + tokenName, self.getTokenErrorDisplay(e.token) + ) + + elif isinstance(e, MismatchedTokenException): + tokenName = "" + if e.expecting == EOF: + tokenName = "EOF" + else: + tokenName = self.tokenNames[e.expecting] + + msg = "mismatched input " \ + + self.getTokenErrorDisplay(e.token) \ + + " expecting " \ + + tokenName + + elif isinstance(e, MismatchedTreeNodeException): + tokenName = "" + if e.expecting == EOF: + tokenName = "EOF" + else: + tokenName = self.tokenNames[e.expecting] + + msg = "mismatched tree node: %s expecting %s" \ + % (e.node, tokenName) + + elif isinstance(e, NoViableAltException): + msg = "no viable alternative at input " \ + + self.getTokenErrorDisplay(e.token) + + elif isinstance(e, EarlyExitException): + msg = "required (...)+ loop did not match anything at input " \ + + self.getTokenErrorDisplay(e.token) + + elif isinstance(e, MismatchedSetException): + msg = "mismatched input " \ + + self.getTokenErrorDisplay(e.token) \ + + " expecting set " \ + + repr(e.expecting) + + elif isinstance(e, MismatchedNotSetException): + msg = "mismatched input " \ + + self.getTokenErrorDisplay(e.token) \ + + " expecting set " \ + + repr(e.expecting) + + elif isinstance(e, FailedPredicateException): + msg = "rule " \ + + e.ruleName \ + + " failed predicate: {" \ + + e.predicateText \ + + "}?" + + else: + msg = str(e) + + return msg + + + def getNumberOfSyntaxErrors(self): + """ + Get number of recognition errors (lexer, parser, tree parser). Each + recognizer tracks its own number. So parser and lexer each have + separate count. Does not count the spurious errors found between + an error and next valid token match + + See also reportError() + """ + return self._state.syntaxErrors + + + def getErrorHeader(self, e): + """ + What is the error header, normally line/character position information? + """ + + return "line %d:%d" % (e.line, e.charPositionInLine) + + + def getTokenErrorDisplay(self, t): + """ + How should a token be displayed in an error message? The default + is to display just the text, but during development you might + want to have a lot of information spit out. Override in that case + to use t.toString() (which, for CommonToken, dumps everything about + the token). This is better than forcing you to override a method in + your token objects because you don't have to go modify your lexer + so that it creates a new Java type. + """ + + s = t.text + if s is None: + if t.type == EOF: + s = "" + else: + s = "<"+t.type+">" + + return repr(s) + + + def emitErrorMessage(self, msg): + """Override this method to change where error messages go""" + sys.stderr.write(msg + '\n') + + + def recover(self, input, re): + """ + Recover from an error found on the input stream. This is + for NoViableAlt and mismatched symbol exceptions. If you enable + single token insertion and deletion, this will usually not + handle mismatched symbol exceptions but there could be a mismatched + token that the match() routine could not recover from. + """ + + # PROBLEM? what if input stream is not the same as last time + # perhaps make lastErrorIndex a member of input + if self._state.lastErrorIndex == input.index(): + # uh oh, another error at same token index; must be a case + # where LT(1) is in the recovery token set so nothing is + # consumed; consume a single token so at least to prevent + # an infinite loop; this is a failsafe. + input.consume() + + self._state.lastErrorIndex = input.index() + followSet = self.computeErrorRecoverySet() + + self.beginResync() + self.consumeUntil(input, followSet) + self.endResync() + + + def beginResync(self): + """ + A hook to listen in on the token consumption during error recovery. + The DebugParser subclasses this to fire events to the listenter. + """ + + pass + + + def endResync(self): + """ + A hook to listen in on the token consumption during error recovery. + The DebugParser subclasses this to fire events to the listenter. + """ + + pass + + + def computeErrorRecoverySet(self): + """ + Compute the error recovery set for the current rule. During + rule invocation, the parser pushes the set of tokens that can + follow that rule reference on the stack; this amounts to + computing FIRST of what follows the rule reference in the + enclosing rule. This local follow set only includes tokens + from within the rule; i.e., the FIRST computation done by + ANTLR stops at the end of a rule. + + EXAMPLE + + When you find a "no viable alt exception", the input is not + consistent with any of the alternatives for rule r. The best + thing to do is to consume tokens until you see something that + can legally follow a call to r *or* any rule that called r. + You don't want the exact set of viable next tokens because the + input might just be missing a token--you might consume the + rest of the input looking for one of the missing tokens. + + Consider grammar: + + a : '[' b ']' + | '(' b ')' + ; + b : c '^' INT ; + c : ID + | INT + ; + + At each rule invocation, the set of tokens that could follow + that rule is pushed on a stack. Here are the various "local" + follow sets: + + FOLLOW(b1_in_a) = FIRST(']') = ']' + FOLLOW(b2_in_a) = FIRST(')') = ')' + FOLLOW(c_in_b) = FIRST('^') = '^' + + Upon erroneous input "[]", the call chain is + + a -> b -> c + + and, hence, the follow context stack is: + + depth local follow set after call to rule + 0 \ a (from main()) + 1 ']' b + 3 '^' c + + Notice that ')' is not included, because b would have to have + been called from a different context in rule a for ')' to be + included. + + For error recovery, we cannot consider FOLLOW(c) + (context-sensitive or otherwise). We need the combined set of + all context-sensitive FOLLOW sets--the set of all tokens that + could follow any reference in the call chain. We need to + resync to one of those tokens. Note that FOLLOW(c)='^' and if + we resync'd to that token, we'd consume until EOF. We need to + sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + In this case, for input "[]", LA(1) is in this set so we would + not consume anything and after printing an error rule c would + return normally. It would not find the required '^' though. + At this point, it gets a mismatched token error and throws an + exception (since LA(1) is not in the viable following token + set). The rule exception handler tries to recover, but finds + the same recovery set and doesn't consume anything. Rule b + exits normally returning to rule a. Now it finds the ']' (and + with the successful match exits errorRecovery mode). + + So, you cna see that the parser walks up call chain looking + for the token that was a member of the recovery set. + + Errors are not generated in errorRecovery mode. + + ANTLR's error recovery mechanism is based upon original ideas: + + "Algorithms + Data Structures = Programs" by Niklaus Wirth + + and + + "A note on error recovery in recursive descent parsers": + http://portal.acm.org/citation.cfm?id=947902.947905 + + Later, Josef Grosch had some good ideas: + + "Efficient and Comfortable Error Recovery in Recursive Descent + Parsers": + ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + + Like Grosch I implemented local FOLLOW sets that are combined + at run-time upon error to avoid overhead during parsing. + """ + + return self.combineFollows(False) + + + def computeContextSensitiveRuleFOLLOW(self): + """ + Compute the context-sensitive FOLLOW set for current rule. + This is set of token types that can follow a specific rule + reference given a specific call chain. You get the set of + viable tokens that can possibly come next (lookahead depth 1) + given the current call chain. Contrast this with the + definition of plain FOLLOW for rule r: + + FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} + + where x in T* and alpha, beta in V*; T is set of terminals and + V is the set of terminals and nonterminals. In other words, + FOLLOW(r) is the set of all tokens that can possibly follow + references to r in *any* sentential form (context). At + runtime, however, we know precisely which context applies as + we have the call chain. We may compute the exact (rather + than covering superset) set of following tokens. + + For example, consider grammar: + + stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} + | "return" expr '.' + ; + expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} + atom : INT // FOLLOW(atom)=={'+',')',';','.'} + | '(' expr ')' + ; + + The FOLLOW sets are all inclusive whereas context-sensitive + FOLLOW sets are precisely what could follow a rule reference. + For input input "i=(3);", here is the derivation: + + stat => ID '=' expr ';' + => ID '=' atom ('+' atom)* ';' + => ID '=' '(' expr ')' ('+' atom)* ';' + => ID '=' '(' atom ')' ('+' atom)* ';' + => ID '=' '(' INT ')' ('+' atom)* ';' + => ID '=' '(' INT ')' ';' + + At the "3" token, you'd have a call chain of + + stat -> expr -> atom -> expr -> atom + + What can follow that specific nested ref to atom? Exactly ')' + as you can see by looking at the derivation of this specific + input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. + + You want the exact viable token set when recovering from a + token mismatch. Upon token mismatch, if LA(1) is member of + the viable next token set, then you know there is most likely + a missing token in the input stream. "Insert" one by just not + throwing an exception. + """ + + return self.combineFollows(True) + + + def combineFollows(self, exact): + followSet = set() + for idx, localFollowSet in reversed(list(enumerate(self._state.following))): + followSet |= localFollowSet + if exact: + # can we see end of rule? + if EOR_TOKEN_TYPE in localFollowSet: + # Only leave EOR in set if at top (start rule); this lets + # us know if have to include follow(start rule); i.e., EOF + if idx > 0: + followSet.remove(EOR_TOKEN_TYPE) + + else: + # can't see end of rule, quit + break + + return followSet + + + def recoverFromMismatchedToken(self, input, ttype, follow): + """Attempt to recover from a single missing or extra token. + + EXTRA TOKEN + + LA(1) is not what we are looking for. If LA(2) has the right token, + however, then assume LA(1) is some extra spurious token. Delete it + and LA(2) as if we were doing a normal match(), which advances the + input. + + MISSING TOKEN + + If current token is consistent with what could come after + ttype then it is ok to 'insert' the missing token, else throw + exception For example, Input 'i=(3;' is clearly missing the + ')'. When the parser returns from the nested call to expr, it + will have call chain: + + stat -> expr -> atom + + and it will be trying to match the ')' at this point in the + derivation: + + => ID '=' '(' INT ')' ('+' atom)* ';' + ^ + match() will see that ';' doesn't match ')' and report a + mismatched token error. To recover, it sees that LA(1)==';' + is in the set of tokens that can follow the ')' token + reference in rule atom. It can assume that you forgot the ')'. + """ + + e = None + + # if next token is what we are looking for then "delete" this token + if self.mismatchIsUnwantedToken(input, ttype): + e = UnwantedTokenException(ttype, input) + + self.beginResync() + input.consume() # simply delete extra token + self.endResync() + + # report after consuming so AW sees the token in the exception + self.reportError(e) + + # we want to return the token we're actually matching + matchedSymbol = self.getCurrentInputSymbol(input) + + # move past ttype token as if all were ok + input.consume() + return matchedSymbol + + # can't recover with single token deletion, try insertion + if self.mismatchIsMissingToken(input, follow): + inserted = self.getMissingSymbol(input, e, ttype, follow) + e = MissingTokenException(ttype, input, inserted) + + # report after inserting so AW sees the token in the exception + self.reportError(e) + return inserted + + # even that didn't work; must throw the exception + e = MismatchedTokenException(ttype, input) + raise e + + + def recoverFromMismatchedSet(self, input, e, follow): + """Not currently used""" + + if self.mismatchIsMissingToken(input, follow): + self.reportError(e) + # we don't know how to conjure up a token for sets yet + return self.getMissingSymbol(input, e, INVALID_TOKEN_TYPE, follow) + + # TODO do single token deletion like above for Token mismatch + raise e + + + def getCurrentInputSymbol(self, input): + """ + Match needs to return the current input symbol, which gets put + into the label for the associated token ref; e.g., x=ID. Token + and tree parsers need to return different objects. Rather than test + for input stream type or change the IntStream interface, I use + a simple method to ask the recognizer to tell me what the current + input symbol is. + + This is ignored for lexers. + """ + + return None + + + def getMissingSymbol(self, input, e, expectedTokenType, follow): + """Conjure up a missing token during error recovery. + + The recognizer attempts to recover from single missing + symbols. But, actions might refer to that missing symbol. + For example, x=ID {f($x);}. The action clearly assumes + that there has been an identifier matched previously and that + $x points at that token. If that token is missing, but + the next token in the stream is what we want we assume that + this token is missing and we keep going. Because we + have to return some token to replace the missing token, + we have to conjure one up. This method gives the user control + over the tokens returned for missing tokens. Mostly, + you will want to create something special for identifier + tokens. For literals such as '{' and ',', the default + action in the parser or tree parser works. It simply creates + a CommonToken of the appropriate type. The text will be the token. + If you change what tokens must be created by the lexer, + override this method to create the appropriate tokens. + """ + + return None + + +## def recoverFromMissingElement(self, input, e, follow): +## """ +## This code is factored out from mismatched token and mismatched set +## recovery. It handles "single token insertion" error recovery for +## both. No tokens are consumed to recover from insertions. Return +## true if recovery was possible else return false. +## """ + +## if self.mismatchIsMissingToken(input, follow): +## self.reportError(e) +## return True + +## # nothing to do; throw exception +## return False + + + def consumeUntil(self, input, tokenTypes): + """ + Consume tokens until one matches the given token or token set + + tokenTypes can be a single token type or a set of token types + + """ + + if not isinstance(tokenTypes, (set, frozenset)): + tokenTypes = frozenset([tokenTypes]) + + ttype = input.LA(1) + while ttype != EOF and ttype not in tokenTypes: + input.consume() + ttype = input.LA(1) + + + def getRuleInvocationStack(self): + """ + Return List of the rules in your parser instance + leading up to a call to this method. You could override if + you want more details such as the file/line info of where + in the parser java code a rule is invoked. + + This is very useful for error messages and for context-sensitive + error recovery. + + You must be careful, if you subclass a generated recognizers. + The default implementation will only search the module of self + for rules, but the subclass will not contain any rules. + You probably want to override this method to look like + + def getRuleInvocationStack(self): + return self._getRuleInvocationStack(.__module__) + + where is the class of the generated recognizer, e.g. + the superclass of self. + """ + + return self._getRuleInvocationStack(self.__module__) + + + def _getRuleInvocationStack(cls, module): + """ + A more general version of getRuleInvocationStack where you can + pass in, for example, a RecognitionException to get it's rule + stack trace. This routine is shared with all recognizers, hence, + static. + + TODO: move to a utility class or something; weird having lexer call + this + """ + + # mmmhhh,... perhaps look at the first argument + # (f_locals[co_varnames[0]]?) and test if it's a (sub)class of + # requested recognizer... + + rules = [] + for frame in reversed(inspect.stack()): + code = frame[0].f_code + codeMod = inspect.getmodule(code) + if codeMod is None: + continue + + # skip frames not in requested module + if codeMod.__name__ != module: + continue + + # skip some unwanted names + if code.co_name in ('nextToken', ''): + continue + + rules.append(code.co_name) + + return rules + + _getRuleInvocationStack = classmethod(_getRuleInvocationStack) + + + def getBacktrackingLevel(self): + return self._state.backtracking + + def setBacktrackingLevel(self, n): + self._state.backtracking = n + + + def failed(self): + """Return whether or not a backtracking attempt failed.""" + + return self._state.failed + + + def getGrammarFileName(self): + """For debugging and other purposes, might want the grammar name. + + Have ANTLR generate an implementation for this method. + """ + + return self.grammarFileName + + + def getSourceName(self): + raise NotImplementedError + + + def toStrings(self, tokens): + """A convenience method for use most often with template rewrites. + + Convert a List to List + """ + + if tokens is None: + return None + + return [token.text for token in tokens] + + + def getRuleMemoization(self, ruleIndex, ruleStartIndex): + """ + Given a rule number and a start token index number, return + MEMO_RULE_UNKNOWN if the rule has not parsed input starting from + start index. If this rule has parsed input starting from the + start index before, then return where the rule stopped parsing. + It returns the index of the last token matched by the rule. + """ + + if ruleIndex not in self._state.ruleMemo: + self._state.ruleMemo[ruleIndex] = {} + + return self._state.ruleMemo[ruleIndex].get( + ruleStartIndex, self.MEMO_RULE_UNKNOWN + ) + + + def alreadyParsedRule(self, input, ruleIndex): + """ + Has this rule already parsed input at the current index in the + input stream? Return the stop token index or MEMO_RULE_UNKNOWN. + If we attempted but failed to parse properly before, return + MEMO_RULE_FAILED. + + This method has a side-effect: if we have seen this input for + this rule and successfully parsed before, then seek ahead to + 1 past the stop token matched for this rule last time. + """ + + stopIndex = self.getRuleMemoization(ruleIndex, input.index()) + if stopIndex == self.MEMO_RULE_UNKNOWN: + return False + + if stopIndex == self.MEMO_RULE_FAILED: + raise BacktrackingFailed + + else: + input.seek(stopIndex + 1) + + return True + + + def memoize(self, input, ruleIndex, ruleStartIndex, success): + """ + Record whether or not this rule parsed the input at this position + successfully. + """ + + if success: + stopTokenIndex = input.index() - 1 + else: + stopTokenIndex = self.MEMO_RULE_FAILED + + if ruleIndex in self._state.ruleMemo: + self._state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex + + + def traceIn(self, ruleName, ruleIndex, inputSymbol): + sys.stdout.write("enter %s %s" % (ruleName, inputSymbol)) + + if self._state.backtracking > 0: + sys.stdout.write(" backtracking=%s" % self._state.backtracking) + + sys.stdout.write('\n') + + + def traceOut(self, ruleName, ruleIndex, inputSymbol): + sys.stdout.write("exit %s %s" % (ruleName, inputSymbol)) + + if self._state.backtracking > 0: + sys.stdout.write(" backtracking=%s" % self._state.backtracking) + + if self._state.failed: + sys.stdout.write(" failed") + else: + sys.stdout.write(" succeeded") + + sys.stdout.write('\n') + + +class TokenSource(object): + """ + @brief Abstract baseclass for token producers. + + A source of tokens must provide a sequence of tokens via nextToken() + and also must reveal it's source of characters; CommonToken's text is + computed from a CharStream; it only store indices into the char stream. + + Errors from the lexer are never passed to the parser. Either you want + to keep going or you do not upon token recognition error. If you do not + want to continue lexing then you do not want to continue parsing. Just + throw an exception not under RecognitionException and Java will naturally + toss you all the way out of the recognizers. If you want to continue + lexing then you should not throw an exception to the parser--it has already + requested a token. Keep lexing until you get a valid one. Just report + errors and keep going, looking for a valid token. + """ + + def nextToken(self): + """Return a Token object from your input stream (usually a CharStream). + + Do not fail/return upon lexing error; keep chewing on the characters + until you get a good one; errors are not passed through to the parser. + """ + + raise NotImplementedError + + + def __iter__(self): + """The TokenSource is an interator. + + The iteration will not include the final EOF token, see also the note + for the next() method. + + """ + + return self + + + def next(self): + """Return next token or raise StopIteration. + + Note that this will raise StopIteration when hitting the EOF token, + so EOF will not be part of the iteration. + + """ + + token = self.nextToken() + if token is None or token.type == EOF: + raise StopIteration + return token + + +class Lexer(BaseRecognizer, TokenSource): + """ + @brief Baseclass for generated lexer classes. + + A lexer is recognizer that draws input symbols from a character stream. + lexer grammars result in a subclass of this object. A Lexer object + uses simplified match() and error recovery mechanisms in the interest + of speed. + """ + + def __init__(self, input, state=None): + BaseRecognizer.__init__(self, state) + TokenSource.__init__(self) + + # Where is the lexer drawing characters from? + self.input = input + + + def reset(self): + BaseRecognizer.reset(self) # reset all recognizer state variables + + if self.input is not None: + # rewind the input + self.input.seek(0) + + if self._state is None: + # no shared state work to do + return + + # wack Lexer state variables + self._state.token = None + self._state.type = INVALID_TOKEN_TYPE + self._state.channel = DEFAULT_CHANNEL + self._state.tokenStartCharIndex = -1 + self._state.tokenStartLine = -1 + self._state.tokenStartCharPositionInLine = -1 + self._state.text = None + + + def nextToken(self): + """ + Return a token from this source; i.e., match a token on the char + stream. + """ + + while 1: + self._state.token = None + self._state.channel = DEFAULT_CHANNEL + self._state.tokenStartCharIndex = self.input.index() + self._state.tokenStartCharPositionInLine = self.input.charPositionInLine + self._state.tokenStartLine = self.input.line + self._state.text = None + if self.input.LA(1) == EOF: + return EOF_TOKEN + + try: + self.mTokens() + + if self._state.token is None: + self.emit() + + elif self._state.token == SKIP_TOKEN: + continue + + return self._state.token + + except NoViableAltException, re: + self.reportError(re) + self.recover(re) # throw out current char and try again + + except RecognitionException, re: + self.reportError(re) + # match() routine has already called recover() + + + def skip(self): + """ + Instruct the lexer to skip creating a token for current lexer rule + and look for another token. nextToken() knows to keep looking when + a lexer rule finishes with token set to SKIP_TOKEN. Recall that + if token==null at end of any token rule, it creates one for you + and emits it. + """ + + self._state.token = SKIP_TOKEN + + + def mTokens(self): + """This is the lexer entry point that sets instance var 'token'""" + + # abstract method + raise NotImplementedError + + + def setCharStream(self, input): + """Set the char stream and reset the lexer""" + self.input = None + self.reset() + self.input = input + + + def getSourceName(self): + return self.input.getSourceName() + + + def emit(self, token=None): + """ + The standard method called to automatically emit a token at the + outermost lexical rule. The token object should point into the + char buffer start..stop. If there is a text override in 'text', + use that to set the token's text. Override this method to emit + custom Token objects. + + If you are building trees, then you should also override + Parser or TreeParser.getMissingSymbol(). + """ + + if token is None: + token = CommonToken( + input=self.input, + type=self._state.type, + channel=self._state.channel, + start=self._state.tokenStartCharIndex, + stop=self.getCharIndex()-1 + ) + token.line = self._state.tokenStartLine + token.text = self._state.text + token.charPositionInLine = self._state.tokenStartCharPositionInLine + + self._state.token = token + + return token + + + def match(self, s): + if isinstance(s, basestring): + for c in s: + if self.input.LA(1) != ord(c): + if self._state.backtracking > 0: + raise BacktrackingFailed + + mte = MismatchedTokenException(c, self.input) + self.recover(mte) + raise mte + + self.input.consume() + + else: + if self.input.LA(1) != s: + if self._state.backtracking > 0: + raise BacktrackingFailed + + mte = MismatchedTokenException(unichr(s), self.input) + self.recover(mte) # don't really recover; just consume in lexer + raise mte + + self.input.consume() + + + def matchAny(self): + self.input.consume() + + + def matchRange(self, a, b): + if self.input.LA(1) < a or self.input.LA(1) > b: + if self._state.backtracking > 0: + raise BacktrackingFailed + + mre = MismatchedRangeException(unichr(a), unichr(b), self.input) + self.recover(mre) + raise mre + + self.input.consume() + + + def getLine(self): + return self.input.line + + + def getCharPositionInLine(self): + return self.input.charPositionInLine + + + def getCharIndex(self): + """What is the index of the current character of lookahead?""" + + return self.input.index() + + + def getText(self): + """ + Return the text matched so far for the current token or any + text override. + """ + if self._state.text is not None: + return self._state.text + + return self.input.substring( + self._state.tokenStartCharIndex, + self.getCharIndex()-1 + ) + + + def setText(self, text): + """ + Set the complete text of this token; it wipes any previous + changes to the text. + """ + self._state.text = text + + + text = property(getText, setText) + + + def reportError(self, e): + ## TODO: not thought about recovery in lexer yet. + + ## # if we've already reported an error and have not matched a token + ## # yet successfully, don't report any errors. + ## if self.errorRecovery: + ## #System.err.print("[SPURIOUS] "); + ## return; + ## + ## self.errorRecovery = True + + self.displayRecognitionError(self.tokenNames, e) + + + def getErrorMessage(self, e, tokenNames): + msg = None + + if isinstance(e, MismatchedTokenException): + msg = "mismatched character " \ + + self.getCharErrorDisplay(e.c) \ + + " expecting " \ + + self.getCharErrorDisplay(e.expecting) + + elif isinstance(e, NoViableAltException): + msg = "no viable alternative at character " \ + + self.getCharErrorDisplay(e.c) + + elif isinstance(e, EarlyExitException): + msg = "required (...)+ loop did not match anything at character " \ + + self.getCharErrorDisplay(e.c) + + elif isinstance(e, MismatchedNotSetException): + msg = "mismatched character " \ + + self.getCharErrorDisplay(e.c) \ + + " expecting set " \ + + repr(e.expecting) + + elif isinstance(e, MismatchedSetException): + msg = "mismatched character " \ + + self.getCharErrorDisplay(e.c) \ + + " expecting set " \ + + repr(e.expecting) + + elif isinstance(e, MismatchedRangeException): + msg = "mismatched character " \ + + self.getCharErrorDisplay(e.c) \ + + " expecting set " \ + + self.getCharErrorDisplay(e.a) \ + + ".." \ + + self.getCharErrorDisplay(e.b) + + else: + msg = BaseRecognizer.getErrorMessage(self, e, tokenNames) + + return msg + + + def getCharErrorDisplay(self, c): + if c == EOF: + c = '' + return repr(c) + + + def recover(self, re): + """ + Lexers can normally match any char in it's vocabulary after matching + a token, so do the easy thing and just kill a character and hope + it all works out. You can instead use the rule invocation stack + to do sophisticated error recovery if you are in a fragment rule. + """ + + self.input.consume() + + + def traceIn(self, ruleName, ruleIndex): + inputSymbol = "%s line=%d:%s" % (self.input.LT(1), + self.getLine(), + self.getCharPositionInLine() + ) + + BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol) + + + def traceOut(self, ruleName, ruleIndex): + inputSymbol = "%s line=%d:%s" % (self.input.LT(1), + self.getLine(), + self.getCharPositionInLine() + ) + + BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol) + + + +class Parser(BaseRecognizer): + """ + @brief Baseclass for generated parser classes. + """ + + def __init__(self, lexer, state=None): + BaseRecognizer.__init__(self, state) + + self.setTokenStream(lexer) + + + def reset(self): + BaseRecognizer.reset(self) # reset all recognizer state variables + if self.input is not None: + self.input.seek(0) # rewind the input + + + def getCurrentInputSymbol(self, input): + return input.LT(1) + + + def getMissingSymbol(self, input, e, expectedTokenType, follow): + if expectedTokenType == EOF: + tokenText = "" + else: + tokenText = "" + t = CommonToken(type=expectedTokenType, text=tokenText) + current = input.LT(1) + if current.type == EOF: + current = input.LT(-1) + + if current is not None: + t.line = current.line + t.charPositionInLine = current.charPositionInLine + t.channel = DEFAULT_CHANNEL + return t + + + def setTokenStream(self, input): + """Set the token stream and reset the parser""" + + self.input = None + self.reset() + self.input = input + + + def getTokenStream(self): + return self.input + + + def getSourceName(self): + return self.input.getSourceName() + + + def traceIn(self, ruleName, ruleIndex): + BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1)) + + + def traceOut(self, ruleName, ruleIndex): + BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1)) + + +class RuleReturnScope(object): + """ + Rules can return start/stop info as well as possible trees and templates. + """ + + def getStart(self): + """Return the start token or tree.""" + return None + + + def getStop(self): + """Return the stop token or tree.""" + return None + + + def getTree(self): + """Has a value potentially if output=AST.""" + return None + + + def getTemplate(self): + """Has a value potentially if output=template.""" + return None + + +class ParserRuleReturnScope(RuleReturnScope): + """ + Rules that return more than a single value must return an object + containing all the values. Besides the properties defined in + RuleLabelScope.predefinedRulePropertiesScope there may be user-defined + return values. This class simply defines the minimum properties that + are always defined and methods to access the others that might be + available depending on output option such as template and tree. + + Note text is not an actual property of the return value, it is computed + from start and stop using the input stream's toString() method. I + could add a ctor to this so that we can pass in and store the input + stream, but I'm not sure we want to do that. It would seem to be undefined + to get the .text property anyway if the rule matches tokens from multiple + input streams. + + I do not use getters for fields of objects that are used simply to + group values such as this aggregate. The getters/setters are there to + satisfy the superclass interface. + """ + + def __init__(self): + self.start = None + self.stop = None + + + def getStart(self): + return self.start + + + def getStop(self): + return self.stop + diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/streams.py b/antlr/antlr_python_runtime-3.1.3/antlr3/streams.py new file mode 100644 index 0000000000000000000000000000000000000000..0dbe0f1f575773ee889e14d0bd6fe2f272ed9dfc --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/streams.py @@ -0,0 +1,1452 @@ +"""ANTLR3 runtime package""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +import codecs +from StringIO import StringIO + +from antlr3.constants import DEFAULT_CHANNEL, EOF +from antlr3.tokens import Token, EOF_TOKEN + + +############################################################################ +# +# basic interfaces +# IntStream +# +- CharStream +# \- TokenStream +# +# subclasses must implemented all methods +# +############################################################################ + +class IntStream(object): + """ + @brief Base interface for streams of integer values. + + A simple stream of integers used when all I care about is the char + or token type sequence (such as interpretation). + """ + + def consume(self): + raise NotImplementedError + + + def LA(self, i): + """Get int at current input pointer + i ahead where i=1 is next int. + + Negative indexes are allowed. LA(-1) is previous token (token + just matched). LA(-i) where i is before first token should + yield -1, invalid char / EOF. + """ + + raise NotImplementedError + + + def mark(self): + """ + Tell the stream to start buffering if it hasn't already. Return + current input position, index(), or some other marker so that + when passed to rewind() you get back to the same spot. + rewind(mark()) should not affect the input cursor. The Lexer + track line/col info as well as input index so its markers are + not pure input indexes. Same for tree node streams. + """ + + raise NotImplementedError + + + def index(self): + """ + Return the current input symbol index 0..n where n indicates the + last symbol has been read. The index is the symbol about to be + read not the most recently read symbol. + """ + + raise NotImplementedError + + + def rewind(self, marker=None): + """ + Reset the stream so that next call to index would return marker. + The marker will usually be index() but it doesn't have to be. It's + just a marker to indicate what state the stream was in. This is + essentially calling release() and seek(). If there are markers + created after this marker argument, this routine must unroll them + like a stack. Assume the state the stream was in when this marker + was created. + + If marker is None: + Rewind to the input position of the last marker. + Used currently only after a cyclic DFA and just + before starting a sem/syn predicate to get the + input position back to the start of the decision. + Do not "pop" the marker off the state. mark(i) + and rewind(i) should balance still. It is + like invoking rewind(last marker) but it should not "pop" + the marker off. It's like seek(last marker's input position). + """ + + raise NotImplementedError + + + def release(self, marker=None): + """ + You may want to commit to a backtrack but don't want to force the + stream to keep bookkeeping objects around for a marker that is + no longer necessary. This will have the same behavior as + rewind() except it releases resources without the backward seek. + This must throw away resources for all markers back to the marker + argument. So if you're nested 5 levels of mark(), and then release(2) + you have to release resources for depths 2..5. + """ + + raise NotImplementedError + + + def seek(self, index): + """ + Set the input cursor to the position indicated by index. This is + normally used to seek ahead in the input stream. No buffering is + required to do this unless you know your stream will use seek to + move backwards such as when backtracking. + + This is different from rewind in its multi-directional + requirement and in that its argument is strictly an input cursor + (index). + + For char streams, seeking forward must update the stream state such + as line number. For seeking backwards, you will be presumably + backtracking using the mark/rewind mechanism that restores state and + so this method does not need to update state when seeking backwards. + + Currently, this method is only used for efficient backtracking using + memoization, but in the future it may be used for incremental parsing. + + The index is 0..n-1. A seek to position i means that LA(1) will + return the ith symbol. So, seeking to 0 means LA(1) will return the + first element in the stream. + """ + + raise NotImplementedError + + + def size(self): + """ + Only makes sense for streams that buffer everything up probably, but + might be useful to display the entire stream or for testing. This + value includes a single EOF. + """ + + raise NotImplementedError + + + def getSourceName(self): + """ + Where are you getting symbols from? Normally, implementations will + pass the buck all the way to the lexer who can ask its input stream + for the file name or whatever. + """ + + raise NotImplementedError + + +class CharStream(IntStream): + """ + @brief A source of characters for an ANTLR lexer. + + This is an abstract class that must be implemented by a subclass. + + """ + + # pylint does not realize that this is an interface, too + #pylint: disable-msg=W0223 + + EOF = -1 + + + def substring(self, start, stop): + """ + For infinite streams, you don't need this; primarily I'm providing + a useful interface for action code. Just make sure actions don't + use this on streams that don't support it. + """ + + raise NotImplementedError + + + def LT(self, i): + """ + Get the ith character of lookahead. This is the same usually as + LA(i). This will be used for labels in the generated + lexer code. I'd prefer to return a char here type-wise, but it's + probably better to be 32-bit clean and be consistent with LA. + """ + + raise NotImplementedError + + + def getLine(self): + """ANTLR tracks the line information automatically""" + + raise NotImplementedError + + + def setLine(self, line): + """ + Because this stream can rewind, we need to be able to reset the line + """ + + raise NotImplementedError + + + def getCharPositionInLine(self): + """ + The index of the character relative to the beginning of the line 0..n-1 + """ + + raise NotImplementedError + + + def setCharPositionInLine(self, pos): + raise NotImplementedError + + +class TokenStream(IntStream): + """ + + @brief A stream of tokens accessing tokens from a TokenSource + + This is an abstract class that must be implemented by a subclass. + + """ + + # pylint does not realize that this is an interface, too + #pylint: disable-msg=W0223 + + def LT(self, k): + """ + Get Token at current input pointer + i ahead where i=1 is next Token. + i<0 indicates tokens in the past. So -1 is previous token and -2 is + two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. + Return null for LT(0) and any index that results in an absolute address + that is negative. + """ + + raise NotImplementedError + + + def get(self, i): + """ + Get a token at an absolute index i; 0..n-1. This is really only + needed for profiling and debugging and token stream rewriting. + If you don't want to buffer up tokens, then this method makes no + sense for you. Naturally you can't use the rewrite stream feature. + I believe DebugTokenStream can easily be altered to not use + this method, removing the dependency. + """ + + raise NotImplementedError + + + def getTokenSource(self): + """ + Where is this stream pulling tokens from? This is not the name, but + the object that provides Token objects. + """ + + raise NotImplementedError + + + def toString(self, start=None, stop=None): + """ + Return the text of all tokens from start to stop, inclusive. + If the stream does not buffer all the tokens then it can just + return "" or null; Users should not access $ruleLabel.text in + an action of course in that case. + + Because the user is not required to use a token with an index stored + in it, we must provide a means for two token objects themselves to + indicate the start/end location. Most often this will just delegate + to the other toString(int,int). This is also parallel with + the TreeNodeStream.toString(Object,Object). + """ + + raise NotImplementedError + + +############################################################################ +# +# character streams for use in lexers +# CharStream +# \- ANTLRStringStream +# +############################################################################ + + +class ANTLRStringStream(CharStream): + """ + @brief CharStream that pull data from a unicode string. + + A pretty quick CharStream that pulls all data from an array + directly. Every method call counts in the lexer. + + """ + + + def __init__(self, data): + """ + @param data This should be a unicode string holding the data you want + to parse. If you pass in a byte string, the Lexer will choke on + non-ascii data. + + """ + + CharStream.__init__(self) + + # The data being scanned + self.strdata = unicode(data) + self.data = [ord(c) for c in self.strdata] + + # How many characters are actually in the buffer + self.n = len(data) + + # 0..n-1 index into string of next char + self.p = 0 + + # line number 1..n within the input + self.line = 1 + + # The index of the character relative to the beginning of the + # line 0..n-1 + self.charPositionInLine = 0 + + # A list of CharStreamState objects that tracks the stream state + # values line, charPositionInLine, and p that can change as you + # move through the input stream. Indexed from 0..markDepth-1. + self._markers = [ ] + self.lastMarker = None + self.markDepth = 0 + + # What is name or source of this char stream? + self.name = None + + + def reset(self): + """ + Reset the stream so that it's in the same state it was + when the object was created *except* the data array is not + touched. + """ + + self.p = 0 + self.line = 1 + self.charPositionInLine = 0 + self._markers = [ ] + + + def consume(self): + try: + if self.data[self.p] == 10: # \n + self.line += 1 + self.charPositionInLine = 0 + else: + self.charPositionInLine += 1 + + self.p += 1 + + except IndexError: + # happend when we reached EOF and self.data[self.p] fails + # just do nothing + pass + + + + def LA(self, i): + if i == 0: + return 0 # undefined + + if i < 0: + i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] + + try: + return self.data[self.p+i-1] + except IndexError: + return EOF + + + + def LT(self, i): + if i == 0: + return 0 # undefined + + if i < 0: + i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] + + try: + return self.strdata[self.p+i-1] + except IndexError: + return EOF + + + def index(self): + """ + Return the current input symbol index 0..n where n indicates the + last symbol has been read. The index is the index of char to + be returned from LA(1). + """ + + return self.p + + + def size(self): + return self.n + + + def mark(self): + state = (self.p, self.line, self.charPositionInLine) + try: + self._markers[self.markDepth] = state + except IndexError: + self._markers.append(state) + self.markDepth += 1 + + self.lastMarker = self.markDepth + + return self.lastMarker + + + def rewind(self, marker=None): + if marker is None: + marker = self.lastMarker + + p, line, charPositionInLine = self._markers[marker-1] + + self.seek(p) + self.line = line + self.charPositionInLine = charPositionInLine + self.release(marker) + + + def release(self, marker=None): + if marker is None: + marker = self.lastMarker + + self.markDepth = marker-1 + + + def seek(self, index): + """ + consume() ahead until p==index; can't just set p=index as we must + update line and charPositionInLine. + """ + + if index <= self.p: + self.p = index # just jump; don't update stream state (line, ...) + return + + # seek forward, consume until p hits index + while self.p < index: + self.consume() + + + def substring(self, start, stop): + return self.strdata[start:stop+1] + + + def getLine(self): + """Using setter/getter methods is deprecated. Use o.line instead.""" + return self.line + + + def getCharPositionInLine(self): + """ + Using setter/getter methods is deprecated. Use o.charPositionInLine + instead. + """ + return self.charPositionInLine + + + def setLine(self, line): + """Using setter/getter methods is deprecated. Use o.line instead.""" + self.line = line + + + def setCharPositionInLine(self, pos): + """ + Using setter/getter methods is deprecated. Use o.charPositionInLine + instead. + """ + self.charPositionInLine = pos + + + def getSourceName(self): + return self.name + + +class ANTLRFileStream(ANTLRStringStream): + """ + @brief CharStream that opens a file to read the data. + + This is a char buffer stream that is loaded from a file + all at once when you construct the object. + """ + + def __init__(self, fileName, encoding=None): + """ + @param fileName The path to the file to be opened. The file will be + opened with mode 'rb'. + + @param encoding If you set the optional encoding argument, then the + data will be decoded on the fly. + + """ + + self.fileName = fileName + + fp = codecs.open(fileName, 'rb', encoding) + try: + data = fp.read() + finally: + fp.close() + + ANTLRStringStream.__init__(self, data) + + + def getSourceName(self): + """Deprecated, access o.fileName directly.""" + + return self.fileName + + +class ANTLRInputStream(ANTLRStringStream): + """ + @brief CharStream that reads data from a file-like object. + + This is a char buffer stream that is loaded from a file like object + all at once when you construct the object. + + All input is consumed from the file, but it is not closed. + """ + + def __init__(self, file, encoding=None): + """ + @param file A file-like object holding your input. Only the read() + method must be implemented. + + @param encoding If you set the optional encoding argument, then the + data will be decoded on the fly. + + """ + + if encoding is not None: + # wrap input in a decoding reader + reader = codecs.lookup(encoding)[2] + file = reader(file) + + data = file.read() + + ANTLRStringStream.__init__(self, data) + + +# I guess the ANTLR prefix exists only to avoid a name clash with some Java +# mumbojumbo. A plain "StringStream" looks better to me, which should be +# the preferred name in Python. +StringStream = ANTLRStringStream +FileStream = ANTLRFileStream +InputStream = ANTLRInputStream + + +############################################################################ +# +# Token streams +# TokenStream +# +- CommonTokenStream +# \- TokenRewriteStream +# +############################################################################ + + +class CommonTokenStream(TokenStream): + """ + @brief The most common stream of tokens + + The most common stream of tokens is one where every token is buffered up + and tokens are prefiltered for a certain channel (the parser will only + see these tokens and cannot change the filter channel number during the + parse). + """ + + def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): + """ + @param tokenSource A TokenSource instance (usually a Lexer) to pull + the tokens from. + + @param channel Skip tokens on any channel but this one; this is how we + skip whitespace... + + """ + + TokenStream.__init__(self) + + self.tokenSource = tokenSource + + # Record every single token pulled from the source so we can reproduce + # chunks of it later. + self.tokens = [] + + # Map to override some Tokens' channel numbers + self.channelOverrideMap = {} + + # Set; discard any tokens with this type + self.discardSet = set() + + # Skip tokens on any channel but this one; this is how we skip whitespace... + self.channel = channel + + # By default, track all incoming tokens + self.discardOffChannelTokens = False + + # The index into the tokens list of the current token (next token + # to consume). p==-1 indicates that the tokens list is empty + self.p = -1 + + # Remember last marked position + self.lastMarker = None + + + def setTokenSource(self, tokenSource): + """Reset this token stream by setting its token source.""" + + self.tokenSource = tokenSource + self.tokens = [] + self.p = -1 + self.channel = DEFAULT_CHANNEL + + + def reset(self): + self.p = 0 + self.lastMarker = None + + + def fillBuffer(self): + """ + Load all tokens from the token source and put in tokens. + This is done upon first LT request because you might want to + set some token type / channel overrides before filling buffer. + """ + + + index = 0 + t = self.tokenSource.nextToken() + while t is not None and t.type != EOF: + discard = False + + if self.discardSet is not None and t.type in self.discardSet: + discard = True + + elif self.discardOffChannelTokens and t.channel != self.channel: + discard = True + + # is there a channel override for token type? + try: + overrideChannel = self.channelOverrideMap[t.type] + + except KeyError: + # no override for this type + pass + + else: + if overrideChannel == self.channel: + t.channel = overrideChannel + else: + discard = True + + if not discard: + t.index = index + self.tokens.append(t) + index += 1 + + t = self.tokenSource.nextToken() + + # leave p pointing at first token on channel + self.p = 0 + self.p = self.skipOffTokenChannels(self.p) + + + def consume(self): + """ + Move the input pointer to the next incoming token. The stream + must become active with LT(1) available. consume() simply + moves the input pointer so that LT(1) points at the next + input symbol. Consume at least one token. + + Walk past any token not on the channel the parser is listening to. + """ + + if self.p < len(self.tokens): + self.p += 1 + + self.p = self.skipOffTokenChannels(self.p) # leave p on valid token + + + def skipOffTokenChannels(self, i): + """ + Given a starting index, return the index of the first on-channel + token. + """ + + try: + while self.tokens[i].channel != self.channel: + i += 1 + except IndexError: + # hit the end of token stream + pass + + return i + + + def skipOffTokenChannelsReverse(self, i): + while i >= 0 and self.tokens[i].channel != self.channel: + i -= 1 + + return i + + + def setTokenTypeChannel(self, ttype, channel): + """ + A simple filter mechanism whereby you can tell this token stream + to force all tokens of type ttype to be on channel. For example, + when interpreting, we cannot exec actions so we need to tell + the stream to force all WS and NEWLINE to be a different, ignored + channel. + """ + + self.channelOverrideMap[ttype] = channel + + + def discardTokenType(self, ttype): + self.discardSet.add(ttype) + + + def getTokens(self, start=None, stop=None, types=None): + """ + Given a start and stop index, return a list of all tokens in + the token type set. Return None if no tokens were found. This + method looks at both on and off channel tokens. + """ + + if self.p == -1: + self.fillBuffer() + + if stop is None or stop >= len(self.tokens): + stop = len(self.tokens) - 1 + + if start is None or stop < 0: + start = 0 + + if start > stop: + return None + + if isinstance(types, (int, long)): + # called with a single type, wrap into set + types = set([types]) + + filteredTokens = [ + token for token in self.tokens[start:stop] + if types is None or token.type in types + ] + + if len(filteredTokens) == 0: + return None + + return filteredTokens + + + def LT(self, k): + """ + Get the ith token from the current position 1..n where k=1 is the + first symbol of lookahead. + """ + + if self.p == -1: + self.fillBuffer() + + if k == 0: + return None + + if k < 0: + return self.LB(-k) + + i = self.p + n = 1 + # find k good tokens + while n < k: + # skip off-channel tokens + i = self.skipOffTokenChannels(i+1) # leave p on valid token + n += 1 + + try: + return self.tokens[i] + except IndexError: + return EOF_TOKEN + + + def LB(self, k): + """Look backwards k tokens on-channel tokens""" + + if self.p == -1: + self.fillBuffer() + + if k == 0: + return None + + if self.p - k < 0: + return None + + i = self.p + n = 1 + # find k good tokens looking backwards + while n <= k: + # skip off-channel tokens + i = self.skipOffTokenChannelsReverse(i-1) # leave p on valid token + n += 1 + + if i < 0: + return None + + return self.tokens[i] + + + def get(self, i): + """ + Return absolute token i; ignore which channel the tokens are on; + that is, count all tokens not just on-channel tokens. + """ + + return self.tokens[i] + + + def LA(self, i): + return self.LT(i).type + + + def mark(self): + self.lastMarker = self.index() + return self.lastMarker + + + def release(self, marker=None): + # no resources to release + pass + + + def size(self): + return len(self.tokens) + + + def index(self): + return self.p + + + def rewind(self, marker=None): + if marker is None: + marker = self.lastMarker + + self.seek(marker) + + + def seek(self, index): + self.p = index + + + def getTokenSource(self): + return self.tokenSource + + + def getSourceName(self): + return self.tokenSource.getSourceName() + + + def toString(self, start=None, stop=None): + if self.p == -1: + self.fillBuffer() + + if start is None: + start = 0 + elif not isinstance(start, int): + start = start.index + + if stop is None: + stop = len(self.tokens) - 1 + elif not isinstance(stop, int): + stop = stop.index + + if stop >= len(self.tokens): + stop = len(self.tokens) - 1 + + return ''.join([t.text for t in self.tokens[start:stop+1]]) + + +class RewriteOperation(object): + """@brief Internal helper class.""" + + def __init__(self, stream, index, text): + self.stream = stream + self.index = index + self.text = text + + def execute(self, buf): + """Execute the rewrite operation by possibly adding to the buffer. + Return the index of the next token to operate on. + """ + + return self.index + + def toString(self): + opName = self.__class__.__name__ + return '<%s@%d:"%s">' % (opName, self.index, self.text) + + __str__ = toString + __repr__ = toString + + +class InsertBeforeOp(RewriteOperation): + """@brief Internal helper class.""" + + def execute(self, buf): + buf.write(self.text) + buf.write(self.stream.tokens[self.index].text) + return self.index + 1 + + +class ReplaceOp(RewriteOperation): + """ + @brief Internal helper class. + + I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp + instructions. + """ + + def __init__(self, stream, first, last, text): + RewriteOperation.__init__(self, stream, first, text) + self.lastIndex = last + + + def execute(self, buf): + if self.text is not None: + buf.write(self.text) + + return self.lastIndex + 1 + + + def toString(self): + return '' % ( + self.index, self.lastIndex, self.text) + + __str__ = toString + __repr__ = toString + + +class DeleteOp(ReplaceOp): + """ + @brief Internal helper class. + """ + + def __init__(self, stream, first, last): + ReplaceOp.__init__(self, stream, first, last, None) + + + def toString(self): + return '' % (self.index, self.lastIndex) + + __str__ = toString + __repr__ = toString + + +class TokenRewriteStream(CommonTokenStream): + """@brief CommonTokenStream that can be modified. + + Useful for dumping out the input stream after doing some + augmentation or other manipulations. + + You can insert stuff, replace, and delete chunks. Note that the + operations are done lazily--only if you convert the buffer to a + String. This is very efficient because you are not moving data around + all the time. As the buffer of tokens is converted to strings, the + toString() method(s) check to see if there is an operation at the + current index. If so, the operation is done and then normal String + rendering continues on the buffer. This is like having multiple Turing + machine instruction streams (programs) operating on a single input tape. :) + + Since the operations are done lazily at toString-time, operations do not + screw up the token index values. That is, an insert operation at token + index i does not change the index values for tokens i+1..n-1. + + Because operations never actually alter the buffer, you may always get + the original token stream back without undoing anything. Since + the instructions are queued up, you can easily simulate transactions and + roll back any changes if there is an error just by removing instructions. + For example, + + CharStream input = new ANTLRFileStream("input"); + TLexer lex = new TLexer(input); + TokenRewriteStream tokens = new TokenRewriteStream(lex); + T parser = new T(tokens); + parser.startRule(); + + Then in the rules, you can execute + Token t,u; + ... + input.insertAfter(t, "text to put after t");} + input.insertAfter(u, "text after u");} + System.out.println(tokens.toString()); + + Actually, you have to cast the 'input' to a TokenRewriteStream. :( + + You can also have multiple "instruction streams" and get multiple + rewrites from a single pass over the input. Just name the instruction + streams and use that name again when printing the buffer. This could be + useful for generating a C file and also its header file--all from the + same buffer: + + tokens.insertAfter("pass1", t, "text to put after t");} + tokens.insertAfter("pass2", u, "text after u");} + System.out.println(tokens.toString("pass1")); + System.out.println(tokens.toString("pass2")); + + If you don't use named rewrite streams, a "default" stream is used as + the first example shows. + """ + + DEFAULT_PROGRAM_NAME = "default" + MIN_TOKEN_INDEX = 0 + + def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): + CommonTokenStream.__init__(self, tokenSource, channel) + + # You may have multiple, named streams of rewrite operations. + # I'm calling these things "programs." + # Maps String (name) -> rewrite (List) + self.programs = {} + self.programs[self.DEFAULT_PROGRAM_NAME] = [] + + # Map String (program name) -> Integer index + self.lastRewriteTokenIndexes = {} + + + def rollback(self, *args): + """ + Rollback the instruction stream for a program so that + the indicated instruction (via instructionIndex) is no + longer in the stream. UNTESTED! + """ + + if len(args) == 2: + programName = args[0] + instructionIndex = args[1] + elif len(args) == 1: + programName = self.DEFAULT_PROGRAM_NAME + instructionIndex = args[0] + else: + raise TypeError("Invalid arguments") + + p = self.programs.get(programName, None) + if p is not None: + self.programs[programName] = ( + p[self.MIN_TOKEN_INDEX:instructionIndex]) + + + def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME): + """Reset the program so that no instructions exist""" + + self.rollback(programName, self.MIN_TOKEN_INDEX) + + + def insertAfter(self, *args): + if len(args) == 2: + programName = self.DEFAULT_PROGRAM_NAME + index = args[0] + text = args[1] + + elif len(args) == 3: + programName = args[0] + index = args[1] + text = args[2] + + else: + raise TypeError("Invalid arguments") + + if isinstance(index, Token): + # index is a Token, grap the stream index from it + index = index.index + + # to insert after, just insert before next index (even if past end) + self.insertBefore(programName, index+1, text) + + + def insertBefore(self, *args): + if len(args) == 2: + programName = self.DEFAULT_PROGRAM_NAME + index = args[0] + text = args[1] + + elif len(args) == 3: + programName = args[0] + index = args[1] + text = args[2] + + else: + raise TypeError("Invalid arguments") + + if isinstance(index, Token): + # index is a Token, grap the stream index from it + index = index.index + + op = InsertBeforeOp(self, index, text) + rewrites = self.getProgram(programName) + rewrites.append(op) + + + def replace(self, *args): + if len(args) == 2: + programName = self.DEFAULT_PROGRAM_NAME + first = args[0] + last = args[0] + text = args[1] + + elif len(args) == 3: + programName = self.DEFAULT_PROGRAM_NAME + first = args[0] + last = args[1] + text = args[2] + + elif len(args) == 4: + programName = args[0] + first = args[1] + last = args[2] + text = args[3] + + else: + raise TypeError("Invalid arguments") + + if isinstance(first, Token): + # first is a Token, grap the stream index from it + first = first.index + + if isinstance(last, Token): + # last is a Token, grap the stream index from it + last = last.index + + if first > last or first < 0 or last < 0 or last >= len(self.tokens): + raise ValueError( + "replace: range invalid: "+first+".."+last+ + "(size="+len(self.tokens)+")") + + op = ReplaceOp(self, first, last, text) + rewrites = self.getProgram(programName) + rewrites.append(op) + + + def delete(self, *args): + self.replace(*(list(args) + [None])) + + + def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME): + return self.lastRewriteTokenIndexes.get(programName, -1) + + + def setLastRewriteTokenIndex(self, programName, i): + self.lastRewriteTokenIndexes[programName] = i + + + def getProgram(self, name): + p = self.programs.get(name, None) + if p is None: + p = self.initializeProgram(name) + + return p + + + def initializeProgram(self, name): + p = [] + self.programs[name] = p + return p + + + def toOriginalString(self, start=None, end=None): + if start is None: + start = self.MIN_TOKEN_INDEX + if end is None: + end = self.size() - 1 + + buf = StringIO() + i = start + while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): + buf.write(self.get(i).text) + i += 1 + + return buf.getvalue() + + + def toString(self, *args): + if len(args) == 0: + programName = self.DEFAULT_PROGRAM_NAME + start = self.MIN_TOKEN_INDEX + end = self.size() - 1 + + elif len(args) == 1: + programName = args[0] + start = self.MIN_TOKEN_INDEX + end = self.size() - 1 + + elif len(args) == 2: + programName = self.DEFAULT_PROGRAM_NAME + start = args[0] + end = args[1] + + if start is None: + start = self.MIN_TOKEN_INDEX + elif not isinstance(start, int): + start = start.index + + if end is None: + end = len(self.tokens) - 1 + elif not isinstance(end, int): + end = end.index + + # ensure start/end are in range + if end >= len(self.tokens): + end = len(self.tokens) - 1 + + if start < 0: + start = 0 + + rewrites = self.programs.get(programName) + if rewrites is None or len(rewrites) == 0: + # no instructions to execute + return self.toOriginalString(start, end) + + buf = StringIO() + + # First, optimize instruction stream + indexToOp = self.reduceToSingleOperationPerIndex(rewrites) + + # Walk buffer, executing instructions and emitting tokens + i = start + while i <= end and i < len(self.tokens): + op = indexToOp.get(i) + # remove so any left have index size-1 + try: + del indexToOp[i] + except KeyError: + pass + + t = self.tokens[i] + if op is None: + # no operation at that index, just dump token + buf.write(t.text) + i += 1 # move to next token + + else: + i = op.execute(buf) # execute operation and skip + + # include stuff after end if it's last index in buffer + # So, if they did an insertAfter(lastValidIndex, "foo"), include + # foo if end==lastValidIndex. + if end == len(self.tokens) - 1: + # Scan any remaining operations after last token + # should be included (they will be inserts). + for i in sorted(indexToOp.keys()): + op = indexToOp[i] + if op.index >= len(self.tokens)-1: + buf.write(op.text) + + return buf.getvalue() + + __str__ = toString + + + def reduceToSingleOperationPerIndex(self, rewrites): + """ + We need to combine operations and report invalid operations (like + overlapping replaces that are not completed nested). Inserts to + same index need to be combined etc... Here are the cases: + + I.i.u I.j.v leave alone, nonoverlapping + I.i.u I.i.v combine: Iivu + + R.i-j.u R.x-y.v | i-j in x-y delete first R + R.i-j.u R.i-j.v delete first R + R.i-j.u R.x-y.v | x-y in i-j ERROR + R.i-j.u R.x-y.v | boundaries overlap ERROR + + I.i.u R.x-y.v | i in x-y delete I + I.i.u R.x-y.v | i not in x-y leave alone, nonoverlapping + R.x-y.v I.i.u | i in x-y ERROR + R.x-y.v I.x.u R.x-y.uv (combine, delete I) + R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping + + I.i.u = insert u before op @ index i + R.x-y.u = replace x-y indexed tokens with u + + First we need to examine replaces. For any replace op: + + 1. wipe out any insertions before op within that range. + 2. Drop any replace op before that is contained completely within + that range. + 3. Throw exception upon boundary overlap with any previous replace. + + Then we can deal with inserts: + + 1. for any inserts to same index, combine even if not adjacent. + 2. for any prior replace with same left boundary, combine this + insert with replace and delete this replace. + 3. throw exception if index in same range as previous replace + + Don't actually delete; make op null in list. Easier to walk list. + Later we can throw as we add to index -> op map. + + Note that I.2 R.2-2 will wipe out I.2 even though, technically, the + inserted stuff would be before the replace range. But, if you + add tokens in front of a method body '{' and then delete the method + body, I think the stuff before the '{' you added should disappear too. + + Return a map from token index to operation. + """ + + # WALK REPLACES + for i, rop in enumerate(rewrites): + if rop is None: + continue + + if not isinstance(rop, ReplaceOp): + continue + + # Wipe prior inserts within range + for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i): + if iop.index >= rop.index and iop.index <= rop.lastIndex: + rewrites[j] = None # delete insert as it's a no-op. + + # Drop any prior replaces contained within + for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i): + if (prevRop.index >= rop.index + and prevRop.lastIndex <= rop.lastIndex): + rewrites[j] = None # delete replace as it's a no-op. + continue + + # throw exception unless disjoint or identical + disjoint = (prevRop.lastIndex < rop.index + or prevRop.index > rop.lastIndex) + same = (prevRop.index == rop.index + and prevRop.lastIndex == rop.lastIndex) + if not disjoint and not same: + raise ValueError( + "replace op boundaries of %s overlap with previous %s" + % (rop, prevRop)) + + # WALK INSERTS + for i, iop in enumerate(rewrites): + if iop is None: + continue + + if not isinstance(iop, InsertBeforeOp): + continue + + # combine current insert with prior if any at same index + for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i): + if prevIop.index == iop.index: # combine objects + # convert to strings...we're in process of toString'ing + # whole token buffer so no lazy eval issue with any + # templates + iop.text = self.catOpText(iop.text, prevIop.text) + rewrites[j] = None # delete redundant prior insert + + # look for replaces where iop.index is in range; error + for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i): + if iop.index == rop.index: + rop.text = self.catOpText(iop.text, rop.text) + rewrites[i] = None # delete current insert + continue + + if iop.index >= rop.index and iop.index <= rop.lastIndex: + raise ValueError( + "insert op %s within boundaries of previous %s" + % (iop, rop)) + + m = {} + for i, op in enumerate(rewrites): + if op is None: + continue # ignore deleted ops + + assert op.index not in m, "should only be one op per index" + m[op.index] = op + + return m + + + def catOpText(self, a, b): + x = "" + y = "" + if a is not None: + x = a + if b is not None: + y = b + return x + y + + + def getKindOfOps(self, rewrites, kind, before=None): + if before is None: + before = len(rewrites) + elif before > len(rewrites): + before = len(rewrites) + + for i, op in enumerate(rewrites[:before]): + if op is None: + # ignore deleted + continue + if op.__class__ == kind: + yield i, op + + + def toDebugString(self, start=None, end=None): + if start is None: + start = self.MIN_TOKEN_INDEX + if end is None: + end = self.size() - 1 + + buf = StringIO() + i = start + while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): + buf.write(self.get(i)) + i += 1 + + return buf.getvalue() diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/tokens.py b/antlr/antlr_python_runtime-3.1.3/antlr3/tokens.py new file mode 100644 index 0000000000000000000000000000000000000000..8ce835d92904d43fbf3ed83e05accc92a0caea83 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/tokens.py @@ -0,0 +1,416 @@ +"""ANTLR3 runtime package""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +from antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE + +############################################################################ +# +# basic token interface +# +############################################################################ + +class Token(object): + """@brief Abstract token baseclass.""" + + def getText(self): + """@brief Get the text of the token. + + Using setter/getter methods is deprecated. Use o.text instead. + """ + raise NotImplementedError + + def setText(self, text): + """@brief Set the text of the token. + + Using setter/getter methods is deprecated. Use o.text instead. + """ + raise NotImplementedError + + + def getType(self): + """@brief Get the type of the token. + + Using setter/getter methods is deprecated. Use o.type instead.""" + + raise NotImplementedError + + def setType(self, ttype): + """@brief Get the type of the token. + + Using setter/getter methods is deprecated. Use o.type instead.""" + + raise NotImplementedError + + + def getLine(self): + """@brief Get the line number on which this token was matched + + Lines are numbered 1..n + + Using setter/getter methods is deprecated. Use o.line instead.""" + + raise NotImplementedError + + def setLine(self, line): + """@brief Set the line number on which this token was matched + + Using setter/getter methods is deprecated. Use o.line instead.""" + + raise NotImplementedError + + + def getCharPositionInLine(self): + """@brief Get the column of the tokens first character, + + Columns are numbered 0..n-1 + + Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" + + raise NotImplementedError + + def setCharPositionInLine(self, pos): + """@brief Set the column of the tokens first character, + + Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" + + raise NotImplementedError + + + def getChannel(self): + """@brief Get the channel of the token + + Using setter/getter methods is deprecated. Use o.channel instead.""" + + raise NotImplementedError + + def setChannel(self, channel): + """@brief Set the channel of the token + + Using setter/getter methods is deprecated. Use o.channel instead.""" + + raise NotImplementedError + + + def getTokenIndex(self): + """@brief Get the index in the input stream. + + An index from 0..n-1 of the token object in the input stream. + This must be valid in order to use the ANTLRWorks debugger. + + Using setter/getter methods is deprecated. Use o.index instead.""" + + raise NotImplementedError + + def setTokenIndex(self, index): + """@brief Set the index in the input stream. + + Using setter/getter methods is deprecated. Use o.index instead.""" + + raise NotImplementedError + + + def getInputStream(self): + """@brief From what character stream was this token created. + + You don't have to implement but it's nice to know where a Token + comes from if you have include files etc... on the input.""" + + raise NotImplementedError + + def setInputStream(self, input): + """@brief From what character stream was this token created. + + You don't have to implement but it's nice to know where a Token + comes from if you have include files etc... on the input.""" + + raise NotImplementedError + + +############################################################################ +# +# token implementations +# +# Token +# +- CommonToken +# \- ClassicToken +# +############################################################################ + +class CommonToken(Token): + """@brief Basic token implementation. + + This implementation does not copy the text from the input stream upon + creation, but keeps start/stop pointers into the stream to avoid + unnecessary copy operations. + + """ + + def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, + input=None, start=None, stop=None, oldToken=None): + Token.__init__(self) + + if oldToken is not None: + self.type = oldToken.type + self.line = oldToken.line + self.charPositionInLine = oldToken.charPositionInLine + self.channel = oldToken.channel + self.index = oldToken.index + self._text = oldToken._text + if isinstance(oldToken, CommonToken): + self.input = oldToken.input + self.start = oldToken.start + self.stop = oldToken.stop + + else: + self.type = type + self.input = input + self.charPositionInLine = -1 # set to invalid position + self.line = 0 + self.channel = channel + + #What token number is this from 0..n-1 tokens; < 0 implies invalid index + self.index = -1 + + # We need to be able to change the text once in a while. If + # this is non-null, then getText should return this. Note that + # start/stop are not affected by changing this. + self._text = text + + # The char position into the input buffer where this token starts + self.start = start + + # The char position into the input buffer where this token stops + # This is the index of the last char, *not* the index after it! + self.stop = stop + + + def getText(self): + if self._text is not None: + return self._text + + if self.input is None: + return None + + return self.input.substring(self.start, self.stop) + + + def setText(self, text): + """ + Override the text for this token. getText() will return this text + rather than pulling from the buffer. Note that this does not mean + that start/stop indexes are not valid. It means that that input + was converted to a new string in the token object. + """ + self._text = text + + text = property(getText, setText) + + + def getType(self): + return self.type + + def setType(self, ttype): + self.type = ttype + + + def getLine(self): + return self.line + + def setLine(self, line): + self.line = line + + + def getCharPositionInLine(self): + return self.charPositionInLine + + def setCharPositionInLine(self, pos): + self.charPositionInLine = pos + + + def getChannel(self): + return self.channel + + def setChannel(self, channel): + self.channel = channel + + + def getTokenIndex(self): + return self.index + + def setTokenIndex(self, index): + self.index = index + + + def getInputStream(self): + return self.input + + def setInputStream(self, input): + self.input = input + + + def __str__(self): + if self.type == EOF: + return "" + + channelStr = "" + if self.channel > 0: + channelStr = ",channel=" + str(self.channel) + + txt = self.text + if txt is not None: + txt = txt.replace("\n","\\\\n") + txt = txt.replace("\r","\\\\r") + txt = txt.replace("\t","\\\\t") + else: + txt = "" + + return "[@%d,%d:%d=%r,<%d>%s,%d:%d]" % ( + self.index, + self.start, self.stop, + txt, + self.type, channelStr, + self.line, self.charPositionInLine + ) + + +class ClassicToken(Token): + """@brief Alternative token implementation. + + A Token object like we'd use in ANTLR 2.x; has an actual string created + and associated with this object. These objects are needed for imaginary + tree nodes that have payload objects. We need to create a Token object + that has a string; the tree node will point at this token. CommonToken + has indexes into a char stream and hence cannot be used to introduce + new strings. + """ + + def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL, + oldToken=None + ): + Token.__init__(self) + + if oldToken is not None: + self.text = oldToken.text + self.type = oldToken.type + self.line = oldToken.line + self.charPositionInLine = oldToken.charPositionInLine + self.channel = oldToken.channel + + self.text = text + self.type = type + self.line = None + self.charPositionInLine = None + self.channel = channel + self.index = None + + + def getText(self): + return self.text + + def setText(self, text): + self.text = text + + + def getType(self): + return self.type + + def setType(self, ttype): + self.type = ttype + + + def getLine(self): + return self.line + + def setLine(self, line): + self.line = line + + + def getCharPositionInLine(self): + return self.charPositionInLine + + def setCharPositionInLine(self, pos): + self.charPositionInLine = pos + + + def getChannel(self): + return self.channel + + def setChannel(self, channel): + self.channel = channel + + + def getTokenIndex(self): + return self.index + + def setTokenIndex(self, index): + self.index = index + + + def getInputStream(self): + return None + + def setInputStream(self, input): + pass + + + def toString(self): + channelStr = "" + if self.channel > 0: + channelStr = ",channel=" + str(self.channel) + + txt = self.text + if txt is None: + txt = "" + + return "[@%r,%r,<%r>%s,%r:%r]" % (self.index, + txt, + self.type, + channelStr, + self.line, + self.charPositionInLine + ) + + + __str__ = toString + __repr__ = toString + + + +EOF_TOKEN = CommonToken(type=EOF) + +INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) + +# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR +# will avoid creating a token for this symbol and try to fetch another. +SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) + + diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/tree.py b/antlr/antlr_python_runtime-3.1.3/antlr3/tree.py new file mode 100644 index 0000000000000000000000000000000000000000..5fcf52661a9ed89b14bebad9791813495c2edd41 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/tree.py @@ -0,0 +1,2702 @@ +""" @package antlr3.tree +@brief ANTLR3 runtime package, tree module + +This module contains all support classes for AST construction and tree parsers. + +""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +# lot's of docstrings are missing, don't complain for now... +# pylint: disable-msg=C0111 + +import re + +from antlr3.constants import UP, DOWN, EOF, INVALID_TOKEN_TYPE +from antlr3.recognizers import BaseRecognizer, RuleReturnScope +from antlr3.streams import IntStream +from antlr3.tokens import CommonToken, Token, INVALID_TOKEN +from antlr3.exceptions import MismatchedTreeNodeException, \ + MissingTokenException, UnwantedTokenException, MismatchedTokenException, \ + NoViableAltException + + +############################################################################ +# +# tree related exceptions +# +############################################################################ + + +class RewriteCardinalityException(RuntimeError): + """ + @brief Base class for all exceptions thrown during AST rewrite construction. + + This signifies a case where the cardinality of two or more elements + in a subrule are different: (ID INT)+ where |ID|!=|INT| + """ + + def __init__(self, elementDescription): + RuntimeError.__init__(self, elementDescription) + + self.elementDescription = elementDescription + + + def getMessage(self): + return self.elementDescription + + +class RewriteEarlyExitException(RewriteCardinalityException): + """@brief No elements within a (...)+ in a rewrite rule""" + + def __init__(self, elementDescription=None): + RewriteCardinalityException.__init__(self, elementDescription) + + +class RewriteEmptyStreamException(RewriteCardinalityException): + """ + @brief Ref to ID or expr but no tokens in ID stream or subtrees in expr stream + """ + + pass + + +############################################################################ +# +# basic Tree and TreeAdaptor interfaces +# +############################################################################ + +class Tree(object): + """ + @brief Abstract baseclass for tree nodes. + + What does a tree look like? ANTLR has a number of support classes + such as CommonTreeNodeStream that work on these kinds of trees. You + don't have to make your trees implement this interface, but if you do, + you'll be able to use more support code. + + NOTE: When constructing trees, ANTLR can build any kind of tree; it can + even use Token objects as trees if you add a child list to your tokens. + + This is a tree node without any payload; just navigation and factory stuff. + """ + + + def getChild(self, i): + raise NotImplementedError + + + def getChildCount(self): + raise NotImplementedError + + + def getParent(self): + """Tree tracks parent and child index now > 3.0""" + + raise NotImplementedError + + def setParent(self, t): + """Tree tracks parent and child index now > 3.0""" + + raise NotImplementedError + + + def hasAncestor(self, ttype): + """Walk upwards looking for ancestor with this token type.""" + + raise NotImplementedError + + def getAncestor(self, ttype): + """Walk upwards and get first ancestor with this token type.""" + + raise NotImplementedError + + def getAncestors(self): + """Return a list of all ancestors of this node. + + The first node of list is the root and the last is the parent of + this node. + """ + + raise NotImplementedError + + + def getChildIndex(self): + """This node is what child index? 0..n-1""" + + raise NotImplementedError + + def setChildIndex(self, index): + """This node is what child index? 0..n-1""" + + raise NotImplementedError + + + def freshenParentAndChildIndexes(self): + """Set the parent and child index values for all children""" + + raise NotImplementedError + + + def addChild(self, t): + """ + Add t as a child to this node. If t is null, do nothing. If t + is nil, add all children of t to this' children. + """ + + raise NotImplementedError + + + def setChild(self, i, t): + """Set ith child (0..n-1) to t; t must be non-null and non-nil node""" + + raise NotImplementedError + + + def deleteChild(self, i): + raise NotImplementedError + + + def replaceChildren(self, startChildIndex, stopChildIndex, t): + """ + Delete children from start to stop and replace with t even if t is + a list (nil-root tree). num of children can increase or decrease. + For huge child lists, inserting children can force walking rest of + children to set their childindex; could be slow. + """ + + raise NotImplementedError + + + def isNil(self): + """ + Indicates the node is a nil node but may still have children, meaning + the tree is a flat list. + """ + + raise NotImplementedError + + + def getTokenStartIndex(self): + """ + What is the smallest token index (indexing from 0) for this node + and its children? + """ + + raise NotImplementedError + + + def setTokenStartIndex(self, index): + raise NotImplementedError + + + def getTokenStopIndex(self): + """ + What is the largest token index (indexing from 0) for this node + and its children? + """ + + raise NotImplementedError + + + def setTokenStopIndex(self, index): + raise NotImplementedError + + + def dupNode(self): + raise NotImplementedError + + + def getType(self): + """Return a token type; needed for tree parsing.""" + + raise NotImplementedError + + + def getText(self): + raise NotImplementedError + + + def getLine(self): + """ + In case we don't have a token payload, what is the line for errors? + """ + + raise NotImplementedError + + + def getCharPositionInLine(self): + raise NotImplementedError + + + def toStringTree(self): + raise NotImplementedError + + + def toString(self): + raise NotImplementedError + + + +class TreeAdaptor(object): + """ + @brief Abstract baseclass for tree adaptors. + + How to create and navigate trees. Rather than have a separate factory + and adaptor, I've merged them. Makes sense to encapsulate. + + This takes the place of the tree construction code generated in the + generated code in 2.x and the ASTFactory. + + I do not need to know the type of a tree at all so they are all + generic Objects. This may increase the amount of typecasting needed. :( + """ + + # C o n s t r u c t i o n + + def createWithPayload(self, payload): + """ + Create a tree node from Token object; for CommonTree type trees, + then the token just becomes the payload. This is the most + common create call. + + Override if you want another kind of node to be built. + """ + + raise NotImplementedError + + + def dupNode(self, treeNode): + """Duplicate a single tree node. + + Override if you want another kind of node to be built.""" + + raise NotImplementedError + + + def dupTree(self, tree): + """Duplicate tree recursively, using dupNode() for each node""" + + raise NotImplementedError + + + def nil(self): + """ + Return a nil node (an empty but non-null node) that can hold + a list of element as the children. If you want a flat tree (a list) + use "t=adaptor.nil(); t.addChild(x); t.addChild(y);" + """ + + raise NotImplementedError + + + def errorNode(self, input, start, stop, exc): + """ + Return a tree node representing an error. This node records the + tokens consumed during error recovery. The start token indicates the + input symbol at which the error was detected. The stop token indicates + the last symbol consumed during recovery. + + You must specify the input stream so that the erroneous text can + be packaged up in the error node. The exception could be useful + to some applications; default implementation stores ptr to it in + the CommonErrorNode. + + This only makes sense during token parsing, not tree parsing. + Tree parsing should happen only when parsing and tree construction + succeed. + """ + + raise NotImplementedError + + + def isNil(self, tree): + """Is tree considered a nil node used to make lists of child nodes?""" + + raise NotImplementedError + + + def addChild(self, t, child): + """ + Add a child to the tree t. If child is a flat tree (a list), make all + in list children of t. Warning: if t has no children, but child does + and child isNil then you can decide it is ok to move children to t via + t.children = child.children; i.e., without copying the array. Just + make sure that this is consistent with have the user will build + ASTs. Do nothing if t or child is null. + """ + + raise NotImplementedError + + + def becomeRoot(self, newRoot, oldRoot): + """ + If oldRoot is a nil root, just copy or move the children to newRoot. + If not a nil root, make oldRoot a child of newRoot. + + old=^(nil a b c), new=r yields ^(r a b c) + old=^(a b c), new=r yields ^(r ^(a b c)) + + If newRoot is a nil-rooted single child tree, use the single + child as the new root node. + + old=^(nil a b c), new=^(nil r) yields ^(r a b c) + old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) + + If oldRoot was null, it's ok, just return newRoot (even if isNil). + + old=null, new=r yields r + old=null, new=^(nil r) yields ^(nil r) + + Return newRoot. Throw an exception if newRoot is not a + simple node or nil root with a single child node--it must be a root + node. If newRoot is ^(nil x) return x as newRoot. + + Be advised that it's ok for newRoot to point at oldRoot's + children; i.e., you don't have to copy the list. We are + constructing these nodes so we should have this control for + efficiency. + """ + + raise NotImplementedError + + + def rulePostProcessing(self, root): + """ + Given the root of the subtree created for this rule, post process + it to do any simplifications or whatever you want. A required + behavior is to convert ^(nil singleSubtree) to singleSubtree + as the setting of start/stop indexes relies on a single non-nil root + for non-flat trees. + + Flat trees such as for lists like "idlist : ID+ ;" are left alone + unless there is only one ID. For a list, the start/stop indexes + are set in the nil node. + + This method is executed after all rule tree construction and right + before setTokenBoundaries(). + """ + + raise NotImplementedError + + + def getUniqueID(self, node): + """For identifying trees. + + How to identify nodes so we can say "add node to a prior node"? + Even becomeRoot is an issue. Use System.identityHashCode(node) + usually. + """ + + raise NotImplementedError + + + # R e w r i t e R u l e s + + def createFromToken(self, tokenType, fromToken, text=None): + """ + Create a new node derived from a token, with a new token type and + (optionally) new text. + + This is invoked from an imaginary node ref on right side of a + rewrite rule as IMAG[$tokenLabel] or IMAG[$tokenLabel "IMAG"]. + + This should invoke createToken(Token). + """ + + raise NotImplementedError + + + def createFromType(self, tokenType, text): + """Create a new node derived from a token, with a new token type. + + This is invoked from an imaginary node ref on right side of a + rewrite rule as IMAG["IMAG"]. + + This should invoke createToken(int,String). + """ + + raise NotImplementedError + + + # C o n t e n t + + def getType(self, t): + """For tree parsing, I need to know the token type of a node""" + + raise NotImplementedError + + + def setType(self, t, type): + """Node constructors can set the type of a node""" + + raise NotImplementedError + + + def getText(self, t): + raise NotImplementedError + + def setText(self, t, text): + """Node constructors can set the text of a node""" + + raise NotImplementedError + + + def getToken(self, t): + """Return the token object from which this node was created. + + Currently used only for printing an error message. + The error display routine in BaseRecognizer needs to + display where the input the error occurred. If your + tree of limitation does not store information that can + lead you to the token, you can create a token filled with + the appropriate information and pass that back. See + BaseRecognizer.getErrorMessage(). + """ + + raise NotImplementedError + + + def setTokenBoundaries(self, t, startToken, stopToken): + """ + Where are the bounds in the input token stream for this node and + all children? Each rule that creates AST nodes will call this + method right before returning. Flat trees (i.e., lists) will + still usually have a nil root node just to hold the children list. + That node would contain the start/stop indexes then. + """ + + raise NotImplementedError + + + def getTokenStartIndex(self, t): + """ + Get the token start index for this subtree; return -1 if no such index + """ + + raise NotImplementedError + + + def getTokenStopIndex(self, t): + """ + Get the token stop index for this subtree; return -1 if no such index + """ + + raise NotImplementedError + + + # N a v i g a t i o n / T r e e P a r s i n g + + def getChild(self, t, i): + """Get a child 0..n-1 node""" + + raise NotImplementedError + + + def setChild(self, t, i, child): + """Set ith child (0..n-1) to t; t must be non-null and non-nil node""" + + raise NotImplementedError + + + def deleteChild(self, t, i): + """Remove ith child and shift children down from right.""" + + raise NotImplementedError + + + def getChildCount(self, t): + """How many children? If 0, then this is a leaf node""" + + raise NotImplementedError + + + def getParent(self, t): + """ + Who is the parent node of this node; if null, implies node is root. + If your node type doesn't handle this, it's ok but the tree rewrites + in tree parsers need this functionality. + """ + + raise NotImplementedError + + + def setParent(self, t, parent): + """ + Who is the parent node of this node; if null, implies node is root. + If your node type doesn't handle this, it's ok but the tree rewrites + in tree parsers need this functionality. + """ + + raise NotImplementedError + + + def getChildIndex(self, t): + """ + What index is this node in the child list? Range: 0..n-1 + If your node type doesn't handle this, it's ok but the tree rewrites + in tree parsers need this functionality. + """ + + raise NotImplementedError + + + def setChildIndex(self, t, index): + """ + What index is this node in the child list? Range: 0..n-1 + If your node type doesn't handle this, it's ok but the tree rewrites + in tree parsers need this functionality. + """ + + raise NotImplementedError + + + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + """ + Replace from start to stop child index of parent with t, which might + be a list. Number of children may be different + after this call. + + If parent is null, don't do anything; must be at root of overall tree. + Can't replace whatever points to the parent externally. Do nothing. + """ + + raise NotImplementedError + + + # Misc + + def create(self, *args): + """ + Deprecated, use createWithPayload, createFromToken or createFromType. + + This method only exists to mimic the Java interface of TreeAdaptor. + + """ + + if len(args) == 1 and isinstance(args[0], Token): + # Object create(Token payload); +## warnings.warn( +## "Using create() is deprecated, use createWithPayload()", +## DeprecationWarning, +## stacklevel=2 +## ) + return self.createWithPayload(args[0]) + + if (len(args) == 2 + and isinstance(args[0], (int, long)) + and isinstance(args[1], Token) + ): + # Object create(int tokenType, Token fromToken); +## warnings.warn( +## "Using create() is deprecated, use createFromToken()", +## DeprecationWarning, +## stacklevel=2 +## ) + return self.createFromToken(args[0], args[1]) + + if (len(args) == 3 + and isinstance(args[0], (int, long)) + and isinstance(args[1], Token) + and isinstance(args[2], basestring) + ): + # Object create(int tokenType, Token fromToken, String text); +## warnings.warn( +## "Using create() is deprecated, use createFromToken()", +## DeprecationWarning, +## stacklevel=2 +## ) + return self.createFromToken(args[0], args[1], args[2]) + + if (len(args) == 2 + and isinstance(args[0], (int, long)) + and isinstance(args[1], basestring) + ): + # Object create(int tokenType, String text); +## warnings.warn( +## "Using create() is deprecated, use createFromType()", +## DeprecationWarning, +## stacklevel=2 +## ) + return self.createFromType(args[0], args[1]) + + raise TypeError( + "No create method with this signature found: %s" + % (', '.join(type(v).__name__ for v in args)) + ) + + +############################################################################ +# +# base implementation of Tree and TreeAdaptor +# +# Tree +# \- BaseTree +# +# TreeAdaptor +# \- BaseTreeAdaptor +# +############################################################################ + + +class BaseTree(Tree): + """ + @brief A generic tree implementation with no payload. + + You must subclass to + actually have any user data. ANTLR v3 uses a list of children approach + instead of the child-sibling approach in v2. A flat tree (a list) is + an empty node whose children represent the list. An empty, but + non-null node is called "nil". + """ + + # BaseTree is abstract, no need to complain about not implemented abstract + # methods + # pylint: disable-msg=W0223 + + def __init__(self, node=None): + """ + Create a new node from an existing node does nothing for BaseTree + as there are no fields other than the children list, which cannot + be copied as the children are not considered part of this node. + """ + + Tree.__init__(self) + self.children = [] + self.parent = None + self.childIndex = 0 + + + def getChild(self, i): + try: + return self.children[i] + except IndexError: + return None + + + def getChildren(self): + """@brief Get the children internal List + + Note that if you directly mess with + the list, do so at your own risk. + """ + + # FIXME: mark as deprecated + return self.children + + + def getFirstChildWithType(self, treeType): + for child in self.children: + if child.getType() == treeType: + return child + + return None + + + def getChildCount(self): + return len(self.children) + + + def addChild(self, childTree): + """Add t as child of this node. + + Warning: if t has no children, but child does + and child isNil then this routine moves children to t via + t.children = child.children; i.e., without copying the array. + """ + + # this implementation is much simpler and probably less efficient + # than the mumbo-jumbo that Ter did for the Java runtime. + + if childTree is None: + return + + if childTree.isNil(): + # t is an empty node possibly with children + + if self.children is childTree.children: + raise ValueError("attempt to add child list to itself") + + # fix parent pointer and childIndex for new children + for idx, child in enumerate(childTree.children): + child.parent = self + child.childIndex = len(self.children) + idx + + self.children += childTree.children + + else: + # child is not nil (don't care about children) + self.children.append(childTree) + childTree.parent = self + childTree.childIndex = len(self.children) - 1 + + + def addChildren(self, children): + """Add all elements of kids list as children of this node""" + + self.children += children + + + def setChild(self, i, t): + if t is None: + return + + if t.isNil(): + raise ValueError("Can't set single child to a list") + + self.children[i] = t + t.parent = self + t.childIndex = i + + + def deleteChild(self, i): + killed = self.children[i] + + del self.children[i] + + # walk rest and decrement their child indexes + for idx, child in enumerate(self.children[i:]): + child.childIndex = i + idx + + return killed + + + def replaceChildren(self, startChildIndex, stopChildIndex, newTree): + """ + Delete children from start to stop and replace with t even if t is + a list (nil-root tree). num of children can increase or decrease. + For huge child lists, inserting children can force walking rest of + children to set their childindex; could be slow. + """ + + if (startChildIndex >= len(self.children) + or stopChildIndex >= len(self.children) + ): + raise IndexError("indexes invalid") + + replacingHowMany = stopChildIndex - startChildIndex + 1 + + # normalize to a list of children to add: newChildren + if newTree.isNil(): + newChildren = newTree.children + + else: + newChildren = [newTree] + + replacingWithHowMany = len(newChildren) + delta = replacingHowMany - replacingWithHowMany + + + if delta == 0: + # if same number of nodes, do direct replace + for idx, child in enumerate(newChildren): + self.children[idx + startChildIndex] = child + child.parent = self + child.childIndex = idx + startChildIndex + + else: + # length of children changes... + + # ...delete replaced segment... + del self.children[startChildIndex:stopChildIndex+1] + + # ...insert new segment... + self.children[startChildIndex:startChildIndex] = newChildren + + # ...and fix indeces + self.freshenParentAndChildIndexes(startChildIndex) + + + def isNil(self): + return False + + + def freshenParentAndChildIndexes(self, offset=0): + for idx, child in enumerate(self.children[offset:]): + child.childIndex = idx + offset + child.parent = self + + + def sanityCheckParentAndChildIndexes(self, parent=None, i=-1): + if parent != self.parent: + raise ValueError( + "parents don't match; expected %r found %r" + % (parent, self.parent) + ) + + if i != self.childIndex: + raise ValueError( + "child indexes don't match; expected %d found %d" + % (i, self.childIndex) + ) + + for idx, child in enumerate(self.children): + child.sanityCheckParentAndChildIndexes(self, idx) + + + def getChildIndex(self): + """BaseTree doesn't track child indexes.""" + + return 0 + + + def setChildIndex(self, index): + """BaseTree doesn't track child indexes.""" + + pass + + + def getParent(self): + """BaseTree doesn't track parent pointers.""" + + return None + + def setParent(self, t): + """BaseTree doesn't track parent pointers.""" + + pass + + + def hasAncestor(self, ttype): + """Walk upwards looking for ancestor with this token type.""" + return self.getAncestor(ttype) is not None + + def getAncestor(self, ttype): + """Walk upwards and get first ancestor with this token type.""" + t = self.getParent() + while t is not None: + if t.getType() == ttype: + return t + t = t.getParent() + + return None + + def getAncestors(self): + """Return a list of all ancestors of this node. + + The first node of list is the root and the last is the parent of + this node. + """ + if selfgetParent() is None: + return None + + ancestors = [] + t = self.getParent() + while t is not None: + ancestors.insert(0, t) # insert at start + t = t.getParent() + + return ancestors + + + def toStringTree(self): + """Print out a whole tree not just a node""" + + if len(self.children) == 0: + return self.toString() + + buf = [] + if not self.isNil(): + buf.append('(') + buf.append(self.toString()) + buf.append(' ') + + for i, child in enumerate(self.children): + if i > 0: + buf.append(' ') + buf.append(child.toStringTree()) + + if not self.isNil(): + buf.append(')') + + return ''.join(buf) + + + def getLine(self): + return 0 + + + def getCharPositionInLine(self): + return 0 + + + def toString(self): + """Override to say how a node (not a tree) should look as text""" + + raise NotImplementedError + + + +class BaseTreeAdaptor(TreeAdaptor): + """ + @brief A TreeAdaptor that works with any Tree implementation. + """ + + # BaseTreeAdaptor is abstract, no need to complain about not implemented + # abstract methods + # pylint: disable-msg=W0223 + + def nil(self): + return self.createWithPayload(None) + + + def errorNode(self, input, start, stop, exc): + """ + create tree node that holds the start and stop tokens associated + with an error. + + If you specify your own kind of tree nodes, you will likely have to + override this method. CommonTree returns Token.INVALID_TOKEN_TYPE + if no token payload but you might have to set token type for diff + node type. + + You don't have to subclass CommonErrorNode; you will likely need to + subclass your own tree node class to avoid class cast exception. + """ + + return CommonErrorNode(input, start, stop, exc) + + + def isNil(self, tree): + return tree.isNil() + + + def dupTree(self, t, parent=None): + """ + This is generic in the sense that it will work with any kind of + tree (not just Tree interface). It invokes the adaptor routines + not the tree node routines to do the construction. + """ + + if t is None: + return None + + newTree = self.dupNode(t) + + # ensure new subtree root has parent/child index set + + # same index in new tree + self.setChildIndex(newTree, self.getChildIndex(t)) + + self.setParent(newTree, parent) + + for i in range(self.getChildCount(t)): + child = self.getChild(t, i) + newSubTree = self.dupTree(child, t) + self.addChild(newTree, newSubTree) + + return newTree + + + def addChild(self, tree, child): + """ + Add a child to the tree t. If child is a flat tree (a list), make all + in list children of t. Warning: if t has no children, but child does + and child isNil then you can decide it is ok to move children to t via + t.children = child.children; i.e., without copying the array. Just + make sure that this is consistent with have the user will build + ASTs. + """ + + #if isinstance(child, Token): + # child = self.createWithPayload(child) + + if tree is not None and child is not None: + tree.addChild(child) + + + def becomeRoot(self, newRoot, oldRoot): + """ + If oldRoot is a nil root, just copy or move the children to newRoot. + If not a nil root, make oldRoot a child of newRoot. + + old=^(nil a b c), new=r yields ^(r a b c) + old=^(a b c), new=r yields ^(r ^(a b c)) + + If newRoot is a nil-rooted single child tree, use the single + child as the new root node. + + old=^(nil a b c), new=^(nil r) yields ^(r a b c) + old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) + + If oldRoot was null, it's ok, just return newRoot (even if isNil). + + old=null, new=r yields r + old=null, new=^(nil r) yields ^(nil r) + + Return newRoot. Throw an exception if newRoot is not a + simple node or nil root with a single child node--it must be a root + node. If newRoot is ^(nil x) return x as newRoot. + + Be advised that it's ok for newRoot to point at oldRoot's + children; i.e., you don't have to copy the list. We are + constructing these nodes so we should have this control for + efficiency. + """ + + if isinstance(newRoot, Token): + newRoot = self.create(newRoot) + + if oldRoot is None: + return newRoot + + if not isinstance(newRoot, CommonTree): + newRoot = self.createWithPayload(newRoot) + + # handle ^(nil real-node) + if newRoot.isNil(): + nc = newRoot.getChildCount() + if nc == 1: + newRoot = newRoot.getChild(0) + + elif nc > 1: + # TODO: make tree run time exceptions hierarchy + raise RuntimeError("more than one node as root") + + # add oldRoot to newRoot; addChild takes care of case where oldRoot + # is a flat list (i.e., nil-rooted tree). All children of oldRoot + # are added to newRoot. + newRoot.addChild(oldRoot) + return newRoot + + + def rulePostProcessing(self, root): + """Transform ^(nil x) to x and nil to null""" + + if root is not None and root.isNil(): + if root.getChildCount() == 0: + root = None + + elif root.getChildCount() == 1: + root = root.getChild(0) + # whoever invokes rule will set parent and child index + root.setParent(None) + root.setChildIndex(-1) + + return root + + + def createFromToken(self, tokenType, fromToken, text=None): + assert isinstance(tokenType, (int, long)), type(tokenType).__name__ + assert isinstance(fromToken, Token), type(fromToken).__name__ + assert text is None or isinstance(text, basestring), type(text).__name__ + + fromToken = self.createToken(fromToken) + fromToken.type = tokenType + if text is not None: + fromToken.text = text + t = self.createWithPayload(fromToken) + return t + + + def createFromType(self, tokenType, text): + assert isinstance(tokenType, (int, long)), type(tokenType).__name__ + assert isinstance(text, basestring) or text is None, type(text).__name__ + + fromToken = self.createToken(tokenType=tokenType, text=text) + t = self.createWithPayload(fromToken) + return t + + + def getType(self, t): + return t.getType() + + + def setType(self, t, type): + raise RuntimeError("don't know enough about Tree node") + + + def getText(self, t): + return t.getText() + + + def setText(self, t, text): + raise RuntimeError("don't know enough about Tree node") + + + def getChild(self, t, i): + return t.getChild(i) + + + def setChild(self, t, i, child): + t.setChild(i, child) + + + def deleteChild(self, t, i): + return t.deleteChild(i) + + + def getChildCount(self, t): + return t.getChildCount() + + + def getUniqueID(self, node): + return hash(node) + + + def createToken(self, fromToken=None, tokenType=None, text=None): + """ + Tell me how to create a token for use with imaginary token nodes. + For example, there is probably no input symbol associated with imaginary + token DECL, but you need to create it as a payload or whatever for + the DECL node as in ^(DECL type ID). + + If you care what the token payload objects' type is, you should + override this method and any other createToken variant. + """ + + raise NotImplementedError + + +############################################################################ +# +# common tree implementation +# +# Tree +# \- BaseTree +# \- CommonTree +# \- CommonErrorNode +# +# TreeAdaptor +# \- BaseTreeAdaptor +# \- CommonTreeAdaptor +# +############################################################################ + + +class CommonTree(BaseTree): + """@brief A tree node that is wrapper for a Token object. + + After 3.0 release + while building tree rewrite stuff, it became clear that computing + parent and child index is very difficult and cumbersome. Better to + spend the space in every tree node. If you don't want these extra + fields, it's easy to cut them out in your own BaseTree subclass. + + """ + + def __init__(self, payload): + BaseTree.__init__(self) + + # What token indexes bracket all tokens associated with this node + # and below? + self.startIndex = -1 + self.stopIndex = -1 + + # Who is the parent node of this node; if null, implies node is root + self.parent = None + + # What index is this node in the child list? Range: 0..n-1 + self.childIndex = -1 + + # A single token is the payload + if payload is None: + self.token = None + + elif isinstance(payload, CommonTree): + self.token = payload.token + self.startIndex = payload.startIndex + self.stopIndex = payload.stopIndex + + elif payload is None or isinstance(payload, Token): + self.token = payload + + else: + raise TypeError(type(payload).__name__) + + + + def getToken(self): + return self.token + + + def dupNode(self): + return CommonTree(self) + + + def isNil(self): + return self.token is None + + + def getType(self): + if self.token is None: + return INVALID_TOKEN_TYPE + + return self.token.getType() + + type = property(getType) + + + def getText(self): + if self.token is None: + return None + + return self.token.text + + text = property(getText) + + + def getLine(self): + if self.token is None or self.token.getLine() == 0: + if self.getChildCount(): + return self.getChild(0).getLine() + else: + return 0 + + return self.token.getLine() + + line = property(getLine) + + + def getCharPositionInLine(self): + if self.token is None or self.token.getCharPositionInLine() == -1: + if self.getChildCount(): + return self.getChild(0).getCharPositionInLine() + else: + return 0 + + else: + return self.token.getCharPositionInLine() + + charPositionInLine = property(getCharPositionInLine) + + + def getTokenStartIndex(self): + if self.startIndex == -1 and self.token is not None: + return self.token.getTokenIndex() + + return self.startIndex + + def setTokenStartIndex(self, index): + self.startIndex = index + + tokenStartIndex = property(getTokenStartIndex, setTokenStartIndex) + + + def getTokenStopIndex(self): + if self.stopIndex == -1 and self.token is not None: + return self.token.getTokenIndex() + + return self.stopIndex + + def setTokenStopIndex(self, index): + self.stopIndex = index + + tokenStopIndex = property(getTokenStopIndex, setTokenStopIndex) + + + def setUnknownTokenBoundaries(self): + """For every node in this subtree, make sure it's start/stop token's + are set. Walk depth first, visit bottom up. Only updates nodes + with at least one token index < 0. + """ + + if self.children is None: + if self.startIndex < 0 or self.stopIndex < 0: + self.startIndex = self.stopIndex = self.token.getTokenIndex() + + return + + for child in self.children: + child.setUnknownTokenBoundaries() + + if self.startIndex >= 0 and self.stopIndex >= 0: + # already set + return + + if self.children: + firstChild = self.children[0] + lastChild = self.children[-1] + self.startIndex = firstChild.getTokenStartIndex() + self.stopIndex = lastChild.getTokenStopIndex() + + + def getChildIndex(self): + #FIXME: mark as deprecated + return self.childIndex + + + def setChildIndex(self, idx): + #FIXME: mark as deprecated + self.childIndex = idx + + + def getParent(self): + #FIXME: mark as deprecated + return self.parent + + + def setParent(self, t): + #FIXME: mark as deprecated + self.parent = t + + + def toString(self): + if self.isNil(): + return "nil" + + if self.getType() == INVALID_TOKEN_TYPE: + return "" + + return self.token.text + + __str__ = toString + + + + def toStringTree(self): + if not self.children: + return self.toString() + + ret = '' + if not self.isNil(): + ret += '(%s ' % (self.toString()) + + ret += ' '.join([child.toStringTree() for child in self.children]) + + if not self.isNil(): + ret += ')' + + return ret + + +INVALID_NODE = CommonTree(INVALID_TOKEN) + + +class CommonErrorNode(CommonTree): + """A node representing erroneous token range in token stream""" + + def __init__(self, input, start, stop, exc): + CommonTree.__init__(self, None) + + if (stop is None or + (stop.getTokenIndex() < start.getTokenIndex() and + stop.getType() != EOF + ) + ): + # sometimes resync does not consume a token (when LT(1) is + # in follow set. So, stop will be 1 to left to start. adjust. + # Also handle case where start is the first token and no token + # is consumed during recovery; LT(-1) will return null. + stop = start + + self.input = input + self.start = start + self.stop = stop + self.trappedException = exc + + + def isNil(self): + return False + + + def getType(self): + return INVALID_TOKEN_TYPE + + + def getText(self): + if isinstance(self.start, Token): + i = self.start.getTokenIndex() + j = self.stop.getTokenIndex() + if self.stop.getType() == EOF: + j = self.input.size() + + badText = self.input.toString(i, j) + + elif isinstance(self.start, Tree): + badText = self.input.toString(self.start, self.stop) + + else: + # people should subclass if they alter the tree type so this + # next one is for sure correct. + badText = "" + + return badText + + + def toString(self): + if isinstance(self.trappedException, MissingTokenException): + return ("") + + elif isinstance(self.trappedException, UnwantedTokenException): + return ("") + + elif isinstance(self.trappedException, MismatchedTokenException): + return ("") + + elif isinstance(self.trappedException, NoViableAltException): + return ("") + + return "" + + +class CommonTreeAdaptor(BaseTreeAdaptor): + """ + @brief A TreeAdaptor that works with any Tree implementation. + + It provides + really just factory methods; all the work is done by BaseTreeAdaptor. + If you would like to have different tokens created than ClassicToken + objects, you need to override this and then set the parser tree adaptor to + use your subclass. + + To get your parser to build nodes of a different type, override + create(Token), errorNode(), and to be safe, YourTreeClass.dupNode(). + dupNode is called to duplicate nodes during rewrite operations. + """ + + def dupNode(self, treeNode): + """ + Duplicate a node. This is part of the factory; + override if you want another kind of node to be built. + + I could use reflection to prevent having to override this + but reflection is slow. + """ + + if treeNode is None: + return None + + return treeNode.dupNode() + + + def createWithPayload(self, payload): + return CommonTree(payload) + + + def createToken(self, fromToken=None, tokenType=None, text=None): + """ + Tell me how to create a token for use with imaginary token nodes. + For example, there is probably no input symbol associated with imaginary + token DECL, but you need to create it as a payload or whatever for + the DECL node as in ^(DECL type ID). + + If you care what the token payload objects' type is, you should + override this method and any other createToken variant. + """ + + if fromToken is not None: + return CommonToken(oldToken=fromToken) + + return CommonToken(type=tokenType, text=text) + + + def setTokenBoundaries(self, t, startToken, stopToken): + """ + Track start/stop token for subtree root created for a rule. + Only works with Tree nodes. For rules that match nothing, + seems like this will yield start=i and stop=i-1 in a nil node. + Might be useful info so I'll not force to be i..i. + """ + + if t is None: + return + + start = 0 + stop = 0 + + if startToken is not None: + start = startToken.index + + if stopToken is not None: + stop = stopToken.index + + t.setTokenStartIndex(start) + t.setTokenStopIndex(stop) + + + def getTokenStartIndex(self, t): + if t is None: + return -1 + return t.getTokenStartIndex() + + + def getTokenStopIndex(self, t): + if t is None: + return -1 + return t.getTokenStopIndex() + + + def getText(self, t): + if t is None: + return None + return t.getText() + + + def getType(self, t): + if t is None: + return INVALID_TOKEN_TYPE + + return t.getType() + + + def getToken(self, t): + """ + What is the Token associated with this node? If + you are not using CommonTree, then you must + override this in your own adaptor. + """ + + if isinstance(t, CommonTree): + return t.getToken() + + return None # no idea what to do + + + def getChild(self, t, i): + if t is None: + return None + return t.getChild(i) + + + def getChildCount(self, t): + if t is None: + return 0 + return t.getChildCount() + + + def getParent(self, t): + return t.getParent() + + + def setParent(self, t, parent): + t.setParent(parent) + + + def getChildIndex(self, t): + if t is None: + return 0 + return t.getChildIndex() + + + def setChildIndex(self, t, index): + t.setChildIndex(index) + + + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + if parent is not None: + parent.replaceChildren(startChildIndex, stopChildIndex, t) + + +############################################################################ +# +# streams +# +# TreeNodeStream +# \- BaseTree +# \- CommonTree +# +# TreeAdaptor +# \- BaseTreeAdaptor +# \- CommonTreeAdaptor +# +############################################################################ + + + +class TreeNodeStream(IntStream): + """@brief A stream of tree nodes + + It accessing nodes from a tree of some kind. + """ + + # TreeNodeStream is abstract, no need to complain about not implemented + # abstract methods + # pylint: disable-msg=W0223 + + def get(self, i): + """Get a tree node at an absolute index i; 0..n-1. + If you don't want to buffer up nodes, then this method makes no + sense for you. + """ + + raise NotImplementedError + + + def LT(self, k): + """ + Get tree node at current input pointer + i ahead where i=1 is next node. + i<0 indicates nodes in the past. So LT(-1) is previous node, but + implementations are not required to provide results for k < -1. + LT(0) is undefined. For i>=n, return null. + Return null for LT(0) and any index that results in an absolute address + that is negative. + + This is analogus to the LT() method of the TokenStream, but this + returns a tree node instead of a token. Makes code gen identical + for both parser and tree grammars. :) + """ + + raise NotImplementedError + + + def getTreeSource(self): + """ + Where is this stream pulling nodes from? This is not the name, but + the object that provides node objects. + """ + + raise NotImplementedError + + + def getTokenStream(self): + """ + If the tree associated with this stream was created from a TokenStream, + you can specify it here. Used to do rule $text attribute in tree + parser. Optional unless you use tree parser rule text attribute + or output=template and rewrite=true options. + """ + + raise NotImplementedError + + + def getTreeAdaptor(self): + """ + What adaptor can tell me how to interpret/navigate nodes and + trees. E.g., get text of a node. + """ + + raise NotImplementedError + + + def setUniqueNavigationNodes(self, uniqueNavigationNodes): + """ + As we flatten the tree, we use UP, DOWN nodes to represent + the tree structure. When debugging we need unique nodes + so we have to instantiate new ones. When doing normal tree + parsing, it's slow and a waste of memory to create unique + navigation nodes. Default should be false; + """ + + raise NotImplementedError + + + def toString(self, start, stop): + """ + Return the text of all nodes from start to stop, inclusive. + If the stream does not buffer all the nodes then it can still + walk recursively from start until stop. You can always return + null or "" too, but users should not access $ruleLabel.text in + an action of course in that case. + """ + + raise NotImplementedError + + + # REWRITING TREES (used by tree parser) + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + """ + Replace from start to stop child index of parent with t, which might + be a list. Number of children may be different + after this call. The stream is notified because it is walking the + tree and might need to know you are monkeying with the underlying + tree. Also, it might be able to modify the node stream to avoid + restreaming for future phases. + + If parent is null, don't do anything; must be at root of overall tree. + Can't replace whatever points to the parent externally. Do nothing. + """ + + raise NotImplementedError + + +class CommonTreeNodeStream(TreeNodeStream): + """@brief A buffered stream of tree nodes. + + Nodes can be from a tree of ANY kind. + + This node stream sucks all nodes out of the tree specified in + the constructor during construction and makes pointers into + the tree using an array of Object pointers. The stream necessarily + includes pointers to DOWN and UP and EOF nodes. + + This stream knows how to mark/release for backtracking. + + This stream is most suitable for tree interpreters that need to + jump around a lot or for tree parsers requiring speed (at cost of memory). + There is some duplicated functionality here with UnBufferedTreeNodeStream + but just in bookkeeping, not tree walking etc... + + @see UnBufferedTreeNodeStream + """ + + def __init__(self, *args): + TreeNodeStream.__init__(self) + + if len(args) == 1: + adaptor = CommonTreeAdaptor() + tree = args[0] + + nodes = None + down = None + up = None + eof = None + + elif len(args) == 2: + adaptor = args[0] + tree = args[1] + + nodes = None + down = None + up = None + eof = None + + elif len(args) == 3: + parent = args[0] + start = args[1] + stop = args[2] + + adaptor = parent.adaptor + tree = parent.root + + nodes = parent.nodes[start:stop] + down = parent.down + up = parent.up + eof = parent.eof + + else: + raise TypeError("Invalid arguments") + + # all these navigation nodes are shared and hence they + # cannot contain any line/column info + if down is not None: + self.down = down + else: + self.down = adaptor.createFromType(DOWN, "DOWN") + + if up is not None: + self.up = up + else: + self.up = adaptor.createFromType(UP, "UP") + + if eof is not None: + self.eof = eof + else: + self.eof = adaptor.createFromType(EOF, "EOF") + + # The complete mapping from stream index to tree node. + # This buffer includes pointers to DOWN, UP, and EOF nodes. + # It is built upon ctor invocation. The elements are type + # Object as we don't what the trees look like. + + # Load upon first need of the buffer so we can set token types + # of interest for reverseIndexing. Slows us down a wee bit to + # do all of the if p==-1 testing everywhere though. + if nodes is not None: + self.nodes = nodes + else: + self.nodes = [] + + # Pull nodes from which tree? + self.root = tree + + # IF this tree (root) was created from a token stream, track it. + self.tokens = None + + # What tree adaptor was used to build these trees + self.adaptor = adaptor + + # Reuse same DOWN, UP navigation nodes unless this is true + self.uniqueNavigationNodes = False + + # The index into the nodes list of the current node (next node + # to consume). If -1, nodes array not filled yet. + self.p = -1 + + # Track the last mark() call result value for use in rewind(). + self.lastMarker = None + + # Stack of indexes used for push/pop calls + self.calls = [] + + + def fillBuffer(self): + """Walk tree with depth-first-search and fill nodes buffer. + Don't do DOWN, UP nodes if its a list (t is isNil). + """ + + self._fillBuffer(self.root) + self.p = 0 # buffer of nodes intialized now + + + def _fillBuffer(self, t): + nil = self.adaptor.isNil(t) + + if not nil: + self.nodes.append(t) # add this node + + # add DOWN node if t has children + n = self.adaptor.getChildCount(t) + if not nil and n > 0: + self.addNavigationNode(DOWN) + + # and now add all its children + for c in range(n): + self._fillBuffer(self.adaptor.getChild(t, c)) + + # add UP node if t has children + if not nil and n > 0: + self.addNavigationNode(UP) + + + def getNodeIndex(self, node): + """What is the stream index for node? 0..n-1 + Return -1 if node not found. + """ + + if self.p == -1: + self.fillBuffer() + + for i, t in enumerate(self.nodes): + if t == node: + return i + + return -1 + + + def addNavigationNode(self, ttype): + """ + As we flatten the tree, we use UP, DOWN nodes to represent + the tree structure. When debugging we need unique nodes + so instantiate new ones when uniqueNavigationNodes is true. + """ + + navNode = None + + if ttype == DOWN: + if self.hasUniqueNavigationNodes(): + navNode = self.adaptor.createFromType(DOWN, "DOWN") + + else: + navNode = self.down + + else: + if self.hasUniqueNavigationNodes(): + navNode = self.adaptor.createFromType(UP, "UP") + + else: + navNode = self.up + + self.nodes.append(navNode) + + + def get(self, i): + if self.p == -1: + self.fillBuffer() + + return self.nodes[i] + + + def LT(self, k): + if self.p == -1: + self.fillBuffer() + + if k == 0: + return None + + if k < 0: + return self.LB(-k) + + if self.p + k - 1 >= len(self.nodes): + return self.eof + + return self.nodes[self.p + k - 1] + + + def getCurrentSymbol(self): + return self.LT(1) + + + def LB(self, k): + """Look backwards k nodes""" + + if k == 0: + return None + + if self.p - k < 0: + return None + + return self.nodes[self.p - k] + + + def getTreeSource(self): + return self.root + + + def getSourceName(self): + return self.getTokenStream().getSourceName() + + + def getTokenStream(self): + return self.tokens + + + def setTokenStream(self, tokens): + self.tokens = tokens + + + def getTreeAdaptor(self): + return self.adaptor + + + def hasUniqueNavigationNodes(self): + return self.uniqueNavigationNodes + + + def setUniqueNavigationNodes(self, uniqueNavigationNodes): + self.uniqueNavigationNodes = uniqueNavigationNodes + + + def consume(self): + if self.p == -1: + self.fillBuffer() + + self.p += 1 + + + def LA(self, i): + return self.adaptor.getType(self.LT(i)) + + + def mark(self): + if self.p == -1: + self.fillBuffer() + + + self.lastMarker = self.index() + return self.lastMarker + + + def release(self, marker=None): + # no resources to release + + pass + + + def index(self): + return self.p + + + def rewind(self, marker=None): + if marker is None: + marker = self.lastMarker + + self.seek(marker) + + + def seek(self, index): + if self.p == -1: + self.fillBuffer() + + self.p = index + + + def push(self, index): + """ + Make stream jump to a new location, saving old location. + Switch back with pop(). + """ + + self.calls.append(self.p) # save current index + self.seek(index) + + + def pop(self): + """ + Seek back to previous index saved during last push() call. + Return top of stack (return index). + """ + + ret = self.calls.pop(-1) + self.seek(ret) + return ret + + + def reset(self): + self.p = 0 + self.lastMarker = 0 + self.calls = [] + + + def size(self): + if self.p == -1: + self.fillBuffer() + + return len(self.nodes) + + + # TREE REWRITE INTERFACE + + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + if parent is not None: + self.adaptor.replaceChildren( + parent, startChildIndex, stopChildIndex, t + ) + + + def __str__(self): + """Used for testing, just return the token type stream""" + + if self.p == -1: + self.fillBuffer() + + return ' '.join([str(self.adaptor.getType(node)) + for node in self.nodes + ]) + + + def toString(self, start, stop): + if start is None or stop is None: + return None + + if self.p == -1: + self.fillBuffer() + + #System.out.println("stop: "+stop); + #if ( start instanceof CommonTree ) + # System.out.print("toString: "+((CommonTree)start).getToken()+", "); + #else + # System.out.println(start); + #if ( stop instanceof CommonTree ) + # System.out.println(((CommonTree)stop).getToken()); + #else + # System.out.println(stop); + + # if we have the token stream, use that to dump text in order + if self.tokens is not None: + beginTokenIndex = self.adaptor.getTokenStartIndex(start) + endTokenIndex = self.adaptor.getTokenStopIndex(stop) + + # if it's a tree, use start/stop index from start node + # else use token range from start/stop nodes + if self.adaptor.getType(stop) == UP: + endTokenIndex = self.adaptor.getTokenStopIndex(start) + + elif self.adaptor.getType(stop) == EOF: + endTokenIndex = self.size() -2 # don't use EOF + + return self.tokens.toString(beginTokenIndex, endTokenIndex) + + # walk nodes looking for start + i, t = 0, None + for i, t in enumerate(self.nodes): + if t == start: + break + + # now walk until we see stop, filling string buffer with text + buf = [] + t = self.nodes[i] + while t != stop: + text = self.adaptor.getText(t) + if text is None: + text = " " + self.adaptor.getType(t) + + buf.append(text) + i += 1 + t = self.nodes[i] + + # include stop node too + text = self.adaptor.getText(stop) + if text is None: + text = " " +self.adaptor.getType(stop) + + buf.append(text) + + return ''.join(buf) + + + ## iterator interface + def __iter__(self): + if self.p == -1: + self.fillBuffer() + + for node in self.nodes: + yield node + + +############################################################################# +# +# tree parser +# +############################################################################# + +class TreeParser(BaseRecognizer): + """@brief Baseclass for generated tree parsers. + + A parser for a stream of tree nodes. "tree grammars" result in a subclass + of this. All the error reporting and recovery is shared with Parser via + the BaseRecognizer superclass. + """ + + def __init__(self, input, state=None): + BaseRecognizer.__init__(self, state) + + self.input = None + self.setTreeNodeStream(input) + + + def reset(self): + BaseRecognizer.reset(self) # reset all recognizer state variables + if self.input is not None: + self.input.seek(0) # rewind the input + + + def setTreeNodeStream(self, input): + """Set the input stream""" + + self.input = input + + + def getTreeNodeStream(self): + return self.input + + + def getSourceName(self): + return self.input.getSourceName() + + + def getCurrentInputSymbol(self, input): + return input.LT(1) + + + def getMissingSymbol(self, input, e, expectedTokenType, follow): + tokenText = "" + return CommonTree(CommonToken(type=expectedTokenType, text=tokenText)) + + + # precompiled regex used by inContext + dotdot = ".*[^.]\\.\\.[^.].*" + doubleEtc = ".*\\.\\.\\.\\s+\\.\\.\\..*" + dotdotPattern = re.compile(dotdot) + doubleEtcPattern = re.compile(doubleEtc) + + def inContext(self, context, adaptor=None, tokenName=None, t=None): + """Check if current node in input has a context. + + Context means sequence of nodes towards root of tree. For example, + you might say context is "MULT" which means my parent must be MULT. + "CLASS VARDEF" says current node must be child of a VARDEF and whose + parent is a CLASS node. You can use "..." to mean zero-or-more nodes. + "METHOD ... VARDEF" means my parent is VARDEF and somewhere above + that is a METHOD node. The first node in the context is not + necessarily the root. The context matcher stops matching and returns + true when it runs out of context. There is no way to force the first + node to be the root. + """ + + return _inContext( + self.input.getTreeAdaptor(), self.getTokenNames(), + self.input.LT(1), context) + + @classmethod + def _inContext(cls, adaptor, tokenNames, t, context): + """The worker for inContext. + + It's static and full of parameters for testing purposes. + """ + + if cls.dotdotPattern.match(context): + # don't allow "..", must be "..." + raise ValueError("invalid syntax: ..") + + if cls.doubleEtcPattern.match(context): + # don't allow double "..." + raise ValueError("invalid syntax: ... ...") + + # ensure spaces around ... + context = context.replace("...", " ... ") + context = context.strip() + nodes = context.split() + + ni = len(nodes) - 1 + t = adaptor.getParent(t) + while ni >= 0 and t is not None: + if nodes[ni] == "...": + # walk upwards until we see nodes[ni-1] then continue walking + if ni == 0: + # ... at start is no-op + return True + goal = nodes[ni-1] + ancestor = cls._getAncestor(adaptor, tokenNames, t, goal) + if ancestor is None: + return False + t = ancestor + ni -= 1 + + name = tokenNames[adaptor.getType(t)] + if name != nodes[ni]: + return False + + # advance to parent and to previous element in context node list + ni -= 1 + t = adaptor.getParent(t) + + # at root but more nodes to match + if t is None and ni >= 0: + return False + + return True + + @staticmethod + def _getAncestor(adaptor, tokenNames, t, goal): + """Helper for static inContext.""" + while t is not None: + name = tokenNames[adaptor.getType(t)] + if name == goal: + return t + t = adaptor.getParent(t) + + return None + + + def matchAny(self, ignore): # ignore stream, copy of this.input + """ + Match '.' in tree parser has special meaning. Skip node or + entire tree if node has children. If children, scan until + corresponding UP node. + """ + + self._state.errorRecovery = False + + look = self.input.LT(1) + if self.input.getTreeAdaptor().getChildCount(look) == 0: + self.input.consume() # not subtree, consume 1 node and return + return + + # current node is a subtree, skip to corresponding UP. + # must count nesting level to get right UP + level = 0 + tokenType = self.input.getTreeAdaptor().getType(look) + while tokenType != EOF and not (tokenType == UP and level==0): + self.input.consume() + look = self.input.LT(1) + tokenType = self.input.getTreeAdaptor().getType(look) + if tokenType == DOWN: + level += 1 + + elif tokenType == UP: + level -= 1 + + self.input.consume() # consume UP + + + def mismatch(self, input, ttype, follow): + """ + We have DOWN/UP nodes in the stream that have no line info; override. + plus we want to alter the exception type. Don't try to recover + from tree parser errors inline... + """ + + raise MismatchedTreeNodeException(ttype, input) + + + def getErrorHeader(self, e): + """ + Prefix error message with the grammar name because message is + always intended for the programmer because the parser built + the input tree not the user. + """ + + return (self.getGrammarFileName() + + ": node from %sline %s:%s" + % (['', "after "][e.approximateLineInfo], + e.line, + e.charPositionInLine + ) + ) + + def getErrorMessage(self, e, tokenNames): + """ + Tree parsers parse nodes they usually have a token object as + payload. Set the exception token and do the default behavior. + """ + + if isinstance(self, TreeParser): + adaptor = e.input.getTreeAdaptor() + e.token = adaptor.getToken(e.node) + if e.token is not None: # could be an UP/DOWN node + e.token = CommonToken( + type=adaptor.getType(e.node), + text=adaptor.getText(e.node) + ) + + return BaseRecognizer.getErrorMessage(self, e, tokenNames) + + + def traceIn(self, ruleName, ruleIndex): + BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1)) + + + def traceOut(self, ruleName, ruleIndex): + BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1)) + + +############################################################################# +# +# tree visitor +# +############################################################################# + +class TreeVisitor(object): + """Do a depth first walk of a tree, applying pre() and post() actions + we go. + """ + + def __init__(self, adaptor=None): + if adaptor is not None: + self.adaptor = adaptor + else: + self.adaptor = CommonTreeAdaptor() + + def visit(self, t, pre_action=None, post_action=None): + """Visit every node in tree t and trigger an action for each node + before/after having visited all of its children. Bottom up walk. + Execute both actions even if t has no children. Ignore return + results from transforming children since they will have altered + the child list of this node (their parent). Return result of + applying post action to this node. + + The Python version differs from the Java version by taking two + callables 'pre_action' and 'post_action' instead of a class instance + that wraps those methods. Those callables must accept a TreeNode as + their single argument and return the (potentially transformed or + replaced) TreeNode. + """ + + isNil = self.adaptor.isNil(t) + if pre_action is not None and not isNil: + # if rewritten, walk children of new t + t = pre_action(t) + + for idx in xrange(self.adaptor.getChildCount(t)): + child = self.adaptor.getChild(t, idx) + self.visit(child, pre_action, post_action) + + if post_action is not None and not isNil: + t = post_action(t) + + return t + + +############################################################################# +# +# streams for rule rewriting +# +############################################################################# + +class RewriteRuleElementStream(object): + """@brief Internal helper class. + + A generic list of elements tracked in an alternative to be used in + a -> rewrite rule. We need to subclass to fill in the next() method, + which returns either an AST node wrapped around a token payload or + an existing subtree. + + Once you start next()ing, do not try to add more elements. It will + break the cursor tracking I believe. + + @see org.antlr.runtime.tree.RewriteRuleSubtreeStream + @see org.antlr.runtime.tree.RewriteRuleTokenStream + + TODO: add mechanism to detect/puke on modification after reading from + stream + """ + + def __init__(self, adaptor, elementDescription, elements=None): + # Cursor 0..n-1. If singleElement!=null, cursor is 0 until you next(), + # which bumps it to 1 meaning no more elements. + self.cursor = 0 + + # Track single elements w/o creating a list. Upon 2nd add, alloc list + self.singleElement = None + + # The list of tokens or subtrees we are tracking + self.elements = None + + # Once a node / subtree has been used in a stream, it must be dup'd + # from then on. Streams are reset after subrules so that the streams + # can be reused in future subrules. So, reset must set a dirty bit. + # If dirty, then next() always returns a dup. + self.dirty = False + + # The element or stream description; usually has name of the token or + # rule reference that this list tracks. Can include rulename too, but + # the exception would track that info. + self.elementDescription = elementDescription + + self.adaptor = adaptor + + if isinstance(elements, (list, tuple)): + # Create a stream, but feed off an existing list + self.singleElement = None + self.elements = elements + + else: + # Create a stream with one element + self.add(elements) + + + def reset(self): + """ + Reset the condition of this stream so that it appears we have + not consumed any of its elements. Elements themselves are untouched. + Once we reset the stream, any future use will need duplicates. Set + the dirty bit. + """ + + self.cursor = 0 + self.dirty = True + + + def add(self, el): + if el is None: + return + + if self.elements is not None: # if in list, just add + self.elements.append(el) + return + + if self.singleElement is None: # no elements yet, track w/o list + self.singleElement = el + return + + # adding 2nd element, move to list + self.elements = [] + self.elements.append(self.singleElement) + self.singleElement = None + self.elements.append(el) + + + def nextTree(self): + """ + Return the next element in the stream. If out of elements, throw + an exception unless size()==1. If size is 1, then return elements[0]. + + Return a duplicate node/subtree if stream is out of elements and + size==1. If we've already used the element, dup (dirty bit set). + """ + + if (self.dirty + or (self.cursor >= len(self) and len(self) == 1) + ): + # if out of elements and size is 1, dup + el = self._next() + return self.dup(el) + + # test size above then fetch + el = self._next() + return el + + + def _next(self): + """ + do the work of getting the next element, making sure that it's + a tree node or subtree. Deal with the optimization of single- + element list versus list of size > 1. Throw an exception + if the stream is empty or we're out of elements and size>1. + protected so you can override in a subclass if necessary. + """ + + if len(self) == 0: + raise RewriteEmptyStreamException(self.elementDescription) + + if self.cursor >= len(self): # out of elements? + if len(self) == 1: # if size is 1, it's ok; return and we'll dup + return self.toTree(self.singleElement) + + # out of elements and size was not 1, so we can't dup + raise RewriteCardinalityException(self.elementDescription) + + # we have elements + if self.singleElement is not None: + self.cursor += 1 # move cursor even for single element list + return self.toTree(self.singleElement) + + # must have more than one in list, pull from elements + o = self.toTree(self.elements[self.cursor]) + self.cursor += 1 + return o + + + def dup(self, el): + """ + When constructing trees, sometimes we need to dup a token or AST + subtree. Dup'ing a token means just creating another AST node + around it. For trees, you must call the adaptor.dupTree() unless + the element is for a tree root; then it must be a node dup. + """ + + raise NotImplementedError + + + def toTree(self, el): + """ + Ensure stream emits trees; tokens must be converted to AST nodes. + AST nodes can be passed through unmolested. + """ + + return el + + + def hasNext(self): + return ( (self.singleElement is not None and self.cursor < 1) + or (self.elements is not None + and self.cursor < len(self.elements) + ) + ) + + + def size(self): + if self.singleElement is not None: + return 1 + + if self.elements is not None: + return len(self.elements) + + return 0 + + __len__ = size + + + def getDescription(self): + """Deprecated. Directly access elementDescription attribute""" + + return self.elementDescription + + +class RewriteRuleTokenStream(RewriteRuleElementStream): + """@brief Internal helper class.""" + + def toTree(self, el): + # Don't convert to a tree unless they explicitly call nextTree. + # This way we can do hetero tree nodes in rewrite. + return el + + + def nextNode(self): + t = self._next() + return self.adaptor.createWithPayload(t) + + + def nextToken(self): + return self._next() + + + def dup(self, el): + raise TypeError("dup can't be called for a token stream.") + + +class RewriteRuleSubtreeStream(RewriteRuleElementStream): + """@brief Internal helper class.""" + + def nextNode(self): + """ + Treat next element as a single node even if it's a subtree. + This is used instead of next() when the result has to be a + tree root node. Also prevents us from duplicating recently-added + children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration + must dup the type node, but ID has been added. + + Referencing a rule result twice is ok; dup entire tree as + we can't be adding trees as root; e.g., expr expr. + + Hideous code duplication here with super.next(). Can't think of + a proper way to refactor. This needs to always call dup node + and super.next() doesn't know which to call: dup node or dup tree. + """ + + if (self.dirty + or (self.cursor >= len(self) and len(self) == 1) + ): + # if out of elements and size is 1, dup (at most a single node + # since this is for making root nodes). + el = self._next() + return self.adaptor.dupNode(el) + + # test size above then fetch + el = self._next() + return el + + + def dup(self, el): + return self.adaptor.dupTree(el) + + + +class RewriteRuleNodeStream(RewriteRuleElementStream): + """ + Queues up nodes matched on left side of -> in a tree parser. This is + the analog of RewriteRuleTokenStream for normal parsers. + """ + + def nextNode(self): + return self._next() + + + def toTree(self, el): + return self.adaptor.dupNode(el) + + + def dup(self, el): + # we dup every node, so don't have to worry about calling dup; short- + #circuited next() so it doesn't call. + raise TypeError("dup can't be called for a node stream.") + + +class TreeRuleReturnScope(RuleReturnScope): + """ + This is identical to the ParserRuleReturnScope except that + the start property is a tree nodes not Token object + when you are parsing trees. To be generic the tree node types + have to be Object. + """ + + def __init__(self): + self.start = None + self.tree = None + + + def getStart(self): + return self.start + + + def getTree(self): + return self.tree + diff --git a/antlr/antlr_python_runtime-3.1.3/antlr3/treewizard.py b/antlr/antlr_python_runtime-3.1.3/antlr3/treewizard.py new file mode 100644 index 0000000000000000000000000000000000000000..1d8f4eb7f66b409c8790c3d6445ec050a067b372 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr3/treewizard.py @@ -0,0 +1,614 @@ +""" @package antlr3.tree +@brief ANTLR3 runtime package, treewizard module + +A utility module to create ASTs at runtime. +See for an overview. Note that the API of the Python implementation is slightly different. + +""" + +# begin[licence] +# +# [The "BSD licence"] +# Copyright (c) 2005-2008 Terence Parr +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# end[licence] + +from antlr3.constants import INVALID_TOKEN_TYPE +from antlr3.tokens import CommonToken +from antlr3.tree import CommonTree, CommonTreeAdaptor + + +def computeTokenTypes(tokenNames): + """ + Compute a dict that is an inverted index of + tokenNames (which maps int token types to names). + """ + + if tokenNames is None: + return {} + + return dict((name, type) for type, name in enumerate(tokenNames)) + + +## token types for pattern parser +EOF = -1 +BEGIN = 1 +END = 2 +ID = 3 +ARG = 4 +PERCENT = 5 +COLON = 6 +DOT = 7 + +class TreePatternLexer(object): + def __init__(self, pattern): + ## The tree pattern to lex like "(A B C)" + self.pattern = pattern + + ## Index into input string + self.p = -1 + + ## Current char + self.c = None + + ## How long is the pattern in char? + self.n = len(pattern) + + ## Set when token type is ID or ARG + self.sval = None + + self.error = False + + self.consume() + + + __idStartChar = frozenset( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' + ) + __idChar = __idStartChar | frozenset('0123456789') + + def nextToken(self): + self.sval = "" + while self.c != EOF: + if self.c in (' ', '\n', '\r', '\t'): + self.consume() + continue + + if self.c in self.__idStartChar: + self.sval += self.c + self.consume() + while self.c in self.__idChar: + self.sval += self.c + self.consume() + + return ID + + if self.c == '(': + self.consume() + return BEGIN + + if self.c == ')': + self.consume() + return END + + if self.c == '%': + self.consume() + return PERCENT + + if self.c == ':': + self.consume() + return COLON + + if self.c == '.': + self.consume() + return DOT + + if self.c == '[': # grab [x] as a string, returning x + self.consume() + while self.c != ']': + if self.c == '\\': + self.consume() + if self.c != ']': + self.sval += '\\' + + self.sval += self.c + + else: + self.sval += self.c + + self.consume() + + self.consume() + return ARG + + self.consume() + self.error = True + return EOF + + return EOF + + + def consume(self): + self.p += 1 + if self.p >= self.n: + self.c = EOF + + else: + self.c = self.pattern[self.p] + + +class TreePatternParser(object): + def __init__(self, tokenizer, wizard, adaptor): + self.tokenizer = tokenizer + self.wizard = wizard + self.adaptor = adaptor + self.ttype = tokenizer.nextToken() # kickstart + + + def pattern(self): + if self.ttype == BEGIN: + return self.parseTree() + + elif self.ttype == ID: + node = self.parseNode() + if self.ttype == EOF: + return node + + return None # extra junk on end + + return None + + + def parseTree(self): + if self.ttype != BEGIN: + return None + + self.ttype = self.tokenizer.nextToken() + root = self.parseNode() + if root is None: + return None + + while self.ttype in (BEGIN, ID, PERCENT, DOT): + if self.ttype == BEGIN: + subtree = self.parseTree() + self.adaptor.addChild(root, subtree) + + else: + child = self.parseNode() + if child is None: + return None + + self.adaptor.addChild(root, child) + + if self.ttype != END: + return None + + self.ttype = self.tokenizer.nextToken() + return root + + + def parseNode(self): + # "%label:" prefix + label = None + + if self.ttype == PERCENT: + self.ttype = self.tokenizer.nextToken() + if self.ttype != ID: + return None + + label = self.tokenizer.sval + self.ttype = self.tokenizer.nextToken() + if self.ttype != COLON: + return None + + self.ttype = self.tokenizer.nextToken() # move to ID following colon + + # Wildcard? + if self.ttype == DOT: + self.ttype = self.tokenizer.nextToken() + wildcardPayload = CommonToken(0, ".") + node = WildcardTreePattern(wildcardPayload) + if label is not None: + node.label = label + return node + + # "ID" or "ID[arg]" + if self.ttype != ID: + return None + + tokenName = self.tokenizer.sval + self.ttype = self.tokenizer.nextToken() + + if tokenName == "nil": + return self.adaptor.nil() + + text = tokenName + # check for arg + arg = None + if self.ttype == ARG: + arg = self.tokenizer.sval + text = arg + self.ttype = self.tokenizer.nextToken() + + # create node + treeNodeType = self.wizard.getTokenType(tokenName) + if treeNodeType == INVALID_TOKEN_TYPE: + return None + + node = self.adaptor.createFromType(treeNodeType, text) + if label is not None and isinstance(node, TreePattern): + node.label = label + + if arg is not None and isinstance(node, TreePattern): + node.hasTextArg = True + + return node + + +class TreePattern(CommonTree): + """ + When using %label:TOKENNAME in a tree for parse(), we must + track the label. + """ + + def __init__(self, payload): + CommonTree.__init__(self, payload) + + self.label = None + self.hasTextArg = None + + + def toString(self): + if self.label is not None: + return '%' + self.label + ':' + CommonTree.toString(self) + + else: + return CommonTree.toString(self) + + +class WildcardTreePattern(TreePattern): + pass + + +class TreePatternTreeAdaptor(CommonTreeAdaptor): + """This adaptor creates TreePattern objects for use during scan()""" + + def createWithPayload(self, payload): + return TreePattern(payload) + + +class TreeWizard(object): + """ + Build and navigate trees with this object. Must know about the names + of tokens so you have to pass in a map or array of token names (from which + this class can build the map). I.e., Token DECL means nothing unless the + class can translate it to a token type. + + In order to create nodes and navigate, this class needs a TreeAdaptor. + + This class can build a token type -> node index for repeated use or for + iterating over the various nodes with a particular type. + + This class works in conjunction with the TreeAdaptor rather than moving + all this functionality into the adaptor. An adaptor helps build and + navigate trees using methods. This class helps you do it with string + patterns like "(A B C)". You can create a tree from that pattern or + match subtrees against it. + """ + + def __init__(self, adaptor=None, tokenNames=None, typeMap=None): + self.adaptor = adaptor + if typeMap is None: + self.tokenNameToTypeMap = computeTokenTypes(tokenNames) + + else: + if tokenNames is not None: + raise ValueError("Can't have both tokenNames and typeMap") + + self.tokenNameToTypeMap = typeMap + + + def getTokenType(self, tokenName): + """Using the map of token names to token types, return the type.""" + + try: + return self.tokenNameToTypeMap[tokenName] + except KeyError: + return INVALID_TOKEN_TYPE + + + def create(self, pattern): + """ + Create a tree or node from the indicated tree pattern that closely + follows ANTLR tree grammar tree element syntax: + + (root child1 ... child2). + + You can also just pass in a node: ID + + Any node can have a text argument: ID[foo] + (notice there are no quotes around foo--it's clear it's a string). + + nil is a special name meaning "give me a nil node". Useful for + making lists: (nil A B C) is a list of A B C. + """ + + tokenizer = TreePatternLexer(pattern) + parser = TreePatternParser(tokenizer, self, self.adaptor) + return parser.pattern() + + + def index(self, tree): + """Walk the entire tree and make a node name to nodes mapping. + + For now, use recursion but later nonrecursive version may be + more efficient. Returns a dict int -> list where the list is + of your AST node type. The int is the token type of the node. + """ + + m = {} + self._index(tree, m) + return m + + + def _index(self, t, m): + """Do the work for index""" + + if t is None: + return + + ttype = self.adaptor.getType(t) + elements = m.get(ttype) + if elements is None: + m[ttype] = elements = [] + + elements.append(t) + for i in range(self.adaptor.getChildCount(t)): + child = self.adaptor.getChild(t, i) + self._index(child, m) + + + def find(self, tree, what): + """Return a list of matching token. + + what may either be an integer specifzing the token type to find or + a string with a pattern that must be matched. + + """ + + if isinstance(what, (int, long)): + return self._findTokenType(tree, what) + + elif isinstance(what, basestring): + return self._findPattern(tree, what) + + else: + raise TypeError("'what' must be string or integer") + + + def _findTokenType(self, t, ttype): + """Return a List of tree nodes with token type ttype""" + + nodes = [] + + def visitor(tree, parent, childIndex, labels): + nodes.append(tree) + + self.visit(t, ttype, visitor) + + return nodes + + + def _findPattern(self, t, pattern): + """Return a List of subtrees matching pattern.""" + + subtrees = [] + + # Create a TreePattern from the pattern + tokenizer = TreePatternLexer(pattern) + parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) + tpattern = parser.pattern() + + # don't allow invalid patterns + if (tpattern is None or tpattern.isNil() + or isinstance(tpattern, WildcardTreePattern)): + return None + + rootTokenType = tpattern.getType() + + def visitor(tree, parent, childIndex, label): + if self._parse(tree, tpattern, None): + subtrees.append(tree) + + self.visit(t, rootTokenType, visitor) + + return subtrees + + + def visit(self, tree, what, visitor): + """Visit every node in tree matching what, invoking the visitor. + + If what is a string, it is parsed as a pattern and only matching + subtrees will be visited. + The implementation uses the root node of the pattern in combination + with visit(t, ttype, visitor) so nil-rooted patterns are not allowed. + Patterns with wildcard roots are also not allowed. + + If what is an integer, it is used as a token type and visit will match + all nodes of that type (this is faster than the pattern match). + The labels arg of the visitor action method is never set (it's None) + since using a token type rather than a pattern doesn't let us set a + label. + """ + + if isinstance(what, (int, long)): + self._visitType(tree, None, 0, what, visitor) + + elif isinstance(what, basestring): + self._visitPattern(tree, what, visitor) + + else: + raise TypeError("'what' must be string or integer") + + + def _visitType(self, t, parent, childIndex, ttype, visitor): + """Do the recursive work for visit""" + + if t is None: + return + + if self.adaptor.getType(t) == ttype: + visitor(t, parent, childIndex, None) + + for i in range(self.adaptor.getChildCount(t)): + child = self.adaptor.getChild(t, i) + self._visitType(child, t, i, ttype, visitor) + + + def _visitPattern(self, tree, pattern, visitor): + """ + For all subtrees that match the pattern, execute the visit action. + """ + + # Create a TreePattern from the pattern + tokenizer = TreePatternLexer(pattern) + parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) + tpattern = parser.pattern() + + # don't allow invalid patterns + if (tpattern is None or tpattern.isNil() + or isinstance(tpattern, WildcardTreePattern)): + return + + rootTokenType = tpattern.getType() + + def rootvisitor(tree, parent, childIndex, labels): + labels = {} + if self._parse(tree, tpattern, labels): + visitor(tree, parent, childIndex, labels) + + self.visit(tree, rootTokenType, rootvisitor) + + + def parse(self, t, pattern, labels=None): + """ + Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels + on the various nodes and '.' (dot) as the node/subtree wildcard, + return true if the pattern matches and fill the labels Map with + the labels pointing at the appropriate nodes. Return false if + the pattern is malformed or the tree does not match. + + If a node specifies a text arg in pattern, then that must match + for that node in t. + """ + + tokenizer = TreePatternLexer(pattern) + parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) + tpattern = parser.pattern() + + return self._parse(t, tpattern, labels) + + + def _parse(self, t1, tpattern, labels): + """ + Do the work for parse. Check to see if the tpattern fits the + structure and token types in t1. Check text if the pattern has + text arguments on nodes. Fill labels map with pointers to nodes + in tree matched against nodes in pattern with labels. + """ + + # make sure both are non-null + if t1 is None or tpattern is None: + return False + + # check roots (wildcard matches anything) + if not isinstance(tpattern, WildcardTreePattern): + if self.adaptor.getType(t1) != tpattern.getType(): + return False + + # if pattern has text, check node text + if (tpattern.hasTextArg + and self.adaptor.getText(t1) != tpattern.getText()): + return False + + if tpattern.label is not None and labels is not None: + # map label in pattern to node in t1 + labels[tpattern.label] = t1 + + # check children + n1 = self.adaptor.getChildCount(t1) + n2 = tpattern.getChildCount() + if n1 != n2: + return False + + for i in range(n1): + child1 = self.adaptor.getChild(t1, i) + child2 = tpattern.getChild(i) + if not self._parse(child1, child2, labels): + return False + + return True + + + def equals(self, t1, t2, adaptor=None): + """ + Compare t1 and t2; return true if token types/text, structure match + exactly. + The trees are examined in their entirety so that (A B) does not match + (A B C) nor (A (B C)). + """ + + if adaptor is None: + adaptor = self.adaptor + + return self._equals(t1, t2, adaptor) + + + def _equals(self, t1, t2, adaptor): + # make sure both are non-null + if t1 is None or t2 is None: + return False + + # check roots + if adaptor.getType(t1) != adaptor.getType(t2): + return False + + if adaptor.getText(t1) != adaptor.getText(t2): + return False + + # check children + n1 = adaptor.getChildCount(t1) + n2 = adaptor.getChildCount(t2) + if n1 != n2: + return False + + for i in range(n1): + child1 = adaptor.getChild(t1, i) + child2 = adaptor.getChild(t2, i) + if not self._equals(child1, child2, adaptor): + return False + + return True diff --git a/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/PKG-INFO b/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..32bcc2e886981e73a3b0f5a707636bfa82778ec2 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/PKG-INFO @@ -0,0 +1,13 @@ +Metadata-Version: 1.0 +Name: antlr-python-runtime +Version: 3.1.3 +Summary: Runtime package for ANTLR3 +Home-page: http://www.antlr.org/ +Author: Benjamin Niemann +Author-email: pink@odahoda.de +License: BSD +Download-URL: http://www.antlr.org/download.html +Description: This is the runtime package for ANTLR3, which is required to use parsers + generated by ANTLR3. + +Platform: UNKNOWN diff --git a/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/SOURCES.txt b/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..a95956337048d7c2a9ddaa06b92b9eebe4eee6f0 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/SOURCES.txt @@ -0,0 +1,24 @@ +AUTHORS +LICENSE +MANIFEST.in +README +ez_setup.py +setup.py +antlr3/__init__.py +antlr3/compat.py +antlr3/constants.py +antlr3/debug.py +antlr3/dfa.py +antlr3/dottreegen.py +antlr3/exceptions.py +antlr3/extras.py +antlr3/main.py +antlr3/recognizers.py +antlr3/streams.py +antlr3/tokens.py +antlr3/tree.py +antlr3/treewizard.py +antlr_python_runtime.egg-info/PKG-INFO +antlr_python_runtime.egg-info/SOURCES.txt +antlr_python_runtime.egg-info/dependency_links.txt +antlr_python_runtime.egg-info/top_level.txt diff --git a/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/dependency_links.txt b/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/top_level.txt b/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..a6ea0008bce8bb1d407e666c29e3faf5dfa278e4 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/antlr_python_runtime.egg-info/top_level.txt @@ -0,0 +1 @@ +antlr3 diff --git a/antlr/antlr_python_runtime-3.1.3/ez_setup.py b/antlr/antlr_python_runtime-3.1.3/ez_setup.py new file mode 100644 index 0000000000000000000000000000000000000000..38c09c624d3b8c7af799934408cd46dc6cb96147 --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/ez_setup.py @@ -0,0 +1,228 @@ +#!python +"""Bootstrap setuptools installation + +If you want to use setuptools in your package's setup.py, just include this +file in the same directory with it, and add this to the top of your setup.py:: + + from ez_setup import use_setuptools + use_setuptools() + +If you want to require a specific version of setuptools, set a download +mirror, or use an alternate download directory, you can do so by supplying +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import sys +DEFAULT_VERSION = "0.6c5" +DEFAULT_URL = "http://cheeseshop.python.org/packages/%s/s/setuptools/" % sys.version[:3] + +md5_data = { + 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca', + 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb', + 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b', + 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a', + 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618', + 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac', + 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5', + 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4', + 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c', + 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b', + 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27', + 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277', + 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa', + 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e', + 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e', + 'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f', + 'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2', + 'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc', + 'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167', + 'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64', + 'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d', +} + +import sys, os + +def _validate_md5(egg_name, data): + if egg_name in md5_data: + from md5 import md5 + digest = md5(data).hexdigest() + if digest != md5_data[egg_name]: + print >>sys.stderr, ( + "md5 validation of %s failed! (Possible download problem?)" + % egg_name + ) + sys.exit(2) + return data + + +def use_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + download_delay=15 +): + """Automatically find/download setuptools and make it available on sys.path + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end with + a '/'). `to_dir` is the directory where setuptools will be downloaded, if + it is not already available. If `download_delay` is specified, it should + be the number of seconds that will be paused before initiating a download, + should one be required. If an older version of setuptools is installed, + this routine will print a message to ``sys.stderr`` and raise SystemExit in + an attempt to abort the calling script. + """ + try: + import setuptools + if setuptools.__version__ == '0.0.1': + print >>sys.stderr, ( + "You have an obsolete version of setuptools installed. Please\n" + "remove it from your system entirely before rerunning this script." + ) + sys.exit(2) + except ImportError: + egg = download_setuptools(version, download_base, to_dir, download_delay) + sys.path.insert(0, egg) + import setuptools; setuptools.bootstrap_install_from = egg + + import pkg_resources + try: + pkg_resources.require("setuptools>="+version) + + except pkg_resources.VersionConflict, e: + # XXX could we install in a subprocess here? + print >>sys.stderr, ( + "The required version of setuptools (>=%s) is not available, and\n" + "can't be installed while this script is running. Please install\n" + " a more recent version first.\n\n(Currently using %r)" + ) % (version, e.args[0]) + sys.exit(2) + +def download_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + delay = 15 +): + """Download setuptools from a specified location and return its filename + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download attempt. + """ + import urllib2, shutil + egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) + url = download_base + egg_name + saveto = os.path.join(to_dir, egg_name) + src = dst = None + if not os.path.exists(saveto): # Avoid repeated downloads + try: + from distutils import log + if delay: + log.warn(""" +--------------------------------------------------------------------------- +This script requires setuptools version %s to run (even to display +help). I will attempt to download it for you (from +%s), but +you may need to enable firewall access for this script first. +I will start the download in %d seconds. + +(Note: if this machine does not have network access, please obtain the file + + %s + +and place it in this directory before rerunning this script.) +---------------------------------------------------------------------------""", + version, download_base, delay, url + ); from time import sleep; sleep(delay) + log.warn("Downloading %s", url) + src = urllib2.urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = _validate_md5(egg_name, src.read()) + dst = open(saveto,"wb"); dst.write(data) + finally: + if src: src.close() + if dst: dst.close() + return os.path.realpath(saveto) + +def main(argv, version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + + try: + import setuptools + except ImportError: + egg = None + try: + egg = download_setuptools(version, delay=0) + sys.path.insert(0,egg) + from setuptools.command.easy_install import main + return main(list(argv)+[egg]) # we're done here + finally: + if egg and os.path.exists(egg): + os.unlink(egg) + else: + if setuptools.__version__ == '0.0.1': + # tell the user to uninstall obsolete version + use_setuptools(version) + + req = "setuptools>="+version + import pkg_resources + try: + pkg_resources.require(req) + except pkg_resources.VersionConflict: + try: + from setuptools.command.easy_install import main + except ImportError: + from easy_install import main + main(list(argv)+[download_setuptools(delay=0)]) + sys.exit(0) # try to force an exit + else: + if argv: + from setuptools.command.easy_install import main + main(argv) + else: + print "Setuptools version",version,"or greater has been installed." + print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + + + +def update_md5(filenames): + """Update our built-in md5 registry""" + + import re + from md5 import md5 + + for name in filenames: + base = os.path.basename(name) + f = open(name,'rb') + md5_data[base] = md5(f.read()).hexdigest() + f.close() + + data = [" %r: %r,\n" % it for it in md5_data.items()] + data.sort() + repl = "".join(data) + + import inspect + srcfile = inspect.getsourcefile(sys.modules[__name__]) + f = open(srcfile, 'rb'); src = f.read(); f.close() + + match = re.search("\nmd5_data = {\n([^}]+)}", src) + if not match: + print >>sys.stderr, "Internal error!" + sys.exit(2) + + src = src[:match.start(1)] + repl + src[match.end(1):] + f = open(srcfile,'w') + f.write(src) + f.close() + + +if __name__=='__main__': + if len(sys.argv)>2 and sys.argv[1]=='--md5update': + update_md5(sys.argv[2:]) + else: + main(sys.argv[1:]) + + + + + diff --git a/antlr/antlr_python_runtime-3.1.3/setup.cfg b/antlr/antlr_python_runtime-3.1.3/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..861a9f554263efb088d8636c4f17a30696e495ad --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/setup.cfg @@ -0,0 +1,5 @@ +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/antlr/antlr_python_runtime-3.1.3/setup.py b/antlr/antlr_python_runtime-3.1.3/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..5ebf579b1d78abda5d6da11b9e4a17c75184c61f --- /dev/null +++ b/antlr/antlr_python_runtime-3.1.3/setup.py @@ -0,0 +1,308 @@ +# bootstrapping setuptools +import ez_setup +ez_setup.use_setuptools() + +import os +import sys +import textwrap +from distutils.errors import * +from distutils.command.clean import clean as _clean +from distutils.cmd import Command +from setuptools import setup +from distutils import log + +from distutils.core import setup + + +class clean(_clean): + """Also cleanup local temp files.""" + + def run(self): + _clean.run(self) + + import fnmatch + + # kill temporary files + patterns = [ + # generic tempfiles + '*~', '*.bak', '*.pyc', + + # tempfiles generated by ANTLR runs + 't[0-9]*Lexer.py', 't[0-9]*Parser.py', + '*.tokens', '*__.g', + ] + + for path in ('antlr3', 'unittests', 'tests'): + path = os.path.join(os.path.dirname(__file__), path) + if os.path.isdir(path): + for root, dirs, files in os.walk(path, topdown=True): + graveyard = [] + for pat in patterns: + graveyard.extend(fnmatch.filter(files, pat)) + + for name in graveyard: + filePath = os.path.join(root, name) + + try: + log.info("removing '%s'", filePath) + os.unlink(filePath) + except OSError, exc: + log.warn( + "Failed to delete '%s': %s", + filePath, exc + ) + + +class TestError(DistutilsError): + pass + + +# grml.. the class name appears in the --help output: +# ... +# Options for 'CmdUnitTest' command +# ... +# so I have to use a rather ugly name... +class unittest(Command): + """Run unit tests for package""" + + description = "run unit tests for package" + + user_options = [ + ('xml-output=', None, + "Directory for JUnit compatible XML files."), + ] + boolean_options = [] + + def initialize_options(self): + self.xml_output = None + + def finalize_options(self): + pass + + def run(self): + testDir = os.path.join(os.path.dirname(__file__), 'unittests') + if not os.path.isdir(testDir): + raise DistutilsFileError( + "There is not 'unittests' directory. Did you fetch the " + "development version?", + ) + + import glob + import imp + import unittest + import traceback + import StringIO + + suite = unittest.TestSuite() + loadFailures = [] + + # collect tests from all unittests/test*.py files + testFiles = [] + for testPath in glob.glob(os.path.join(testDir, 'test*.py')): + testFiles.append(testPath) + + testFiles.sort() + for testPath in testFiles: + testID = os.path.basename(testPath)[:-3] + + try: + modFile, modPathname, modDescription \ + = imp.find_module(testID, [testDir]) + + testMod = imp.load_module( + testID, modFile, modPathname, modDescription + ) + + suite.addTests( + unittest.defaultTestLoader.loadTestsFromModule(testMod) + ) + + except Exception: + buf = StringIO.StringIO() + traceback.print_exc(file=buf) + + loadFailures.append( + (os.path.basename(testPath), buf.getvalue()) + ) + + if self.xml_output: + import xmlrunner + runner = xmlrunner.XMLTestRunner( + stream=open(os.path.join(self.xml_output, 'unittest.xml'), 'w')) + else: + runner = unittest.TextTestRunner(verbosity=2) + result = runner.run(suite) + + for testName, error in loadFailures: + sys.stderr.write('\n' + '='*70 + '\n') + sys.stderr.write( + "Failed to load test module %s\n" % testName + ) + sys.stderr.write(error) + sys.stderr.write('\n') + + if not result.wasSuccessful() or loadFailures: + raise TestError( + "Unit test suite failed!", + ) + + +class functest(Command): + """Run functional tests for package""" + + description = "run functional tests for package" + + user_options = [ + ('testcase=', None, + "testcase to run [default: run all]"), + ('antlr-version=', None, + "ANTLR version to use [default: HEAD (in ../../build)]"), + ('antlr-jar=', None, + "Explicit path to an antlr jar (overrides --antlr-version)"), + ('xml-output=', None, + "Directory for JUnit compatible XML files."), + ] + + boolean_options = [] + + def initialize_options(self): + self.testcase = None + self.antlr_version = 'HEAD' + self.antlr_jar = None + self.xml_output = None + + def finalize_options(self): + pass + + + def run(self): + import glob + import imp + import unittest + import traceback + import StringIO + + testDir = os.path.join(os.path.dirname(__file__), 'tests') + if not os.path.isdir(testDir): + raise DistutilsFileError( + "There is not 'tests' directory. Did you fetch the " + "development version?", + ) + + # make sure, relative imports from testcases work + sys.path.insert(0, testDir) + + rootDir = os.path.abspath( + os.path.join(os.path.dirname(__file__), '..', '..')) + + if self.antlr_jar is not None: + classpath = [self.antlr_jar] + elif self.antlr_version == 'HEAD': + classpath = [ + os.path.join(rootDir, 'tool', 'target', 'classes'), + os.path.join(rootDir, 'runtime', 'Java', 'target', 'classes') + ] + else: + classpath = [ + os.path.join(rootDir, 'archive', + 'antlr-%s.jar' % self.antlr_version) + ] + + classpath.extend([ + os.path.join(rootDir, 'lib', 'antlr-2.7.7.jar'), + os.path.join(rootDir, 'lib', 'stringtemplate-3.2.jar'), + os.path.join(rootDir, 'lib', 'junit-4.2.jar') + ]) + os.environ['CLASSPATH'] = ':'.join(classpath) + + os.environ['ANTLRVERSION'] = self.antlr_version + + suite = unittest.TestSuite() + loadFailures = [] + + # collect tests from all tests/t*.py files + testFiles = [] + for testPath in glob.glob(os.path.join(testDir, 't*.py')): + if (testPath.endswith('Lexer.py') + or testPath.endswith('Parser.py') + ): + continue + + # if a single testcase has been selected, filter out all other + # tests + if (self.testcase is not None + and os.path.basename(testPath)[:-3] != self.testcase + ): + continue + + testFiles.append(testPath) + + testFiles.sort() + for testPath in testFiles: + testID = os.path.basename(testPath)[:-3] + + try: + modFile, modPathname, modDescription \ + = imp.find_module(testID, [testDir]) + + testMod = imp.load_module( + testID, modFile, modPathname, modDescription + ) + + suite.addTests( + unittest.defaultTestLoader.loadTestsFromModule(testMod) + ) + + except Exception: + buf = StringIO.StringIO() + traceback.print_exc(file=buf) + + loadFailures.append( + (os.path.basename(testPath), buf.getvalue()) + ) + + + if self.xml_output: + import xmlrunner + runner = xmlrunner.XMLTestRunner( + stream=open(os.path.join(self.xml_output, 'functest.xml'), 'w')) + else: + runner = unittest.TextTestRunner(verbosity=2) + + result = runner.run(suite) + + for testName, error in loadFailures: + sys.stderr.write('\n' + '='*70 + '\n') + sys.stderr.write( + "Failed to load test module %s\n" % testName + ) + sys.stderr.write(error) + sys.stderr.write('\n') + + if not result.wasSuccessful() or loadFailures: + raise TestError( + "Functional test suite failed!", + ) + + +setup(name='antlr_python_runtime', + version='3.1.3', + packages=['antlr3'], + + author="Benjamin Niemann", + author_email="pink@odahoda.de", + url="http://www.antlr.org/", + download_url="http://www.antlr.org/download.html", + license="BSD", + description="Runtime package for ANTLR3", + long_description=textwrap.dedent('''\ + This is the runtime package for ANTLR3, which is required to use parsers + generated by ANTLR3. + '''), + + + cmdclass={'unittest': unittest, + 'functest': functest, + 'clean': clean + }, + )