mirror of
https://gitee.com/kekingcn/file-online-preview.git
synced 2026-06-19 04:17:07 +00:00
集成OpenOffice替换为LibreOffice
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""The pgen2 package."""
|
||||
@@ -0,0 +1,257 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""Convert graminit.[ch] spit out by pgen to Python code.
|
||||
|
||||
Pgen is the Python parser generator. It is useful to quickly create a
|
||||
parser from a grammar file in Python's grammar notation. But I don't
|
||||
want my parsers to be written in C (yet), so I'm translating the
|
||||
parsing tables to Python data structures and writing a Python parse
|
||||
engine.
|
||||
|
||||
Note that the token numbers are constants determined by the standard
|
||||
Python tokenizer. The standard token module defines these numbers and
|
||||
their names (the names are not used much). The token numbers are
|
||||
hardcoded into the Python tokenizer and into pgen. A Python
|
||||
implementation of the Python tokenizer is also available, in the
|
||||
standard tokenize module.
|
||||
|
||||
On the other hand, symbol numbers (representing the grammar's
|
||||
non-terminals) are assigned by pgen based on the actual grammar
|
||||
input.
|
||||
|
||||
Note: this module is pretty much obsolete; the pgen module generates
|
||||
equivalent grammar tables directly from the Grammar.txt input file
|
||||
without having to invoke the Python pgen C program.
|
||||
|
||||
"""
|
||||
|
||||
# Python imports
|
||||
import re
|
||||
|
||||
# Local imports
|
||||
from pgen2 import grammar, token
|
||||
|
||||
|
||||
class Converter(grammar.Grammar):
|
||||
"""Grammar subclass that reads classic pgen output files.
|
||||
|
||||
The run() method reads the tables as produced by the pgen parser
|
||||
generator, typically contained in two C files, graminit.h and
|
||||
graminit.c. The other methods are for internal use only.
|
||||
|
||||
See the base class for more documentation.
|
||||
|
||||
"""
|
||||
|
||||
def run(self, graminit_h, graminit_c):
|
||||
"""Load the grammar tables from the text files written by pgen."""
|
||||
self.parse_graminit_h(graminit_h)
|
||||
self.parse_graminit_c(graminit_c)
|
||||
self.finish_off()
|
||||
|
||||
def parse_graminit_h(self, filename):
|
||||
"""Parse the .h file written by pgen. (Internal)
|
||||
|
||||
This file is a sequence of #define statements defining the
|
||||
nonterminals of the grammar as numbers. We build two tables
|
||||
mapping the numbers to names and back.
|
||||
|
||||
"""
|
||||
try:
|
||||
f = open(filename)
|
||||
except OSError as err:
|
||||
print("Can't open %s: %s" % (filename, err))
|
||||
return False
|
||||
self.symbol2number = {}
|
||||
self.number2symbol = {}
|
||||
lineno = 0
|
||||
for line in f:
|
||||
lineno += 1
|
||||
mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
|
||||
if not mo and line.strip():
|
||||
print("%s(%s): can't parse %s" % (filename, lineno,
|
||||
line.strip()))
|
||||
else:
|
||||
symbol, number = mo.groups()
|
||||
number = int(number)
|
||||
assert symbol not in self.symbol2number
|
||||
assert number not in self.number2symbol
|
||||
self.symbol2number[symbol] = number
|
||||
self.number2symbol[number] = symbol
|
||||
return True
|
||||
|
||||
def parse_graminit_c(self, filename):
|
||||
"""Parse the .c file written by pgen. (Internal)
|
||||
|
||||
The file looks as follows. The first two lines are always this:
|
||||
|
||||
#include "pgenheaders.h"
|
||||
#include "grammar.h"
|
||||
|
||||
After that come four blocks:
|
||||
|
||||
1) one or more state definitions
|
||||
2) a table defining dfas
|
||||
3) a table defining labels
|
||||
4) a struct defining the grammar
|
||||
|
||||
A state definition has the following form:
|
||||
- one or more arc arrays, each of the form:
|
||||
static arc arcs_<n>_<m>[<k>] = {
|
||||
{<i>, <j>},
|
||||
...
|
||||
};
|
||||
- followed by a state array, of the form:
|
||||
static state states_<s>[<t>] = {
|
||||
{<k>, arcs_<n>_<m>},
|
||||
...
|
||||
};
|
||||
|
||||
"""
|
||||
try:
|
||||
f = open(filename)
|
||||
except OSError as err:
|
||||
print("Can't open %s: %s" % (filename, err))
|
||||
return False
|
||||
# The code below essentially uses f's iterator-ness!
|
||||
lineno = 0
|
||||
|
||||
# Expect the two #include lines
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == '#include "pgenheaders.h"\n', (lineno, line)
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == '#include "grammar.h"\n', (lineno, line)
|
||||
|
||||
# Parse the state definitions
|
||||
lineno, line = lineno+1, next(f)
|
||||
allarcs = {}
|
||||
states = []
|
||||
while line.startswith("static arc "):
|
||||
while line.startswith("static arc "):
|
||||
mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
|
||||
line)
|
||||
assert mo, (lineno, line)
|
||||
n, m, k = list(map(int, mo.groups()))
|
||||
arcs = []
|
||||
for _ in range(k):
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r"\s+{(\d+), (\d+)},$", line)
|
||||
assert mo, (lineno, line)
|
||||
i, j = list(map(int, mo.groups()))
|
||||
arcs.append((i, j))
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == "};\n", (lineno, line)
|
||||
allarcs[(n, m)] = arcs
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
|
||||
assert mo, (lineno, line)
|
||||
s, t = list(map(int, mo.groups()))
|
||||
assert s == len(states), (lineno, line)
|
||||
state = []
|
||||
for _ in range(t):
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
|
||||
assert mo, (lineno, line)
|
||||
k, n, m = list(map(int, mo.groups()))
|
||||
arcs = allarcs[n, m]
|
||||
assert k == len(arcs), (lineno, line)
|
||||
state.append(arcs)
|
||||
states.append(state)
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == "};\n", (lineno, line)
|
||||
lineno, line = lineno+1, next(f)
|
||||
self.states = states
|
||||
|
||||
# Parse the dfas
|
||||
dfas = {}
|
||||
mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
|
||||
assert mo, (lineno, line)
|
||||
ndfas = int(mo.group(1))
|
||||
for i in range(ndfas):
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
|
||||
line)
|
||||
assert mo, (lineno, line)
|
||||
symbol = mo.group(2)
|
||||
number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
|
||||
assert self.symbol2number[symbol] == number, (lineno, line)
|
||||
assert self.number2symbol[number] == symbol, (lineno, line)
|
||||
assert x == 0, (lineno, line)
|
||||
state = states[z]
|
||||
assert y == len(state), (lineno, line)
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
|
||||
assert mo, (lineno, line)
|
||||
first = {}
|
||||
rawbitset = eval(mo.group(1))
|
||||
for i, c in enumerate(rawbitset):
|
||||
byte = ord(c)
|
||||
for j in range(8):
|
||||
if byte & (1<<j):
|
||||
first[i*8 + j] = 1
|
||||
dfas[number] = (state, first)
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == "};\n", (lineno, line)
|
||||
self.dfas = dfas
|
||||
|
||||
# Parse the labels
|
||||
labels = []
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r"static label labels\[(\d+)\] = {$", line)
|
||||
assert mo, (lineno, line)
|
||||
nlabels = int(mo.group(1))
|
||||
for i in range(nlabels):
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
|
||||
assert mo, (lineno, line)
|
||||
x, y = mo.groups()
|
||||
x = int(x)
|
||||
if y == "0":
|
||||
y = None
|
||||
else:
|
||||
y = eval(y)
|
||||
labels.append((x, y))
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == "};\n", (lineno, line)
|
||||
self.labels = labels
|
||||
|
||||
# Parse the grammar struct
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r"\s+(\d+),$", line)
|
||||
assert mo, (lineno, line)
|
||||
ndfas = int(mo.group(1))
|
||||
assert ndfas == len(self.dfas)
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == "\tdfas,\n", (lineno, line)
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r"\s+{(\d+), labels},$", line)
|
||||
assert mo, (lineno, line)
|
||||
nlabels = int(mo.group(1))
|
||||
assert nlabels == len(self.labels), (lineno, line)
|
||||
lineno, line = lineno+1, next(f)
|
||||
mo = re.match(r"\s+(\d+)$", line)
|
||||
assert mo, (lineno, line)
|
||||
start = int(mo.group(1))
|
||||
assert start in self.number2symbol, (lineno, line)
|
||||
self.start = start
|
||||
lineno, line = lineno+1, next(f)
|
||||
assert line == "};\n", (lineno, line)
|
||||
try:
|
||||
lineno, line = lineno+1, next(f)
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
assert 0, (lineno, line)
|
||||
|
||||
def finish_off(self):
|
||||
"""Create additional useful structures. (Internal)."""
|
||||
self.keywords = {} # map from keyword strings to arc labels
|
||||
self.tokens = {} # map from numeric token values to arc labels
|
||||
for ilabel, (type, value) in enumerate(self.labels):
|
||||
if type == token.NAME and value is not None:
|
||||
self.keywords[value] = ilabel
|
||||
elif value is None:
|
||||
self.tokens[type] = ilabel
|
||||
@@ -0,0 +1,177 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright 2006 Google, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""Parser driver.
|
||||
|
||||
This provides a high-level interface to parse a file into a syntax tree.
|
||||
|
||||
"""
|
||||
|
||||
__author__ = "Guido van Rossum <guido@python.org>"
|
||||
|
||||
__all__ = ["Driver", "load_grammar"]
|
||||
|
||||
# Python imports
|
||||
import io
|
||||
import os
|
||||
import logging
|
||||
import pkgutil
|
||||
import sys
|
||||
|
||||
# Pgen imports
|
||||
from . import grammar, parse, token, tokenize, pgen
|
||||
|
||||
|
||||
class Driver(object):
|
||||
|
||||
def __init__(self, grammar, convert=None, logger=None):
|
||||
self.grammar = grammar
|
||||
if logger is None:
|
||||
logger = logging.getLogger()
|
||||
self.logger = logger
|
||||
self.convert = convert
|
||||
|
||||
def parse_tokens(self, tokens, debug=False):
|
||||
"""Parse a series of tokens and return the syntax tree."""
|
||||
# XXX Move the prefix computation into a wrapper around tokenize.
|
||||
p = parse.Parser(self.grammar, self.convert)
|
||||
p.setup()
|
||||
lineno = 1
|
||||
column = 0
|
||||
type = value = start = end = line_text = None
|
||||
prefix = ""
|
||||
for quintuple in tokens:
|
||||
type, value, start, end, line_text = quintuple
|
||||
if start != (lineno, column):
|
||||
assert (lineno, column) <= start, ((lineno, column), start)
|
||||
s_lineno, s_column = start
|
||||
if lineno < s_lineno:
|
||||
prefix += "\n" * (s_lineno - lineno)
|
||||
lineno = s_lineno
|
||||
column = 0
|
||||
if column < s_column:
|
||||
prefix += line_text[column:s_column]
|
||||
column = s_column
|
||||
if type in (tokenize.COMMENT, tokenize.NL):
|
||||
prefix += value
|
||||
lineno, column = end
|
||||
if value.endswith("\n"):
|
||||
lineno += 1
|
||||
column = 0
|
||||
continue
|
||||
if type == token.OP:
|
||||
type = grammar.opmap[value]
|
||||
if debug:
|
||||
self.logger.debug("%s %r (prefix=%r)",
|
||||
token.tok_name[type], value, prefix)
|
||||
if p.addtoken(type, value, (prefix, start)):
|
||||
if debug:
|
||||
self.logger.debug("Stop.")
|
||||
break
|
||||
prefix = ""
|
||||
lineno, column = end
|
||||
if value.endswith("\n"):
|
||||
lineno += 1
|
||||
column = 0
|
||||
else:
|
||||
# We never broke out -- EOF is too soon (how can this happen???)
|
||||
raise parse.ParseError("incomplete input",
|
||||
type, value, (prefix, start))
|
||||
return p.rootnode
|
||||
|
||||
def parse_stream_raw(self, stream, debug=False):
|
||||
"""Parse a stream and return the syntax tree."""
|
||||
tokens = tokenize.generate_tokens(stream.readline)
|
||||
return self.parse_tokens(tokens, debug)
|
||||
|
||||
def parse_stream(self, stream, debug=False):
|
||||
"""Parse a stream and return the syntax tree."""
|
||||
return self.parse_stream_raw(stream, debug)
|
||||
|
||||
def parse_file(self, filename, encoding=None, debug=False):
|
||||
"""Parse a file and return the syntax tree."""
|
||||
with io.open(filename, "r", encoding=encoding) as stream:
|
||||
return self.parse_stream(stream, debug)
|
||||
|
||||
def parse_string(self, text, debug=False):
|
||||
"""Parse a string and return the syntax tree."""
|
||||
tokens = tokenize.generate_tokens(io.StringIO(text).readline)
|
||||
return self.parse_tokens(tokens, debug)
|
||||
|
||||
|
||||
def _generate_pickle_name(gt):
|
||||
head, tail = os.path.splitext(gt)
|
||||
if tail == ".txt":
|
||||
tail = ""
|
||||
return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
|
||||
|
||||
|
||||
def load_grammar(gt="Grammar.txt", gp=None,
|
||||
save=True, force=False, logger=None):
|
||||
"""Load the grammar (maybe from a pickle)."""
|
||||
if logger is None:
|
||||
logger = logging.getLogger()
|
||||
gp = _generate_pickle_name(gt) if gp is None else gp
|
||||
if force or not _newer(gp, gt):
|
||||
logger.info("Generating grammar tables from %s", gt)
|
||||
g = pgen.generate_grammar(gt)
|
||||
if save:
|
||||
logger.info("Writing grammar tables to %s", gp)
|
||||
try:
|
||||
g.dump(gp)
|
||||
except OSError as e:
|
||||
logger.info("Writing failed: %s", e)
|
||||
else:
|
||||
g = grammar.Grammar()
|
||||
g.load(gp)
|
||||
return g
|
||||
|
||||
|
||||
def _newer(a, b):
|
||||
"""Inquire whether file a was written since file b."""
|
||||
if not os.path.exists(a):
|
||||
return False
|
||||
if not os.path.exists(b):
|
||||
return True
|
||||
return os.path.getmtime(a) >= os.path.getmtime(b)
|
||||
|
||||
|
||||
def load_packaged_grammar(package, grammar_source):
|
||||
"""Normally, loads a pickled grammar by doing
|
||||
pkgutil.get_data(package, pickled_grammar)
|
||||
where *pickled_grammar* is computed from *grammar_source* by adding the
|
||||
Python version and using a ``.pickle`` extension.
|
||||
|
||||
However, if *grammar_source* is an extant file, load_grammar(grammar_source)
|
||||
is called instead. This facilitates using a packaged grammar file when needed
|
||||
but preserves load_grammar's automatic regeneration behavior when possible.
|
||||
|
||||
"""
|
||||
if os.path.isfile(grammar_source):
|
||||
return load_grammar(grammar_source)
|
||||
pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
|
||||
data = pkgutil.get_data(package, pickled_name)
|
||||
g = grammar.Grammar()
|
||||
g.loads(data)
|
||||
return g
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""Main program, when run as a script: produce grammar pickle files.
|
||||
|
||||
Calls load_grammar for each argument, a path to a grammar text file.
|
||||
"""
|
||||
if not args:
|
||||
args = sys.argv[1:]
|
||||
logging.basicConfig(level=logging.INFO, stream=sys.stdout,
|
||||
format='%(message)s')
|
||||
for gt in args:
|
||||
load_grammar(gt, save=True, force=True)
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(int(not main()))
|
||||
@@ -0,0 +1,188 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""This module defines the data structures used to represent a grammar.
|
||||
|
||||
These are a bit arcane because they are derived from the data
|
||||
structures used by Python's 'pgen' parser generator.
|
||||
|
||||
There's also a table here mapping operators to their names in the
|
||||
token module; the Python tokenize module reports all operators as the
|
||||
fallback token code OP, but the parser needs the actual token code.
|
||||
|
||||
"""
|
||||
|
||||
# Python imports
|
||||
import pickle
|
||||
|
||||
# Local imports
|
||||
from . import token
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""Pgen parsing tables conversion class.
|
||||
|
||||
Once initialized, this class supplies the grammar tables for the
|
||||
parsing engine implemented by parse.py. The parsing engine
|
||||
accesses the instance variables directly. The class here does not
|
||||
provide initialization of the tables; several subclasses exist to
|
||||
do this (see the conv and pgen modules).
|
||||
|
||||
The load() method reads the tables from a pickle file, which is
|
||||
much faster than the other ways offered by subclasses. The pickle
|
||||
file is written by calling dump() (after loading the grammar
|
||||
tables using a subclass). The report() method prints a readable
|
||||
representation of the tables to stdout, for debugging.
|
||||
|
||||
The instance variables are as follows:
|
||||
|
||||
symbol2number -- a dict mapping symbol names to numbers. Symbol
|
||||
numbers are always 256 or higher, to distinguish
|
||||
them from token numbers, which are between 0 and
|
||||
255 (inclusive).
|
||||
|
||||
number2symbol -- a dict mapping numbers to symbol names;
|
||||
these two are each other's inverse.
|
||||
|
||||
states -- a list of DFAs, where each DFA is a list of
|
||||
states, each state is a list of arcs, and each
|
||||
arc is a (i, j) pair where i is a label and j is
|
||||
a state number. The DFA number is the index into
|
||||
this list. (This name is slightly confusing.)
|
||||
Final states are represented by a special arc of
|
||||
the form (0, j) where j is its own state number.
|
||||
|
||||
dfas -- a dict mapping symbol numbers to (DFA, first)
|
||||
pairs, where DFA is an item from the states list
|
||||
above, and first is a set of tokens that can
|
||||
begin this grammar rule (represented by a dict
|
||||
whose values are always 1).
|
||||
|
||||
labels -- a list of (x, y) pairs where x is either a token
|
||||
number or a symbol number, and y is either None
|
||||
or a string; the strings are keywords. The label
|
||||
number is the index in this list; label numbers
|
||||
are used to mark state transitions (arcs) in the
|
||||
DFAs.
|
||||
|
||||
start -- the number of the grammar's start symbol.
|
||||
|
||||
keywords -- a dict mapping keyword strings to arc labels.
|
||||
|
||||
tokens -- a dict mapping token numbers to arc labels.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.symbol2number = {}
|
||||
self.number2symbol = {}
|
||||
self.states = []
|
||||
self.dfas = {}
|
||||
self.labels = [(0, "EMPTY")]
|
||||
self.keywords = {}
|
||||
self.tokens = {}
|
||||
self.symbol2label = {}
|
||||
self.start = 256
|
||||
|
||||
def dump(self, filename):
|
||||
"""Dump the grammar tables to a pickle file."""
|
||||
with open(filename, "wb") as f:
|
||||
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
def load(self, filename):
|
||||
"""Load the grammar tables from a pickle file."""
|
||||
with open(filename, "rb") as f:
|
||||
d = pickle.load(f)
|
||||
self.__dict__.update(d)
|
||||
|
||||
def loads(self, pkl):
|
||||
"""Load the grammar tables from a pickle bytes object."""
|
||||
self.__dict__.update(pickle.loads(pkl))
|
||||
|
||||
def copy(self):
|
||||
"""
|
||||
Copy the grammar.
|
||||
"""
|
||||
new = self.__class__()
|
||||
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
|
||||
"tokens", "symbol2label"):
|
||||
setattr(new, dict_attr, getattr(self, dict_attr).copy())
|
||||
new.labels = self.labels[:]
|
||||
new.states = self.states[:]
|
||||
new.start = self.start
|
||||
return new
|
||||
|
||||
def report(self):
|
||||
"""Dump the grammar tables to standard output, for debugging."""
|
||||
from pprint import pprint
|
||||
print("s2n")
|
||||
pprint(self.symbol2number)
|
||||
print("n2s")
|
||||
pprint(self.number2symbol)
|
||||
print("states")
|
||||
pprint(self.states)
|
||||
print("dfas")
|
||||
pprint(self.dfas)
|
||||
print("labels")
|
||||
pprint(self.labels)
|
||||
print("start", self.start)
|
||||
|
||||
|
||||
# Map from operator to number (since tokenize doesn't do this)
|
||||
|
||||
opmap_raw = """
|
||||
( LPAR
|
||||
) RPAR
|
||||
[ LSQB
|
||||
] RSQB
|
||||
: COLON
|
||||
, COMMA
|
||||
; SEMI
|
||||
+ PLUS
|
||||
- MINUS
|
||||
* STAR
|
||||
/ SLASH
|
||||
| VBAR
|
||||
& AMPER
|
||||
< LESS
|
||||
> GREATER
|
||||
= EQUAL
|
||||
. DOT
|
||||
% PERCENT
|
||||
` BACKQUOTE
|
||||
{ LBRACE
|
||||
} RBRACE
|
||||
@ AT
|
||||
@= ATEQUAL
|
||||
== EQEQUAL
|
||||
!= NOTEQUAL
|
||||
<> NOTEQUAL
|
||||
<= LESSEQUAL
|
||||
>= GREATEREQUAL
|
||||
~ TILDE
|
||||
^ CIRCUMFLEX
|
||||
<< LEFTSHIFT
|
||||
>> RIGHTSHIFT
|
||||
** DOUBLESTAR
|
||||
+= PLUSEQUAL
|
||||
-= MINEQUAL
|
||||
*= STAREQUAL
|
||||
/= SLASHEQUAL
|
||||
%= PERCENTEQUAL
|
||||
&= AMPEREQUAL
|
||||
|= VBAREQUAL
|
||||
^= CIRCUMFLEXEQUAL
|
||||
<<= LEFTSHIFTEQUAL
|
||||
>>= RIGHTSHIFTEQUAL
|
||||
**= DOUBLESTAREQUAL
|
||||
// DOUBLESLASH
|
||||
//= DOUBLESLASHEQUAL
|
||||
-> RARROW
|
||||
:= COLONEQUAL
|
||||
"""
|
||||
|
||||
opmap = {}
|
||||
for line in opmap_raw.splitlines():
|
||||
if line:
|
||||
op, name = line.split()
|
||||
opmap[op] = getattr(token, name)
|
||||
@@ -0,0 +1,60 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""Safely evaluate Python string literals without using eval()."""
|
||||
|
||||
import re
|
||||
|
||||
simple_escapes = {"a": "\a",
|
||||
"b": "\b",
|
||||
"f": "\f",
|
||||
"n": "\n",
|
||||
"r": "\r",
|
||||
"t": "\t",
|
||||
"v": "\v",
|
||||
"'": "'",
|
||||
'"': '"',
|
||||
"\\": "\\"}
|
||||
|
||||
def escape(m):
|
||||
all, tail = m.group(0, 1)
|
||||
assert all.startswith("\\")
|
||||
esc = simple_escapes.get(tail)
|
||||
if esc is not None:
|
||||
return esc
|
||||
if tail.startswith("x"):
|
||||
hexes = tail[1:]
|
||||
if len(hexes) < 2:
|
||||
raise ValueError("invalid hex string escape ('\\%s')" % tail)
|
||||
try:
|
||||
i = int(hexes, 16)
|
||||
except ValueError:
|
||||
raise ValueError("invalid hex string escape ('\\%s')" % tail) from None
|
||||
else:
|
||||
try:
|
||||
i = int(tail, 8)
|
||||
except ValueError:
|
||||
raise ValueError("invalid octal string escape ('\\%s')" % tail) from None
|
||||
return chr(i)
|
||||
|
||||
def evalString(s):
|
||||
assert s.startswith("'") or s.startswith('"'), repr(s[:1])
|
||||
q = s[0]
|
||||
if s[:3] == q*3:
|
||||
q = q*3
|
||||
assert s.endswith(q), repr(s[-len(q):])
|
||||
assert len(s) >= 2*len(q)
|
||||
s = s[len(q):-len(q)]
|
||||
return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
|
||||
|
||||
def test():
|
||||
for i in range(256):
|
||||
c = chr(i)
|
||||
s = repr(c)
|
||||
e = evalString(s)
|
||||
if e != c:
|
||||
print(i, c, s, e)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test()
|
||||
@@ -0,0 +1,204 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
"""Parser engine for the grammar tables generated by pgen.
|
||||
|
||||
The grammar table must be loaded first.
|
||||
|
||||
See Parser/parser.c in the Python distribution for additional info on
|
||||
how this parsing engine works.
|
||||
|
||||
"""
|
||||
|
||||
# Local imports
|
||||
from . import token
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Exception to signal the parser is stuck."""
|
||||
|
||||
def __init__(self, msg, type, value, context):
|
||||
Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
|
||||
(msg, type, value, context))
|
||||
self.msg = msg
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.context = context
|
||||
|
||||
def __reduce__(self):
|
||||
return type(self), (self.msg, self.type, self.value, self.context)
|
||||
|
||||
class Parser(object):
|
||||
"""Parser engine.
|
||||
|
||||
The proper usage sequence is:
|
||||
|
||||
p = Parser(grammar, [converter]) # create instance
|
||||
p.setup([start]) # prepare for parsing
|
||||
<for each input token>:
|
||||
if p.addtoken(...): # parse a token; may raise ParseError
|
||||
break
|
||||
root = p.rootnode # root of abstract syntax tree
|
||||
|
||||
A Parser instance may be reused by calling setup() repeatedly.
|
||||
|
||||
A Parser instance contains state pertaining to the current token
|
||||
sequence, and should not be used concurrently by different threads
|
||||
to parse separate token sequences.
|
||||
|
||||
See driver.py for how to get input tokens by tokenizing a file or
|
||||
string.
|
||||
|
||||
Parsing is complete when addtoken() returns True; the root of the
|
||||
abstract syntax tree can then be retrieved from the rootnode
|
||||
instance variable. When a syntax error occurs, addtoken() raises
|
||||
the ParseError exception. There is no error recovery; the parser
|
||||
cannot be used after a syntax error was reported (but it can be
|
||||
reinitialized by calling setup()).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, grammar, convert=None):
|
||||
"""Constructor.
|
||||
|
||||
The grammar argument is a grammar.Grammar instance; see the
|
||||
grammar module for more information.
|
||||
|
||||
The parser is not ready yet for parsing; you must call the
|
||||
setup() method to get it started.
|
||||
|
||||
The optional convert argument is a function mapping concrete
|
||||
syntax tree nodes to abstract syntax tree nodes. If not
|
||||
given, no conversion is done and the syntax tree produced is
|
||||
the concrete syntax tree. If given, it must be a function of
|
||||
two arguments, the first being the grammar (a grammar.Grammar
|
||||
instance), and the second being the concrete syntax tree node
|
||||
to be converted. The syntax tree is converted from the bottom
|
||||
up.
|
||||
|
||||
A concrete syntax tree node is a (type, value, context, nodes)
|
||||
tuple, where type is the node type (a token or symbol number),
|
||||
value is None for symbols and a string for tokens, context is
|
||||
None or an opaque value used for error reporting (typically a
|
||||
(lineno, offset) pair), and nodes is a list of children for
|
||||
symbols, and None for tokens.
|
||||
|
||||
An abstract syntax tree node may be anything; this is entirely
|
||||
up to the converter function.
|
||||
|
||||
"""
|
||||
self.grammar = grammar
|
||||
self.convert = convert or (lambda grammar, node: node)
|
||||
|
||||
def setup(self, start=None):
|
||||
"""Prepare for parsing.
|
||||
|
||||
This *must* be called before starting to parse.
|
||||
|
||||
The optional argument is an alternative start symbol; it
|
||||
defaults to the grammar's start symbol.
|
||||
|
||||
You can use a Parser instance to parse any number of programs;
|
||||
each time you call setup() the parser is reset to an initial
|
||||
state determined by the (implicit or explicit) start symbol.
|
||||
|
||||
"""
|
||||
if start is None:
|
||||
start = self.grammar.start
|
||||
# Each stack entry is a tuple: (dfa, state, node).
|
||||
# A node is a tuple: (type, value, context, children),
|
||||
# where children is a list of nodes or None, and context may be None.
|
||||
newnode = (start, None, None, [])
|
||||
stackentry = (self.grammar.dfas[start], 0, newnode)
|
||||
self.stack = [stackentry]
|
||||
self.rootnode = None
|
||||
self.used_names = set() # Aliased to self.rootnode.used_names in pop()
|
||||
|
||||
def addtoken(self, type, value, context):
|
||||
"""Add a token; return True iff this is the end of the program."""
|
||||
# Map from token to label
|
||||
ilabel = self.classify(type, value, context)
|
||||
# Loop until the token is shifted; may raise exceptions
|
||||
while True:
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
arcs = states[state]
|
||||
# Look for a state with this label
|
||||
for i, newstate in arcs:
|
||||
t, v = self.grammar.labels[i]
|
||||
if ilabel == i:
|
||||
# Look it up in the list of labels
|
||||
assert t < 256
|
||||
# Shift a token; we're done with it
|
||||
self.shift(type, value, newstate, context)
|
||||
# Pop while we are in an accept-only state
|
||||
state = newstate
|
||||
while states[state] == [(0, state)]:
|
||||
self.pop()
|
||||
if not self.stack:
|
||||
# Done parsing!
|
||||
return True
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
# Done with this token
|
||||
return False
|
||||
elif t >= 256:
|
||||
# See if it's a symbol and if we're in its first set
|
||||
itsdfa = self.grammar.dfas[t]
|
||||
itsstates, itsfirst = itsdfa
|
||||
if ilabel in itsfirst:
|
||||
# Push a symbol
|
||||
self.push(t, self.grammar.dfas[t], newstate, context)
|
||||
break # To continue the outer while loop
|
||||
else:
|
||||
if (0, state) in arcs:
|
||||
# An accepting state, pop it and try something else
|
||||
self.pop()
|
||||
if not self.stack:
|
||||
# Done parsing, but another token is input
|
||||
raise ParseError("too much input",
|
||||
type, value, context)
|
||||
else:
|
||||
# No success finding a transition
|
||||
raise ParseError("bad input", type, value, context)
|
||||
|
||||
def classify(self, type, value, context):
|
||||
"""Turn a token into a label. (Internal)"""
|
||||
if type == token.NAME:
|
||||
# Keep a listing of all used names
|
||||
self.used_names.add(value)
|
||||
# Check for reserved words
|
||||
ilabel = self.grammar.keywords.get(value)
|
||||
if ilabel is not None:
|
||||
return ilabel
|
||||
ilabel = self.grammar.tokens.get(type)
|
||||
if ilabel is None:
|
||||
raise ParseError("bad token", type, value, context)
|
||||
return ilabel
|
||||
|
||||
def shift(self, type, value, newstate, context):
|
||||
"""Shift a token. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type, value, context, None)
|
||||
newnode = self.convert(self.grammar, newnode)
|
||||
if newnode is not None:
|
||||
node[-1].append(newnode)
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
|
||||
def push(self, type, newdfa, newstate, context):
|
||||
"""Push a nonterminal. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type, None, context, [])
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
self.stack.append((newdfa, 0, newnode))
|
||||
|
||||
def pop(self):
|
||||
"""Pop a nonterminal. (Internal)"""
|
||||
popdfa, popstate, popnode = self.stack.pop()
|
||||
newnode = self.convert(self.grammar, popnode)
|
||||
if newnode is not None:
|
||||
if self.stack:
|
||||
dfa, state, node = self.stack[-1]
|
||||
node[-1].append(newnode)
|
||||
else:
|
||||
self.rootnode = newnode
|
||||
self.rootnode.used_names = self.used_names
|
||||
@@ -0,0 +1,386 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Pgen imports
|
||||
from . import grammar, token, tokenize
|
||||
|
||||
class PgenGrammar(grammar.Grammar):
|
||||
pass
|
||||
|
||||
class ParserGenerator(object):
|
||||
|
||||
def __init__(self, filename, stream=None):
|
||||
close_stream = None
|
||||
if stream is None:
|
||||
stream = open(filename)
|
||||
close_stream = stream.close
|
||||
self.filename = filename
|
||||
self.stream = stream
|
||||
self.generator = tokenize.generate_tokens(stream.readline)
|
||||
self.gettoken() # Initialize lookahead
|
||||
self.dfas, self.startsymbol = self.parse()
|
||||
if close_stream is not None:
|
||||
close_stream()
|
||||
self.first = {} # map from symbol name to set of tokens
|
||||
self.addfirstsets()
|
||||
|
||||
def make_grammar(self):
|
||||
c = PgenGrammar()
|
||||
names = list(self.dfas.keys())
|
||||
names.sort()
|
||||
names.remove(self.startsymbol)
|
||||
names.insert(0, self.startsymbol)
|
||||
for name in names:
|
||||
i = 256 + len(c.symbol2number)
|
||||
c.symbol2number[name] = i
|
||||
c.number2symbol[i] = name
|
||||
for name in names:
|
||||
dfa = self.dfas[name]
|
||||
states = []
|
||||
for state in dfa:
|
||||
arcs = []
|
||||
for label, next in sorted(state.arcs.items()):
|
||||
arcs.append((self.make_label(c, label), dfa.index(next)))
|
||||
if state.isfinal:
|
||||
arcs.append((0, dfa.index(state)))
|
||||
states.append(arcs)
|
||||
c.states.append(states)
|
||||
c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
|
||||
c.start = c.symbol2number[self.startsymbol]
|
||||
return c
|
||||
|
||||
def make_first(self, c, name):
|
||||
rawfirst = self.first[name]
|
||||
first = {}
|
||||
for label in sorted(rawfirst):
|
||||
ilabel = self.make_label(c, label)
|
||||
##assert ilabel not in first # XXX failed on <> ... !=
|
||||
first[ilabel] = 1
|
||||
return first
|
||||
|
||||
def make_label(self, c, label):
|
||||
# XXX Maybe this should be a method on a subclass of converter?
|
||||
ilabel = len(c.labels)
|
||||
if label[0].isalpha():
|
||||
# Either a symbol name or a named token
|
||||
if label in c.symbol2number:
|
||||
# A symbol name (a non-terminal)
|
||||
if label in c.symbol2label:
|
||||
return c.symbol2label[label]
|
||||
else:
|
||||
c.labels.append((c.symbol2number[label], None))
|
||||
c.symbol2label[label] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# A named token (NAME, NUMBER, STRING)
|
||||
itoken = getattr(token, label, None)
|
||||
assert isinstance(itoken, int), label
|
||||
assert itoken in token.tok_name, label
|
||||
if itoken in c.tokens:
|
||||
return c.tokens[itoken]
|
||||
else:
|
||||
c.labels.append((itoken, None))
|
||||
c.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# Either a keyword or an operator
|
||||
assert label[0] in ('"', "'"), label
|
||||
value = eval(label)
|
||||
if value[0].isalpha():
|
||||
# A keyword
|
||||
if value in c.keywords:
|
||||
return c.keywords[value]
|
||||
else:
|
||||
c.labels.append((token.NAME, value))
|
||||
c.keywords[value] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# An operator (any non-numeric token)
|
||||
itoken = grammar.opmap[value] # Fails if unknown token
|
||||
if itoken in c.tokens:
|
||||
return c.tokens[itoken]
|
||||
else:
|
||||
c.labels.append((itoken, None))
|
||||
c.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
|
||||
def addfirstsets(self):
|
||||
names = list(self.dfas.keys())
|
||||
names.sort()
|
||||
for name in names:
|
||||
if name not in self.first:
|
||||
self.calcfirst(name)
|
||||
#print name, self.first[name].keys()
|
||||
|
||||
def calcfirst(self, name):
|
||||
dfa = self.dfas[name]
|
||||
self.first[name] = None # dummy to detect left recursion
|
||||
state = dfa[0]
|
||||
totalset = {}
|
||||
overlapcheck = {}
|
||||
for label, next in state.arcs.items():
|
||||
if label in self.dfas:
|
||||
if label in self.first:
|
||||
fset = self.first[label]
|
||||
if fset is None:
|
||||
raise ValueError("recursion for rule %r" % name)
|
||||
else:
|
||||
self.calcfirst(label)
|
||||
fset = self.first[label]
|
||||
totalset.update(fset)
|
||||
overlapcheck[label] = fset
|
||||
else:
|
||||
totalset[label] = 1
|
||||
overlapcheck[label] = {label: 1}
|
||||
inverse = {}
|
||||
for label, itsfirst in overlapcheck.items():
|
||||
for symbol in itsfirst:
|
||||
if symbol in inverse:
|
||||
raise ValueError("rule %s is ambiguous; %s is in the"
|
||||
" first sets of %s as well as %s" %
|
||||
(name, symbol, label, inverse[symbol]))
|
||||
inverse[symbol] = label
|
||||
self.first[name] = totalset
|
||||
|
||||
def parse(self):
|
||||
dfas = {}
|
||||
startsymbol = None
|
||||
# MSTART: (NEWLINE | RULE)* ENDMARKER
|
||||
while self.type != token.ENDMARKER:
|
||||
while self.type == token.NEWLINE:
|
||||
self.gettoken()
|
||||
# RULE: NAME ':' RHS NEWLINE
|
||||
name = self.expect(token.NAME)
|
||||
self.expect(token.OP, ":")
|
||||
a, z = self.parse_rhs()
|
||||
self.expect(token.NEWLINE)
|
||||
#self.dump_nfa(name, a, z)
|
||||
dfa = self.make_dfa(a, z)
|
||||
#self.dump_dfa(name, dfa)
|
||||
oldlen = len(dfa)
|
||||
self.simplify_dfa(dfa)
|
||||
newlen = len(dfa)
|
||||
dfas[name] = dfa
|
||||
#print name, oldlen, newlen
|
||||
if startsymbol is None:
|
||||
startsymbol = name
|
||||
return dfas, startsymbol
|
||||
|
||||
def make_dfa(self, start, finish):
|
||||
# To turn an NFA into a DFA, we define the states of the DFA
|
||||
# to correspond to *sets* of states of the NFA. Then do some
|
||||
# state reduction. Let's represent sets as dicts with 1 for
|
||||
# values.
|
||||
assert isinstance(start, NFAState)
|
||||
assert isinstance(finish, NFAState)
|
||||
def closure(state):
|
||||
base = {}
|
||||
addclosure(state, base)
|
||||
return base
|
||||
def addclosure(state, base):
|
||||
assert isinstance(state, NFAState)
|
||||
if state in base:
|
||||
return
|
||||
base[state] = 1
|
||||
for label, next in state.arcs:
|
||||
if label is None:
|
||||
addclosure(next, base)
|
||||
states = [DFAState(closure(start), finish)]
|
||||
for state in states: # NB states grows while we're iterating
|
||||
arcs = {}
|
||||
for nfastate in state.nfaset:
|
||||
for label, next in nfastate.arcs:
|
||||
if label is not None:
|
||||
addclosure(next, arcs.setdefault(label, {}))
|
||||
for label, nfaset in sorted(arcs.items()):
|
||||
for st in states:
|
||||
if st.nfaset == nfaset:
|
||||
break
|
||||
else:
|
||||
st = DFAState(nfaset, finish)
|
||||
states.append(st)
|
||||
state.addarc(st, label)
|
||||
return states # List of DFAState instances; first one is start
|
||||
|
||||
def dump_nfa(self, name, start, finish):
|
||||
print("Dump of NFA for", name)
|
||||
todo = [start]
|
||||
for i, state in enumerate(todo):
|
||||
print(" State", i, state is finish and "(final)" or "")
|
||||
for label, next in state.arcs:
|
||||
if next in todo:
|
||||
j = todo.index(next)
|
||||
else:
|
||||
j = len(todo)
|
||||
todo.append(next)
|
||||
if label is None:
|
||||
print(" -> %d" % j)
|
||||
else:
|
||||
print(" %s -> %d" % (label, j))
|
||||
|
||||
def dump_dfa(self, name, dfa):
|
||||
print("Dump of DFA for", name)
|
||||
for i, state in enumerate(dfa):
|
||||
print(" State", i, state.isfinal and "(final)" or "")
|
||||
for label, next in sorted(state.arcs.items()):
|
||||
print(" %s -> %d" % (label, dfa.index(next)))
|
||||
|
||||
def simplify_dfa(self, dfa):
|
||||
# This is not theoretically optimal, but works well enough.
|
||||
# Algorithm: repeatedly look for two states that have the same
|
||||
# set of arcs (same labels pointing to the same nodes) and
|
||||
# unify them, until things stop changing.
|
||||
|
||||
# dfa is a list of DFAState instances
|
||||
changes = True
|
||||
while changes:
|
||||
changes = False
|
||||
for i, state_i in enumerate(dfa):
|
||||
for j in range(i+1, len(dfa)):
|
||||
state_j = dfa[j]
|
||||
if state_i == state_j:
|
||||
#print " unify", i, j
|
||||
del dfa[j]
|
||||
for state in dfa:
|
||||
state.unifystate(state_j, state_i)
|
||||
changes = True
|
||||
break
|
||||
|
||||
def parse_rhs(self):
|
||||
# RHS: ALT ('|' ALT)*
|
||||
a, z = self.parse_alt()
|
||||
if self.value != "|":
|
||||
return a, z
|
||||
else:
|
||||
aa = NFAState()
|
||||
zz = NFAState()
|
||||
aa.addarc(a)
|
||||
z.addarc(zz)
|
||||
while self.value == "|":
|
||||
self.gettoken()
|
||||
a, z = self.parse_alt()
|
||||
aa.addarc(a)
|
||||
z.addarc(zz)
|
||||
return aa, zz
|
||||
|
||||
def parse_alt(self):
|
||||
# ALT: ITEM+
|
||||
a, b = self.parse_item()
|
||||
while (self.value in ("(", "[") or
|
||||
self.type in (token.NAME, token.STRING)):
|
||||
c, d = self.parse_item()
|
||||
b.addarc(c)
|
||||
b = d
|
||||
return a, b
|
||||
|
||||
def parse_item(self):
|
||||
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
|
||||
if self.value == "[":
|
||||
self.gettoken()
|
||||
a, z = self.parse_rhs()
|
||||
self.expect(token.OP, "]")
|
||||
a.addarc(z)
|
||||
return a, z
|
||||
else:
|
||||
a, z = self.parse_atom()
|
||||
value = self.value
|
||||
if value not in ("+", "*"):
|
||||
return a, z
|
||||
self.gettoken()
|
||||
z.addarc(a)
|
||||
if value == "+":
|
||||
return a, z
|
||||
else:
|
||||
return a, a
|
||||
|
||||
def parse_atom(self):
|
||||
# ATOM: '(' RHS ')' | NAME | STRING
|
||||
if self.value == "(":
|
||||
self.gettoken()
|
||||
a, z = self.parse_rhs()
|
||||
self.expect(token.OP, ")")
|
||||
return a, z
|
||||
elif self.type in (token.NAME, token.STRING):
|
||||
a = NFAState()
|
||||
z = NFAState()
|
||||
a.addarc(z, self.value)
|
||||
self.gettoken()
|
||||
return a, z
|
||||
else:
|
||||
self.raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||
self.type, self.value)
|
||||
|
||||
def expect(self, type, value=None):
|
||||
if self.type != type or (value is not None and self.value != value):
|
||||
self.raise_error("expected %s/%s, got %s/%s",
|
||||
type, value, self.type, self.value)
|
||||
value = self.value
|
||||
self.gettoken()
|
||||
return value
|
||||
|
||||
def gettoken(self):
|
||||
tup = next(self.generator)
|
||||
while tup[0] in (tokenize.COMMENT, tokenize.NL):
|
||||
tup = next(self.generator)
|
||||
self.type, self.value, self.begin, self.end, self.line = tup
|
||||
#print token.tok_name[self.type], repr(self.value)
|
||||
|
||||
def raise_error(self, msg, *args):
|
||||
if args:
|
||||
try:
|
||||
msg = msg % args
|
||||
except:
|
||||
msg = " ".join([msg] + list(map(str, args)))
|
||||
raise SyntaxError(msg, (self.filename, self.end[0],
|
||||
self.end[1], self.line))
|
||||
|
||||
class NFAState(object):
|
||||
|
||||
def __init__(self):
|
||||
self.arcs = [] # list of (label, NFAState) pairs
|
||||
|
||||
def addarc(self, next, label=None):
|
||||
assert label is None or isinstance(label, str)
|
||||
assert isinstance(next, NFAState)
|
||||
self.arcs.append((label, next))
|
||||
|
||||
class DFAState(object):
|
||||
|
||||
def __init__(self, nfaset, final):
|
||||
assert isinstance(nfaset, dict)
|
||||
assert isinstance(next(iter(nfaset)), NFAState)
|
||||
assert isinstance(final, NFAState)
|
||||
self.nfaset = nfaset
|
||||
self.isfinal = final in nfaset
|
||||
self.arcs = {} # map from label to DFAState
|
||||
|
||||
def addarc(self, next, label):
|
||||
assert isinstance(label, str)
|
||||
assert label not in self.arcs
|
||||
assert isinstance(next, DFAState)
|
||||
self.arcs[label] = next
|
||||
|
||||
def unifystate(self, old, new):
|
||||
for label, next in self.arcs.items():
|
||||
if next is old:
|
||||
self.arcs[label] = new
|
||||
|
||||
def __eq__(self, other):
|
||||
# Equality test -- ignore the nfaset instance variable
|
||||
assert isinstance(other, DFAState)
|
||||
if self.isfinal != other.isfinal:
|
||||
return False
|
||||
# Can't just return self.arcs == other.arcs, because that
|
||||
# would invoke this method recursively, with cycles...
|
||||
if len(self.arcs) != len(other.arcs):
|
||||
return False
|
||||
for label, next in self.arcs.items():
|
||||
if next is not other.arcs.get(label):
|
||||
return False
|
||||
return True
|
||||
|
||||
__hash__ = None # For Py3 compatibility.
|
||||
|
||||
def generate_grammar(filename="Grammar.txt"):
|
||||
p = ParserGenerator(filename)
|
||||
return p.make_grammar()
|
||||
@@ -0,0 +1,86 @@
|
||||
#! /usr/bin/env python3
|
||||
|
||||
"""Token constants (from "token.h")."""
|
||||
|
||||
# Taken from Python (r53757) and modified to include some tokens
|
||||
# originally monkeypatched in by pgen2.tokenize
|
||||
|
||||
#--start constants--
|
||||
ENDMARKER = 0
|
||||
NAME = 1
|
||||
NUMBER = 2
|
||||
STRING = 3
|
||||
NEWLINE = 4
|
||||
INDENT = 5
|
||||
DEDENT = 6
|
||||
LPAR = 7
|
||||
RPAR = 8
|
||||
LSQB = 9
|
||||
RSQB = 10
|
||||
COLON = 11
|
||||
COMMA = 12
|
||||
SEMI = 13
|
||||
PLUS = 14
|
||||
MINUS = 15
|
||||
STAR = 16
|
||||
SLASH = 17
|
||||
VBAR = 18
|
||||
AMPER = 19
|
||||
LESS = 20
|
||||
GREATER = 21
|
||||
EQUAL = 22
|
||||
DOT = 23
|
||||
PERCENT = 24
|
||||
BACKQUOTE = 25
|
||||
LBRACE = 26
|
||||
RBRACE = 27
|
||||
EQEQUAL = 28
|
||||
NOTEQUAL = 29
|
||||
LESSEQUAL = 30
|
||||
GREATEREQUAL = 31
|
||||
TILDE = 32
|
||||
CIRCUMFLEX = 33
|
||||
LEFTSHIFT = 34
|
||||
RIGHTSHIFT = 35
|
||||
DOUBLESTAR = 36
|
||||
PLUSEQUAL = 37
|
||||
MINEQUAL = 38
|
||||
STAREQUAL = 39
|
||||
SLASHEQUAL = 40
|
||||
PERCENTEQUAL = 41
|
||||
AMPEREQUAL = 42
|
||||
VBAREQUAL = 43
|
||||
CIRCUMFLEXEQUAL = 44
|
||||
LEFTSHIFTEQUAL = 45
|
||||
RIGHTSHIFTEQUAL = 46
|
||||
DOUBLESTAREQUAL = 47
|
||||
DOUBLESLASH = 48
|
||||
DOUBLESLASHEQUAL = 49
|
||||
AT = 50
|
||||
ATEQUAL = 51
|
||||
OP = 52
|
||||
COMMENT = 53
|
||||
NL = 54
|
||||
RARROW = 55
|
||||
AWAIT = 56
|
||||
ASYNC = 57
|
||||
ERRORTOKEN = 58
|
||||
COLONEQUAL = 59
|
||||
N_TOKENS = 60
|
||||
NT_OFFSET = 256
|
||||
#--end constants--
|
||||
|
||||
tok_name = {}
|
||||
for _name, _value in list(globals().items()):
|
||||
if type(_value) is type(0):
|
||||
tok_name[_value] = _name
|
||||
|
||||
|
||||
def ISTERMINAL(x):
|
||||
return x < NT_OFFSET
|
||||
|
||||
def ISNONTERMINAL(x):
|
||||
return x >= NT_OFFSET
|
||||
|
||||
def ISEOF(x):
|
||||
return x == ENDMARKER
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user