pgen2/parse.py

OILS / pgen2 / parse.py View on Github | oilshell.org

220 lines, 91 significant

1	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2	# Licensed to PSF under a Contributor Agreement.
3
4	"""Parser engine for the grammar tables generated by pgen.
5
6	The grammar table must be loaded first.
7
8	See Parser/parser.c in the Python distribution for additional info on
9	how this parsing engine works.
10	"""
11
12	from mycpp.mylib import log
13	_ = log
14
15	from typing import TYPE_CHECKING, Optional, Any, List
16	from pgen2.pnode import PNode, PNodeAllocator
17
18	if TYPE_CHECKING:
19	from _devbuild.gen.syntax_asdl import Token
20	from pgen2.grammar import Grammar, dfa_t
21
22
23	class ParseError(Exception):
24	"""Exception to signal the parser is stuck."""
25
26	def __init__(self, msg, type_, tok):
27	# type: (str, int, Token) -> None
28	self.msg = msg
29	self.type = type_
30	self.tok = tok
31
32	def __repr__(self):
33	# type: () -> str
34	return "%s: type=%d, tok=%r" % (self.msg, self.type, self.tok)
35
36
37	class _StackItem(object):
38	def __init__(self, dfa, state, node):
39	# type: (dfa_t, int, PNode) -> None
40	self.dfa = dfa
41	self.state = state
42	self.node = node
43
44
45	class Parser(object):
46	"""Parser engine.
47
48	The proper usage sequence is:
49
50	p = Parser(grammar, [converter]) # create instance
51	p.setup(start) # prepare for parsing
52	<for each input token>:
53	if p.addtoken(...): # parse a token; may raise ParseError
54	break
55	root = p.rootnode # root of abstract syntax tree
56
57	A Parser instance may be reused by calling setup() repeatedly.
58
59	A Parser instance contains state pertaining to the current token
60	sequence, and should not be used concurrently by different threads
61	to parse separate token sequences.
62
63	See driver.py for how to get input tokens by tokenizing a file or
64	string.
65
66	Parsing is complete when addtoken() returns True; the root of the
67	abstract syntax tree can then be retrieved from the rootnode
68	instance variable. When a syntax error occurs, addtoken() raises
69	the ParseError exception. There is no error recovery; the parser
70	cannot be used after a syntax error was reported (but it can be
71	reinitialized by calling setup()).
72	"""
73
74	def __init__(self, grammar):
75	# type: (Grammar) -> None
76	"""Constructor.
77
78	The grammar argument is a grammar.Grammar instance; see the
79	grammar module for more information.
80
81	The parser is not ready yet for parsing; you must call the
82	setup() method to get it started.
83
84	A concrete syntax tree node is a (type, value, context, nodes)
85	tuple, where type is the node type (a token or symbol number),
86	value is None for symbols and a string for tokens, context is
87	None or an opaque value used for error reporting (typically a
88	(lineno, offset) pair), and nodes is a list of children for
89	symbols, and None for tokens.
90
91	An abstract syntax tree node may be anything; this is entirely
92	up to the converter function.
93	"""
94	self.grammar = grammar
95	self.rootnode = None # type: Optional[PNode]
96	self.stack = [] # type: List[_StackItem]
97	self.pnode_alloc = None # type: Optional[PNodeAllocator]
98
99	def setup(self, start, pnode_alloc):
100	# type: (int, PNodeAllocator) -> None
101	"""Prepare for parsing.
102
103	This must be called before starting to parse.
104
105	The optional argument is an alternative start symbol; it
106	defaults to the grammar's start symbol.
107
108	You can use a Parser instance to parse any number of programs;
109	each time you call setup() the parser is reset to an initial
110	state determined by the (implicit or explicit) start symbol.
111	"""
112	self.pnode_alloc = pnode_alloc
113	newnode = self.pnode_alloc.NewPNode(start, None)
114	# Each stack entry is a tuple: (dfa, state, node).
115	self.stack = [_StackItem(self.grammar.dfas[start], 0, newnode)]
116	self.rootnode = None
117
118	def addtoken(self, typ, opaque, ilabel):
119	# type: (int, Token, int) -> bool
120	"""Add a token; return True iff this is the end of the program."""
121	# Loop until the token is shifted; may raise exceptions
122
123	# Andy NOTE: This is not linear time, i.e. a constant amount of work
124	# for each token? Is it O(n^2) as the ANTLR paper says?
125	# Do the "accelerators" in pgen.c have anything to do with it?
126
127	while True:
128	top = self.stack[-1]
129	states, _ = top.dfa
130	state = top.state
131
132	arcs = states[state]
133
134	# Look for a state with this label
135	found = False
136	for ilab, newstate in arcs:
137	t = self.grammar.labels[ilab]
138	if ilabel == ilab:
139	# Look it up in the list of labels
140	assert t < 256, t
141	# Shift a token; we're done with it
142	self.shift(typ, opaque, newstate)
143	# Pop while we are in an accept-only state
144	state = newstate
145
146	while True:
147	# mycpp: rewrite of tuple-in-list comparison
148	if len(states[state]) != 1:
149	break
150
151	s0, s1 = states[state][0]
152	if s0 != 0 or s1 != state:
153	break
154
155	self.pop()
156	if len(self.stack) == 0:
157	# Done parsing!
158	return True
159	top = self.stack[-1]
160	states, _ = top.dfa
161	state = top.state
162
163	# Done with this token
164	return False
165	elif t >= 256:
166	# See if it's a symbol and if we're in its first set
167	itsdfa = self.grammar.dfas[t]
168	_, itsfirst = itsdfa
169	if ilabel in itsfirst:
170	# Push a symbol
171	self.push(t, opaque, self.grammar.dfas[t], newstate)
172	found = True
173	break # To continue the outer while loop
174
175	if not found:
176	# Note: this condition was rewritten for mycpp tarnslation.
177	# if (0, state) in arcs:
178	# ...
179	# else:
180	# ...
181	found2 = False
182	for left, right in arcs:
183	if left == 0 and right == state:
184	# An accepting state, pop it and try something else
185	self.pop()
186	if len(self.stack) == 0:
187	# Done parsing, but another token is input
188	raise ParseError("too much input", typ, opaque)
189	found2 = True
190
191	if not found2:
192	# No success finding a transition
193	raise ParseError("bad input", typ, opaque)
194
195	def shift(self, typ, opaque, newstate):
196	# type: (int, Token, int) -> None
197	"""Shift a token. (Internal)"""
198	top = self.stack[-1]
199	newnode = self.pnode_alloc.NewPNode(typ, opaque)
200	top.node.AddChild(newnode)
201	self.stack[-1].state = newstate
202
203	def push(self, typ, opaque, newdfa, newstate):
204	# type: (int, Token, dfa_t, int) -> None
205	"""Push a nonterminal. (Internal)"""
206	top = self.stack[-1]
207	newnode = self.pnode_alloc.NewPNode(typ, opaque)
208	self.stack[-1].state = newstate
209	self.stack.append(_StackItem(newdfa, 0, newnode))
210
211	def pop(self):
212	# type: () -> None
213	"""Pop a nonterminal. (Internal)"""
214	top = self.stack.pop()
215	newnode = top.node
216	if len(self.stack):
217	top2 = self.stack[-1]
218	top2.node.AddChild(newnode)
219	else:
220	self.rootnode = newnode