osh/tdop.py

OILS / osh / tdop.py View on Github | oilshell.org

340 lines, 143 significant

1	"""
2	tdop.py - Library for expression parsing.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Id_t
6	from _devbuild.gen.syntax_asdl import (loc, arith_expr, arith_expr_e,
7	arith_expr_t, word_t, CompoundWord,
8	NameTok)
9	from core.error import p_die
10	from core import ui
11	from frontend import lexer
12	from mycpp import mylib
13	from mycpp.mylib import tagswitch
14	from osh import word_
15
16	from typing import (Callable, List, Dict, Tuple, Any, cast, TYPE_CHECKING)
17
18	if TYPE_CHECKING: # break circular dep
19	from osh.word_parse import WordParser
20	from core import optview
21	LeftFunc = Callable[['TdopParser', word_t, arith_expr_t, int],
22	arith_expr_t]
23	NullFunc = Callable[['TdopParser', word_t, int], arith_expr_t]
24
25
26	def IsIndexable(node):
27	# type: (arith_expr_t) -> bool
28	"""In POSIX shell arith, a[1] is allowed but a[1][1] isn't.
29
30	We also allow $a[i] and foo$x[i] (formerly parse_dynamic_arith)
31	"""
32	with tagswitch(node) as case:
33	if case(arith_expr_e.VarSub, arith_expr_e.Word):
34	return True
35	return False
36
37
38	def CheckLhsExpr(node, blame_word):
39	# type: (arith_expr_t, word_t) -> None
40	"""Determine if a node is a valid L-value by whitelisting tags.
41
42	Valid:
43	x = y
44	a[1] = y
45	Invalid:
46	a[0][0] = y
47	"""
48	UP_node = node
49	if node.tag() == arith_expr_e.Binary:
50	node = cast(arith_expr.Binary, UP_node)
51	if node.op_id == Id.Arith_LBracket and IsIndexable(node.left):
52	return
53	# But a[0][0] = 1 is NOT valid.
54
55	if IsIndexable(node):
56	return
57
58	p_die("Left-hand side of this assignment is invalid", loc.Word(blame_word))
59
60
61	#
62	# Null Denotation
63	#
64
65
66	def NullError(p, t, bp):
67	# type: (TdopParser, word_t, int) -> arith_expr_t
68	# TODO: I need position information
69	p_die("Token can't be used in prefix position", loc.Word(t))
70	return None # never reached
71
72
73	def NullConstant(p, w, bp):
74	# type: (TdopParser, word_t, int) -> arith_expr_t
75	name_tok = word_.LooksLikeArithVar(w)
76	if name_tok:
77	return NameTok(name_tok, lexer.TokenVal(name_tok))
78
79	# Id.Word_Compound in the spec ensures this cast is valid
80	return cast(CompoundWord, w)
81
82
83	def NullParen(p, t, bp):
84	# type: (TdopParser, word_t, int) -> arith_expr_t
85	"""Arithmetic grouping."""
86	r = p.ParseUntil(bp)
87	p.Eat(Id.Arith_RParen)
88	return r
89
90
91	def NullPrefixOp(p, w, bp):
92	# type: (TdopParser, word_t, int) -> arith_expr_t
93	"""Prefix operator.
94
95	Low precedence: return, raise, etc.
96	return x+y is return (x+y), not (return x) + y
97
98	High precedence: logical negation, bitwise complement, etc.
99	!x && y is (!x) && y, not !(x && y)
100	"""
101	right = p.ParseUntil(bp)
102	return arith_expr.Unary(word_.ArithId(w), right)
103
104
105	#
106	# Left Denotation
107	#
108
109
110	def LeftError(p, t, left, rbp):
111	# type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
112	# Hm is this not called because of binding power?
113	p_die("Token can't be used in infix position", loc.Word(t))
114	return None # never reached
115
116
117	def LeftBinaryOp(p, w, left, rbp):
118	# type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
119	"""Normal binary operator like 1+2 or 2*3, etc."""
120	# TODO: w should be a Token, and we should extract the token from it.
121	return arith_expr.Binary(word_.ArithId(w), left, p.ParseUntil(rbp))
122
123
124	def LeftAssign(p, w, left, rbp):
125	# type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
126	"""Normal binary operator like 1+2 or 2*3, etc."""
127	# x += 1, or a[i] += 1
128
129	CheckLhsExpr(left, w)
130	return arith_expr.BinaryAssign(word_.ArithId(w), left, p.ParseUntil(rbp))
131
132
133	#
134	# Parser definition
135	# TODO: To be consistent, move this to osh/tdop_def.py.
136	#
137
138	if mylib.PYTHON:
139
140	def _ModuleAndFuncName(f):
141	# type: (Any) -> Tuple[str, str]
142	namespace = f.__module__.split('.')[-1]
143	return namespace, f.__name__
144
145	def _CppFuncName(f):
146	# type: (Any) -> str
147	return '%s::%s' % _ModuleAndFuncName(f)
148
149	class LeftInfo(object):
150	"""Row for operator.
151
152	In C++ this should be a big array.
153	"""
154
155	def __init__(self, led=None, lbp=0, rbp=0):
156	# type: (LeftFunc, int, int) -> None
157	self.led = led or LeftError
158	self.lbp = lbp
159	self.rbp = rbp
160
161	def __str__(self):
162	# type: () -> str
163	"""Used by C++ code generation."""
164	return '{ %s, %d, %d },' % (_CppFuncName(
165	self.led), self.lbp, self.rbp)
166
167	def ModuleAndFuncName(self):
168	# type: () -> Tuple[str, str]
169	"""Used by C++ code generation."""
170	return _ModuleAndFuncName(self.led)
171
172	class NullInfo(object):
173	"""Row for operator.
174
175	In C++ this should be a big array.
176	"""
177
178	def __init__(self, nud=None, bp=0):
179	# type: (NullFunc, int) -> None
180	self.nud = nud or LeftError
181	self.bp = bp
182
183	def __str__(self):
184	# type: () -> str
185	"""Used by C++ code generation."""
186	return '{ %s, %d },' % (_CppFuncName(self.nud), self.bp)
187
188	def ModuleAndFuncName(self):
189	# type: () -> Tuple[str, str]
190	"""Used by C++ code generation."""
191	return _ModuleAndFuncName(self.nud)
192
193	class ParserSpec(object):
194	"""Specification for a TDOP parser.
195
196	This can be compiled to a table in C++.
197	"""
198
199	def __init__(self):
200	# type: () -> None
201	self.nud_lookup = {} # type: Dict[Id_t, NullInfo]
202	self.led_lookup = {} # type: Dict[Id_t, LeftInfo]
203
204	def Null(self, bp, nud, tokens):
205	# type: (int, NullFunc, List[Id_t]) -> None
206	"""Register a token that doesn't take anything on the left.
207
208	Examples: constant, prefix operator, error.
209	"""
210	for token in tokens:
211	self.nud_lookup[token] = NullInfo(nud=nud, bp=bp)
212	if token not in self.led_lookup:
213	self.led_lookup[token] = LeftInfo() # error
214
215	def _RegisterLed(self, lbp, rbp, led, tokens):
216	# type: (int, int, LeftFunc, List[Id_t]) -> None
217	for token in tokens:
218	if token not in self.nud_lookup:
219	self.nud_lookup[token] = NullInfo(NullError)
220	self.led_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
221
222	def Left(self, bp, led, tokens):
223	# type: (int, LeftFunc, List[Id_t]) -> None
224	"""Register a token that takes an expression on the left."""
225	self._RegisterLed(bp, bp, led, tokens)
226
227	def LeftRightAssoc(self, bp, led, tokens):
228	# type: (int, LeftFunc, List[Id_t]) -> None
229	"""Register a right associative operator."""
230	self._RegisterLed(bp, bp - 1, led, tokens)
231
232	def LookupNud(self, token):
233	# type: (Id_t) -> NullInfo
234
235	# As long as the table is complete, this shouldn't fail
236	return self.nud_lookup[token]
237
238	def LookupLed(self, token):
239	# type: (Id_t) -> LeftInfo
240	"""Get a left_info for the token."""
241
242	# As long as the table is complete, this shouldn't fail
243	return self.led_lookup[token]
244
245
246	class TdopParser(object):
247
248	def __init__(self, spec, w_parser, parse_opts):
249	# type: (ParserSpec, WordParser, optview.Parse) -> None
250	self.spec = spec
251	self.w_parser = w_parser
252	self.parse_opts = parse_opts
253
254	# NOTE: Next() overwrites this state, so we don't need a Reset() method in
255	# between reuses of this TdopParser instance.
256	self.cur_word = None # type: word_t # current token
257	self.op_id = Id.Undefined_Tok
258
259	def CurrentId(self):
260	# type: () -> Id_t
261	"""Glue used by the WordParser to check for extra tokens."""
262	return word_.CommandId(self.cur_word)
263
264	def AtToken(self, token_type):
265	# type: (Id_t) -> bool
266	return self.op_id == token_type
267
268	def Eat(self, token_type):
269	# type: (Id_t) -> None
270	"""Assert that we're at the current token and advance."""
271	if not self.AtToken(token_type):
272	p_die(
273	'Parser expected %s, got %s' %
274	(ui.PrettyId(token_type), ui.PrettyId(self.op_id)),
275	loc.Word(self.cur_word))
276	self.Next()
277
278	def Next(self):
279	# type: () -> bool
280	self.cur_word = self.w_parser.ReadArithWord()
281	self.op_id = word_.ArithId(self.cur_word)
282	return True
283
284	def ParseUntil(self, rbp):
285	# type: (int) -> arith_expr_t
286	"""Parse to the right, eating tokens until we encounter a token with
287	binding power LESS THAN OR EQUAL TO rbp."""
288	# TODO: use Kind.Eof
289	if self.op_id in (Id.Eof_Real, Id.Eof_RParen, Id.Eof_Backtick):
290	p_die('Unexpected end of input', loc.Word(self.cur_word))
291
292	t = self.cur_word
293	null_info = self.spec.LookupNud(self.op_id)
294
295	self.Next() # skip over the token, e.g. ! ~ + -
296	node = null_info.nud(self, t, null_info.bp)
297
298	while True:
299	t = self.cur_word
300	left_info = self.spec.LookupLed(self.op_id)
301
302	# Examples:
303	# If we see 1*2+ , rbp = 27 and lbp = 25, so stop.
304	# If we see 1+2+ , rbp = 25 and lbp = 25, so stop.
305	# If we see 12, rbp = 26 and lbp = 27, so keep going.
306	if rbp >= left_info.lbp:
307	break
308	self.Next() # skip over the token, e.g. / *
309
310	node = left_info.led(self, t, node, left_info.rbp)
311
312	return node
313
314	def Parse(self):
315	# type: () -> arith_expr_t
316
317	self.Next() # may raise ParseError
318
319	if not self.parse_opts.parse_sh_arith():
320	# Affects:
321	# echo $(( x ))
322	# ${a[i]} which should be $[a[i]] -- could have better error
323	#
324	# Note: sh_expr_eval.UnsafeArith has a dynamic e_die() check
325	#
326	# Doesn't affect:
327	# printf -v x # unsafe_arith.ParseLValue
328	# unset x # unsafe_arith.ParseLValue
329	# ${!ref} # unsafe_arith.ParseVarRef
330	# declare -n # not fully implemented yet
331	#
332	# a[i+1]= # parse_sh_assign
333	#
334	# (( a = 1 )) # parse_dparen
335	# for (( i = 0; ... # parse_dparen
336
337	p_die("POSIX shell arithmetic isn't allowed (parse_sh_arith)",
338	loc.Word(self.cur_word))
339
340	return self.ParseUntil(0)