OILS / osh / tdop.py View on Github | oilshell.org

340 lines, 143 significant
1"""
2tdop.py - Library for expression parsing.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Id_t
6from _devbuild.gen.syntax_asdl import (loc, arith_expr, arith_expr_e,
7 arith_expr_t, word_t, CompoundWord,
8 NameTok)
9from core.error import p_die
10from core import ui
11from frontend import lexer
12from mycpp import mylib
13from mycpp.mylib import tagswitch
14from osh import word_
15
16from typing import (Callable, List, Dict, Tuple, Any, cast, TYPE_CHECKING)
17
18if TYPE_CHECKING: # break circular dep
19 from osh.word_parse import WordParser
20 from core import optview
21 LeftFunc = Callable[['TdopParser', word_t, arith_expr_t, int],
22 arith_expr_t]
23 NullFunc = Callable[['TdopParser', word_t, int], arith_expr_t]
24
25
26def IsIndexable(node):
27 # type: (arith_expr_t) -> bool
28 """In POSIX shell arith, a[1] is allowed but a[1][1] isn't.
29
30 We also allow $a[i] and foo$x[i] (formerly parse_dynamic_arith)
31 """
32 with tagswitch(node) as case:
33 if case(arith_expr_e.VarSub, arith_expr_e.Word):
34 return True
35 return False
36
37
38def CheckLhsExpr(node, blame_word):
39 # type: (arith_expr_t, word_t) -> None
40 """Determine if a node is a valid L-value by whitelisting tags.
41
42 Valid:
43 x = y
44 a[1] = y
45 Invalid:
46 a[0][0] = y
47 """
48 UP_node = node
49 if node.tag() == arith_expr_e.Binary:
50 node = cast(arith_expr.Binary, UP_node)
51 if node.op_id == Id.Arith_LBracket and IsIndexable(node.left):
52 return
53 # But a[0][0] = 1 is NOT valid.
54
55 if IsIndexable(node):
56 return
57
58 p_die("Left-hand side of this assignment is invalid", loc.Word(blame_word))
59
60
61#
62# Null Denotation
63#
64
65
66def NullError(p, t, bp):
67 # type: (TdopParser, word_t, int) -> arith_expr_t
68 # TODO: I need position information
69 p_die("Token can't be used in prefix position", loc.Word(t))
70 return None # never reached
71
72
73def NullConstant(p, w, bp):
74 # type: (TdopParser, word_t, int) -> arith_expr_t
75 name_tok = word_.LooksLikeArithVar(w)
76 if name_tok:
77 return NameTok(name_tok, lexer.TokenVal(name_tok))
78
79 # Id.Word_Compound in the spec ensures this cast is valid
80 return cast(CompoundWord, w)
81
82
83def NullParen(p, t, bp):
84 # type: (TdopParser, word_t, int) -> arith_expr_t
85 """Arithmetic grouping."""
86 r = p.ParseUntil(bp)
87 p.Eat(Id.Arith_RParen)
88 return r
89
90
91def NullPrefixOp(p, w, bp):
92 # type: (TdopParser, word_t, int) -> arith_expr_t
93 """Prefix operator.
94
95 Low precedence: return, raise, etc.
96 return x+y is return (x+y), not (return x) + y
97
98 High precedence: logical negation, bitwise complement, etc.
99 !x && y is (!x) && y, not !(x && y)
100 """
101 right = p.ParseUntil(bp)
102 return arith_expr.Unary(word_.ArithId(w), right)
103
104
105#
106# Left Denotation
107#
108
109
110def LeftError(p, t, left, rbp):
111 # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
112 # Hm is this not called because of binding power?
113 p_die("Token can't be used in infix position", loc.Word(t))
114 return None # never reached
115
116
117def LeftBinaryOp(p, w, left, rbp):
118 # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
119 """Normal binary operator like 1+2 or 2*3, etc."""
120 # TODO: w should be a Token, and we should extract the token from it.
121 return arith_expr.Binary(word_.ArithId(w), left, p.ParseUntil(rbp))
122
123
124def LeftAssign(p, w, left, rbp):
125 # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
126 """Normal binary operator like 1+2 or 2*3, etc."""
127 # x += 1, or a[i] += 1
128
129 CheckLhsExpr(left, w)
130 return arith_expr.BinaryAssign(word_.ArithId(w), left, p.ParseUntil(rbp))
131
132
133#
134# Parser definition
135# TODO: To be consistent, move this to osh/tdop_def.py.
136#
137
138if mylib.PYTHON:
139
140 def _ModuleAndFuncName(f):
141 # type: (Any) -> Tuple[str, str]
142 namespace = f.__module__.split('.')[-1]
143 return namespace, f.__name__
144
145 def _CppFuncName(f):
146 # type: (Any) -> str
147 return '%s::%s' % _ModuleAndFuncName(f)
148
149 class LeftInfo(object):
150 """Row for operator.
151
152 In C++ this should be a big array.
153 """
154
155 def __init__(self, led=None, lbp=0, rbp=0):
156 # type: (LeftFunc, int, int) -> None
157 self.led = led or LeftError
158 self.lbp = lbp
159 self.rbp = rbp
160
161 def __str__(self):
162 # type: () -> str
163 """Used by C++ code generation."""
164 return '{ %s, %d, %d },' % (_CppFuncName(
165 self.led), self.lbp, self.rbp)
166
167 def ModuleAndFuncName(self):
168 # type: () -> Tuple[str, str]
169 """Used by C++ code generation."""
170 return _ModuleAndFuncName(self.led)
171
172 class NullInfo(object):
173 """Row for operator.
174
175 In C++ this should be a big array.
176 """
177
178 def __init__(self, nud=None, bp=0):
179 # type: (NullFunc, int) -> None
180 self.nud = nud or LeftError
181 self.bp = bp
182
183 def __str__(self):
184 # type: () -> str
185 """Used by C++ code generation."""
186 return '{ %s, %d },' % (_CppFuncName(self.nud), self.bp)
187
188 def ModuleAndFuncName(self):
189 # type: () -> Tuple[str, str]
190 """Used by C++ code generation."""
191 return _ModuleAndFuncName(self.nud)
192
193 class ParserSpec(object):
194 """Specification for a TDOP parser.
195
196 This can be compiled to a table in C++.
197 """
198
199 def __init__(self):
200 # type: () -> None
201 self.nud_lookup = {} # type: Dict[Id_t, NullInfo]
202 self.led_lookup = {} # type: Dict[Id_t, LeftInfo]
203
204 def Null(self, bp, nud, tokens):
205 # type: (int, NullFunc, List[Id_t]) -> None
206 """Register a token that doesn't take anything on the left.
207
208 Examples: constant, prefix operator, error.
209 """
210 for token in tokens:
211 self.nud_lookup[token] = NullInfo(nud=nud, bp=bp)
212 if token not in self.led_lookup:
213 self.led_lookup[token] = LeftInfo() # error
214
215 def _RegisterLed(self, lbp, rbp, led, tokens):
216 # type: (int, int, LeftFunc, List[Id_t]) -> None
217 for token in tokens:
218 if token not in self.nud_lookup:
219 self.nud_lookup[token] = NullInfo(NullError)
220 self.led_lookup[token] = LeftInfo(lbp=lbp, rbp=rbp, led=led)
221
222 def Left(self, bp, led, tokens):
223 # type: (int, LeftFunc, List[Id_t]) -> None
224 """Register a token that takes an expression on the left."""
225 self._RegisterLed(bp, bp, led, tokens)
226
227 def LeftRightAssoc(self, bp, led, tokens):
228 # type: (int, LeftFunc, List[Id_t]) -> None
229 """Register a right associative operator."""
230 self._RegisterLed(bp, bp - 1, led, tokens)
231
232 def LookupNud(self, token):
233 # type: (Id_t) -> NullInfo
234
235 # As long as the table is complete, this shouldn't fail
236 return self.nud_lookup[token]
237
238 def LookupLed(self, token):
239 # type: (Id_t) -> LeftInfo
240 """Get a left_info for the token."""
241
242 # As long as the table is complete, this shouldn't fail
243 return self.led_lookup[token]
244
245
246class TdopParser(object):
247
248 def __init__(self, spec, w_parser, parse_opts):
249 # type: (ParserSpec, WordParser, optview.Parse) -> None
250 self.spec = spec
251 self.w_parser = w_parser
252 self.parse_opts = parse_opts
253
254 # NOTE: Next() overwrites this state, so we don't need a Reset() method in
255 # between reuses of this TdopParser instance.
256 self.cur_word = None # type: word_t # current token
257 self.op_id = Id.Undefined_Tok
258
259 def CurrentId(self):
260 # type: () -> Id_t
261 """Glue used by the WordParser to check for extra tokens."""
262 return word_.CommandId(self.cur_word)
263
264 def AtToken(self, token_type):
265 # type: (Id_t) -> bool
266 return self.op_id == token_type
267
268 def Eat(self, token_type):
269 # type: (Id_t) -> None
270 """Assert that we're at the current token and advance."""
271 if not self.AtToken(token_type):
272 p_die(
273 'Parser expected %s, got %s' %
274 (ui.PrettyId(token_type), ui.PrettyId(self.op_id)),
275 loc.Word(self.cur_word))
276 self.Next()
277
278 def Next(self):
279 # type: () -> bool
280 self.cur_word = self.w_parser.ReadArithWord()
281 self.op_id = word_.ArithId(self.cur_word)
282 return True
283
284 def ParseUntil(self, rbp):
285 # type: (int) -> arith_expr_t
286 """Parse to the right, eating tokens until we encounter a token with
287 binding power LESS THAN OR EQUAL TO rbp."""
288 # TODO: use Kind.Eof
289 if self.op_id in (Id.Eof_Real, Id.Eof_RParen, Id.Eof_Backtick):
290 p_die('Unexpected end of input', loc.Word(self.cur_word))
291
292 t = self.cur_word
293 null_info = self.spec.LookupNud(self.op_id)
294
295 self.Next() # skip over the token, e.g. ! ~ + -
296 node = null_info.nud(self, t, null_info.bp)
297
298 while True:
299 t = self.cur_word
300 left_info = self.spec.LookupLed(self.op_id)
301
302 # Examples:
303 # If we see 1*2+ , rbp = 27 and lbp = 25, so stop.
304 # If we see 1+2+ , rbp = 25 and lbp = 25, so stop.
305 # If we see 1**2**, rbp = 26 and lbp = 27, so keep going.
306 if rbp >= left_info.lbp:
307 break
308 self.Next() # skip over the token, e.g. / *
309
310 node = left_info.led(self, t, node, left_info.rbp)
311
312 return node
313
314 def Parse(self):
315 # type: () -> arith_expr_t
316
317 self.Next() # may raise ParseError
318
319 if not self.parse_opts.parse_sh_arith():
320 # Affects:
321 # echo $(( x ))
322 # ${a[i]} which should be $[a[i]] -- could have better error
323 #
324 # Note: sh_expr_eval.UnsafeArith has a dynamic e_die() check
325 #
326 # Doesn't affect:
327 # printf -v x # unsafe_arith.ParseLValue
328 # unset x # unsafe_arith.ParseLValue
329 # ${!ref} # unsafe_arith.ParseVarRef
330 # declare -n # not fully implemented yet
331 #
332 # a[i+1]= # parse_sh_assign
333 #
334 # (( a = 1 )) # parse_dparen
335 # for (( i = 0; ... # parse_dparen
336
337 p_die("POSIX shell arithmetic isn't allowed (parse_sh_arith)",
338 loc.Word(self.cur_word))
339
340 return self.ParseUntil(0)