frontend/parse

OILS / frontend / parse_lib.py View on Github | oilshell.org

405 lines, 224 significant

1	"""
2	parse_lib.py - Consolidate various parser instantiations here.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id_t
6	from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
7	ArgList, Proc, Func, command, pat_t)
8	from _devbuild.gen.types_asdl import lex_mode_e
9	from _devbuild.gen import grammar_nt
10
11	from asdl import format as fmt
12	from core import state
13	from frontend import lexer
14	from frontend import reader
15	from osh import tdop
16	from osh import arith_parse
17	from osh import cmd_parse
18	from osh import word_parse
19	from mycpp import mylib
20	from mycpp.mylib import log
21	from ysh import expr_parse
22	from ysh import expr_to_ast
23	from ysh.expr_parse import ctx_PNodeAllocator
24
25	_ = log
26
27	from typing import Any, List, Tuple, Dict, TYPE_CHECKING
28	if TYPE_CHECKING:
29	from core.alloc import Arena
30	from core.util import _DebugFile
31	from core import optview
32	from frontend.lexer import Lexer
33	from frontend.reader import _Reader
34	from osh.tdop import TdopParser
35	from osh.word_parse import WordParser
36	from osh.cmd_parse import CommandParser
37	from pgen2.grammar import Grammar
38
39
40	class _BaseTrail(object):
41	"""Base class has members, but no-ops for methods."""
42
43	def __init__(self):
44	# type: () -> None
45	# word from a partially completed command.
46	# Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
47	self.words = [] # type: List[CompoundWord]
48	self.redirects = [] # type: List[Redir]
49	# TODO: We should maintain the LST invariant and have a single list, but
50	# that I ran into the "cases classes are better than variants" problem.
51
52	# Non-ignored tokens, after PushHint translation. Used for variable name
53	# completion. Filled in by _Peek() in osh/word_parse.py.
54	#
55	# Example:
56	# $ echo $\
57	# f<TAB>
58	# This could complete $foo.
59	# Problem: readline doesn't even allow that, because it spans more than one
60	# line!
61	self.tokens = [] # type: List[Token]
62
63	self.alias_words = [
64	] # type: List[CompoundWord] # words INSIDE an alias expansion
65	self._expanding_alias = False
66
67	def Clear(self):
68	# type: () -> None
69	pass
70
71	def SetLatestWords(self, words, redirects):
72	# type: (List[CompoundWord], List[Redir]) -> None
73	pass
74
75	def AppendToken(self, token):
76	# type: (Token) -> None
77	pass
78
79	def BeginAliasExpansion(self):
80	# type: () -> None
81	pass
82
83	def EndAliasExpansion(self):
84	# type: () -> None
85	pass
86
87	if mylib.PYTHON:
88
89	def PrintDebugString(self, debug_f):
90	# type: (_DebugFile) -> None
91
92	# note: could cast DebugFile to IO[str] instead of ignoring?
93	debug_f.writeln(' words:')
94	for w in self.words:
95	fmt.PrettyPrint(w, f=debug_f) # type: ignore
96	debug_f.writeln('')
97
98	debug_f.writeln(' redirects:')
99	for r in self.redirects:
100	fmt.PrettyPrint(r, f=debug_f) # type: ignore
101	debug_f.writeln('')
102
103	debug_f.writeln(' tokens:')
104	for p in self.tokens:
105	fmt.PrettyPrint(p, f=debug_f) # type: ignore
106	debug_f.writeln('')
107
108	debug_f.writeln(' alias_words:')
109	for w in self.alias_words:
110	fmt.PrettyPrint(w, f=debug_f) # type: ignore
111	debug_f.writeln('')
112
113	def __repr__(self):
114	# type: () -> str
115	return '<Trail %s %s %s %s>' % (self.words, self.redirects,
116	self.tokens, self.alias_words)
117
118
119	class ctx_Alias(object):
120	"""Used by CommandParser so we know to be ready for FIRST alias word.
121
122	For example, for
123
124	alias ll='ls -l'
125
126	Then we want to capture 'ls' as the first word.
127
128	We do NOT want SetLatestWords or AppendToken to be active, because we don't
129	need other tokens from 'ls -l'.
130
131	It would also probably cause bugs in history expansion, e.g. echo !1 should
132	be the first word the user typed, not the first word after alias expansion.
133	"""
134
135	def __init__(self, trail):
136	# type: (_BaseTrail) -> None
137	trail._expanding_alias = True
138	self.trail = trail
139
140	def __enter__(self):
141	# type: () -> None
142	pass
143
144	def __exit__(self, type, value, traceback):
145	# type: (Any, Any, Any) -> None
146	self.trail._expanding_alias = False
147
148
149	class Trail(_BaseTrail):
150	"""Info left by the parser to help us complete shell syntax and commands.
151
152	It's also used for history expansion.
153	"""
154
155	def __init__(self):
156	# type: () -> None
157	"""Empty constructor for mycpp."""
158	_BaseTrail.__init__(self)
159
160	def Clear(self):
161	# type: () -> None
162	del self.words[:]
163	del self.redirects[:]
164	# The other ones don't need to be reset?
165	del self.tokens[:]
166	del self.alias_words[:]
167
168	def SetLatestWords(self, words, redirects):
169	# type: (List[CompoundWord], List[Redir]) -> None
170	if self._expanding_alias:
171	self.alias_words = words # Save these separately
172	return
173	self.words = words
174	self.redirects = redirects
175
176	def AppendToken(self, token):
177	# type: (Token) -> None
178	if self._expanding_alias: # We don't want tokens inside aliases
179	return
180	self.tokens.append(token)
181
182
183	if TYPE_CHECKING:
184	AliasesInFlight = List[Tuple[str, int]]
185
186
187	class ParseContext(object):
188	"""Context shared between the mutually recursive Command and Word parsers.
189
190	In contrast, STATE is stored in the CommandParser and WordParser
191	instances.
192	"""
193
194	def __init__(self,
195	arena,
196	parse_opts,
197	aliases,
198	ysh_grammar,
199	do_lossless=False):
200	# type: (Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
201	self.arena = arena
202	self.parse_opts = parse_opts
203	self.aliases = aliases
204	self.ysh_grammar = ysh_grammar
205	self.do_lossless = do_lossless
206
207	# NOTE: The transformer is really a pure function.
208	if ysh_grammar:
209	self.tr = expr_to_ast.Transformer(ysh_grammar)
210	else: # hack for unit tests, which pass None
211	self.tr = None
212
213	if mylib.PYTHON:
214	if self.tr:
215	self.p_printer = self.tr.p_printer
216	else:
217	self.p_printer = None
218
219	# Completion state lives here since it may span multiple parsers.
220	self.trail = _BaseTrail() # no-op by default
221
222	def Init_Trail(self, trail):
223	# type: (_BaseTrail) -> None
224	self.trail = trail
225
226	def MakeLexer(self, line_reader):
227	# type: (_Reader) -> Lexer
228	"""Helper function.
229
230	NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
231	better.
232	"""
233	# Take Arena from LineReader
234	line_lexer = lexer.LineLexer(line_reader.arena)
235	return lexer.Lexer(line_lexer, line_reader)
236
237	def MakeOshParser(self, line_reader, emit_comp_dummy=False):
238	# type: (_Reader, bool) -> CommandParser
239	lx = self.MakeLexer(line_reader)
240	if emit_comp_dummy:
241	lx.EmitCompDummy() # A special token before EOF!
242
243	w_parser = word_parse.WordParser(self, lx, line_reader)
244	c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
245	line_reader)
246	return c_parser
247
248	def MakeConfigParser(self, line_reader):
249	# type: (_Reader) -> CommandParser
250	lx = self.MakeLexer(line_reader)
251	parse_opts = state.MakeOilOpts()
252	w_parser = word_parse.WordParser(self, lx, line_reader)
253	c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
254	line_reader)
255	return c_parser
256
257	def MakeWordParserForHereDoc(self, line_reader):
258	# type: (_Reader) -> WordParser
259	lx = self.MakeLexer(line_reader)
260	return word_parse.WordParser(self, lx, line_reader)
261
262	def MakeWordParser(self, lx, line_reader):
263	# type: (Lexer, _Reader) -> WordParser
264	return word_parse.WordParser(self, lx, line_reader)
265
266	def MakeArithParser(self, code_str):
267	# type: (str) -> TdopParser
268	"""Used for a[x+1]=foo in the CommandParser."""
269	line_reader = reader.StringLineReader(code_str, self.arena)
270	lx = self.MakeLexer(line_reader)
271	w_parser = word_parse.WordParser(self, lx, line_reader)
272	w_parser.Init(lex_mode_e.Arith) # Special initialization
273	a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
274	self.parse_opts)
275	return a_parser
276
277	def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
278	# type: (_Reader, Lexer, Id_t) -> CommandParser
279	"""To parse command sub, we want a fresh word parser state."""
280	w_parser = word_parse.WordParser(self, lexer, line_reader)
281	c_parser = cmd_parse.CommandParser(self,
282	self.parse_opts,
283	w_parser,
284	lexer,
285	line_reader,
286	eof_id=eof_id)
287	return c_parser
288
289	def MakeWordParserForPlugin(self, code_str):
290	# type: (str) -> WordParser
291	"""For $PS1, $PS4, etc."""
292	line_reader = reader.StringLineReader(code_str, self.arena)
293	lx = self.MakeLexer(line_reader)
294	return word_parse.WordParser(self, lx, line_reader)
295
296	def _YshParser(self):
297	# type: () -> expr_parse.ExprParser
298	return expr_parse.ExprParser(self, self.ysh_grammar)
299
300	def ParseVarDecl(self, kw_token, lexer):
301	# type: (Token, Lexer) -> Tuple[command.VarDecl, Token]
302	""" var mylist = [1, 2, 3] """
303	e_parser = self._YshParser()
304	with ctx_PNodeAllocator(e_parser):
305	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
306
307	if 0:
308	self.p_printer.Print(pnode)
309
310	ast_node = self.tr.MakeVarDecl(pnode)
311	ast_node.keyword = kw_token # VarDecl didn't fill this in
312
313	return ast_node, last_token
314
315	def ParseMutation(self, kw_token, lexer):
316	# type: (Token, Lexer) -> Tuple[command.Mutation, Token]
317	""" setvar d['a'] += 1 """
318	e_parser = self._YshParser()
319	with ctx_PNodeAllocator(e_parser):
320	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
321	if 0:
322	self.p_printer.Print(pnode)
323	ast_node = self.tr.MakeMutation(pnode)
324	ast_node.keyword = kw_token # VarDecl didn't fill this in
325
326	return ast_node, last_token
327
328	def ParseProcCallArgs(self, lx, out, start_symbol):
329	# type: (Lexer, ArgList, int) -> None
330	""" json write (x, foo=1) and assert [42 === x] """
331
332	e_parser = self._YshParser()
333	with ctx_PNodeAllocator(e_parser):
334	pnode, last_token = e_parser.Parse(lx, start_symbol)
335
336	if 0:
337	self.p_printer.Print(pnode)
338
339	self.tr.ProcCallArgs(pnode, out)
340	out.right = last_token
341
342	def ParseYshExpr(self, lx, start_symbol):
343	# type: (Lexer, int) -> Tuple[expr_t, Token]
344	"""if (x > 0) { ...
345
346	}, while, etc.
347	"""
348
349	e_parser = self._YshParser()
350	with ctx_PNodeAllocator(e_parser):
351	pnode, last_token = e_parser.Parse(lx, start_symbol)
352	if 0:
353	self.p_printer.Print(pnode)
354
355	ast_node = self.tr.Expr(pnode)
356
357	return ast_node, last_token
358
359	def ParseYshCasePattern(self, lexer):
360	# type: (Lexer) -> Tuple[pat_t, Token, Token]
361	"""(6) \| (7), / dot* '.py' /, (else), etc.
362
363	Alongside the pattern, this returns the first token in the pattern and
364	the LBrace token at the start of the case arm body.
365	"""
366	e_parser = self._YshParser()
367	with ctx_PNodeAllocator(e_parser):
368	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
369
370	left_tok = pnode.GetChild(0).tok
371	pattern = self.tr.YshCasePattern(pnode)
372
373	return pattern, left_tok, last_token
374
375	def ParseProc(self, lexer, out):
376	# type: (Lexer, Proc) -> Token
377	"""proc f(x, y, @args) {"""
378	e_parser = self._YshParser()
379	with ctx_PNodeAllocator(e_parser):
380	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
381
382	if 0:
383	self.p_printer.Print(pnode)
384
385	out.sig = self.tr.Proc(pnode)
386
387	return last_token
388
389	def ParseFunc(self, lexer, out):
390	# type: (Lexer, Func) -> Token
391	""" func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
392	e_parser = self._YshParser()
393	with ctx_PNodeAllocator(e_parser):
394	pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
395
396	if 0:
397	self.p_printer.Print(pnode)
398
399	self.tr.YshFunc(pnode, out)
400	return last_token
401
402
403	# Another parser instantiation:
404	# - For Array Literal in word_parse.py WordParser:
405	# w_parser = WordParser(self.lexer, self.line_reader)