osh/cmd_parse.py

OILS / osh / cmd_parse.py View on Github | oilshell.org

2710 lines, 1399 significant

1	# Copyright 2016 Andy Chu. All rights reserved.
2	# Licensed under the Apache License, Version 2.0 (the "License");
3	# you may not use this file except in compliance with the License.
4	# You may obtain a copy of the License at
5	#
6	# http://www.apache.org/licenses/LICENSE-2.0
7	"""
8	cmd_parse.py - Parse high level shell commands.
9	"""
10	from __future__ import print_function
11
12	from _devbuild.gen import grammar_nt
13	from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind, Kind_str
14	from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15	from _devbuild.gen.syntax_asdl import (
16	loc,
17	SourceLine,
18	source,
19	parse_result,
20	parse_result_t,
21	command,
22	command_t,
23	condition,
24	condition_t,
25	for_iter,
26	ArgList,
27	BraceGroup,
28	LiteralBlock,
29	CaseArm,
30	case_arg,
31	IfArm,
32	pat,
33	pat_t,
34	Redir,
35	redir_param,
36	redir_loc,
37	redir_loc_t,
38	word_e,
39	word_t,
40	CompoundWord,
41	Token,
42	word_part_e,
43	word_part_t,
44	rhs_word,
45	rhs_word_t,
46	sh_lhs,
47	sh_lhs_t,
48	AssignPair,
49	EnvPair,
50	ParsedAssignment,
51	assign_op_e,
52	NameType,
53	proc_sig,
54	proc_sig_e,
55	Proc,
56	Func,
57	)
58	from core import alloc
59	from core import error
60	from core.error import p_die
61	from core import ui
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from frontend import match
66	from frontend import reader
67	from mycpp.mylib import log
68	from osh import braces
69	from osh import bool_parse
70	from osh import word_
71
72	from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73	if TYPE_CHECKING:
74	from core.alloc import Arena
75	from core import optview
76	from frontend.lexer import Lexer
77	from frontend.parse_lib import ParseContext, AliasesInFlight
78	from frontend.reader import _Reader
79	from osh.word_parse import WordParser
80
81	_ = Kind_str # for debug prints
82
83	TAB_CH = 9 # ord('\t')
84	SPACE_CH = 32 # ord(' ')
85
86
87	def _ReadHereLines(
88	line_reader, # type: _Reader
89	h, # type: Redir
90	delimiter, # type: str
91	):
92	# type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93	# NOTE: We read all lines at once, instead of parsing line-by-line,
94	# because of cases like this:
95	# cat <<EOF
96	# 1 $(echo 2
97	# echo 3) 4
98	# EOF
99	here_lines = [] # type: List[Tuple[SourceLine, int]]
100	last_line = None # type: Tuple[SourceLine, int]
101	strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103	while True:
104	src_line, unused_offset = line_reader.GetLine()
105
106	if src_line is None: # EOF
107	# An unterminated here doc is just a warning in bash. We make it
108	# fatal because we want to be strict, and because it causes problems
109	# reporting other errors.
110	# Attribute it to the << in <<EOF for now.
111	p_die("Couldn't find terminator for here doc that starts here",
112	h.op)
113
114	assert len(src_line.content) != 0 # None should be the empty line
115
116	line = src_line.content
117
118	# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119	# the first tab.
120	start_offset = 0
121	if strip_leading_tabs:
122	n = len(line)
123	i = 0 # used after loop exit
124	while i < n:
125	if line[i] != '\t':
126	break
127	i += 1
128	start_offset = i
129
130	if line[start_offset:].rstrip() == delimiter:
131	last_line = (src_line, start_offset)
132	break
133
134	here_lines.append((src_line, start_offset))
135
136	return here_lines, last_line
137
138
139	def _MakeLiteralHereLines(
140	here_lines, # type: List[Tuple[SourceLine, int]]
141	arena, # type: Arena
142	do_lossless, # type: bool
143	):
144	# type: (...) -> List[word_part_t]
145	"""Create a Token for each line.
146
147	For <<'EOF' and <<-'EOF' - single quoted rule
148
149	<<- has non-zero start_offset
150	"""
151	# less precise type, because List[T] is an invariant type
152	tokens = [] # type: List[word_part_t]
153	for src_line, start_offset in here_lines:
154
155	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
156	# arena invariant, but don't refer to it.
157	if do_lossless: # avoid garbage, doesn't affect correctness
158	arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, src_line,
159	None)
160
161	t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
162	src_line, src_line.content[start_offset:])
163	tokens.append(t)
164	return tokens
165
166
167	def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
168	# type: (ParseContext, Redir, _Reader, Arena) -> None
169	"""Fill in attributes of a pending here doc node."""
170	h = cast(redir_param.HereDoc, r.arg)
171	# "If any character in word is quoted, the delimiter shall be formed by
172	# performing quote removal on word, and the here-document lines shall not
173	# be expanded. Otherwise, the delimiter shall be the word itself."
174	# NOTE: \EOF counts, or even E\OF
175	ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
176	if not ok:
177	p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
178
179	here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
180
181	if delim_quoted:
182	# <<'EOF' and <<-'EOF' - Literal for each line.
183	h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
184	parse_ctx.do_lossless)
185	else:
186	# <<EOF and <<-EOF - Parse as word
187	line_reader = reader.VirtualLineReader(arena, here_lines,
188	parse_ctx.do_lossless)
189	w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
190	w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
191
192	end_line, start_offset = last_line
193
194	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
195	# arena invariant, but don't refer to it.
196	if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
197	arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, end_line, None)
198
199	# Create a Token with the end terminator. Maintains the invariant that the
200	# tokens "add up".
201	h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
202	len(end_line.content), end_line, '')
203
204
205	def _MakeAssignPair(parse_ctx, preparsed, arena):
206	# type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
207	"""Create an AssignPair from a 4-tuples from DetectShAssignment."""
208
209	left_token = preparsed.left
210	close_token = preparsed.close
211
212	lhs = None # type: sh_lhs_t
213
214	if left_token.id == Id.Lit_VarLike: # s=1
215	if lexer.IsPlusEquals(left_token):
216	var_name = lexer.TokenSliceRight(left_token, -2)
217	op = assign_op_e.PlusEqual
218	else:
219	var_name = lexer.TokenSliceRight(left_token, -1)
220	op = assign_op_e.Equal
221
222	lhs = sh_lhs.Name(left_token, var_name)
223
224	elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
225	var_name = lexer.TokenSliceRight(left_token, -1)
226	if lexer.IsPlusEquals(close_token):
227	op = assign_op_e.PlusEqual
228	else:
229	op = assign_op_e.Equal
230
231	assert left_token.line == close_token.line, \
232	'%s and %s not on same line' % (left_token, close_token)
233
234	left_pos = left_token.col + left_token.length
235	index_str = left_token.line.content[left_pos:close_token.col]
236	lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
237
238	elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
239	var_name = lexer.TokenSliceRight(left_token, -1)
240	if lexer.IsPlusEquals(close_token):
241	op = assign_op_e.PlusEqual
242	else:
243	op = assign_op_e.Equal
244
245	# Similar to SnipCodeString / SnipCodeBlock
246	if left_token.line == close_token.line:
247	# extract what's between brackets
248	s = left_token.col + left_token.length
249	code_str = left_token.line.content[s:close_token.col]
250	else:
251	raise NotImplementedError('%s != %s' %
252	(left_token.line, close_token.line))
253	a_parser = parse_ctx.MakeArithParser(code_str)
254
255	# a[i+1]= is a LHS
256	src = source.Reparsed('array LHS', left_token, close_token)
257	with alloc.ctx_SourceCode(arena, src):
258	index_node = a_parser.Parse() # may raise error.Parse
259
260	lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
261
262	else:
263	raise AssertionError()
264
265	# TODO: Should we also create a rhs_expr.ArrayLiteral here?
266	parts = preparsed.w.parts
267	offset = preparsed.part_offset
268
269	n = len(parts)
270	if offset == n:
271	rhs = rhs_word.Empty # type: rhs_word_t
272	else:
273	w = CompoundWord(parts[offset:])
274	word_.TildeDetectAssign(w)
275	rhs = w
276
277	return AssignPair(left_token, lhs, op, rhs)
278
279
280	def _AppendMoreEnv(preparsed_list, more_env):
281	# type: (List[ParsedAssignment], List[EnvPair]) -> None
282	"""Helper to modify a SimpleCommand node.
283
284	Args:
285	preparsed: a list of 4-tuples from DetectShAssignment
286	more_env: a list to append env_pairs to
287	"""
288	for preparsed in preparsed_list:
289	left_token = preparsed.left
290
291	if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
292	p_die(
293	"Environment binding shouldn't look like an array assignment",
294	left_token)
295
296	if lexer.IsPlusEquals(left_token):
297	p_die('Expected = in environment binding, got +=', left_token)
298
299	var_name = lexer.TokenSliceRight(left_token, -1)
300
301	parts = preparsed.w.parts
302	n = len(parts)
303	offset = preparsed.part_offset
304	if offset == n:
305	rhs = rhs_word.Empty # type: rhs_word_t
306	else:
307	w = CompoundWord(parts[offset:])
308	word_.TildeDetectAssign(w)
309	rhs = w
310
311	more_env.append(EnvPair(left_token, var_name, rhs))
312
313
314	def _SplitSimpleCommandPrefix(words):
315	# type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
316	"""Second pass of SimpleCommand parsing: look for assignment words."""
317	preparsed_list = [] # type: List[ParsedAssignment]
318	suffix_words = [] # type: List[CompoundWord]
319
320	done_prefix = False
321	for w in words:
322	if done_prefix:
323	suffix_words.append(w)
324	continue
325
326	left_token, close_token, part_offset = word_.DetectShAssignment(w)
327	if left_token:
328	preparsed_list.append(
329	ParsedAssignment(left_token, close_token, part_offset, w))
330	else:
331	done_prefix = True
332	suffix_words.append(w)
333
334	return preparsed_list, suffix_words
335
336
337	def _MakeSimpleCommand(
338	preparsed_list, # type: List[ParsedAssignment]
339	suffix_words, # type: List[CompoundWord]
340	redirects, # type: List[Redir]
341	typed_args, # type: Optional[ArgList]
342	block, # type: Optional[LiteralBlock]
343	):
344	# type: (...) -> command.Simple
345	"""Create an command.Simple node."""
346
347	# FOO=(1 2 3) ls is not allowed.
348	for preparsed in preparsed_list:
349	if word_.HasArrayPart(preparsed.w):
350	p_die("Environment bindings can't contain array literals",
351	loc.Word(preparsed.w))
352
353	# NOTE: It would be possible to add this check back. But it already happens
354	# at runtime in EvalWordSequence2.
355	# echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
356	if 0:
357	for w in suffix_words:
358	if word_.HasArrayPart(w):
359	p_die("Commands can't contain array literals", loc.Word(w))
360
361	assert len(suffix_words) != 0
362	# {a,b,c} # Use { before brace detection
363	# ~/bin/ls # Use ~ before tilde detection
364	part0 = suffix_words[0].parts[0]
365	blame_tok = location.LeftTokenForWordPart(part0)
366
367	# NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
368	# can't implement bash's behavior of having say {~bob,~jane}/src work,
369	# because we only have a BracedTree.
370	# This is documented in spec/brace-expansion.
371	# NOTE: Technically we could do expansion outside of 'oshc translate', but it
372	# doesn't seem worth it.
373	words2 = braces.BraceDetectAll(suffix_words)
374	words3 = word_.TildeDetectAll(words2)
375
376	more_env = [] # type: List[EnvPair]
377	_AppendMoreEnv(preparsed_list, more_env)
378
379	# do_fork by default
380	return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
381	block, True)
382
383
384	class VarChecker(object):
385	"""Statically check for proc and variable usage errors."""
386
387	def __init__(self):
388	# type: () -> None
389	"""
390	Args:
391	oil_proc: Whether to disallow nested proc/function declarations
392	"""
393	# self.tokens for location info: 'proc' or another token
394	self.tokens = [] # type: List[Token]
395	self.names = [] # type: List[Dict[str, Id_t]]
396
397	def Push(self, blame_tok):
398	# type: (Token) -> None
399	"""Called when we enter a shell function, proc, or func.
400
401	Bash allows this, but it's confusing because it's the same as two
402	functions at the top level.
403
404	f() {
405	g() {
406	echo 'top level function defined in another one'
407	}
408	}
409
410	YSH disallows nested procs and funcs.
411	"""
412	if len(self.tokens) != 0:
413	if blame_tok.id == Id.KW_Proc:
414	p_die("procs must be defined at the top level", blame_tok)
415	if blame_tok.id == Id.KW_Func:
416	p_die("funcs must be defined at the top level", blame_tok)
417	if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
418	p_die("shell functions can't be defined inside proc or func",
419	blame_tok)
420
421	self.tokens.append(blame_tok)
422	entry = {} # type: Dict[str, Id_t]
423	self.names.append(entry)
424
425	def Pop(self):
426	# type: () -> None
427	self.names.pop()
428	self.tokens.pop()
429
430	def Check(self, keyword_id, var_name, blame_tok):
431	# type: (Id_t, str, Token) -> None
432	"""Check for declaration / mutation errors in proc and func.
433
434	var x
435	x already declared
436	setvar x:
437	x is not declared
438	setglobal x:
439	No errors are possible; we would need all these many conditions to
440	statically know the names:
441	- no 'source'
442	- shopt -u copy_env.
443	- AND use lib has to be static
444
445	What about bare assignment in Hay? I think these are dynamic checks --
446	there is no static check. Hay is for building up data imperatively,
447	and then LATER, right before main(), it can be type checked.
448
449	Package {
450	version = '3.11'
451	version = '3.12'
452	}
453	"""
454	# No static checks are the global level! Because of 'source', var and
455	# setvar are essentially the same.
456	if len(self.names) == 0:
457	return
458
459	top = self.names[-1]
460	if keyword_id == Id.KW_Var:
461	if var_name in top:
462	p_die('%r was already declared' % var_name, blame_tok)
463	else:
464	top[var_name] = keyword_id
465
466	if keyword_id == Id.KW_SetVar:
467	if var_name not in top:
468	# Note: the solution could be setglobal, etc.
469	p_die(
470	"setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
471	var_name, blame_tok)
472
473
474	class ctx_VarChecker(object):
475
476	def __init__(self, var_checker, blame_tok):
477	# type: (VarChecker, Token) -> None
478	var_checker.Push(blame_tok)
479	self.var_checker = var_checker
480
481	def __enter__(self):
482	# type: () -> None
483	pass
484
485	def __exit__(self, type, value, traceback):
486	# type: (Any, Any, Any) -> None
487	self.var_checker.Pop()
488
489
490	class ctx_CmdMode(object):
491
492	def __init__(self, cmd_parse, new_cmd_mode):
493	# type: (CommandParser, cmd_mode_t) -> None
494	self.cmd_parse = cmd_parse
495	self.prev_cmd_mode = cmd_parse.cmd_mode
496	cmd_parse.cmd_mode = new_cmd_mode
497
498	def __enter__(self):
499	# type: () -> None
500	pass
501
502	def __exit__(self, type, value, traceback):
503	# type: (Any, Any, Any) -> None
504	self.cmd_parse.cmd_mode = self.prev_cmd_mode
505
506
507	SECONDARY_KEYWORDS = [
508	Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
509	Id.KW_Esac
510	]
511
512
513	class CommandParser(object):
514	"""Recursive descent parser derived from POSIX shell grammar.
515
516	This is a BNF grammar:
517	https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
518
519	- Augmented with both bash/OSH and YSH constructs.
520
521	- We use regex-like iteration rather than recursive references
522	? means optional (0 or 1)
523	* means 0 or more
524	+ means 1 or more
525
526	- Keywords are spelled in Caps:
527	If Elif Case
528
529	- Operator tokens are quoted:
530	'(' '\|'
531
532	or can be spelled directly if it matters:
533
534	Op_LParen Op_Pipe
535
536	- Non-terminals are snake_case:
537	brace_group subshell
538
539	Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
540	the production should be in the method docstrings, e.g.
541
542	def ParseSubshell():
543	"
544	subshell : '(' compound_list ')'
545
546	Looking at Op_LParen # Comment to say how this method is called
547	"
548
549	The grammar may be factored to make parsing easier.
550	"""
551
552	def __init__(self,
553	parse_ctx,
554	parse_opts,
555	w_parser,
556	lexer,
557	line_reader,
558	eof_id=Id.Eof_Real):
559	# type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
560	self.parse_ctx = parse_ctx
561	self.aliases = parse_ctx.aliases # aliases to expand at parse time
562
563	self.parse_opts = parse_opts
564	self.w_parser = w_parser # type: WordParser # for normal parsing
565	self.lexer = lexer # for pushing hints, lookahead to (
566	self.line_reader = line_reader # for here docs
567	self.eof_id = eof_id
568
569	self.arena = line_reader.arena # for adding here doc and alias spans
570	self.aliases_in_flight = [] # type: AliasesInFlight
571
572	# A hacky boolean to remove 'if cd / {' ambiguity.
573	self.allow_block = True
574
575	# Stack of booleans for nested Attr and SHELL nodes.
576	# Attr nodes allow bare assignment x = 42, but not shell x=42.
577	# SHELL nodes are the inverse. 'var x = 42' is preferred in shell
578	# nodes, but x42 is still allowed.
579	#
580	# Note: this stack could be optimized by turning it into an integer and
581	# binary encoding.
582	self.hay_attrs_stack = [] # type: List[bool]
583
584	# Note: VarChecker is instantiated with each CommandParser, which means
585	# that two 'proc foo' -- inside a command sub and outside -- don't
586	# conflict, because they use different CommandParser instances. I think
587	# this OK but you can imagine different behaviors.
588	self.var_checker = VarChecker()
589
590	self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
591
592	self.Reset()
593
594	# Init_() function for "keyword arg"
595	def Init_AliasesInFlight(self, aliases_in_flight):
596	# type: (AliasesInFlight) -> None
597	self.aliases_in_flight = aliases_in_flight
598
599	def Reset(self):
600	# type: () -> None
601	"""Reset our own internal state.
602
603	Called by the interactive loop.
604	"""
605	# Cursor state set by _GetWord()
606	self.next_lex_mode = lex_mode_e.ShCommand
607	self.cur_word = None # type: word_t # current word
608	self.c_kind = Kind.Undefined
609	self.c_id = Id.Undefined_Tok
610
611	self.pending_here_docs = [
612	] # type: List[Redir] # should have HereLiteral arg
613
614	def ResetInputObjects(self):
615	# type: () -> None
616	"""Reset the internal state of our inputs.
617
618	Called by the interactive loop.
619	"""
620	self.w_parser.Reset()
621	self.lexer.ResetInputObjects()
622	self.line_reader.Reset()
623
624	def _SetNext(self):
625	# type: () -> None
626	"""Call this when you no longer need the current token.
627
628	This method is lazy. A subsequent call to _GetWord() will
629	actually read the next Token.
630	"""
631	self.next_lex_mode = lex_mode_e.ShCommand
632
633	def _SetNextBrack(self):
634	# type: () -> None
635	self.next_lex_mode = lex_mode_e.ShCommandBrack
636
637	def _GetWord(self):
638	# type: () -> None
639	"""Call this when you need to make a decision based on Id or Kind.
640
641	If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
642	self.c_id and self.c_kind.
643
644	Otherwise it does nothing.
645	"""
646	if self.next_lex_mode != lex_mode_e.Undefined:
647	w = self.w_parser.ReadWord(self.next_lex_mode)
648	#log("w %s", w)
649
650	# Here docs only happen in command mode, so other kinds of newlines don't
651	# count.
652	if w.tag() == word_e.Operator:
653	tok = cast(Token, w)
654	if tok.id == Id.Op_Newline:
655	for h in self.pending_here_docs:
656	_ParseHereDocBody(self.parse_ctx, h, self.line_reader,
657	self.arena)
658	del self.pending_here_docs[:] # No .clear() until Python 3.3.
659
660	self.cur_word = w
661
662	self.c_kind = word_.CommandKind(self.cur_word)
663	self.c_id = word_.CommandId(self.cur_word)
664	self.next_lex_mode = lex_mode_e.Undefined
665
666	def _Eat(self, c_id, msg=None):
667	# type: (Id_t, Optional[str]) -> word_t
668	"""Consume a word of a type, maybe showing a custom error message.
669
670	Args:
671	c_id: the Id we expected
672	msg: improved error message
673	"""
674	self._GetWord()
675	if self.c_id != c_id:
676	if msg is None:
677	msg = 'Expected word type %s, got %s' % (
678	ui.PrettyId(c_id), ui.PrettyId(self.c_id))
679	p_die(msg, loc.Word(self.cur_word))
680
681	skipped = self.cur_word
682	self._SetNext()
683	return skipped
684
685	def _NewlineOk(self):
686	# type: () -> None
687	"""Check for optional newline and consume it."""
688	self._GetWord()
689	if self.c_id == Id.Op_Newline:
690	self._SetNext()
691
692	def _AtSecondaryKeyword(self):
693	# type: () -> bool
694	self._GetWord()
695	if self.c_id in SECONDARY_KEYWORDS:
696	return True
697	return False
698
699	def ParseRedirect(self):
700	# type: () -> Redir
701	self._GetWord()
702	assert self.c_kind == Kind.Redir, self.cur_word
703	op_tok = cast(Token, self.cur_word) # for MyPy
704
705	# Note: the lexer could take distinguish between
706	# >out
707	# 3>out
708	# {fd}>out
709	#
710	# which would make the code below faster. But small string optimization
711	# would also speed it up, since redirects are small.
712
713	# One way to do this is with Kind.Redir and Kind.RedirNamed, and then
714	# possibly "unify" the IDs by subtracting a constant like 8 or 16?
715
716	op_val = lexer.TokenVal(op_tok)
717	if op_val[0] == '{':
718	pos = op_val.find('}')
719	assert pos != -1 # lexer ensures this
720	where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
721
722	elif op_val[0].isdigit():
723	pos = 1
724	if op_val[1].isdigit():
725	pos = 2
726	where = redir_loc.Fd(int(op_val[:pos]))
727
728	else:
729	where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
730
731	self._SetNext()
732
733	self._GetWord()
734	# Other redirect
735	if self.c_kind != Kind.Word:
736	p_die('Invalid token after redirect operator',
737	loc.Word(self.cur_word))
738
739	# Here doc
740	if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
741	arg = redir_param.HereDoc.CreateNull()
742	arg.here_begin = self.cur_word
743	arg.stdin_parts = []
744
745	r = Redir(op_tok, where, arg)
746
747	self.pending_here_docs.append(r) # will be filled on next newline.
748
749	self._SetNext()
750	return r
751
752	arg_word = self.cur_word
753	tilde = word_.TildeDetect(arg_word)
754	if tilde:
755	arg_word = tilde
756	self._SetNext()
757
758	# We should never get Empty, Token, etc.
759	assert arg_word.tag() == word_e.Compound, arg_word
760	return Redir(op_tok, where, cast(CompoundWord, arg_word))
761
762	def _ParseRedirectList(self):
763	# type: () -> List[Redir]
764	"""Try parsing any redirects at the cursor.
765
766	This is used for blocks only, not commands.
767	"""
768	redirects = [] # type: List[Redir]
769	while True:
770	# This prediction needs to ONLY accept redirect operators. Should we
771	# make them a separate Kind?
772	self._GetWord()
773	if self.c_kind != Kind.Redir:
774	break
775
776	node = self.ParseRedirect()
777	redirects.append(node)
778	self._SetNext()
779
780	return redirects
781
782	def _ScanSimpleCommand(self):
783	# type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
784	"""YSH extends simple commands with typed args and blocks.
785
786	Shell has a recursive grammar, which awkwardly expresses
787	non-grammatical rules:
788
789	simple_command : cmd_prefix cmd_word cmd_suffix
790	\| cmd_prefix cmd_word
791	\| cmd_prefix
792	\| cmd_name cmd_suffix
793	\| cmd_name
794	;
795	cmd_name : WORD /* Apply rule 7a */
796	;
797	cmd_word : WORD /* Apply rule 7b */
798	;
799	cmd_prefix : io_redirect
800	\| cmd_prefix io_redirect
801	\| ASSIGNMENT_WORD
802	\| cmd_prefix ASSIGNMENT_WORD
803	;
804	cmd_suffix : io_redirect
805	\| cmd_suffix io_redirect
806	\| WORD
807	\| cmd_suffix WORD
808
809	YSH grammar:
810
811	simple_command =
812	cmd_prefix* word+ typed_args? BraceGroup? cmd_suffix*
813
814	typed_args =
815	'(' arglist ')'
816	\| '[' arglist ']'
817
818	Notably, redirects shouldn't appear after between typed args and
819	BraceGroup.
820	"""
821	redirects = [] # type: List[Redir]
822	words = [] # type: List[CompoundWord]
823	typed_args = None # type: Optional[ArgList]
824	block = None # type: Optional[LiteralBlock]
825
826	first_word_caps = False # does first word look like Caps, but not CAPS
827
828	i = 0
829	while True:
830	self._GetWord()
831	if self.c_kind == Kind.Redir:
832	node = self.ParseRedirect()
833	redirects.append(node)
834
835	elif self.c_kind == Kind.Word:
836	if self.parse_opts.parse_brace():
837	# Treat { and } more like operators
838	if self.c_id == Id.Lit_LBrace:
839	if self.allow_block: # Disabled for if/while condition, etc.
840
841	# allow x = 42
842	self.hay_attrs_stack.append(first_word_caps)
843	brace_group = self.ParseBraceGroup()
844
845	# So we can get the source code back later
846	lines = self.arena.SaveLinesAndDiscard(
847	brace_group.left, brace_group.right)
848	block = LiteralBlock(brace_group, lines)
849
850	self.hay_attrs_stack.pop()
851
852	if 0:
853	print('--')
854	block.PrettyPrint()
855	print('\n--')
856	break
857	elif self.c_id == Id.Lit_RBrace:
858	# Another thing: { echo hi }
859	# We're DONE!!!
860	break
861
862	w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
863
864	if i == 0:
865	# Disallow leading =a because it's confusing
866	part0 = w.parts[0]
867	if part0.tag() == word_part_e.Literal:
868	tok = cast(Token, part0)
869	if tok.id == Id.Lit_Equals:
870	p_die(
871	"=word isn't allowed. Hint: add a space after =, or quote it",
872	tok)
873
874	# Is the first word a Hay Attr word?
875	ok, word_str, quoted = word_.StaticEval(w)
876	# Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
877	if (ok and len(word_str) and word_str[0].isupper() and
878	not word_str.isupper()):
879	first_word_caps = True
880	#log('W %s', word_str)
881
882	words.append(w)
883
884	elif self.c_id == Id.Op_LParen:
885	# 1. Check that there's a preceding space
886	prev_byte = self.lexer.ByteLookBack()
887	if prev_byte not in (SPACE_CH, TAB_CH):
888	if self.parse_opts.parse_at():
889	p_die('Space required before (',
890	loc.Word(self.cur_word))
891	else:
892	# inline func call like @sorted(x) is invalid in OSH, but the
893	# solution isn't a space
894	p_die(
895	'Unexpected left paren (might need a space before it)',
896	loc.Word(self.cur_word))
897
898	# 2. Check that it's not (). We disallow this because it's a no-op and
899	# there could be confusion with shell func defs.
900	# For some reason we need to call lexer.LookPastSpace, not
901	# w_parser.LookPastSpace. I think this is because we're at (, which is
902	# an operator token. All the other cases are like 'x=', which is PART
903	# of a word, and we don't know if it will end.
904	next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
905	if next_id == Id.Op_RParen:
906	p_die('Empty arg list not allowed',
907	loc.Word(self.cur_word))
908
909	typed_args = self.w_parser.ParseProcCallArgs(
910	grammar_nt.ysh_eager_arglist)
911
912	elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
913	typed_args = self.w_parser.ParseProcCallArgs(
914	grammar_nt.ysh_lazy_arglist)
915
916	else:
917	break
918
919	self._SetNextBrack() # Allow bracket for SECOND word on
920	i += 1
921	return redirects, words, typed_args, block
922
923	def _MaybeExpandAliases(self, words):
924	# type: (List[CompoundWord]) -> Optional[command_t]
925	"""Try to expand aliases.
926
927	Args:
928	words: A list of Compound
929
930	Returns:
931	A new LST node, or None.
932
933	Our implementation of alias has two design choices:
934	- Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
935	- What grammar rule to parse the expanded alias buffer with. In our case
936	it's ParseCommand().
937
938	This doesn't quite match what other shells do, but I can't figure out a
939	better places.
940
941	Most test cases pass, except for ones like:
942
943	alias LBRACE='{'
944	LBRACE echo one; echo two; }
945
946	alias MULTILINE='echo 1
947	echo 2
948	echo 3'
949	MULTILINE
950
951	NOTE: dash handles aliases in a totally different way. It has a global
952	variable checkkwd in parser.c. It assigns it all over the grammar, like
953	this:
954
955	checkkwd = CHKNL \| CHKKWD \| CHKALIAS;
956
957	The readtoken() function checks (checkkwd & CHKALIAS) and then calls
958	lookupalias(). This seems to provide a consistent behavior among shells,
959	but it's less modular and testable.
960
961	Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
962
963	Returns:
964	A command node if any aliases were expanded, or None otherwise.
965	"""
966	# Start a new list if there aren't any. This will be passed recursively
967	# through CommandParser instances.
968	aliases_in_flight = (self.aliases_in_flight
969	if len(self.aliases_in_flight) else [])
970
971	# for error message
972	first_word_str = None # type: Optional[str]
973	argv0_loc = loc.Word(words[0])
974
975	expanded = [] # type: List[str]
976	i = 0
977	n = len(words)
978
979	while i < n:
980	w = words[i]
981
982	ok, word_str, quoted = word_.StaticEval(w)
983	if not ok or quoted:
984	break
985
986	alias_exp = self.aliases.get(word_str)
987	if alias_exp is None:
988	break
989
990	# Prevent infinite loops. This is subtle: we want to prevent infinite
991	# expansion of alias echo='echo x'. But we don't want to prevent
992	# expansion of the second word in 'echo echo', so we add 'i' to
993	# "aliases_in_flight".
994	if (word_str, i) in aliases_in_flight:
995	break
996
997	if i == 0:
998	first_word_str = word_str # for error message
999
1000	#log('%r -> %r', word_str, alias_exp)
1001	aliases_in_flight.append((word_str, i))
1002	expanded.append(alias_exp)
1003	i += 1
1004
1005	if not alias_exp.endswith(' '):
1006	# alias e='echo [ ' is the same expansion as
1007	# alias e='echo ['
1008	# The trailing space indicates whether we should continue to expand
1009	# aliases; it's not part of it.
1010	expanded.append(' ')
1011	break # No more expansions
1012
1013	if len(expanded) == 0: # No expansions; caller does parsing.
1014	return None
1015
1016	# We are expanding an alias, so copy the rest of the words and re-parse.
1017	if i < n:
1018	left_tok = location.LeftTokenForWord(words[i])
1019	right_tok = location.RightTokenForWord(words[-1])
1020
1021	# OLD CONSTRAINT
1022	#assert left_tok.line_id == right_tok.line_id
1023
1024	words_str = self.arena.SnipCodeString(left_tok, right_tok)
1025	expanded.append(words_str)
1026
1027	code_str = ''.join(expanded)
1028
1029	# TODO:
1030	# Aliases break static parsing (like backticks), so use our own Arena.
1031	# This matters for Hay, which calls SaveLinesAndDiscard().
1032	# arena = alloc.Arena()
1033	arena = self.arena
1034
1035	line_reader = reader.StringLineReader(code_str, arena)
1036	cp = self.parse_ctx.MakeOshParser(line_reader)
1037	cp.Init_AliasesInFlight(aliases_in_flight)
1038
1039	# break circular dep
1040	from frontend import parse_lib
1041
1042	# The interaction between COMPLETION and ALIASES requires special care.
1043	# See docstring of BeginAliasExpansion() in parse_lib.py.
1044	src = source.Alias(first_word_str, argv0_loc)
1045	with alloc.ctx_SourceCode(arena, src):
1046	with parse_lib.ctx_Alias(self.parse_ctx.trail):
1047	try:
1048	# _ParseCommandTerm() handles multiline commands, compound
1049	# commands, etc. as opposed to ParseLogicalLine()
1050	node = cp._ParseCommandTerm()
1051	except error.Parse as e:
1052	# Failure to parse alias expansion is a fatal error
1053	# We don't need more handling here/
1054	raise
1055
1056	if 0:
1057	log('AFTER expansion:')
1058	node.PrettyPrint()
1059
1060	return node
1061
1062	def ParseSimpleCommand(self):
1063	# type: () -> command_t
1064	"""Fixed transcription of the POSIX grammar (TODO: port to
1065	grammar/Shell.g)
1066
1067	io_file : '<' filename
1068	\| LESSAND filename
1069	...
1070
1071	io_here : DLESS here_end
1072	\| DLESSDASH here_end
1073
1074	redirect : IO_NUMBER (io_redirect \| io_here)
1075
1076	prefix_part : ASSIGNMENT_WORD \| redirect
1077	cmd_part : WORD \| redirect
1078
1079	assign_kw : Declare \| Export \| Local \| Readonly
1080
1081	# Without any words it is parsed as a command, not an assignment
1082	assign_listing : assign_kw
1083
1084	# Now we have something to do (might be changing assignment flags too)
1085	# NOTE: any prefixes should be a warning, but they are allowed in shell.
1086	assignment : prefix_part* assign_kw (WORD \| ASSIGNMENT_WORD)+
1087
1088	# an external command, a function call, or a builtin -- a "word_command"
1089	word_command : prefix_part* cmd_part+
1090
1091	simple_command : assign_listing
1092	\| assignment
1093	\| proc_command
1094
1095	Simple imperative algorithm:
1096
1097	1) Read a list of words and redirects. Append them to separate lists.
1098	2) Look for the first non-assignment word. If it's declare, etc., then
1099	keep parsing words AND assign words. Otherwise, just parse words.
1100	3) If there are no non-assignment words, then it's a global assignment.
1101
1102	{ redirects, global assignments } OR
1103	{ redirects, prefix_bindings, words } OR
1104	{ redirects, ERROR_prefix_bindings, keyword, assignments, words }
1105
1106	THEN CHECK that prefix bindings don't have any array literal parts!
1107	global assignment and keyword assignments can have the of course.
1108	well actually EXPORT shouldn't have them either -- WARNING
1109
1110	3 cases we want to warn: prefix_bindings for assignment, and array literal
1111	in prefix bindings, or export
1112
1113	A command can be an assignment word, word, or redirect on its own.
1114
1115	ls
1116	>out.txt
1117
1118	>out.txt FOO=bar # this touches the file
1119
1120	Or any sequence:
1121	ls foo bar
1122	<in.txt ls foo bar >out.txt
1123	<in.txt ls >out.txt foo bar
1124
1125	Or add one or more environment bindings:
1126	VAR=val env
1127	>out.txt VAR=val env
1128
1129	here_end vs filename is a matter of whether we test that it's quoted. e.g.
1130	<<EOF vs <<'EOF'.
1131	"""
1132	redirects, words, typed_args, block = self._ScanSimpleCommand()
1133
1134	typed_loc = None # type: Optional[Token]
1135	if block:
1136	typed_loc = block.brace_group.left
1137	if typed_args:
1138	typed_loc = typed_args.left # preferred over block location
1139
1140	if len(words) == 0: # e.g. >out.txt # redirect without words
1141	assert len(redirects) != 0
1142	if typed_loc is not None:
1143	p_die("Unexpected typed args", typed_loc)
1144
1145	simple = command.Simple.CreateNull()
1146	simple.blame_tok = redirects[0].op
1147	simple.more_env = []
1148	simple.words = []
1149	simple.redirects = redirects
1150	return simple
1151
1152	preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1153	if len(preparsed_list):
1154	# Disallow X=Y inside proc and func
1155	# and inside Hay Attr blocks
1156	# But allow X=Y at the top level
1157	# for interactive use foo=bar
1158	# for global constants GLOBAL=~/src
1159	# because YSH assignment doesn't have tilde sub
1160	if len(suffix_words) == 0:
1161	if (self.cmd_mode != cmd_mode_e.Shell or
1162	(len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1163	p_die('Use var/setvar to assign in YSH',
1164	preparsed_list[0].left)
1165
1166	# Set a reference to words and redirects for completion. We want to
1167	# inspect this state after a failed parse.
1168	self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1169
1170	if len(suffix_words) == 0:
1171	if typed_loc is not None:
1172	p_die("Unexpected typed args", typed_loc)
1173
1174	# ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1175	pairs = [] # type: List[AssignPair]
1176	for preparsed in preparsed_list:
1177	pairs.append(
1178	_MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1179
1180	left_tok = location.LeftTokenForCompoundWord(words[0])
1181	return command.ShAssignment(left_tok, pairs, redirects)
1182
1183	kind, kw_token = word_.IsControlFlow(suffix_words[0])
1184
1185	if kind == Kind.ControlFlow:
1186	if kw_token.id == Id.ControlFlow_Return:
1187	# return x - inside procs and shell functions
1188	# return (x) - inside funcs
1189	if typed_args is None:
1190	if self.cmd_mode not in (cmd_mode_e.Shell,
1191	cmd_mode_e.Proc):
1192	p_die('Shell-style returns not allowed here', kw_token)
1193	else:
1194	if self.cmd_mode != cmd_mode_e.Func:
1195	p_die('Typed return is only allowed inside func',
1196	typed_loc)
1197	if len(typed_args.pos_args) != 1:
1198	p_die("Typed return expects one argument", typed_loc)
1199	if len(typed_args.named_args) != 0:
1200	p_die("Typed return doesn't take named arguments",
1201	typed_loc)
1202	return command.Retval(kw_token, typed_args.pos_args[0])
1203
1204	if typed_loc is not None:
1205	p_die("Unexpected typed args", typed_loc)
1206	if not self.parse_opts.parse_ignored() and len(redirects):
1207	p_die("Control flow shouldn't have redirects", kw_token)
1208
1209	if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1210	p_die("Control flow shouldn't have environment bindings",
1211	preparsed_list[0].left)
1212
1213	# Attach the token for errors. (ShAssignment may not need it.)
1214	if len(suffix_words) == 1:
1215	arg_word = None # type: Optional[word_t]
1216	elif len(suffix_words) == 2:
1217	arg_word = suffix_words[1]
1218	else:
1219	p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1220	loc.Word(suffix_words[2]))
1221
1222	return command.ControlFlow(kw_token, arg_word)
1223
1224	# Alias expansion only understands words, not typed args ( ) or block { }
1225	if not typed_args and not block and self.parse_opts.expand_aliases():
1226	# If any expansions were detected, then parse again.
1227	expanded_node = self._MaybeExpandAliases(suffix_words)
1228	if expanded_node:
1229	# Attach env bindings and redirects to the expanded node.
1230	more_env = [] # type: List[EnvPair]
1231	_AppendMoreEnv(preparsed_list, more_env)
1232	exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1233	return exp
1234
1235	# TODO: check that we don't have env1=x x[1]=y env2=z here.
1236
1237	# FOO=bar printenv.py FOO
1238	node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1239	typed_args, block)
1240	return node
1241
1242	def ParseBraceGroup(self):
1243	# type: () -> BraceGroup
1244	"""
1245	Original:
1246	brace_group : LBrace command_list RBrace ;
1247
1248	YSH:
1249	brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1250
1251	The doc comment can only occur if there's a newline.
1252	"""
1253	ate = self._Eat(Id.Lit_LBrace)
1254	left = word_.BraceToken(ate)
1255
1256	doc_word = None # type: word_t
1257	self._GetWord()
1258	if self.c_id == Id.Op_Newline:
1259	self._SetNext()
1260	# Set a flag so we don't skip over ###
1261	with word_.ctx_EmitDocToken(self.w_parser):
1262	self._GetWord()
1263
1264	if self.c_id == Id.Ignored_Comment:
1265	doc_word = self.cur_word
1266	self._SetNext()
1267
1268	# Id.Ignored_Comment means it's a Token, or None
1269	doc_token = cast(Token, doc_word)
1270
1271	c_list = self._ParseCommandList()
1272
1273	ate = self._Eat(Id.Lit_RBrace)
1274	right = word_.BraceToken(ate)
1275
1276	# Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1277	# would allow us to revert this back to None, which was changed in
1278	# https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1279	# behavior saves allocations, but is less type safe.
1280	return BraceGroup(left, doc_token, c_list.children, [],
1281	right) # no redirects yet
1282
1283	def ParseDoGroup(self):
1284	# type: () -> command.DoGroup
1285	"""Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1286
1287	do_group : Do command_list Done ; /* Apply rule 6 */
1288	"""
1289	ate = self._Eat(Id.KW_Do)
1290	do_kw = word_.AsKeywordToken(ate)
1291
1292	c_list = self._ParseCommandList() # could be anything
1293
1294	ate = self._Eat(Id.KW_Done)
1295	done_kw = word_.AsKeywordToken(ate)
1296
1297	return command.DoGroup(do_kw, c_list.children, done_kw)
1298
1299	def ParseForWords(self):
1300	# type: () -> Tuple[List[CompoundWord], Optional[Token]]
1301	"""
1302	for_words : WORD* for_sep
1303	;
1304	for_sep : ';' newline_ok
1305	\| NEWLINES
1306	;
1307	"""
1308	words = [] # type: List[CompoundWord]
1309	# The span_id of any semi-colon, so we can remove it.
1310	semi_tok = None # type: Optional[Token]
1311
1312	while True:
1313	self._GetWord()
1314	if self.c_id == Id.Op_Semi:
1315	tok = cast(Token, self.cur_word)
1316	semi_tok = tok
1317	self._SetNext()
1318	self._NewlineOk()
1319	break
1320	elif self.c_id == Id.Op_Newline:
1321	self._SetNext()
1322	break
1323	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1324	break
1325
1326	if self.cur_word.tag() != word_e.Compound:
1327	# TODO: Can we also show a pointer to the 'for' keyword?
1328	p_die('Invalid word in for loop', loc.Word(self.cur_word))
1329
1330	w2 = cast(CompoundWord, self.cur_word)
1331	words.append(w2)
1332	self._SetNext()
1333	return words, semi_tok
1334
1335	def _ParseForExprLoop(self, for_kw):
1336	# type: (Token) -> command.ForExpr
1337	"""
1338	Shell:
1339	for '((' init ';' cond ';' update '))' for_sep? do_group
1340
1341	YSH:
1342	for '((' init ';' cond ';' update '))' for_sep? brace_group
1343	"""
1344	node = self.w_parser.ReadForExpression()
1345	node.keyword = for_kw
1346
1347	self._SetNext()
1348
1349	self._GetWord()
1350	if self.c_id == Id.Op_Semi:
1351	self._SetNext()
1352	self._NewlineOk()
1353	elif self.c_id == Id.Op_Newline:
1354	self._SetNext()
1355	elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1356	pass
1357	elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1358	pass
1359	else:
1360	p_die('Invalid word after for expression', loc.Word(self.cur_word))
1361
1362	if self.c_id == Id.Lit_LBrace:
1363	node.body = self.ParseBraceGroup()
1364	else:
1365	node.body = self.ParseDoGroup()
1366	return node
1367
1368	def _ParseForEachLoop(self, for_kw):
1369	# type: (Token) -> command.ForEach
1370	node = command.ForEach.CreateNull(alloc_lists=True)
1371	node.keyword = for_kw
1372
1373	num_iter_names = 0
1374	while True:
1375	w = self.cur_word
1376
1377	# Hack that makes the language more familiar:
1378	# - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1379	# - 'x y' is also accepted but not idiomatic.
1380	UP_w = w
1381	if w.tag() == word_e.Compound:
1382	w = cast(CompoundWord, UP_w)
1383	if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1384	w.parts.pop()
1385
1386	ok, iter_name, quoted = word_.StaticEval(w)
1387	if not ok or quoted: # error: for $x
1388	p_die('Expected loop variable (a constant word)', loc.Word(w))
1389
1390	if not match.IsValidVarName(iter_name): # error: for -
1391	# TODO: consider commas?
1392	if ',' in iter_name:
1393	p_die('Loop variables look like x, y (fix spaces)',
1394	loc.Word(w))
1395	p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1396
1397	node.iter_names.append(iter_name)
1398	num_iter_names += 1
1399	self._SetNext()
1400
1401	self._GetWord()
1402	# 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1403	# Subtlety: 'var' is KW_Var and is a valid loop name
1404	if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1405	break
1406
1407	if num_iter_names == 3:
1408	p_die('Unexpected word after 3 loop variables',
1409	loc.Word(self.cur_word))
1410
1411	self._NewlineOk()
1412
1413	self._GetWord()
1414	if self.c_id == Id.KW_In:
1415	# Ideally we would want ( not 'in'. But we still have to fix the bug
1416	# where we require a SPACE between in and (
1417	# for x in(y) # should be accepted, but isn't
1418
1419	expr_blame = word_.AsKeywordToken(self.cur_word)
1420
1421	self._SetNext() # skip in
1422	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1423	enode = self.w_parser.ParseYshExprForCommand()
1424	node.iterable = for_iter.YshExpr(enode, expr_blame)
1425
1426	# For simplicity, we don't accept for x in (obj); do ...
1427	self._GetWord()
1428	if self.c_id != Id.Lit_LBrace:
1429	p_die('Expected { after iterable expression',
1430	loc.Word(self.cur_word))
1431	else:
1432	semi_tok = None # type: Optional[Token]
1433	iter_words, semi_tok = self.ParseForWords()
1434	node.semi_tok = semi_tok
1435
1436	if not self.parse_opts.parse_bare_word() and len(
1437	iter_words) == 1:
1438	ok, s, quoted = word_.StaticEval(iter_words[0])
1439	if ok and match.IsValidVarName(s) and not quoted:
1440	p_die(
1441	'Surround this word with either parens or quotes (parse_bare_word)',
1442	loc.Word(iter_words[0]))
1443
1444	words2 = braces.BraceDetectAll(iter_words)
1445	words3 = word_.TildeDetectAll(words2)
1446	node.iterable = for_iter.Words(words3)
1447
1448	# Now that we know there are words, do an extra check
1449	if num_iter_names > 2:
1450	p_die('Expected at most 2 loop variables', for_kw)
1451
1452	elif self.c_id == Id.KW_Do:
1453	node.iterable = for_iter.Args # implicitly loop over "$@"
1454	# do not advance
1455
1456	elif self.c_id == Id.Op_Semi: # for x; do
1457	node.iterable = for_iter.Args # implicitly loop over "$@"
1458	self._SetNext()
1459
1460	else: # for foo BAD
1461	p_die('Unexpected word after for loop variable',
1462	loc.Word(self.cur_word))
1463
1464	self._GetWord()
1465	if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1466	node.body = self.ParseBraceGroup()
1467	else:
1468	node.body = self.ParseDoGroup()
1469
1470	return node
1471
1472	def ParseFor(self):
1473	# type: () -> command_t
1474	"""
1475	TODO: Update the grammar
1476
1477	for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1478	\| For '((' ... TODO
1479	"""
1480	ate = self._Eat(Id.KW_For)
1481	for_kw = word_.AsKeywordToken(ate)
1482
1483	self._GetWord()
1484	if self.c_id == Id.Op_DLeftParen:
1485	if not self.parse_opts.parse_dparen():
1486	p_die("Bash for loops aren't allowed (parse_dparen)",
1487	loc.Word(self.cur_word))
1488
1489	# for (( i = 0; i < 10; i++)
1490	n1 = self._ParseForExprLoop(for_kw)
1491	n1.redirects = self._ParseRedirectList()
1492	return n1
1493	else:
1494	# for x in a b; do echo hi; done
1495	n2 = self._ParseForEachLoop(for_kw)
1496	n2.redirects = self._ParseRedirectList()
1497	return n2
1498
1499	def _ParseConditionList(self):
1500	# type: () -> condition_t
1501	"""
1502	condition_list: command_list
1503
1504	This is a helper to parse a condition list for if commands and while/until
1505	loops. It will throw a parse error if there are no conditions in the list.
1506	"""
1507	self.allow_block = False
1508	commands = self._ParseCommandList()
1509	self.allow_block = True
1510
1511	if len(commands.children) == 0:
1512	p_die("Expected a condition", loc.Word(self.cur_word))
1513
1514	return condition.Shell(commands.children)
1515
1516	def ParseWhileUntil(self, keyword):
1517	# type: (Token) -> command.WhileUntil
1518	"""
1519	while_clause : While command_list do_group ;
1520	until_clause : Until command_list do_group ;
1521	"""
1522	self._SetNext() # skip keyword
1523
1524	if (self.parse_opts.parse_paren() and
1525	self.w_parser.LookPastSpace() == Id.Op_LParen):
1526	enode = self.w_parser.ParseYshExprForCommand()
1527	cond = condition.YshExpr(enode) # type: condition_t
1528	else:
1529	cond = self._ParseConditionList()
1530
1531	# NOTE: The LSTs will be different for OSH and YSH, but the execution
1532	# should be unchanged. To be sure we should desugar.
1533	self._GetWord()
1534	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1535	# while test -f foo {
1536	body_node = self.ParseBraceGroup() # type: command_t
1537	else:
1538	body_node = self.ParseDoGroup()
1539
1540	# no redirects yet
1541	return command.WhileUntil(keyword, cond, body_node, None)
1542
1543	def ParseCaseArm(self):
1544	# type: () -> CaseArm
1545	"""
1546	case_item: '('? pattern ('\|' pattern)* ')'
1547	newline_ok command_term? trailer? ;
1548
1549	Looking at '(' or pattern
1550	"""
1551	self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1552
1553	left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1554
1555	if self.c_id == Id.Op_LParen: # Optional (
1556	self._SetNext()
1557
1558	pat_words = [] # type: List[word_t]
1559	while True:
1560	self._GetWord()
1561	if self.c_kind != Kind.Word:
1562	p_die('Expected case pattern', loc.Word(self.cur_word))
1563	pat_words.append(self.cur_word)
1564	self._SetNext()
1565
1566	self._GetWord()
1567	if self.c_id == Id.Op_Pipe:
1568	self._SetNext()
1569	else:
1570	break
1571
1572	ate = self._Eat(Id.Right_CasePat)
1573	middle_tok = word_.AsOperatorToken(ate)
1574
1575	self._NewlineOk()
1576
1577	self._GetWord()
1578	if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
1579	c_list = self._ParseCommandTerm()
1580	action_children = c_list.children
1581	else:
1582	action_children = []
1583
1584	dsemi_tok = None # type: Token
1585	self._GetWord()
1586	if self.c_id == Id.KW_Esac: # missing last ;;
1587	pass
1588	elif self.c_id == Id.Op_DSemi:
1589	dsemi_tok = word_.AsOperatorToken(self.cur_word)
1590	self._SetNext()
1591	else:
1592	# Happens on EOF
1593	p_die('Expected ;; or esac', loc.Word(self.cur_word))
1594
1595	self._NewlineOk()
1596
1597	return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1598	action_children, dsemi_tok)
1599
1600	def ParseYshCaseArm(self, discriminant):
1601	# type: (Id_t) -> CaseArm
1602	"""
1603	case_item : pattern newline_ok brace_group newline_ok
1604	pattern : pat_words
1605	\| pat_exprs
1606	\| pat_eggex
1607	\| pat_else
1608	pat_words : pat_word (newline_ok '\|' newline_ok pat_word)*
1609	pat_exprs : pat_expr (newline_ok '\|' newline_ok pat_expr)*
1610	pat_word : WORD
1611	pat_eggex : '/' oil_eggex '/'
1612	pat_expr : '(' oil_expr ')'
1613	pat_else : '(' Id.KW_Else ')'
1614
1615	Looking at: 'pattern'
1616
1617	Note that the trailing `newline_ok` in `case_item` is handled by
1618	`ParseYshCase`. We do this because parsing that `newline_ok` returns
1619	the next "discriminant" for the next token, so it makes more sense to
1620	handle it there.
1621	"""
1622	left_tok = None # type: Token
1623	pattern = None # type: pat_t
1624
1625	if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1626	# pat_exprs, pat_else or pat_eggex
1627	pattern, left_tok = self.w_parser.ParseYshCasePattern()
1628	else:
1629	# pat_words
1630	pat_words = [] # type: List[word_t]
1631	while True:
1632	self._GetWord()
1633	if self.c_kind != Kind.Word:
1634	p_die('Expected case pattern', loc.Word(self.cur_word))
1635	pat_words.append(self.cur_word)
1636	self._SetNext()
1637
1638	if not left_tok:
1639	left_tok = location.LeftTokenForWord(self.cur_word)
1640
1641	self._NewlineOk()
1642
1643	self._GetWord()
1644	if self.c_id == Id.Op_Pipe:
1645	self._SetNext()
1646	self._NewlineOk()
1647	else:
1648	break
1649	pattern = pat.Words(pat_words)
1650
1651	self._NewlineOk()
1652	action = self.ParseBraceGroup()
1653
1654	# The left token of the action is our "middle" token
1655	return CaseArm(left_tok, pattern, action.left, action.children,
1656	action.right)
1657
1658	def ParseYshCase(self, case_kw):
1659	# type: (Token) -> command.Case
1660	"""
1661	ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1662
1663	Looking at: token after 'case'
1664	"""
1665	enode = self.w_parser.ParseYshExprForCommand()
1666	to_match = case_arg.YshExpr(enode)
1667
1668	ate = self._Eat(Id.Lit_LBrace)
1669	arms_start = word_.BraceToken(ate)
1670
1671	discriminant = self.w_parser.NewlineOkForYshCase()
1672
1673	# Note: for now, zero arms are accepted, just like POSIX case $x in esac
1674	arms = [] # type: List[CaseArm]
1675	while discriminant != Id.Op_RBrace:
1676	arm = self.ParseYshCaseArm(discriminant)
1677	arms.append(arm)
1678
1679	discriminant = self.w_parser.NewlineOkForYshCase()
1680
1681	# NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1682	# token is read as an Id.Op_RBrace, but we need to store this as a
1683	# Id.Lit_RBrace.
1684	ate = self._Eat(Id.Op_RBrace)
1685	arms_end = word_.AsOperatorToken(ate)
1686	arms_end.id = Id.Lit_RBrace
1687
1688	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1689	None)
1690
1691	def ParseOldCase(self, case_kw):
1692	# type: (Token) -> command.Case
1693	"""
1694	case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1695
1696	-> Looking at WORD
1697
1698	FYI original POSIX case list, which takes pains for DSEMI
1699
1700	case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1701	"""
1702	self._GetWord()
1703	w = self.cur_word
1704	if not self.parse_opts.parse_bare_word():
1705	ok, s, quoted = word_.StaticEval(w)
1706	if ok and not quoted:
1707	p_die(
1708	"This is a constant string. You may want a variable like $x (parse_bare_word)",
1709	loc.Word(w))
1710
1711	if w.tag() != word_e.Compound:
1712	p_die("Expected a word to match against", loc.Word(w))
1713
1714	to_match = case_arg.Word(w)
1715	self._SetNext() # past WORD
1716
1717	self._NewlineOk()
1718
1719	ate = self._Eat(Id.KW_In)
1720	arms_start = word_.AsKeywordToken(ate)
1721
1722	self._NewlineOk()
1723
1724	arms = [] # type: List[CaseArm]
1725	while True:
1726	self._GetWord()
1727	if self.c_id == Id.KW_Esac: # this is Kind.Word
1728	break
1729	# case arm should begin with a pattern word or (
1730	if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1731	break
1732
1733	arm = self.ParseCaseArm()
1734	arms.append(arm)
1735
1736	ate = self._Eat(Id.KW_Esac)
1737	arms_end = word_.AsKeywordToken(ate)
1738
1739	# no redirects yet
1740	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1741	None)
1742
1743	def ParseCase(self):
1744	# type: () -> command.Case
1745	"""
1746	case_clause : old_case # from POSIX
1747	\| ysh_case
1748	;
1749
1750	Looking at 'Case'
1751	"""
1752	case_kw = word_.AsKeywordToken(self.cur_word)
1753	self._SetNext() # past 'case'
1754
1755	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1756	return self.ParseYshCase(case_kw)
1757	else:
1758	return self.ParseOldCase(case_kw)
1759
1760	def _ParseYshElifElse(self, if_node):
1761	# type: (command.If) -> None
1762	"""If test -f foo { echo foo.
1763
1764	} elif test -f bar; test -f spam { ^ we parsed up to here echo
1765	bar } else { echo none }
1766	"""
1767	arms = if_node.arms
1768
1769	while self.c_id == Id.KW_Elif:
1770	elif_kw = word_.AsKeywordToken(self.cur_word)
1771	self._SetNext() # skip elif
1772	if (self.parse_opts.parse_paren() and
1773	self.w_parser.LookPastSpace() == Id.Op_LParen):
1774	enode = self.w_parser.ParseYshExprForCommand()
1775	cond = condition.YshExpr(enode) # type: condition_t
1776	else:
1777	self.allow_block = False
1778	commands = self._ParseCommandList()
1779	self.allow_block = True
1780	cond = condition.Shell(commands.children)
1781
1782	body = self.ParseBraceGroup()
1783	self._GetWord()
1784
1785	arm = IfArm(elif_kw, cond, None, body.children, [elif_kw.span_id])
1786	arms.append(arm)
1787
1788	self._GetWord()
1789	if self.c_id == Id.KW_Else:
1790	self._SetNext()
1791	body = self.ParseBraceGroup()
1792	if_node.else_action = body.children
1793
1794	def _ParseYshIf(self, if_kw, cond):
1795	# type: (Token, condition_t) -> command.If
1796	"""if test -f foo {
1797
1798	# ^ we parsed up to here
1799	echo foo
1800	} elif test -f bar; test -f spam {
1801	echo bar
1802	} else {
1803	echo none
1804	}
1805	NOTE: If you do something like if test -n foo{, the parser keeps going, and
1806	the error is confusing because it doesn't point to the right place.
1807
1808	I think we might need strict_brace so that foo{ is disallowed. It has to
1809	be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1810	form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1811	Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1812	"""
1813	if_node = command.If.CreateNull(alloc_lists=True)
1814	if_node.if_kw = if_kw
1815
1816	body1 = self.ParseBraceGroup()
1817	# Every arm has 1 spid, unlike shell-style
1818	# TODO: We could get the spids from the brace group.
1819	arm = IfArm(if_kw, cond, None, body1.children, [if_kw.span_id])
1820
1821	if_node.arms.append(arm)
1822
1823	self._GetWord()
1824	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1825	self._ParseYshElifElse(if_node)
1826	# the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1827	# spid because that's in the BraceGroup.
1828	return if_node
1829
1830	def _ParseElifElse(self, if_node):
1831	# type: (command.If) -> None
1832	"""
1833	else_part: (Elif command_list Then command_list)* Else command_list ;
1834	"""
1835	arms = if_node.arms
1836
1837	self._GetWord()
1838	while self.c_id == Id.KW_Elif:
1839	elif_kw = word_.AsKeywordToken(self.cur_word)
1840	self._SetNext() # past 'elif'
1841
1842	cond = self._ParseConditionList()
1843
1844	ate = self._Eat(Id.KW_Then)
1845	then_kw = word_.AsKeywordToken(ate)
1846
1847	body = self._ParseCommandList()
1848	arm = IfArm(elif_kw, cond, then_kw, body.children,
1849	[elif_kw.span_id, then_kw.span_id])
1850
1851	arms.append(arm)
1852
1853	self._GetWord()
1854	if self.c_id == Id.KW_Else:
1855	else_kw = word_.AsKeywordToken(self.cur_word)
1856	self._SetNext() # past 'else'
1857	body = self._ParseCommandList()
1858	if_node.else_action = body.children
1859	else:
1860	else_kw = None
1861
1862	if_node.else_kw = else_kw
1863
1864	def ParseIf(self):
1865	# type: () -> command.If
1866	"""
1867	if_clause : If command_list Then command_list else_part? Fi ;
1868
1869	open : '{' \| Then
1870	close : '}' \| Fi
1871
1872	ysh_if : If ( command_list \| '(' expr ')' )
1873	open command_list else_part? close;
1874
1875	There are 2 conditionals here: parse_paren, then parse_brace
1876	"""
1877	if_node = command.If.CreateNull(alloc_lists=True)
1878	if_kw = word_.AsKeywordToken(self.cur_word)
1879	if_node.if_kw = if_kw
1880	self._SetNext() # past 'if'
1881
1882	if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1883	) == Id.Op_LParen:
1884	# if (x + 1)
1885	enode = self.w_parser.ParseYshExprForCommand()
1886	cond = condition.YshExpr(enode) # type: condition_t
1887	else:
1888	# if echo 1; echo 2; then
1889	# Remove ambiguity with if cd / {
1890	cond = self._ParseConditionList()
1891
1892	self._GetWord()
1893	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1894	return self._ParseYshIf(if_kw, cond)
1895
1896	ate = self._Eat(Id.KW_Then)
1897	then_kw = word_.AsKeywordToken(ate)
1898
1899	body = self._ParseCommandList()
1900
1901	# First arm
1902	arm = IfArm(if_kw, cond, then_kw, body.children,
1903	[if_kw.span_id, then_kw.span_id])
1904	if_node.arms.append(arm)
1905
1906	# 2nd to Nth arm
1907	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1908	self._ParseElifElse(if_node)
1909
1910	ate = self._Eat(Id.KW_Fi)
1911	if_node.fi_kw = word_.AsKeywordToken(ate)
1912
1913	return if_node
1914
1915	def ParseTime(self):
1916	# type: () -> command_t
1917	"""Time [-p] pipeline.
1918
1919	According to bash help.
1920	"""
1921	time_kw = word_.AsKeywordToken(self.cur_word)
1922	self._SetNext() # skip time
1923	pipeline = self.ParsePipeline()
1924	return command.TimeBlock(time_kw, pipeline)
1925
1926	def ParseCompoundCommand(self):
1927	# type: () -> command_t
1928	"""
1929	Refactoring: we put io_redirect* here instead of in function_body and
1930	command.
1931
1932	compound_command : brace_group io_redirect*
1933	\| subshell io_redirect*
1934	\| for_clause io_redirect*
1935	\| while_clause io_redirect*
1936	\| until_clause io_redirect*
1937	\| if_clause io_redirect*
1938	\| case_clause io_redirect*
1939
1940	# bash extensions
1941	\| time_clause
1942	\| [[ BoolExpr ]]
1943	\| (( ArithExpr ))
1944	"""
1945	self._GetWord()
1946	if self.c_id == Id.Lit_LBrace:
1947	n1 = self.ParseBraceGroup()
1948	n1.redirects = self._ParseRedirectList()
1949	return n1
1950	if self.c_id == Id.Op_LParen:
1951	n2 = self.ParseSubshell()
1952	n2.redirects = self._ParseRedirectList()
1953	return n2
1954
1955	if self.c_id == Id.KW_For:
1956	# Note: Redirects parsed in this call. POSIX for and bash for (( have
1957	# redirects, but YSH for doesn't.
1958	return self.ParseFor()
1959	if self.c_id in (Id.KW_While, Id.KW_Until):
1960	keyword = word_.AsKeywordToken(self.cur_word)
1961	n3 = self.ParseWhileUntil(keyword)
1962	n3.redirects = self._ParseRedirectList()
1963	return n3
1964
1965	if self.c_id == Id.KW_If:
1966	n4 = self.ParseIf()
1967	n4.redirects = self._ParseRedirectList()
1968	return n4
1969	if self.c_id == Id.KW_Case:
1970	n5 = self.ParseCase()
1971	n5.redirects = self._ParseRedirectList()
1972	return n5
1973
1974	if self.c_id == Id.KW_DLeftBracket:
1975	n6 = self.ParseDBracket()
1976	n6.redirects = self._ParseRedirectList()
1977	return n6
1978	if self.c_id == Id.Op_DLeftParen:
1979	if not self.parse_opts.parse_dparen():
1980	p_die('You may want a space between parens (parse_dparen)',
1981	loc.Word(self.cur_word))
1982	n7 = self.ParseDParen()
1983	n7.redirects = self._ParseRedirectList()
1984	return n7
1985
1986	# bash extensions: no redirects
1987	if self.c_id == Id.KW_Time:
1988	return self.ParseTime()
1989
1990	# Happens in function body, e.g. myfunc() oops
1991	p_die('Unexpected word while parsing compound command',
1992	loc.Word(self.cur_word))
1993	assert False # for MyPy
1994
1995	def ParseFunctionDef(self):
1996	# type: () -> command.ShFunction
1997	"""
1998	function_header : fname '(' ')'
1999	function_def : function_header newline_ok function_body ;
2000
2001	Precondition: Looking at the function name.
2002
2003	NOTE: There is an ambiguity with:
2004
2005	function foo ( echo hi ) and
2006	function foo () ( echo hi )
2007
2008	Bash only accepts the latter, though it doesn't really follow a grammar.
2009	"""
2010	word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2011	name = word_.ShFunctionName(word0)
2012	if len(name) == 0: # example: foo$x is invalid
2013	p_die('Invalid function name', loc.Word(word0))
2014
2015	part0 = word0.parts[0]
2016	# If we got a non-empty string from ShFunctionName, this should be true.
2017	assert part0.tag() == word_part_e.Literal
2018	blame_tok = cast(Token, part0) # for ctx_VarChecker
2019
2020	self._SetNext() # move past function name
2021
2022	# Must be true because of lookahead
2023	self._GetWord()
2024	assert self.c_id == Id.Op_LParen, self.cur_word
2025
2026	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2027	self._SetNext()
2028
2029	self._GetWord()
2030	if self.c_id == Id.Right_ShFunction:
2031	# 'f ()' implies a function definition, since invoking it with no args
2032	# would just be 'f'
2033	self._SetNext()
2034
2035	self._NewlineOk()
2036
2037	func = command.ShFunction.CreateNull()
2038	func.name = name
2039	with ctx_VarChecker(self.var_checker, blame_tok):
2040	func.body = self.ParseCompoundCommand()
2041
2042	func.name_tok = location.LeftTokenForCompoundWord(word0)
2043	return func
2044	else:
2045	p_die('Expected ) in function definition', loc.Word(self.cur_word))
2046	return None
2047
2048	def ParseKshFunctionDef(self):
2049	# type: () -> command.ShFunction
2050	"""
2051	ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2052	"""
2053	keyword_tok = word_.AsKeywordToken(self.cur_word)
2054
2055	self._SetNext() # skip past 'function'
2056	self._GetWord()
2057
2058	cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2059	name = word_.ShFunctionName(cur_word)
2060	if len(name) == 0: # example: foo$x is invalid
2061	p_die('Invalid KSH-style function name', loc.Word(cur_word))
2062
2063	name_word = self.cur_word
2064	self._SetNext() # skip past 'function name
2065
2066	self._GetWord()
2067	if self.c_id == Id.Op_LParen:
2068	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2069	self._SetNext()
2070	self._Eat(Id.Right_ShFunction)
2071
2072	self._NewlineOk()
2073
2074	func = command.ShFunction.CreateNull()
2075	func.name = name
2076	with ctx_VarChecker(self.var_checker, keyword_tok):
2077	func.body = self.ParseCompoundCommand()
2078
2079	func.keyword = keyword_tok
2080	func.name_tok = location.LeftTokenForWord(name_word)
2081	return func
2082
2083	def ParseYshProc(self):
2084	# type: () -> Proc
2085	node = Proc.CreateNull(alloc_lists=True)
2086
2087	keyword_tok = word_.AsKeywordToken(self.cur_word)
2088	node.keyword = keyword_tok
2089
2090	with ctx_VarChecker(self.var_checker, keyword_tok):
2091	with ctx_CmdMode(self, cmd_mode_e.Proc):
2092	self.w_parser.ParseProc(node)
2093	if node.sig.tag() == proc_sig_e.Closed: # Register params
2094	sig = cast(proc_sig.Closed, node.sig)
2095
2096	# Treat 3 kinds of params as variables.
2097	wp = sig.word
2098	if wp:
2099	for param in wp.params:
2100	self.var_checker.Check(Id.KW_Var, param.name,
2101	param.blame_tok)
2102	if wp.rest_of:
2103	r = wp.rest_of
2104	self.var_checker.Check(Id.KW_Var, r.name,
2105	r.blame_tok)
2106	# We COULD register __out here but it would require a different API.
2107	#if param.prefix and param.prefix.id == Id.Arith_Colon:
2108	# self.var_checker.Check(Id.KW_Var, '__' + param.name)
2109
2110	posit = sig.positional
2111	if posit:
2112	for param in posit.params:
2113	self.var_checker.Check(Id.KW_Var, param.name,
2114	param.blame_tok)
2115	if posit.rest_of:
2116	r = posit.rest_of
2117	self.var_checker.Check(Id.KW_Var, r.name,
2118	r.blame_tok)
2119
2120	named = sig.named
2121	if named:
2122	for param in named.params:
2123	self.var_checker.Check(Id.KW_Var, param.name,
2124	param.blame_tok)
2125	if named.rest_of:
2126	r = named.rest_of
2127	self.var_checker.Check(Id.KW_Var, r.name,
2128	r.blame_tok)
2129
2130	if sig.block_param:
2131	b = sig.block_param
2132	self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2133
2134	self._SetNext()
2135	node.body = self.ParseBraceGroup()
2136	# No redirects for YSH procs (only at call site)
2137
2138	return node
2139
2140	def ParseYshFunc(self):
2141	# type: () -> Func
2142	"""
2143	ysh_func: (
2144	Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2145	)
2146	Looking at KW_Func
2147	"""
2148	node = Func.CreateNull(alloc_lists=True)
2149
2150	keyword_tok = word_.AsKeywordToken(self.cur_word)
2151	node.keyword = keyword_tok
2152
2153	with ctx_VarChecker(self.var_checker, keyword_tok):
2154	self.w_parser.ParseFunc(node)
2155
2156	posit = node.positional
2157	if posit:
2158	for param in posit.params:
2159	self.var_checker.Check(Id.KW_Var, param.name,
2160	param.blame_tok)
2161	if posit.rest_of:
2162	r = posit.rest_of
2163	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2164
2165	named = node.named
2166	if named:
2167	for param in named.params:
2168	self.var_checker.Check(Id.KW_Var, param.name,
2169	param.blame_tok)
2170	if named.rest_of:
2171	r = named.rest_of
2172	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2173
2174	self._SetNext()
2175	with ctx_CmdMode(self, cmd_mode_e.Func):
2176	node.body = self.ParseBraceGroup()
2177
2178	return node
2179
2180	def ParseCoproc(self):
2181	# type: () -> command_t
2182	"""
2183	TODO: command.Coproc?
2184	"""
2185	raise NotImplementedError()
2186
2187	def ParseSubshell(self):
2188	# type: () -> command.Subshell
2189	"""
2190	subshell : '(' compound_list ')'
2191
2192	Looking at Op_LParen
2193	"""
2194	left = word_.AsOperatorToken(self.cur_word)
2195	self._SetNext() # skip past (
2196
2197	# Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2198	# translation stack, we want to delay it.
2199
2200	self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2201
2202	c_list = self._ParseCommandList()
2203	if len(c_list.children) == 1:
2204	child = c_list.children[0]
2205	else:
2206	child = c_list
2207
2208	ate = self._Eat(Id.Right_Subshell)
2209	right = word_.AsOperatorToken(ate)
2210
2211	return command.Subshell(left, child, right, None) # no redirects yet
2212
2213	def ParseDBracket(self):
2214	# type: () -> command.DBracket
2215	"""Pass the underlying word parser off to the boolean expression
2216	parser."""
2217	left = word_.AsKeywordToken(self.cur_word)
2218	# TODO: Test interactive. Without closing ]], you should get > prompt
2219	# (PS2)
2220
2221	self._SetNext() # skip [[
2222	b_parser = bool_parse.BoolParser(self.w_parser)
2223	bnode, right = b_parser.Parse() # May raise
2224	return command.DBracket(left, bnode, right, None) # no redirects yet
2225
2226	def ParseDParen(self):
2227	# type: () -> command.DParen
2228	left = word_.AsOperatorToken(self.cur_word)
2229
2230	self._SetNext() # skip ((
2231	anode, right = self.w_parser.ReadDParen()
2232	assert anode is not None
2233
2234	return command.DParen(left, anode, right, None) # no redirects yet
2235
2236	def ParseCommand(self):
2237	# type: () -> command_t
2238	"""
2239	command : simple_command
2240	\| compound_command # OSH edit: io_redirect* folded in
2241	\| function_def
2242	\| ksh_function_def
2243
2244	# YSH extensions
2245	\| proc NAME ...
2246	\| const ...
2247	\| var ...
2248	\| setglobal ...
2249	\| setref ...
2250	\| setvar ...
2251	\| _ EXPR
2252	\| = EXPR
2253	;
2254
2255	Note: the reason const / var are not part of compound_command is because
2256	they can't be alone in a shell function body.
2257
2258	Example:
2259	This is valid shell f() if true; then echo hi; fi
2260	This is invalid f() var x = 1
2261	"""
2262	if self._AtSecondaryKeyword():
2263	p_die('Unexpected word when parsing command',
2264	loc.Word(self.cur_word))
2265
2266	# YSH Extensions
2267
2268	if self.c_id == Id.KW_Proc: # proc p { ... }
2269	# proc is hidden because of the 'local reasoning' principle. Code
2270	# inside procs should be YSH, full stop. That means ysh:upgrade is
2271	# on.
2272	if self.parse_opts.parse_proc():
2273	return self.ParseYshProc()
2274	else:
2275	# 2024-02: This avoids bad syntax errors if you type YSH code
2276	# into OSH
2277	# proc p (x) { echo hi } would actually be parsed as a
2278	# command.Simple! Shell compatibility: quote 'proc'
2279	p_die("proc is a YSH keyword, but this is OSH.",
2280	loc.Word(self.cur_word))
2281
2282	if self.c_id == Id.KW_Func: # func f(x) { ... }
2283	if self.parse_opts.parse_func():
2284	return self.ParseYshFunc()
2285	else:
2286	# Same reasoning as above, for 'proc'
2287	p_die("func is a YSH keyword, but this is OSH.",
2288	loc.Word(self.cur_word))
2289
2290	if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2291	p_die("const can't be inside proc or func. Use var instead.",
2292	loc.Word(self.cur_word))
2293
2294	if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2295	keyword_id = self.c_id
2296	kw_token = word_.LiteralToken(self.cur_word)
2297	self._SetNext()
2298	n8 = self.w_parser.ParseVarDecl(kw_token)
2299	for lhs in n8.lhs:
2300	self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2301	return n8
2302
2303	if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2304	kw_token = word_.LiteralToken(self.cur_word)
2305	self._SetNext()
2306	n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2307	return n9
2308
2309	if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2310	# = 42 + a[i]
2311	# call mylist->append('x')
2312
2313	keyword = word_.LiteralToken(self.cur_word)
2314	assert keyword is not None
2315	self._SetNext()
2316	enode = self.w_parser.ParseCommandExpr()
2317	return command.Expr(keyword, enode)
2318
2319	if self.c_id == Id.KW_Function:
2320	return self.ParseKshFunctionDef()
2321
2322	if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2323	Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2324	Id.KW_If, Id.KW_Case, Id.KW_Time):
2325	return self.ParseCompoundCommand()
2326
2327	# Syntax error for '}' starting a line, which all shells disallow.
2328	if self.c_id == Id.Lit_RBrace:
2329	p_die('Unexpected right brace', loc.Word(self.cur_word))
2330
2331	if self.c_kind == Kind.Redir: # Leading redirect
2332	return self.ParseSimpleCommand()
2333
2334	if self.c_kind == Kind.Word:
2335	# ensured by Kind.Word
2336	cur_word = cast(CompoundWord, self.cur_word)
2337
2338	# NOTE: At the top level, only Token and Compound are possible.
2339	# Can this be modelled better in the type system, removing asserts?
2340	#
2341	# TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2342	# Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2343	# That requires 2 tokens of lookahead, which we don't have
2344	#
2345	# Or maybe we don't just have ParseSimpleCommand -- we will have
2346	# ParseYshCommand or something
2347
2348	if (self.w_parser.LookAheadFuncParens() and
2349	not word_.IsVarLike(cur_word)):
2350	return self.ParseFunctionDef() # f() { echo; } # function
2351
2352	# Parse x = 1+2*3 when inside HayNode { } blocks
2353	parts = cur_word.parts
2354	if self.parse_opts.parse_equals() and len(parts) == 1:
2355	part0 = parts[0]
2356	if part0.tag() == word_part_e.Literal:
2357	tok = cast(Token, part0)
2358	if (match.IsValidVarName(tok.tval) and
2359	self.w_parser.LookPastSpace() == Id.Lit_Equals):
2360	assert tok.id == Id.Lit_Chars, tok
2361
2362	if len(self.hay_attrs_stack
2363	) and self.hay_attrs_stack[-1]:
2364	# Note: no static var_checker.Check() for bare assignment
2365	enode = self.w_parser.ParseBareDecl()
2366	self._SetNext() # Somehow this is necessary
2367	# TODO: Use BareDecl here. Well, do that when we
2368	# treat it as const or lazy.
2369	return command.VarDecl(
2370	None,
2371	[NameType(tok, lexer.TokenVal(tok), None)],
2372	enode)
2373	else:
2374	self._SetNext()
2375	self._GetWord()
2376	p_die(
2377	'Unexpected = (Hint: use var/setvar, or quote it)',
2378	loc.Word(self.cur_word))
2379
2380	# echo foo
2381	# f=(a b c) # array
2382	# array[1+2]+=1
2383	return self.ParseSimpleCommand()
2384
2385	if self.c_kind == Kind.Eof:
2386	p_die("Unexpected EOF while parsing command",
2387	loc.Word(self.cur_word))
2388
2389	# NOTE: This only happens in batch mode in the second turn of the loop!
2390	# e.g. )
2391	p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2392
2393	assert False # for MyPy
2394
2395	def ParsePipeline(self):
2396	# type: () -> command_t
2397	"""
2398	pipeline : Bang? command ( '\|' newline_ok command )* ;
2399	"""
2400	negated = None # type: Optional[Token]
2401
2402	self._GetWord()
2403	if self.c_id == Id.KW_Bang:
2404	negated = word_.AsKeywordToken(self.cur_word)
2405	self._SetNext()
2406
2407	child = self.ParseCommand()
2408	assert child is not None
2409
2410	children = [child]
2411
2412	self._GetWord()
2413	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2414	if negated is not None:
2415	node = command.Pipeline(negated, children, [])
2416	return node
2417	else:
2418	return child # no pipeline
2419
2420	# \| or \|&
2421	ops = [] # type: List[Token]
2422	while True:
2423	op = word_.AsOperatorToken(self.cur_word)
2424	ops.append(op)
2425
2426	self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2427	self._NewlineOk()
2428
2429	child = self.ParseCommand()
2430	children.append(child)
2431
2432	self._GetWord()
2433	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2434	break
2435
2436	return command.Pipeline(negated, children, ops)
2437
2438	def ParseAndOr(self):
2439	# type: () -> command_t
2440	self._GetWord()
2441	if self.c_id == Id.Word_Compound:
2442	first_word_tok = word_.LiteralToken(self.cur_word)
2443	if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
2444	# We got '...', so parse in multiline mode
2445	self._SetNext()
2446	with word_.ctx_Multiline(self.w_parser):
2447	return self._ParseAndOr()
2448
2449	# Parse in normal mode, not multiline
2450	return self._ParseAndOr()
2451
2452	def _ParseAndOr(self):
2453	# type: () -> command_t
2454	"""
2455	and_or : and_or ( AND_IF \| OR_IF ) newline_ok pipeline
2456	\| pipeline
2457
2458	Note that it is left recursive and left associative. We parse it
2459	iteratively with a token of lookahead.
2460	"""
2461	child = self.ParsePipeline()
2462	assert child is not None
2463
2464	self._GetWord()
2465	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2466	return child
2467
2468	ops = [] # type: List[Token]
2469	children = [child]
2470
2471	while True:
2472	ops.append(word_.AsOperatorToken(self.cur_word))
2473
2474	self._SetNext() # skip past \|\| &&
2475	self._NewlineOk()
2476
2477	child = self.ParsePipeline()
2478	children.append(child)
2479
2480	self._GetWord()
2481	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2482	break
2483
2484	return command.AndOr(children, ops)
2485
2486	# NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2487
2488	# At the top level, we execute after every line, e.g. to
2489	# - process alias (a form of dynamic parsing)
2490	# - process 'exit', because invalid syntax might appear after it
2491
2492	# On the other hand, for a while loop body, we parse the whole thing at once,
2493	# and then execute it. We don't want to parse it over and over again!
2494
2495	# COMPARE
2496	# command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2497	# command_term : and_or (trailer and_or)* ; # CHILDREN
2498
2499	def _ParseCommandLine(self):
2500	# type: () -> command_t
2501	"""
2502	command_line : and_or (sync_op and_or)* trailer? ;
2503	trailer : sync_op newline_ok
2504	\| NEWLINES;
2505	sync_op : '&' \| ';';
2506
2507	NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2508	there is another command word after the sync op.
2509
2510	But it's easier to express imperatively. Do the following in a loop:
2511	1. ParseAndOr
2512	2. Peek.
2513	a. If there's a newline, then return. (We're only parsing a single
2514	line.)
2515	b. If there's a sync_op, process it. Then look for a newline and
2516	return. Otherwise, parse another AndOr.
2517	"""
2518	# This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2519	# I don't think we should add anything else here; otherwise it will be
2520	# ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2521	END_LIST = [Id.Op_Newline, Id.Eof_Real]
2522
2523	children = [] # type: List[command_t]
2524	done = False
2525	while not done:
2526	child = self.ParseAndOr()
2527
2528	self._GetWord()
2529	if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2530	tok = cast(Token, self.cur_word) # for MyPy
2531	child = command.Sentence(child, tok)
2532	self._SetNext()
2533
2534	self._GetWord()
2535	if self.c_id in END_LIST:
2536	done = True
2537
2538	elif self.c_id in END_LIST:
2539	done = True
2540
2541	else:
2542	# e.g. echo a(b)
2543	p_die('Invalid word while parsing command line',
2544	loc.Word(self.cur_word))
2545
2546	children.append(child)
2547
2548	# Simplify the AST.
2549	if len(children) > 1:
2550	return command.CommandList(children)
2551	else:
2552	return children[0]
2553
2554	def _ParseCommandTerm(self):
2555	# type: () -> command.CommandList
2556	""""
2557	command_term : and_or (trailer and_or)* ;
2558	trailer : sync_op newline_ok
2559	\| NEWLINES;
2560	sync_op : '&' \| ';';
2561
2562	This is handled in imperative style, like _ParseCommandLine.
2563	Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2564	which is slightly different. (HOW? Is it the DSEMI?)
2565
2566	Returns:
2567	syntax_asdl.command
2568	"""
2569	# Token types that will end the command term.
2570	END_LIST = [self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi]
2571
2572	# NOTE: This is similar to _ParseCommandLine.
2573	#
2574	# - Why aren't we doing END_LIST in _ParseCommandLine?
2575	# - Because you will never be inside $() at the top level.
2576	# - We also know it will end in a newline. It can't end in "fi"!
2577	# - example: if true; then { echo hi; } fi
2578
2579	children = [] # type: List[command_t]
2580	done = False
2581	while not done:
2582	# Most keywords are valid "first words". But do/done/then do not BEGIN
2583	# commands, so they are not valid.
2584	if self._AtSecondaryKeyword():
2585	break
2586
2587	child = self.ParseAndOr()
2588
2589	self._GetWord()
2590	if self.c_id == Id.Op_Newline:
2591	self._SetNext()
2592
2593	self._GetWord()
2594	if self.c_id in END_LIST:
2595	done = True
2596
2597	elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2598	tok = cast(Token, self.cur_word) # for MyPy
2599	child = command.Sentence(child, tok)
2600	self._SetNext()
2601
2602	self._GetWord()
2603	if self.c_id == Id.Op_Newline:
2604	self._SetNext() # skip over newline
2605
2606	# Test if we should keep going. There might be another command after
2607	# the semi and newline.
2608	self._GetWord()
2609	if self.c_id in END_LIST: # \n EOF
2610	done = True
2611
2612	elif self.c_id in END_LIST: # ; EOF
2613	done = True
2614
2615	elif self.c_id in END_LIST: # EOF
2616	done = True
2617
2618	# For if test -f foo; test -f bar {
2619	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2620	done = True
2621
2622	elif self.c_kind != Kind.Word:
2623	# e.g. f() { echo (( x )) ; }
2624	# but can't fail on 'fi fi', see osh/cmd_parse_test.py
2625
2626	#log("Invalid %s", self.cur_word)
2627	p_die("Invalid word while parsing command list",
2628	loc.Word(self.cur_word))
2629
2630	children.append(child)
2631
2632	return command.CommandList(children)
2633
2634	def _ParseCommandList(self):
2635	# type: () -> command.CommandList
2636	"""
2637	command_list : newline_ok command_term trailer? ;
2638
2639	This one is called by all the compound commands. It's basically a command
2640	block.
2641
2642	NOTE: Rather than translating the CFG directly, the code follows a style
2643	more like this: more like this: (and_or trailer)+. It makes capture
2644	easier.
2645	"""
2646	self._NewlineOk()
2647	return self._ParseCommandTerm()
2648
2649	def ParseLogicalLine(self):
2650	# type: () -> command_t
2651	"""Parse a single line for main_loop.
2652
2653	A wrapper around _ParseCommandLine(). Similar but not identical to
2654	_ParseCommandList() and ParseCommandSub().
2655
2656	Raises:
2657	ParseError
2658	"""
2659	self._NewlineOk()
2660	self._GetWord()
2661	if self.c_id == Id.Eof_Real:
2662	return None # main loop checks for here docs
2663	node = self._ParseCommandLine()
2664	return node
2665
2666	def ParseInteractiveLine(self):
2667	# type: () -> parse_result_t
2668	"""Parse a single line for Interactive main_loop.
2669
2670	Different from ParseLogicalLine because newlines are handled differently.
2671
2672	Raises:
2673	ParseError
2674	"""
2675	self._GetWord()
2676	if self.c_id == Id.Op_Newline:
2677	return parse_result.EmptyLine
2678	if self.c_id == Id.Eof_Real:
2679	return parse_result.Eof
2680
2681	node = self._ParseCommandLine()
2682	return parse_result.Node(node)
2683
2684	def ParseCommandSub(self):
2685	# type: () -> command_t
2686	"""Parse $(echo hi) and `echo hi` for word_parse.py.
2687
2688	They can have multiple lines, like this: echo $( echo one echo
2689	two )
2690	"""
2691	self._NewlineOk()
2692
2693	self._GetWord()
2694	if self.c_kind == Kind.Eof: # e.g. $()
2695	return command.NoOp
2696
2697	c_list = self._ParseCommandTerm()
2698	if len(c_list.children) == 1:
2699	return c_list.children[0]
2700	else:
2701	return c_list
2702
2703	def CheckForPendingHereDocs(self):
2704	# type: () -> None
2705	# NOTE: This happens when there is no newline at the end of a file, like
2706	# osh -c 'cat <<EOF'
2707	if len(self.pending_here_docs):
2708	node = self.pending_here_docs[0] # Just show the first one?
2709	h = cast(redir_param.HereDoc, node.arg)
2710	p_die('Unterminated here doc began here', loc.Word(h.here_begin))