osh/cmd_parse.py

OILS / osh / cmd_parse.py View on Github | oilshell.org

2723 lines, 1403 significant

1	# Copyright 2016 Andy Chu. All rights reserved.
2	# Licensed under the Apache License, Version 2.0 (the "License");
3	# you may not use this file except in compliance with the License.
4	# You may obtain a copy of the License at
5	#
6	# http://www.apache.org/licenses/LICENSE-2.0
7	"""
8	cmd_parse.py - Parse high level shell commands.
9	"""
10	from __future__ import print_function
11
12	from _devbuild.gen import grammar_nt
13	from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
14	from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15	from _devbuild.gen.syntax_asdl import (
16	loc,
17	SourceLine,
18	source,
19	parse_result,
20	parse_result_t,
21	command,
22	command_t,
23	condition,
24	condition_t,
25	for_iter,
26	ArgList,
27	BraceGroup,
28	LiteralBlock,
29	CaseArm,
30	case_arg,
31	IfArm,
32	pat,
33	pat_t,
34	Redir,
35	redir_param,
36	redir_loc,
37	redir_loc_t,
38	word_e,
39	word_t,
40	CompoundWord,
41	Token,
42	word_part_e,
43	word_part_t,
44	rhs_word,
45	rhs_word_t,
46	sh_lhs,
47	sh_lhs_t,
48	AssignPair,
49	EnvPair,
50	ParsedAssignment,
51	assign_op_e,
52	NameType,
53	proc_sig,
54	proc_sig_e,
55	Proc,
56	Func,
57	)
58	from core import alloc
59	from core import error
60	from core.error import p_die
61	from core import ui
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from frontend import match
66	from frontend import reader
67	from mycpp.mylib import log
68	from osh import braces
69	from osh import bool_parse
70	from osh import word_
71
72	from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73	if TYPE_CHECKING:
74	from core.alloc import Arena
75	from core import optview
76	from frontend.lexer import Lexer
77	from frontend.parse_lib import ParseContext, AliasesInFlight
78	from frontend.reader import _Reader
79	from osh.word_parse import WordParser
80
81	_ = Kind_str # for debug prints
82
83	TAB_CH = 9 # ord('\t')
84	SPACE_CH = 32 # ord(' ')
85
86
87	def _ReadHereLines(
88	line_reader, # type: _Reader
89	h, # type: Redir
90	delimiter, # type: str
91	):
92	# type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93	# NOTE: We read all lines at once, instead of parsing line-by-line,
94	# because of cases like this:
95	# cat <<EOF
96	# 1 $(echo 2
97	# echo 3) 4
98	# EOF
99	here_lines = [] # type: List[Tuple[SourceLine, int]]
100	last_line = None # type: Tuple[SourceLine, int]
101	strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103	while True:
104	src_line, unused_offset = line_reader.GetLine()
105
106	if src_line is None: # EOF
107	# An unterminated here doc is just a warning in bash. We make it
108	# fatal because we want to be strict, and because it causes problems
109	# reporting other errors.
110	# Attribute it to the << in <<EOF for now.
111	p_die("Couldn't find terminator for here doc that starts here",
112	h.op)
113
114	assert len(src_line.content) != 0 # None should be the empty line
115
116	line = src_line.content
117
118	# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119	# the first tab.
120	start_offset = 0
121	if strip_leading_tabs:
122	n = len(line)
123	i = 0 # used after loop exit
124	while i < n:
125	if line[i] != '\t':
126	break
127	i += 1
128	start_offset = i
129
130	if line[start_offset:].rstrip() == delimiter:
131	last_line = (src_line, start_offset)
132	break
133
134	here_lines.append((src_line, start_offset))
135
136	return here_lines, last_line
137
138
139	def _MakeLiteralHereLines(
140	here_lines, # type: List[Tuple[SourceLine, int]]
141	arena, # type: Arena
142	do_lossless, # type: bool
143	):
144	# type: (...) -> List[word_part_t]
145	"""Create a Token for each line.
146
147	For <<'EOF' and <<-'EOF' - single quoted rule
148
149	<<- has non-zero start_offset
150	"""
151	# less precise type, because List[T] is an invariant type
152	tokens = [] # type: List[word_part_t]
153	for src_line, start_offset in here_lines:
154
155	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
156	# arena invariant, but don't refer to it.
157	#
158	# Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
159	# here docs, but it's more complex with double quoted EOF docs.
160
161	if do_lossless: # avoid garbage, doesn't affect correctness
162	arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
163	src_line)
164
165	t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
166	src_line)
167	tokens.append(t)
168	return tokens
169
170
171	def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
172	# type: (ParseContext, Redir, _Reader, Arena) -> None
173	"""Fill in attributes of a pending here doc node."""
174	h = cast(redir_param.HereDoc, r.arg)
175	# "If any character in word is quoted, the delimiter shall be formed by
176	# performing quote removal on word, and the here-document lines shall not
177	# be expanded. Otherwise, the delimiter shall be the word itself."
178	# NOTE: \EOF counts, or even E\OF
179	ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
180	if not ok:
181	p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
182
183	here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
184
185	if delim_quoted:
186	# <<'EOF' and <<-'EOF' - Literal for each line.
187	h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
188	parse_ctx.do_lossless)
189	else:
190	# <<EOF and <<-EOF - Parse as word
191	line_reader = reader.VirtualLineReader(arena, here_lines,
192	parse_ctx.do_lossless)
193	w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
194	w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
195
196	end_line, start_offset = last_line
197
198	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
199	# arena invariant, but don't refer to it.
200	if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
201	arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
202
203	# Create a Token with the end terminator. Maintains the invariant that the
204	# tokens "add up".
205	h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
206	len(end_line.content), end_line)
207
208
209	def _MakeAssignPair(parse_ctx, preparsed, arena):
210	# type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
211	"""Create an AssignPair from a 4-tuples from DetectShAssignment."""
212
213	left_token = preparsed.left
214	close_token = preparsed.close
215
216	lhs = None # type: sh_lhs_t
217
218	if left_token.id == Id.Lit_VarLike: # s=1
219	if lexer.IsPlusEquals(left_token):
220	var_name = lexer.TokenSliceRight(left_token, -2)
221	op = assign_op_e.PlusEqual
222	else:
223	var_name = lexer.TokenSliceRight(left_token, -1)
224	op = assign_op_e.Equal
225
226	lhs = sh_lhs.Name(left_token, var_name)
227
228	elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
229	var_name = lexer.TokenSliceRight(left_token, -1)
230	if lexer.IsPlusEquals(close_token):
231	op = assign_op_e.PlusEqual
232	else:
233	op = assign_op_e.Equal
234
235	assert left_token.line == close_token.line, \
236	'%s and %s not on same line' % (left_token, close_token)
237
238	left_pos = left_token.col + left_token.length
239	index_str = left_token.line.content[left_pos:close_token.col]
240	lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
241
242	elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
243	var_name = lexer.TokenSliceRight(left_token, -1)
244	if lexer.IsPlusEquals(close_token):
245	op = assign_op_e.PlusEqual
246	else:
247	op = assign_op_e.Equal
248
249	# Similar to SnipCodeString / SnipCodeBlock
250	if left_token.line == close_token.line:
251	# extract what's between brackets
252	s = left_token.col + left_token.length
253	code_str = left_token.line.content[s:close_token.col]
254	else:
255	raise NotImplementedError('%s != %s' %
256	(left_token.line, close_token.line))
257	a_parser = parse_ctx.MakeArithParser(code_str)
258
259	# a[i+1]= is a LHS
260	src = source.Reparsed('array LHS', left_token, close_token)
261	with alloc.ctx_SourceCode(arena, src):
262	index_node = a_parser.Parse() # may raise error.Parse
263
264	lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
265
266	else:
267	raise AssertionError()
268
269	# TODO: Should we also create a rhs_expr.ArrayLiteral here?
270	parts = preparsed.w.parts
271	offset = preparsed.part_offset
272
273	n = len(parts)
274	if offset == n:
275	rhs = rhs_word.Empty # type: rhs_word_t
276	else:
277	w = CompoundWord(parts[offset:])
278	word_.TildeDetectAssign(w)
279	rhs = w
280
281	return AssignPair(left_token, lhs, op, rhs)
282
283
284	def _AppendMoreEnv(preparsed_list, more_env):
285	# type: (List[ParsedAssignment], List[EnvPair]) -> None
286	"""Helper to modify a SimpleCommand node.
287
288	Args:
289	preparsed: a list of 4-tuples from DetectShAssignment
290	more_env: a list to append env_pairs to
291	"""
292	for preparsed in preparsed_list:
293	left_token = preparsed.left
294
295	if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
296	p_die(
297	"Environment binding shouldn't look like an array assignment",
298	left_token)
299
300	if lexer.IsPlusEquals(left_token):
301	p_die('Expected = in environment binding, got +=', left_token)
302
303	var_name = lexer.TokenSliceRight(left_token, -1)
304
305	parts = preparsed.w.parts
306	n = len(parts)
307	offset = preparsed.part_offset
308	if offset == n:
309	rhs = rhs_word.Empty # type: rhs_word_t
310	else:
311	w = CompoundWord(parts[offset:])
312	word_.TildeDetectAssign(w)
313	rhs = w
314
315	more_env.append(EnvPair(left_token, var_name, rhs))
316
317
318	def _SplitSimpleCommandPrefix(words):
319	# type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
320	"""Second pass of SimpleCommand parsing: look for assignment words."""
321	preparsed_list = [] # type: List[ParsedAssignment]
322	suffix_words = [] # type: List[CompoundWord]
323
324	done_prefix = False
325	for w in words:
326	if done_prefix:
327	suffix_words.append(w)
328	continue
329
330	left_token, close_token, part_offset = word_.DetectShAssignment(w)
331	if left_token:
332	preparsed_list.append(
333	ParsedAssignment(left_token, close_token, part_offset, w))
334	else:
335	done_prefix = True
336	suffix_words.append(w)
337
338	return preparsed_list, suffix_words
339
340
341	def _MakeSimpleCommand(
342	preparsed_list, # type: List[ParsedAssignment]
343	suffix_words, # type: List[CompoundWord]
344	redirects, # type: List[Redir]
345	typed_args, # type: Optional[ArgList]
346	block, # type: Optional[LiteralBlock]
347	):
348	# type: (...) -> command.Simple
349	"""Create an command.Simple node."""
350
351	# FOO=(1 2 3) ls is not allowed.
352	for preparsed in preparsed_list:
353	if word_.HasArrayPart(preparsed.w):
354	p_die("Environment bindings can't contain array literals",
355	loc.Word(preparsed.w))
356
357	# NOTE: It would be possible to add this check back. But it already happens
358	# at runtime in EvalWordSequence2.
359	# echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
360	if 0:
361	for w in suffix_words:
362	if word_.HasArrayPart(w):
363	p_die("Commands can't contain array literals", loc.Word(w))
364
365	assert len(suffix_words) != 0
366	# {a,b,c} # Use { before brace detection
367	# ~/bin/ls # Use ~ before tilde detection
368	part0 = suffix_words[0].parts[0]
369	blame_tok = location.LeftTokenForWordPart(part0)
370
371	# NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
372	# can't implement bash's behavior of having say {~bob,~jane}/src work,
373	# because we only have a BracedTree.
374	# This is documented in spec/brace-expansion.
375	# NOTE: Technically we could do expansion outside of 'oshc translate', but it
376	# doesn't seem worth it.
377	words2 = braces.BraceDetectAll(suffix_words)
378	words3 = word_.TildeDetectAll(words2)
379
380	more_env = [] # type: List[EnvPair]
381	_AppendMoreEnv(preparsed_list, more_env)
382
383	# do_fork by default
384	return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
385	block, True)
386
387
388	class VarChecker(object):
389	"""Statically check for proc and variable usage errors."""
390
391	def __init__(self):
392	# type: () -> None
393	"""
394	Args:
395	oil_proc: Whether to disallow nested proc/function declarations
396	"""
397	# self.tokens for location info: 'proc' or another token
398	self.tokens = [] # type: List[Token]
399	self.names = [] # type: List[Dict[str, Id_t]]
400
401	def Push(self, blame_tok):
402	# type: (Token) -> None
403	"""Called when we enter a shell function, proc, or func.
404
405	Bash allows this, but it's confusing because it's the same as two
406	functions at the top level.
407
408	f() {
409	g() {
410	echo 'top level function defined in another one'
411	}
412	}
413
414	YSH disallows nested procs and funcs.
415	"""
416	if len(self.tokens) != 0:
417	if blame_tok.id == Id.KW_Proc:
418	p_die("procs must be defined at the top level", blame_tok)
419	if blame_tok.id == Id.KW_Func:
420	p_die("funcs must be defined at the top level", blame_tok)
421	if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
422	p_die("shell functions can't be defined inside proc or func",
423	blame_tok)
424
425	self.tokens.append(blame_tok)
426	entry = {} # type: Dict[str, Id_t]
427	self.names.append(entry)
428
429	def Pop(self):
430	# type: () -> None
431	self.names.pop()
432	self.tokens.pop()
433
434	def Check(self, keyword_id, var_name, blame_tok):
435	# type: (Id_t, str, Token) -> None
436	"""Check for declaration / mutation errors in proc and func.
437
438	var x
439	x already declared
440	setvar x:
441	x is not declared
442	setglobal x:
443	No errors are possible; we would need all these many conditions to
444	statically know the names:
445	- no 'source'
446	- shopt -u copy_env.
447	- AND use lib has to be static
448
449	What about bare assignment in Hay? I think these are dynamic checks --
450	there is no static check. Hay is for building up data imperatively,
451	and then LATER, right before main(), it can be type checked.
452
453	Package {
454	version = '3.11'
455	version = '3.12'
456	}
457	"""
458	# No static checks are the global level! Because of 'source', var and
459	# setvar are essentially the same.
460	if len(self.names) == 0:
461	return
462
463	top = self.names[-1]
464	if keyword_id == Id.KW_Var:
465	if var_name in top:
466	p_die('%r was already declared' % var_name, blame_tok)
467	else:
468	top[var_name] = keyword_id
469
470	if keyword_id == Id.KW_SetVar:
471	if var_name not in top:
472	# Note: the solution could be setglobal, etc.
473	p_die(
474	"setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
475	var_name, blame_tok)
476
477
478	class ctx_VarChecker(object):
479
480	def __init__(self, var_checker, blame_tok):
481	# type: (VarChecker, Token) -> None
482	var_checker.Push(blame_tok)
483	self.var_checker = var_checker
484
485	def __enter__(self):
486	# type: () -> None
487	pass
488
489	def __exit__(self, type, value, traceback):
490	# type: (Any, Any, Any) -> None
491	self.var_checker.Pop()
492
493
494	class ctx_CmdMode(object):
495
496	def __init__(self, cmd_parse, new_cmd_mode):
497	# type: (CommandParser, cmd_mode_t) -> None
498	self.cmd_parse = cmd_parse
499	self.prev_cmd_mode = cmd_parse.cmd_mode
500	cmd_parse.cmd_mode = new_cmd_mode
501
502	def __enter__(self):
503	# type: () -> None
504	pass
505
506	def __exit__(self, type, value, traceback):
507	# type: (Any, Any, Any) -> None
508	self.cmd_parse.cmd_mode = self.prev_cmd_mode
509
510
511	SECONDARY_KEYWORDS = [
512	Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
513	Id.KW_Esac
514	]
515
516
517	class CommandParser(object):
518	"""Recursive descent parser derived from POSIX shell grammar.
519
520	This is a BNF grammar:
521	https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
522
523	- Augmented with both bash/OSH and YSH constructs.
524
525	- We use regex-like iteration rather than recursive references
526	? means optional (0 or 1)
527	* means 0 or more
528	+ means 1 or more
529
530	- Keywords are spelled in Caps:
531	If Elif Case
532
533	- Operator tokens are quoted:
534	'(' '\|'
535
536	or can be spelled directly if it matters:
537
538	Op_LParen Op_Pipe
539
540	- Non-terminals are snake_case:
541	brace_group subshell
542
543	Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
544	the production should be in the method docstrings, e.g.
545
546	def ParseSubshell():
547	"
548	subshell : '(' compound_list ')'
549
550	Looking at Op_LParen # Comment to say how this method is called
551	"
552
553	The grammar may be factored to make parsing easier.
554	"""
555
556	def __init__(self,
557	parse_ctx,
558	parse_opts,
559	w_parser,
560	lexer,
561	line_reader,
562	eof_id=Id.Eof_Real):
563	# type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
564	self.parse_ctx = parse_ctx
565	self.aliases = parse_ctx.aliases # aliases to expand at parse time
566
567	self.parse_opts = parse_opts
568	self.w_parser = w_parser # type: WordParser # for normal parsing
569	self.lexer = lexer # for pushing hints, lookahead to (
570	self.line_reader = line_reader # for here docs
571	self.eof_id = eof_id
572
573	self.arena = line_reader.arena # for adding here doc and alias spans
574	self.aliases_in_flight = [] # type: AliasesInFlight
575
576	# A hacky boolean to remove 'if cd / {' ambiguity.
577	self.allow_block = True
578
579	# Stack of booleans for nested Attr and SHELL nodes.
580	# Attr nodes allow bare assignment x = 42, but not shell x=42.
581	# SHELL nodes are the inverse. 'var x = 42' is preferred in shell
582	# nodes, but x42 is still allowed.
583	#
584	# Note: this stack could be optimized by turning it into an integer and
585	# binary encoding.
586	self.hay_attrs_stack = [] # type: List[bool]
587
588	# Note: VarChecker is instantiated with each CommandParser, which means
589	# that two 'proc foo' -- inside a command sub and outside -- don't
590	# conflict, because they use different CommandParser instances. I think
591	# this OK but you can imagine different behaviors.
592	self.var_checker = VarChecker()
593
594	self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
595
596	self.Reset()
597
598	# Init_() function for "keyword arg"
599	def Init_AliasesInFlight(self, aliases_in_flight):
600	# type: (AliasesInFlight) -> None
601	self.aliases_in_flight = aliases_in_flight
602
603	def Reset(self):
604	# type: () -> None
605	"""Reset our own internal state.
606
607	Called by the interactive loop.
608	"""
609	# Cursor state set by _GetWord()
610	self.next_lex_mode = lex_mode_e.ShCommand
611	self.cur_word = None # type: word_t # current word
612	self.c_kind = Kind.Undefined
613	self.c_id = Id.Undefined_Tok
614
615	self.pending_here_docs = [
616	] # type: List[Redir] # should have HereLiteral arg
617
618	def ResetInputObjects(self):
619	# type: () -> None
620	"""Reset the internal state of our inputs.
621
622	Called by the interactive loop.
623	"""
624	self.w_parser.Reset()
625	self.lexer.ResetInputObjects()
626	self.line_reader.Reset()
627
628	def _SetNext(self):
629	# type: () -> None
630	"""Call this when you no longer need the current token.
631
632	This method is lazy. A subsequent call to _GetWord() will
633	actually read the next Token.
634	"""
635	self.next_lex_mode = lex_mode_e.ShCommand
636
637	def _SetNextBrack(self):
638	# type: () -> None
639	self.next_lex_mode = lex_mode_e.ShCommandBrack
640
641	def _GetWord(self):
642	# type: () -> None
643	"""Call this when you need to make a decision based on Id or Kind.
644
645	If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
646	self.c_id and self.c_kind.
647
648	Otherwise it does nothing.
649	"""
650	if self.next_lex_mode != lex_mode_e.Undefined:
651	w = self.w_parser.ReadWord(self.next_lex_mode)
652	#log("w %s", w)
653
654	# Here docs only happen in command mode, so other kinds of newlines don't
655	# count.
656	if w.tag() == word_e.Operator:
657	tok = cast(Token, w)
658	if tok.id == Id.Op_Newline:
659	for h in self.pending_here_docs:
660	_ParseHereDocBody(self.parse_ctx, h, self.line_reader,
661	self.arena)
662	del self.pending_here_docs[:] # No .clear() until Python 3.3.
663
664	self.cur_word = w
665
666	self.c_kind = word_.CommandKind(self.cur_word)
667	self.c_id = word_.CommandId(self.cur_word)
668	self.next_lex_mode = lex_mode_e.Undefined
669
670	def _Eat(self, c_id, msg=None):
671	# type: (Id_t, Optional[str]) -> word_t
672	"""Consume a word of a type, maybe showing a custom error message.
673
674	Args:
675	c_id: the Id we expected
676	msg: improved error message
677	"""
678	self._GetWord()
679	if self.c_id != c_id:
680	if msg is None:
681	msg = 'Expected word type %s, got %s' % (
682	ui.PrettyId(c_id), ui.PrettyId(self.c_id))
683	p_die(msg, loc.Word(self.cur_word))
684
685	skipped = self.cur_word
686	self._SetNext()
687	return skipped
688
689	def _NewlineOk(self):
690	# type: () -> None
691	"""Check for optional newline and consume it."""
692	self._GetWord()
693	if self.c_id == Id.Op_Newline:
694	self._SetNext()
695
696	def _AtSecondaryKeyword(self):
697	# type: () -> bool
698	self._GetWord()
699	if self.c_id in SECONDARY_KEYWORDS:
700	return True
701	return False
702
703	def ParseRedirect(self):
704	# type: () -> Redir
705	self._GetWord()
706	assert self.c_kind == Kind.Redir, self.cur_word
707	op_tok = cast(Token, self.cur_word) # for MyPy
708
709	# Note: the lexer could take distinguish between
710	# >out
711	# 3>out
712	# {fd}>out
713	#
714	# which would make the code below faster. But small string optimization
715	# would also speed it up, since redirects are small.
716
717	# One way to do this is with Kind.Redir and Kind.RedirNamed, and then
718	# possibly "unify" the IDs by subtracting a constant like 8 or 16?
719
720	op_val = lexer.TokenVal(op_tok)
721	if op_val[0] == '{':
722	pos = op_val.find('}')
723	assert pos != -1 # lexer ensures this
724	where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
725
726	elif op_val[0].isdigit():
727	pos = 1
728	if op_val[1].isdigit():
729	pos = 2
730	where = redir_loc.Fd(int(op_val[:pos]))
731
732	else:
733	where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
734
735	self._SetNext()
736
737	self._GetWord()
738	# Other redirect
739	if self.c_kind != Kind.Word:
740	p_die('Invalid token after redirect operator',
741	loc.Word(self.cur_word))
742
743	# Here doc
744	if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
745	arg = redir_param.HereDoc.CreateNull()
746	arg.here_begin = self.cur_word
747	arg.stdin_parts = []
748
749	r = Redir(op_tok, where, arg)
750
751	self.pending_here_docs.append(r) # will be filled on next newline.
752
753	self._SetNext()
754	return r
755
756	arg_word = self.cur_word
757	tilde = word_.TildeDetect(arg_word)
758	if tilde:
759	arg_word = tilde
760	self._SetNext()
761
762	# We should never get Empty, Token, etc.
763	assert arg_word.tag() == word_e.Compound, arg_word
764	return Redir(op_tok, where, cast(CompoundWord, arg_word))
765
766	def _ParseRedirectList(self):
767	# type: () -> List[Redir]
768	"""Try parsing any redirects at the cursor.
769
770	This is used for blocks only, not commands.
771	"""
772	redirects = [] # type: List[Redir]
773	while True:
774	# This prediction needs to ONLY accept redirect operators. Should we
775	# make them a separate Kind?
776	self._GetWord()
777	if self.c_kind != Kind.Redir:
778	break
779
780	node = self.ParseRedirect()
781	redirects.append(node)
782	self._SetNext()
783
784	return redirects
785
786	def _ScanSimpleCommand(self):
787	# type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
788	"""YSH extends simple commands with typed args and blocks.
789
790	Shell has a recursive grammar, which awkwardly expresses
791	non-grammatical rules:
792
793	simple_command : cmd_prefix cmd_word cmd_suffix
794	\| cmd_prefix cmd_word
795	\| cmd_prefix
796	\| cmd_name cmd_suffix
797	\| cmd_name
798	;
799	cmd_name : WORD /* Apply rule 7a */
800	;
801	cmd_word : WORD /* Apply rule 7b */
802	;
803	cmd_prefix : io_redirect
804	\| cmd_prefix io_redirect
805	\| ASSIGNMENT_WORD
806	\| cmd_prefix ASSIGNMENT_WORD
807	;
808	cmd_suffix : io_redirect
809	\| cmd_suffix io_redirect
810	\| WORD
811	\| cmd_suffix WORD
812
813	YSH grammar:
814
815	simple_command =
816	cmd_prefix* word+ typed_args? BraceGroup? cmd_suffix*
817
818	typed_args =
819	'(' arglist ')'
820	\| '[' arglist ']'
821
822	Notably, redirects shouldn't appear after between typed args and
823	BraceGroup.
824	"""
825	redirects = [] # type: List[Redir]
826	words = [] # type: List[CompoundWord]
827	typed_args = None # type: Optional[ArgList]
828	block = None # type: Optional[LiteralBlock]
829
830	first_word_caps = False # does first word look like Caps, but not CAPS
831
832	i = 0
833	while True:
834	self._GetWord()
835	if self.c_kind == Kind.Redir:
836	node = self.ParseRedirect()
837	redirects.append(node)
838
839	elif self.c_kind == Kind.Word:
840	if self.parse_opts.parse_brace():
841	# Treat { and } more like operators
842	if self.c_id == Id.Lit_LBrace:
843	if self.allow_block: # Disabled for if/while condition, etc.
844
845	# allow x = 42
846	self.hay_attrs_stack.append(first_word_caps)
847	brace_group = self.ParseBraceGroup()
848
849	# So we can get the source code back later
850	lines = self.arena.SaveLinesAndDiscard(
851	brace_group.left, brace_group.right)
852	block = LiteralBlock(brace_group, lines)
853
854	self.hay_attrs_stack.pop()
855
856	if 0:
857	print('--')
858	block.PrettyPrint()
859	print('\n--')
860	break
861	elif self.c_id == Id.Lit_RBrace:
862	# Another thing: { echo hi }
863	# We're DONE!!!
864	break
865
866	w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
867
868	if i == 0:
869	# Disallow leading =a because it's confusing
870	part0 = w.parts[0]
871	if part0.tag() == word_part_e.Literal:
872	tok = cast(Token, part0)
873	if tok.id == Id.Lit_Equals:
874	p_die(
875	"=word isn't allowed. Hint: add a space after =, or quote it",
876	tok)
877
878	# Is the first word a Hay Attr word?
879	#
880	# Can we remove this StaticEval() call, and just look
881	# inside Token? I think once we get rid of SHELL nodes,
882	# this will be simpler.
883
884	ok, word_str, quoted = word_.StaticEval(w)
885	# Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
886	if (ok and len(word_str) and word_str[0].isupper() and
887	not word_str.isupper()):
888	first_word_caps = True
889	#log('W %s', word_str)
890
891	words.append(w)
892
893	elif self.c_id == Id.Op_LParen:
894	# 1. Check that there's a preceding space
895	prev_byte = self.lexer.ByteLookBack()
896	if prev_byte not in (SPACE_CH, TAB_CH):
897	if self.parse_opts.parse_at():
898	p_die('Space required before (',
899	loc.Word(self.cur_word))
900	else:
901	# inline func call like @sorted(x) is invalid in OSH, but the
902	# solution isn't a space
903	p_die(
904	'Unexpected left paren (might need a space before it)',
905	loc.Word(self.cur_word))
906
907	# 2. Check that it's not (). We disallow this because it's a no-op and
908	# there could be confusion with shell func defs.
909	# For some reason we need to call lexer.LookPastSpace, not
910	# w_parser.LookPastSpace. I think this is because we're at (, which is
911	# an operator token. All the other cases are like 'x=', which is PART
912	# of a word, and we don't know if it will end.
913	next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
914	if next_id == Id.Op_RParen:
915	p_die('Empty arg list not allowed',
916	loc.Word(self.cur_word))
917
918	typed_args = self.w_parser.ParseProcCallArgs(
919	grammar_nt.ysh_eager_arglist)
920
921	elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
922	typed_args = self.w_parser.ParseProcCallArgs(
923	grammar_nt.ysh_lazy_arglist)
924
925	else:
926	break
927
928	self._SetNextBrack() # Allow bracket for SECOND word on
929	i += 1
930	return redirects, words, typed_args, block
931
932	def _MaybeExpandAliases(self, words):
933	# type: (List[CompoundWord]) -> Optional[command_t]
934	"""Try to expand aliases.
935
936	Args:
937	words: A list of Compound
938
939	Returns:
940	A new LST node, or None.
941
942	Our implementation of alias has two design choices:
943	- Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
944	- What grammar rule to parse the expanded alias buffer with. In our case
945	it's ParseCommand().
946
947	This doesn't quite match what other shells do, but I can't figure out a
948	better places.
949
950	Most test cases pass, except for ones like:
951
952	alias LBRACE='{'
953	LBRACE echo one; echo two; }
954
955	alias MULTILINE='echo 1
956	echo 2
957	echo 3'
958	MULTILINE
959
960	NOTE: dash handles aliases in a totally different way. It has a global
961	variable checkkwd in parser.c. It assigns it all over the grammar, like
962	this:
963
964	checkkwd = CHKNL \| CHKKWD \| CHKALIAS;
965
966	The readtoken() function checks (checkkwd & CHKALIAS) and then calls
967	lookupalias(). This seems to provide a consistent behavior among shells,
968	but it's less modular and testable.
969
970	Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
971
972	Returns:
973	A command node if any aliases were expanded, or None otherwise.
974	"""
975	# Start a new list if there aren't any. This will be passed recursively
976	# through CommandParser instances.
977	aliases_in_flight = (self.aliases_in_flight
978	if len(self.aliases_in_flight) else [])
979
980	# for error message
981	first_word_str = None # type: Optional[str]
982	argv0_loc = loc.Word(words[0])
983
984	expanded = [] # type: List[str]
985	i = 0
986	n = len(words)
987
988	while i < n:
989	w = words[i]
990
991	ok, word_str, quoted = word_.StaticEval(w)
992	if not ok or quoted:
993	break
994
995	alias_exp = self.aliases.get(word_str)
996	if alias_exp is None:
997	break
998
999	# Prevent infinite loops. This is subtle: we want to prevent infinite
1000	# expansion of alias echo='echo x'. But we don't want to prevent
1001	# expansion of the second word in 'echo echo', so we add 'i' to
1002	# "aliases_in_flight".
1003	if (word_str, i) in aliases_in_flight:
1004	break
1005
1006	if i == 0:
1007	first_word_str = word_str # for error message
1008
1009	#log('%r -> %r', word_str, alias_exp)
1010	aliases_in_flight.append((word_str, i))
1011	expanded.append(alias_exp)
1012	i += 1
1013
1014	if not alias_exp.endswith(' '):
1015	# alias e='echo [ ' is the same expansion as
1016	# alias e='echo ['
1017	# The trailing space indicates whether we should continue to expand
1018	# aliases; it's not part of it.
1019	expanded.append(' ')
1020	break # No more expansions
1021
1022	if len(expanded) == 0: # No expansions; caller does parsing.
1023	return None
1024
1025	# We are expanding an alias, so copy the rest of the words and re-parse.
1026	if i < n:
1027	left_tok = location.LeftTokenForWord(words[i])
1028	right_tok = location.RightTokenForWord(words[-1])
1029
1030	# OLD CONSTRAINT
1031	#assert left_tok.line_id == right_tok.line_id
1032
1033	words_str = self.arena.SnipCodeString(left_tok, right_tok)
1034	expanded.append(words_str)
1035
1036	code_str = ''.join(expanded)
1037
1038	# TODO:
1039	# Aliases break static parsing (like backticks), so use our own Arena.
1040	# This matters for Hay, which calls SaveLinesAndDiscard().
1041	# arena = alloc.Arena()
1042	arena = self.arena
1043
1044	line_reader = reader.StringLineReader(code_str, arena)
1045	cp = self.parse_ctx.MakeOshParser(line_reader)
1046	cp.Init_AliasesInFlight(aliases_in_flight)
1047
1048	# break circular dep
1049	from frontend import parse_lib
1050
1051	# The interaction between COMPLETION and ALIASES requires special care.
1052	# See docstring of BeginAliasExpansion() in parse_lib.py.
1053	src = source.Alias(first_word_str, argv0_loc)
1054	with alloc.ctx_SourceCode(arena, src):
1055	with parse_lib.ctx_Alias(self.parse_ctx.trail):
1056	try:
1057	# _ParseCommandTerm() handles multiline commands, compound
1058	# commands, etc. as opposed to ParseLogicalLine()
1059	node = cp._ParseCommandTerm()
1060	except error.Parse as e:
1061	# Failure to parse alias expansion is a fatal error
1062	# We don't need more handling here/
1063	raise
1064
1065	if 0:
1066	log('AFTER expansion:')
1067	node.PrettyPrint()
1068
1069	return node
1070
1071	def ParseSimpleCommand(self):
1072	# type: () -> command_t
1073	"""Fixed transcription of the POSIX grammar (TODO: port to
1074	grammar/Shell.g)
1075
1076	io_file : '<' filename
1077	\| LESSAND filename
1078	...
1079
1080	io_here : DLESS here_end
1081	\| DLESSDASH here_end
1082
1083	redirect : IO_NUMBER (io_redirect \| io_here)
1084
1085	prefix_part : ASSIGNMENT_WORD \| redirect
1086	cmd_part : WORD \| redirect
1087
1088	assign_kw : Declare \| Export \| Local \| Readonly
1089
1090	# Without any words it is parsed as a command, not an assignment
1091	assign_listing : assign_kw
1092
1093	# Now we have something to do (might be changing assignment flags too)
1094	# NOTE: any prefixes should be a warning, but they are allowed in shell.
1095	assignment : prefix_part* assign_kw (WORD \| ASSIGNMENT_WORD)+
1096
1097	# an external command, a function call, or a builtin -- a "word_command"
1098	word_command : prefix_part* cmd_part+
1099
1100	simple_command : assign_listing
1101	\| assignment
1102	\| proc_command
1103
1104	Simple imperative algorithm:
1105
1106	1) Read a list of words and redirects. Append them to separate lists.
1107	2) Look for the first non-assignment word. If it's declare, etc., then
1108	keep parsing words AND assign words. Otherwise, just parse words.
1109	3) If there are no non-assignment words, then it's a global assignment.
1110
1111	{ redirects, global assignments } OR
1112	{ redirects, prefix_bindings, words } OR
1113	{ redirects, ERROR_prefix_bindings, keyword, assignments, words }
1114
1115	THEN CHECK that prefix bindings don't have any array literal parts!
1116	global assignment and keyword assignments can have the of course.
1117	well actually EXPORT shouldn't have them either -- WARNING
1118
1119	3 cases we want to warn: prefix_bindings for assignment, and array literal
1120	in prefix bindings, or export
1121
1122	A command can be an assignment word, word, or redirect on its own.
1123
1124	ls
1125	>out.txt
1126
1127	>out.txt FOO=bar # this touches the file
1128
1129	Or any sequence:
1130	ls foo bar
1131	<in.txt ls foo bar >out.txt
1132	<in.txt ls >out.txt foo bar
1133
1134	Or add one or more environment bindings:
1135	VAR=val env
1136	>out.txt VAR=val env
1137
1138	here_end vs filename is a matter of whether we test that it's quoted. e.g.
1139	<<EOF vs <<'EOF'.
1140	"""
1141	redirects, words, typed_args, block = self._ScanSimpleCommand()
1142
1143	typed_loc = None # type: Optional[Token]
1144	if block:
1145	typed_loc = block.brace_group.left
1146	if typed_args:
1147	typed_loc = typed_args.left # preferred over block location
1148
1149	if len(words) == 0: # e.g. >out.txt # redirect without words
1150	assert len(redirects) != 0
1151	if typed_loc is not None:
1152	p_die("Unexpected typed args", typed_loc)
1153
1154	simple = command.Simple.CreateNull()
1155	simple.blame_tok = redirects[0].op
1156	simple.more_env = []
1157	simple.words = []
1158	simple.redirects = redirects
1159	return simple
1160
1161	preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1162	if len(preparsed_list):
1163	# Disallow X=Y inside proc and func
1164	# and inside Hay Attr blocks
1165	# But allow X=Y at the top level
1166	# for interactive use foo=bar
1167	# for global constants GLOBAL=~/src
1168	# because YSH assignment doesn't have tilde sub
1169	if len(suffix_words) == 0:
1170	if (self.cmd_mode != cmd_mode_e.Shell or
1171	(len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1172	p_die('Use var/setvar to assign in YSH',
1173	preparsed_list[0].left)
1174
1175	# Set a reference to words and redirects for completion. We want to
1176	# inspect this state after a failed parse.
1177	self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1178
1179	if len(suffix_words) == 0:
1180	if typed_loc is not None:
1181	p_die("Unexpected typed args", typed_loc)
1182
1183	# ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1184	pairs = [] # type: List[AssignPair]
1185	for preparsed in preparsed_list:
1186	pairs.append(
1187	_MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1188
1189	left_tok = location.LeftTokenForCompoundWord(words[0])
1190	return command.ShAssignment(left_tok, pairs, redirects)
1191
1192	kind, kw_token = word_.IsControlFlow(suffix_words[0])
1193
1194	if kind == Kind.ControlFlow:
1195	if kw_token.id == Id.ControlFlow_Return:
1196	# return x - inside procs and shell functions
1197	# return (x) - inside funcs
1198	if typed_args is None:
1199	if self.cmd_mode not in (cmd_mode_e.Shell,
1200	cmd_mode_e.Proc):
1201	p_die('Shell-style returns not allowed here', kw_token)
1202	else:
1203	if self.cmd_mode != cmd_mode_e.Func:
1204	p_die('Typed return is only allowed inside func',
1205	typed_loc)
1206	if len(typed_args.pos_args) != 1:
1207	p_die("Typed return expects one argument", typed_loc)
1208	if len(typed_args.named_args) != 0:
1209	p_die("Typed return doesn't take named arguments",
1210	typed_loc)
1211	return command.Retval(kw_token, typed_args.pos_args[0])
1212
1213	if typed_loc is not None:
1214	p_die("Unexpected typed args", typed_loc)
1215	if not self.parse_opts.parse_ignored() and len(redirects):
1216	p_die("Control flow shouldn't have redirects", kw_token)
1217
1218	if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1219	p_die("Control flow shouldn't have environment bindings",
1220	preparsed_list[0].left)
1221
1222	# Attach the token for errors. (ShAssignment may not need it.)
1223	if len(suffix_words) == 1:
1224	arg_word = None # type: Optional[word_t]
1225	elif len(suffix_words) == 2:
1226	arg_word = suffix_words[1]
1227	else:
1228	p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1229	loc.Word(suffix_words[2]))
1230
1231	return command.ControlFlow(kw_token, arg_word)
1232
1233	# Alias expansion only understands words, not typed args ( ) or block { }
1234	if not typed_args and not block and self.parse_opts.expand_aliases():
1235	# If any expansions were detected, then parse again.
1236	expanded_node = self._MaybeExpandAliases(suffix_words)
1237	if expanded_node:
1238	# Attach env bindings and redirects to the expanded node.
1239	more_env = [] # type: List[EnvPair]
1240	_AppendMoreEnv(preparsed_list, more_env)
1241	exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1242	return exp
1243
1244	# TODO: check that we don't have env1=x x[1]=y env2=z here.
1245
1246	# FOO=bar printenv.py FOO
1247	node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1248	typed_args, block)
1249	return node
1250
1251	def ParseBraceGroup(self):
1252	# type: () -> BraceGroup
1253	"""
1254	Original:
1255	brace_group : LBrace command_list RBrace ;
1256
1257	YSH:
1258	brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1259
1260	The doc comment can only occur if there's a newline.
1261	"""
1262	ate = self._Eat(Id.Lit_LBrace)
1263	left = word_.BraceToken(ate)
1264
1265	doc_word = None # type: word_t
1266	self._GetWord()
1267	if self.c_id == Id.Op_Newline:
1268	self._SetNext()
1269	# Set a flag so we don't skip over ###
1270	with word_.ctx_EmitDocToken(self.w_parser):
1271	self._GetWord()
1272
1273	if self.c_id == Id.Ignored_Comment:
1274	doc_word = self.cur_word
1275	self._SetNext()
1276
1277	# Id.Ignored_Comment means it's a Token, or None
1278	doc_token = cast(Token, doc_word)
1279
1280	c_list = self._ParseCommandList()
1281
1282	ate = self._Eat(Id.Lit_RBrace)
1283	right = word_.BraceToken(ate)
1284
1285	# Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1286	# would allow us to revert this back to None, which was changed in
1287	# https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1288	# behavior saves allocations, but is less type safe.
1289	return BraceGroup(left, doc_token, c_list.children, [],
1290	right) # no redirects yet
1291
1292	def ParseDoGroup(self):
1293	# type: () -> command.DoGroup
1294	"""Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1295
1296	do_group : Do command_list Done ; /* Apply rule 6 */
1297	"""
1298	ate = self._Eat(Id.KW_Do)
1299	do_kw = word_.AsKeywordToken(ate)
1300
1301	c_list = self._ParseCommandList() # could be anything
1302
1303	ate = self._Eat(Id.KW_Done)
1304	done_kw = word_.AsKeywordToken(ate)
1305
1306	return command.DoGroup(do_kw, c_list.children, done_kw)
1307
1308	def ParseForWords(self):
1309	# type: () -> Tuple[List[CompoundWord], Optional[Token]]
1310	"""
1311	for_words : WORD* for_sep
1312	;
1313	for_sep : ';' newline_ok
1314	\| NEWLINES
1315	;
1316	"""
1317	words = [] # type: List[CompoundWord]
1318	# The token of any semi-colon, so we can remove it.
1319	semi_tok = None # type: Optional[Token]
1320
1321	while True:
1322	self._GetWord()
1323	if self.c_id == Id.Op_Semi:
1324	tok = cast(Token, self.cur_word)
1325	semi_tok = tok
1326	self._SetNext()
1327	self._NewlineOk()
1328	break
1329	elif self.c_id == Id.Op_Newline:
1330	self._SetNext()
1331	break
1332	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1333	break
1334
1335	if self.cur_word.tag() != word_e.Compound:
1336	# TODO: Can we also show a pointer to the 'for' keyword?
1337	p_die('Invalid word in for loop', loc.Word(self.cur_word))
1338
1339	w2 = cast(CompoundWord, self.cur_word)
1340	words.append(w2)
1341	self._SetNext()
1342	return words, semi_tok
1343
1344	def _ParseForExprLoop(self, for_kw):
1345	# type: (Token) -> command.ForExpr
1346	"""
1347	Shell:
1348	for '((' init ';' cond ';' update '))' for_sep? do_group
1349
1350	YSH:
1351	for '((' init ';' cond ';' update '))' for_sep? brace_group
1352	"""
1353	node = self.w_parser.ReadForExpression()
1354	node.keyword = for_kw
1355
1356	self._SetNext()
1357
1358	self._GetWord()
1359	if self.c_id == Id.Op_Semi:
1360	self._SetNext()
1361	self._NewlineOk()
1362	elif self.c_id == Id.Op_Newline:
1363	self._SetNext()
1364	elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1365	pass
1366	elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1367	pass
1368	else:
1369	p_die('Invalid word after for expression', loc.Word(self.cur_word))
1370
1371	if self.c_id == Id.Lit_LBrace:
1372	node.body = self.ParseBraceGroup()
1373	else:
1374	node.body = self.ParseDoGroup()
1375	return node
1376
1377	def _ParseForEachLoop(self, for_kw):
1378	# type: (Token) -> command.ForEach
1379	node = command.ForEach.CreateNull(alloc_lists=True)
1380	node.keyword = for_kw
1381
1382	num_iter_names = 0
1383	while True:
1384	w = self.cur_word
1385
1386	# Hack that makes the language more familiar:
1387	# - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1388	# - 'x y' is also accepted but not idiomatic.
1389	UP_w = w
1390	if w.tag() == word_e.Compound:
1391	w = cast(CompoundWord, UP_w)
1392	if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1393	w.parts.pop()
1394
1395	ok, iter_name, quoted = word_.StaticEval(w)
1396	if not ok or quoted: # error: for $x
1397	p_die('Expected loop variable (a constant word)', loc.Word(w))
1398
1399	if not match.IsValidVarName(iter_name): # error: for -
1400	# TODO: consider commas?
1401	if ',' in iter_name:
1402	p_die('Loop variables look like x, y (fix spaces)',
1403	loc.Word(w))
1404	p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1405
1406	node.iter_names.append(iter_name)
1407	num_iter_names += 1
1408	self._SetNext()
1409
1410	self._GetWord()
1411	# 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1412	# Subtlety: 'var' is KW_Var and is a valid loop name
1413	if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1414	break
1415
1416	if num_iter_names == 3:
1417	p_die('Unexpected word after 3 loop variables',
1418	loc.Word(self.cur_word))
1419
1420	self._NewlineOk()
1421
1422	self._GetWord()
1423	if self.c_id == Id.KW_In:
1424	# Ideally we would want ( not 'in'. But we still have to fix the bug
1425	# where we require a SPACE between in and (
1426	# for x in(y) # should be accepted, but isn't
1427
1428	expr_blame = word_.AsKeywordToken(self.cur_word)
1429
1430	self._SetNext() # skip in
1431	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1432	enode = self.w_parser.ParseYshExprForCommand()
1433	node.iterable = for_iter.YshExpr(enode, expr_blame)
1434
1435	# For simplicity, we don't accept for x in (obj); do ...
1436	self._GetWord()
1437	if self.c_id != Id.Lit_LBrace:
1438	p_die('Expected { after iterable expression',
1439	loc.Word(self.cur_word))
1440	else:
1441	semi_tok = None # type: Optional[Token]
1442	iter_words, semi_tok = self.ParseForWords()
1443	node.semi_tok = semi_tok
1444
1445	if not self.parse_opts.parse_bare_word() and len(
1446	iter_words) == 1:
1447	ok, s, quoted = word_.StaticEval(iter_words[0])
1448	if ok and match.IsValidVarName(s) and not quoted:
1449	p_die(
1450	'Surround this word with either parens or quotes (parse_bare_word)',
1451	loc.Word(iter_words[0]))
1452
1453	words2 = braces.BraceDetectAll(iter_words)
1454	words3 = word_.TildeDetectAll(words2)
1455	node.iterable = for_iter.Words(words3)
1456
1457	# Now that we know there are words, do an extra check
1458	if num_iter_names > 2:
1459	p_die('Expected at most 2 loop variables', for_kw)
1460
1461	elif self.c_id == Id.KW_Do:
1462	node.iterable = for_iter.Args # implicitly loop over "$@"
1463	# do not advance
1464
1465	elif self.c_id == Id.Op_Semi: # for x; do
1466	node.iterable = for_iter.Args # implicitly loop over "$@"
1467	self._SetNext()
1468
1469	else: # for foo BAD
1470	p_die('Unexpected word after for loop variable',
1471	loc.Word(self.cur_word))
1472
1473	self._GetWord()
1474	if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1475	node.body = self.ParseBraceGroup()
1476	else:
1477	node.body = self.ParseDoGroup()
1478
1479	return node
1480
1481	def ParseFor(self):
1482	# type: () -> command_t
1483	"""
1484	TODO: Update the grammar
1485
1486	for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1487	\| For '((' ... TODO
1488	"""
1489	ate = self._Eat(Id.KW_For)
1490	for_kw = word_.AsKeywordToken(ate)
1491
1492	self._GetWord()
1493	if self.c_id == Id.Op_DLeftParen:
1494	if not self.parse_opts.parse_dparen():
1495	p_die("Bash for loops aren't allowed (parse_dparen)",
1496	loc.Word(self.cur_word))
1497
1498	# for (( i = 0; i < 10; i++)
1499	n1 = self._ParseForExprLoop(for_kw)
1500	n1.redirects = self._ParseRedirectList()
1501	return n1
1502	else:
1503	# for x in a b; do echo hi; done
1504	n2 = self._ParseForEachLoop(for_kw)
1505	n2.redirects = self._ParseRedirectList()
1506	return n2
1507
1508	def _ParseConditionList(self):
1509	# type: () -> condition_t
1510	"""
1511	condition_list: command_list
1512
1513	This is a helper to parse a condition list for if commands and while/until
1514	loops. It will throw a parse error if there are no conditions in the list.
1515	"""
1516	self.allow_block = False
1517	commands = self._ParseCommandList()
1518	self.allow_block = True
1519
1520	if len(commands.children) == 0:
1521	p_die("Expected a condition", loc.Word(self.cur_word))
1522
1523	return condition.Shell(commands.children)
1524
1525	def ParseWhileUntil(self, keyword):
1526	# type: (Token) -> command.WhileUntil
1527	"""
1528	while_clause : While command_list do_group ;
1529	until_clause : Until command_list do_group ;
1530	"""
1531	self._SetNext() # skip keyword
1532
1533	if (self.parse_opts.parse_paren() and
1534	self.w_parser.LookPastSpace() == Id.Op_LParen):
1535	enode = self.w_parser.ParseYshExprForCommand()
1536	cond = condition.YshExpr(enode) # type: condition_t
1537	else:
1538	cond = self._ParseConditionList()
1539
1540	# NOTE: The LSTs will be different for OSH and YSH, but the execution
1541	# should be unchanged. To be sure we should desugar.
1542	self._GetWord()
1543	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1544	# while test -f foo {
1545	body_node = self.ParseBraceGroup() # type: command_t
1546	else:
1547	body_node = self.ParseDoGroup()
1548
1549	# no redirects yet
1550	return command.WhileUntil(keyword, cond, body_node, None)
1551
1552	def ParseCaseArm(self):
1553	# type: () -> CaseArm
1554	"""
1555	case_item: '('? pattern ('\|' pattern)* ')'
1556	newline_ok command_term? trailer? ;
1557
1558	Looking at '(' or pattern
1559	"""
1560	self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1561
1562	left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1563
1564	if self.c_id == Id.Op_LParen: # Optional (
1565	self._SetNext()
1566
1567	pat_words = [] # type: List[word_t]
1568	while True:
1569	self._GetWord()
1570	if self.c_kind != Kind.Word:
1571	p_die('Expected case pattern', loc.Word(self.cur_word))
1572	pat_words.append(self.cur_word)
1573	self._SetNext()
1574
1575	self._GetWord()
1576	if self.c_id == Id.Op_Pipe:
1577	self._SetNext()
1578	else:
1579	break
1580
1581	ate = self._Eat(Id.Right_CasePat)
1582	middle_tok = word_.AsOperatorToken(ate)
1583
1584	self._NewlineOk()
1585
1586	self._GetWord()
1587	if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
1588	Id.KW_Esac):
1589	c_list = self._ParseCommandTerm()
1590	action_children = c_list.children
1591	else:
1592	action_children = []
1593
1594	dsemi_tok = None # type: Token
1595	self._GetWord()
1596	if self.c_id == Id.KW_Esac: # missing last ;;
1597	pass
1598	elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
1599	dsemi_tok = word_.AsOperatorToken(self.cur_word)
1600	self._SetNext()
1601	else:
1602	# Happens on EOF
1603	p_die('Expected ;; or esac', loc.Word(self.cur_word))
1604
1605	self._NewlineOk()
1606
1607	return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1608	action_children, dsemi_tok)
1609
1610	def ParseYshCaseArm(self, discriminant):
1611	# type: (Id_t) -> CaseArm
1612	"""
1613	case_item : pattern newline_ok brace_group newline_ok
1614	pattern : pat_words
1615	\| pat_exprs
1616	\| pat_eggex
1617	\| pat_else
1618	pat_words : pat_word (newline_ok '\|' newline_ok pat_word)*
1619	pat_exprs : pat_expr (newline_ok '\|' newline_ok pat_expr)*
1620	pat_word : WORD
1621	pat_eggex : '/' oil_eggex '/'
1622	pat_expr : '(' oil_expr ')'
1623	pat_else : '(' Id.KW_Else ')'
1624
1625	Looking at: 'pattern'
1626
1627	Note that the trailing `newline_ok` in `case_item` is handled by
1628	`ParseYshCase`. We do this because parsing that `newline_ok` returns
1629	the next "discriminant" for the next token, so it makes more sense to
1630	handle it there.
1631	"""
1632	left_tok = None # type: Token
1633	pattern = None # type: pat_t
1634
1635	if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1636	# pat_exprs, pat_else or pat_eggex
1637	pattern, left_tok = self.w_parser.ParseYshCasePattern()
1638	else:
1639	# pat_words
1640	pat_words = [] # type: List[word_t]
1641	while True:
1642	self._GetWord()
1643	if self.c_kind != Kind.Word:
1644	p_die('Expected case pattern', loc.Word(self.cur_word))
1645	pat_words.append(self.cur_word)
1646	self._SetNext()
1647
1648	if not left_tok:
1649	left_tok = location.LeftTokenForWord(self.cur_word)
1650
1651	self._NewlineOk()
1652
1653	self._GetWord()
1654	if self.c_id == Id.Op_Pipe:
1655	self._SetNext()
1656	self._NewlineOk()
1657	else:
1658	break
1659	pattern = pat.Words(pat_words)
1660
1661	self._NewlineOk()
1662	action = self.ParseBraceGroup()
1663
1664	# The left token of the action is our "middle" token
1665	return CaseArm(left_tok, pattern, action.left, action.children,
1666	action.right)
1667
1668	def ParseYshCase(self, case_kw):
1669	# type: (Token) -> command.Case
1670	"""
1671	ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1672
1673	Looking at: token after 'case'
1674	"""
1675	enode = self.w_parser.ParseYshExprForCommand()
1676	to_match = case_arg.YshExpr(enode)
1677
1678	ate = self._Eat(Id.Lit_LBrace)
1679	arms_start = word_.BraceToken(ate)
1680
1681	discriminant = self.w_parser.NewlineOkForYshCase()
1682
1683	# Note: for now, zero arms are accepted, just like POSIX case $x in esac
1684	arms = [] # type: List[CaseArm]
1685	while discriminant != Id.Op_RBrace:
1686	arm = self.ParseYshCaseArm(discriminant)
1687	arms.append(arm)
1688
1689	discriminant = self.w_parser.NewlineOkForYshCase()
1690
1691	# NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1692	# token is read as an Id.Op_RBrace, but we need to store this as a
1693	# Id.Lit_RBrace.
1694	ate = self._Eat(Id.Op_RBrace)
1695	arms_end = word_.AsOperatorToken(ate)
1696	arms_end.id = Id.Lit_RBrace
1697
1698	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1699	None)
1700
1701	def ParseOldCase(self, case_kw):
1702	# type: (Token) -> command.Case
1703	"""
1704	case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1705
1706	-> Looking at WORD
1707
1708	FYI original POSIX case list, which takes pains for DSEMI
1709
1710	case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1711	"""
1712	self._GetWord()
1713	w = self.cur_word
1714	if not self.parse_opts.parse_bare_word():
1715	ok, s, quoted = word_.StaticEval(w)
1716	if ok and not quoted:
1717	p_die(
1718	"This is a constant string. You may want a variable like $x (parse_bare_word)",
1719	loc.Word(w))
1720
1721	if w.tag() != word_e.Compound:
1722	p_die("Expected a word to match against", loc.Word(w))
1723
1724	to_match = case_arg.Word(w)
1725	self._SetNext() # past WORD
1726
1727	self._NewlineOk()
1728
1729	ate = self._Eat(Id.KW_In)
1730	arms_start = word_.AsKeywordToken(ate)
1731
1732	self._NewlineOk()
1733
1734	arms = [] # type: List[CaseArm]
1735	while True:
1736	self._GetWord()
1737	if self.c_id == Id.KW_Esac: # this is Kind.Word
1738	break
1739	# case arm should begin with a pattern word or (
1740	if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1741	break
1742
1743	arm = self.ParseCaseArm()
1744	arms.append(arm)
1745
1746	ate = self._Eat(Id.KW_Esac)
1747	arms_end = word_.AsKeywordToken(ate)
1748
1749	# no redirects yet
1750	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1751	None)
1752
1753	def ParseCase(self):
1754	# type: () -> command.Case
1755	"""
1756	case_clause : old_case # from POSIX
1757	\| ysh_case
1758	;
1759
1760	Looking at 'Case'
1761	"""
1762	case_kw = word_.AsKeywordToken(self.cur_word)
1763	self._SetNext() # past 'case'
1764
1765	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1766	return self.ParseYshCase(case_kw)
1767	else:
1768	return self.ParseOldCase(case_kw)
1769
1770	def _ParseYshElifElse(self, if_node):
1771	# type: (command.If) -> None
1772	"""If test -f foo { echo foo.
1773
1774	} elif test -f bar; test -f spam { ^ we parsed up to here echo
1775	bar } else { echo none }
1776	"""
1777	arms = if_node.arms
1778
1779	while self.c_id == Id.KW_Elif:
1780	elif_kw = word_.AsKeywordToken(self.cur_word)
1781	self._SetNext() # skip elif
1782	if (self.parse_opts.parse_paren() and
1783	self.w_parser.LookPastSpace() == Id.Op_LParen):
1784	enode = self.w_parser.ParseYshExprForCommand()
1785	cond = condition.YshExpr(enode) # type: condition_t
1786	else:
1787	self.allow_block = False
1788	commands = self._ParseCommandList()
1789	self.allow_block = True
1790	cond = condition.Shell(commands.children)
1791
1792	body = self.ParseBraceGroup()
1793	self._GetWord()
1794
1795	arm = IfArm(elif_kw, cond, None, body.children, None)
1796	arms.append(arm)
1797
1798	self._GetWord()
1799	if self.c_id == Id.KW_Else:
1800	self._SetNext()
1801	body = self.ParseBraceGroup()
1802	if_node.else_action = body.children
1803
1804	def _ParseYshIf(self, if_kw, cond):
1805	# type: (Token, condition_t) -> command.If
1806	"""if test -f foo {
1807
1808	# ^ we parsed up to here
1809	echo foo
1810	} elif test -f bar; test -f spam {
1811	echo bar
1812	} else {
1813	echo none
1814	}
1815	NOTE: If you do something like if test -n foo{, the parser keeps going, and
1816	the error is confusing because it doesn't point to the right place.
1817
1818	I think we might need strict_brace so that foo{ is disallowed. It has to
1819	be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1820	form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1821	Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1822	"""
1823	if_node = command.If.CreateNull(alloc_lists=True)
1824	if_node.if_kw = if_kw
1825
1826	body1 = self.ParseBraceGroup()
1827	# Every arm has 1 spid, unlike shell-style
1828	# TODO: We could get the spids from the brace group.
1829	arm = IfArm(if_kw, cond, None, body1.children, None)
1830
1831	if_node.arms.append(arm)
1832
1833	self._GetWord()
1834	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1835	self._ParseYshElifElse(if_node)
1836	# the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1837	# spid because that's in the BraceGroup.
1838	return if_node
1839
1840	def _ParseElifElse(self, if_node):
1841	# type: (command.If) -> None
1842	"""
1843	else_part: (Elif command_list Then command_list)* Else command_list ;
1844	"""
1845	arms = if_node.arms
1846
1847	self._GetWord()
1848	while self.c_id == Id.KW_Elif:
1849	elif_kw = word_.AsKeywordToken(self.cur_word)
1850	self._SetNext() # past 'elif'
1851
1852	cond = self._ParseConditionList()
1853
1854	ate = self._Eat(Id.KW_Then)
1855	then_kw = word_.AsKeywordToken(ate)
1856
1857	body = self._ParseCommandList()
1858	arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
1859
1860	arms.append(arm)
1861
1862	self._GetWord()
1863	if self.c_id == Id.KW_Else:
1864	else_kw = word_.AsKeywordToken(self.cur_word)
1865	self._SetNext() # past 'else'
1866	body = self._ParseCommandList()
1867	if_node.else_action = body.children
1868	else:
1869	else_kw = None
1870
1871	if_node.else_kw = else_kw
1872
1873	def ParseIf(self):
1874	# type: () -> command.If
1875	"""
1876	if_clause : If command_list Then command_list else_part? Fi ;
1877
1878	open : '{' \| Then
1879	close : '}' \| Fi
1880
1881	ysh_if : If ( command_list \| '(' expr ')' )
1882	open command_list else_part? close;
1883
1884	There are 2 conditionals here: parse_paren, then parse_brace
1885	"""
1886	if_node = command.If.CreateNull(alloc_lists=True)
1887	if_kw = word_.AsKeywordToken(self.cur_word)
1888	if_node.if_kw = if_kw
1889	self._SetNext() # past 'if'
1890
1891	if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1892	) == Id.Op_LParen:
1893	# if (x + 1)
1894	enode = self.w_parser.ParseYshExprForCommand()
1895	cond = condition.YshExpr(enode) # type: condition_t
1896	else:
1897	# if echo 1; echo 2; then
1898	# Remove ambiguity with if cd / {
1899	cond = self._ParseConditionList()
1900
1901	self._GetWord()
1902	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1903	return self._ParseYshIf(if_kw, cond)
1904
1905	ate = self._Eat(Id.KW_Then)
1906	then_kw = word_.AsKeywordToken(ate)
1907
1908	body = self._ParseCommandList()
1909
1910	# First arm
1911	arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
1912	if_node.arms.append(arm)
1913
1914	# 2nd to Nth arm
1915	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1916	self._ParseElifElse(if_node)
1917
1918	ate = self._Eat(Id.KW_Fi)
1919	if_node.fi_kw = word_.AsKeywordToken(ate)
1920
1921	return if_node
1922
1923	def ParseTime(self):
1924	# type: () -> command_t
1925	"""Time [-p] pipeline.
1926
1927	According to bash help.
1928	"""
1929	time_kw = word_.AsKeywordToken(self.cur_word)
1930	self._SetNext() # skip time
1931	pipeline = self.ParsePipeline()
1932	return command.TimeBlock(time_kw, pipeline)
1933
1934	def ParseCompoundCommand(self):
1935	# type: () -> command_t
1936	"""
1937	Refactoring: we put io_redirect* here instead of in function_body and
1938	command.
1939
1940	compound_command : brace_group io_redirect*
1941	\| subshell io_redirect*
1942	\| for_clause io_redirect*
1943	\| while_clause io_redirect*
1944	\| until_clause io_redirect*
1945	\| if_clause io_redirect*
1946	\| case_clause io_redirect*
1947
1948	# bash extensions
1949	\| time_clause
1950	\| [[ BoolExpr ]]
1951	\| (( ArithExpr ))
1952	"""
1953	self._GetWord()
1954	if self.c_id == Id.Lit_LBrace:
1955	n1 = self.ParseBraceGroup()
1956	n1.redirects = self._ParseRedirectList()
1957	return n1
1958	if self.c_id == Id.Op_LParen:
1959	n2 = self.ParseSubshell()
1960	n2.redirects = self._ParseRedirectList()
1961	return n2
1962
1963	if self.c_id == Id.KW_For:
1964	# Note: Redirects parsed in this call. POSIX for and bash for (( have
1965	# redirects, but YSH for doesn't.
1966	return self.ParseFor()
1967	if self.c_id in (Id.KW_While, Id.KW_Until):
1968	keyword = word_.AsKeywordToken(self.cur_word)
1969	n3 = self.ParseWhileUntil(keyword)
1970	n3.redirects = self._ParseRedirectList()
1971	return n3
1972
1973	if self.c_id == Id.KW_If:
1974	n4 = self.ParseIf()
1975	n4.redirects = self._ParseRedirectList()
1976	return n4
1977	if self.c_id == Id.KW_Case:
1978	n5 = self.ParseCase()
1979	n5.redirects = self._ParseRedirectList()
1980	return n5
1981
1982	if self.c_id == Id.KW_DLeftBracket:
1983	n6 = self.ParseDBracket()
1984	n6.redirects = self._ParseRedirectList()
1985	return n6
1986	if self.c_id == Id.Op_DLeftParen:
1987	if not self.parse_opts.parse_dparen():
1988	p_die('You may want a space between parens (parse_dparen)',
1989	loc.Word(self.cur_word))
1990	n7 = self.ParseDParen()
1991	n7.redirects = self._ParseRedirectList()
1992	return n7
1993
1994	# bash extensions: no redirects
1995	if self.c_id == Id.KW_Time:
1996	return self.ParseTime()
1997
1998	# Happens in function body, e.g. myfunc() oops
1999	p_die(
2000	'Unexpected word while parsing compound command (%s)' %
2001	Id_str(self.c_id), loc.Word(self.cur_word))
2002	assert False # for MyPy
2003
2004	def ParseFunctionDef(self):
2005	# type: () -> command.ShFunction
2006	"""
2007	function_header : fname '(' ')'
2008	function_def : function_header newline_ok function_body ;
2009
2010	Precondition: Looking at the function name.
2011
2012	NOTE: There is an ambiguity with:
2013
2014	function foo ( echo hi ) and
2015	function foo () ( echo hi )
2016
2017	Bash only accepts the latter, though it doesn't really follow a grammar.
2018	"""
2019	word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2020	name = word_.ShFunctionName(word0)
2021	if len(name) == 0: # example: foo$x is invalid
2022	p_die('Invalid function name', loc.Word(word0))
2023
2024	part0 = word0.parts[0]
2025	# If we got a non-empty string from ShFunctionName, this should be true.
2026	assert part0.tag() == word_part_e.Literal
2027	blame_tok = cast(Token, part0) # for ctx_VarChecker
2028
2029	self._SetNext() # move past function name
2030
2031	# Must be true because of lookahead
2032	self._GetWord()
2033	assert self.c_id == Id.Op_LParen, self.cur_word
2034
2035	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2036	self._SetNext()
2037
2038	self._GetWord()
2039	if self.c_id == Id.Right_ShFunction:
2040	# 'f ()' implies a function definition, since invoking it with no args
2041	# would just be 'f'
2042	self._SetNext()
2043
2044	self._NewlineOk()
2045
2046	func = command.ShFunction.CreateNull()
2047	func.name = name
2048	with ctx_VarChecker(self.var_checker, blame_tok):
2049	func.body = self.ParseCompoundCommand()
2050
2051	func.name_tok = location.LeftTokenForCompoundWord(word0)
2052	return func
2053	else:
2054	p_die('Expected ) in function definition', loc.Word(self.cur_word))
2055	return None
2056
2057	def ParseKshFunctionDef(self):
2058	# type: () -> command.ShFunction
2059	"""
2060	ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2061	"""
2062	keyword_tok = word_.AsKeywordToken(self.cur_word)
2063
2064	self._SetNext() # skip past 'function'
2065	self._GetWord()
2066
2067	cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2068	name = word_.ShFunctionName(cur_word)
2069	if len(name) == 0: # example: foo$x is invalid
2070	p_die('Invalid KSH-style function name', loc.Word(cur_word))
2071
2072	name_word = self.cur_word
2073	self._SetNext() # skip past 'function name
2074
2075	self._GetWord()
2076	if self.c_id == Id.Op_LParen:
2077	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2078	self._SetNext()
2079	self._Eat(Id.Right_ShFunction)
2080
2081	self._NewlineOk()
2082
2083	func = command.ShFunction.CreateNull()
2084	func.name = name
2085	with ctx_VarChecker(self.var_checker, keyword_tok):
2086	func.body = self.ParseCompoundCommand()
2087
2088	func.keyword = keyword_tok
2089	func.name_tok = location.LeftTokenForWord(name_word)
2090	return func
2091
2092	def ParseYshProc(self):
2093	# type: () -> Proc
2094	node = Proc.CreateNull(alloc_lists=True)
2095
2096	keyword_tok = word_.AsKeywordToken(self.cur_word)
2097	node.keyword = keyword_tok
2098
2099	with ctx_VarChecker(self.var_checker, keyword_tok):
2100	with ctx_CmdMode(self, cmd_mode_e.Proc):
2101	self.w_parser.ParseProc(node)
2102	if node.sig.tag() == proc_sig_e.Closed: # Register params
2103	sig = cast(proc_sig.Closed, node.sig)
2104
2105	# Treat 3 kinds of params as variables.
2106	wp = sig.word
2107	if wp:
2108	for param in wp.params:
2109	self.var_checker.Check(Id.KW_Var, param.name,
2110	param.blame_tok)
2111	if wp.rest_of:
2112	r = wp.rest_of
2113	self.var_checker.Check(Id.KW_Var, r.name,
2114	r.blame_tok)
2115	# We COULD register __out here but it would require a different API.
2116	#if param.prefix and param.prefix.id == Id.Arith_Colon:
2117	# self.var_checker.Check(Id.KW_Var, '__' + param.name)
2118
2119	posit = sig.positional
2120	if posit:
2121	for param in posit.params:
2122	self.var_checker.Check(Id.KW_Var, param.name,
2123	param.blame_tok)
2124	if posit.rest_of:
2125	r = posit.rest_of
2126	self.var_checker.Check(Id.KW_Var, r.name,
2127	r.blame_tok)
2128
2129	named = sig.named
2130	if named:
2131	for param in named.params:
2132	self.var_checker.Check(Id.KW_Var, param.name,
2133	param.blame_tok)
2134	if named.rest_of:
2135	r = named.rest_of
2136	self.var_checker.Check(Id.KW_Var, r.name,
2137	r.blame_tok)
2138
2139	if sig.block_param:
2140	b = sig.block_param
2141	self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2142
2143	self._SetNext()
2144	node.body = self.ParseBraceGroup()
2145	# No redirects for YSH procs (only at call site)
2146
2147	return node
2148
2149	def ParseYshFunc(self):
2150	# type: () -> Func
2151	"""
2152	ysh_func: (
2153	Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2154	)
2155	Looking at KW_Func
2156	"""
2157	node = Func.CreateNull(alloc_lists=True)
2158
2159	keyword_tok = word_.AsKeywordToken(self.cur_word)
2160	node.keyword = keyword_tok
2161
2162	with ctx_VarChecker(self.var_checker, keyword_tok):
2163	self.w_parser.ParseFunc(node)
2164
2165	posit = node.positional
2166	if posit:
2167	for param in posit.params:
2168	self.var_checker.Check(Id.KW_Var, param.name,
2169	param.blame_tok)
2170	if posit.rest_of:
2171	r = posit.rest_of
2172	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2173
2174	named = node.named
2175	if named:
2176	for param in named.params:
2177	self.var_checker.Check(Id.KW_Var, param.name,
2178	param.blame_tok)
2179	if named.rest_of:
2180	r = named.rest_of
2181	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2182
2183	self._SetNext()
2184	with ctx_CmdMode(self, cmd_mode_e.Func):
2185	node.body = self.ParseBraceGroup()
2186
2187	return node
2188
2189	def ParseCoproc(self):
2190	# type: () -> command_t
2191	"""
2192	TODO: command.Coproc?
2193	"""
2194	raise NotImplementedError()
2195
2196	def ParseSubshell(self):
2197	# type: () -> command.Subshell
2198	"""
2199	subshell : '(' compound_list ')'
2200
2201	Looking at Op_LParen
2202	"""
2203	left = word_.AsOperatorToken(self.cur_word)
2204	self._SetNext() # skip past (
2205
2206	# Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2207	# translation stack, we want to delay it.
2208
2209	self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2210
2211	c_list = self._ParseCommandList()
2212	if len(c_list.children) == 1:
2213	child = c_list.children[0]
2214	else:
2215	child = c_list
2216
2217	ate = self._Eat(Id.Right_Subshell)
2218	right = word_.AsOperatorToken(ate)
2219
2220	return command.Subshell(left, child, right, None) # no redirects yet
2221
2222	def ParseDBracket(self):
2223	# type: () -> command.DBracket
2224	"""Pass the underlying word parser off to the boolean expression
2225	parser."""
2226	left = word_.AsKeywordToken(self.cur_word)
2227	# TODO: Test interactive. Without closing ]], you should get > prompt
2228	# (PS2)
2229
2230	self._SetNext() # skip [[
2231	b_parser = bool_parse.BoolParser(self.w_parser)
2232	bnode, right = b_parser.Parse() # May raise
2233	return command.DBracket(left, bnode, right, None) # no redirects yet
2234
2235	def ParseDParen(self):
2236	# type: () -> command.DParen
2237	left = word_.AsOperatorToken(self.cur_word)
2238
2239	self._SetNext() # skip ((
2240	anode, right = self.w_parser.ReadDParen()
2241	assert anode is not None
2242
2243	return command.DParen(left, anode, right, None) # no redirects yet
2244
2245	def ParseCommand(self):
2246	# type: () -> command_t
2247	"""
2248	command : simple_command
2249	\| compound_command # OSH edit: io_redirect* folded in
2250	\| function_def
2251	\| ksh_function_def
2252
2253	# YSH extensions
2254	\| proc NAME ...
2255	\| const ...
2256	\| var ...
2257	\| setglobal ...
2258	\| setref ...
2259	\| setvar ...
2260	\| _ EXPR
2261	\| = EXPR
2262	;
2263
2264	Note: the reason const / var are not part of compound_command is because
2265	they can't be alone in a shell function body.
2266
2267	Example:
2268	This is valid shell f() if true; then echo hi; fi
2269	This is invalid f() var x = 1
2270	"""
2271	if self._AtSecondaryKeyword():
2272	p_die('Unexpected word when parsing command',
2273	loc.Word(self.cur_word))
2274
2275	# YSH Extensions
2276
2277	if self.c_id == Id.KW_Proc: # proc p { ... }
2278	# proc is hidden because of the 'local reasoning' principle. Code
2279	# inside procs should be YSH, full stop. That means ysh:upgrade is
2280	# on.
2281	if self.parse_opts.parse_proc():
2282	return self.ParseYshProc()
2283	else:
2284	# 2024-02: This avoids bad syntax errors if you type YSH code
2285	# into OSH
2286	# proc p (x) { echo hi } would actually be parsed as a
2287	# command.Simple! Shell compatibility: quote 'proc'
2288	p_die("proc is a YSH keyword, but this is OSH.",
2289	loc.Word(self.cur_word))
2290
2291	if self.c_id == Id.KW_Func: # func f(x) { ... }
2292	if self.parse_opts.parse_func():
2293	return self.ParseYshFunc()
2294	else:
2295	# Same reasoning as above, for 'proc'
2296	p_die("func is a YSH keyword, but this is OSH.",
2297	loc.Word(self.cur_word))
2298
2299	if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2300	p_die("const can't be inside proc or func. Use var instead.",
2301	loc.Word(self.cur_word))
2302
2303	if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2304	keyword_id = self.c_id
2305	kw_token = word_.LiteralToken(self.cur_word)
2306	self._SetNext()
2307	n8 = self.w_parser.ParseVarDecl(kw_token)
2308	for lhs in n8.lhs:
2309	self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2310	return n8
2311
2312	if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2313	kw_token = word_.LiteralToken(self.cur_word)
2314	self._SetNext()
2315	n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2316	return n9
2317
2318	if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2319	# = 42 + a[i]
2320	# call mylist->append('x')
2321
2322	keyword = word_.LiteralToken(self.cur_word)
2323	assert keyword is not None
2324	self._SetNext()
2325	enode = self.w_parser.ParseCommandExpr()
2326	return command.Expr(keyword, enode)
2327
2328	if self.c_id == Id.KW_Function:
2329	return self.ParseKshFunctionDef()
2330
2331	if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2332	Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2333	Id.KW_If, Id.KW_Case, Id.KW_Time):
2334	return self.ParseCompoundCommand()
2335
2336	# Syntax error for '}' starting a line, which all shells disallow.
2337	if self.c_id == Id.Lit_RBrace:
2338	p_die('Unexpected right brace', loc.Word(self.cur_word))
2339
2340	if self.c_kind == Kind.Redir: # Leading redirect
2341	return self.ParseSimpleCommand()
2342
2343	if self.c_kind == Kind.Word:
2344	# ensured by Kind.Word
2345	cur_word = cast(CompoundWord, self.cur_word)
2346
2347	# NOTE: At the top level, only Token and Compound are possible.
2348	# Can this be modelled better in the type system, removing asserts?
2349	#
2350	# TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2351	# Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2352	# That requires 2 tokens of lookahead, which we don't have
2353	#
2354	# Or maybe we don't just have ParseSimpleCommand -- we will have
2355	# ParseYshCommand or something
2356
2357	if (self.w_parser.LookAheadFuncParens() and
2358	not word_.IsVarLike(cur_word)):
2359	return self.ParseFunctionDef() # f() { echo; } # function
2360
2361	# Parse x = 1+2*3 when inside HayNode { } blocks
2362	parts = cur_word.parts
2363	if self.parse_opts.parse_equals() and len(parts) == 1:
2364	part0 = parts[0]
2365	if part0.tag() == word_part_e.Literal:
2366	tok = cast(Token, part0)
2367	if (match.IsValidVarName(lexer.LazyStr(tok)) and
2368	self.w_parser.LookPastSpace() == Id.Lit_Equals):
2369	assert tok.id == Id.Lit_Chars, tok
2370
2371	if len(self.hay_attrs_stack
2372	) and self.hay_attrs_stack[-1]:
2373	# Note: no static var_checker.Check() for bare assignment
2374	enode = self.w_parser.ParseBareDecl()
2375	self._SetNext() # Somehow this is necessary
2376	# TODO: Use BareDecl here. Well, do that when we
2377	# treat it as const or lazy.
2378	return command.VarDecl(
2379	None,
2380	[NameType(tok, lexer.TokenVal(tok), None)],
2381	enode)
2382	else:
2383	self._SetNext()
2384	self._GetWord()
2385	p_die(
2386	'Unexpected = (Hint: use var/setvar, or quote it)',
2387	loc.Word(self.cur_word))
2388
2389	# echo foo
2390	# f=(a b c) # array
2391	# array[1+2]+=1
2392	return self.ParseSimpleCommand()
2393
2394	if self.c_kind == Kind.Eof:
2395	p_die("Unexpected EOF while parsing command",
2396	loc.Word(self.cur_word))
2397
2398	# NOTE: This only happens in batch mode in the second turn of the loop!
2399	# e.g. )
2400	p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2401
2402	assert False # for MyPy
2403
2404	def ParsePipeline(self):
2405	# type: () -> command_t
2406	"""
2407	pipeline : Bang? command ( '\|' newline_ok command )* ;
2408	"""
2409	negated = None # type: Optional[Token]
2410
2411	self._GetWord()
2412	if self.c_id == Id.KW_Bang:
2413	negated = word_.AsKeywordToken(self.cur_word)
2414	self._SetNext()
2415
2416	child = self.ParseCommand()
2417	assert child is not None
2418
2419	children = [child]
2420
2421	self._GetWord()
2422	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2423	if negated is not None:
2424	node = command.Pipeline(negated, children, [])
2425	return node
2426	else:
2427	return child # no pipeline
2428
2429	# \| or \|&
2430	ops = [] # type: List[Token]
2431	while True:
2432	op = word_.AsOperatorToken(self.cur_word)
2433	ops.append(op)
2434
2435	self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2436	self._NewlineOk()
2437
2438	child = self.ParseCommand()
2439	children.append(child)
2440
2441	self._GetWord()
2442	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2443	break
2444
2445	return command.Pipeline(negated, children, ops)
2446
2447	def ParseAndOr(self):
2448	# type: () -> command_t
2449	self._GetWord()
2450	if self.c_id == Id.Word_Compound:
2451	first_word_tok = word_.LiteralToken(self.cur_word)
2452	if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
2453	# We got '...', so parse in multiline mode
2454	self._SetNext()
2455	with word_.ctx_Multiline(self.w_parser):
2456	return self._ParseAndOr()
2457
2458	# Parse in normal mode, not multiline
2459	return self._ParseAndOr()
2460
2461	def _ParseAndOr(self):
2462	# type: () -> command_t
2463	"""
2464	and_or : and_or ( AND_IF \| OR_IF ) newline_ok pipeline
2465	\| pipeline
2466
2467	Note that it is left recursive and left associative. We parse it
2468	iteratively with a token of lookahead.
2469	"""
2470	child = self.ParsePipeline()
2471	assert child is not None
2472
2473	self._GetWord()
2474	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2475	return child
2476
2477	ops = [] # type: List[Token]
2478	children = [child]
2479
2480	while True:
2481	ops.append(word_.AsOperatorToken(self.cur_word))
2482
2483	self._SetNext() # skip past \|\| &&
2484	self._NewlineOk()
2485
2486	child = self.ParsePipeline()
2487	children.append(child)
2488
2489	self._GetWord()
2490	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2491	break
2492
2493	return command.AndOr(children, ops)
2494
2495	# NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2496
2497	# At the top level, we execute after every line, e.g. to
2498	# - process alias (a form of dynamic parsing)
2499	# - process 'exit', because invalid syntax might appear after it
2500
2501	# On the other hand, for a while loop body, we parse the whole thing at once,
2502	# and then execute it. We don't want to parse it over and over again!
2503
2504	# COMPARE
2505	# command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2506	# command_term : and_or (trailer and_or)* ; # CHILDREN
2507
2508	def _ParseCommandLine(self):
2509	# type: () -> command_t
2510	"""
2511	command_line : and_or (sync_op and_or)* trailer? ;
2512	trailer : sync_op newline_ok
2513	\| NEWLINES;
2514	sync_op : '&' \| ';';
2515
2516	NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2517	there is another command word after the sync op.
2518
2519	But it's easier to express imperatively. Do the following in a loop:
2520	1. ParseAndOr
2521	2. Peek.
2522	a. If there's a newline, then return. (We're only parsing a single
2523	line.)
2524	b. If there's a sync_op, process it. Then look for a newline and
2525	return. Otherwise, parse another AndOr.
2526	"""
2527	# This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2528	# I don't think we should add anything else here; otherwise it will be
2529	# ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2530	END_LIST = [Id.Op_Newline, Id.Eof_Real]
2531
2532	children = [] # type: List[command_t]
2533	done = False
2534	while not done:
2535	child = self.ParseAndOr()
2536
2537	self._GetWord()
2538	if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2539	tok = cast(Token, self.cur_word) # for MyPy
2540	child = command.Sentence(child, tok)
2541	self._SetNext()
2542
2543	self._GetWord()
2544	if self.c_id in END_LIST:
2545	done = True
2546
2547	elif self.c_id in END_LIST:
2548	done = True
2549
2550	else:
2551	# e.g. echo a(b)
2552	p_die(
2553	'Invalid word while parsing command line (%s)' %
2554	Id_str(self.c_id), loc.Word(self.cur_word))
2555
2556	children.append(child)
2557
2558	# Simplify the AST.
2559	if len(children) > 1:
2560	return command.CommandList(children)
2561	else:
2562	return children[0]
2563
2564	def _ParseCommandTerm(self):
2565	# type: () -> command.CommandList
2566	""""
2567	command_term : and_or (trailer and_or)* ;
2568	trailer : sync_op newline_ok
2569	\| NEWLINES;
2570	sync_op : '&' \| ';';
2571
2572	This is handled in imperative style, like _ParseCommandLine.
2573	Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2574	which is slightly different. (HOW? Is it the DSEMI?)
2575
2576	Returns:
2577	syntax_asdl.command
2578	"""
2579	# Token types that will end the command term.
2580	END_LIST = [
2581	self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
2582	Id.Op_SemiAmp, Id.Op_DSemiAmp
2583	]
2584
2585	# NOTE: This is similar to _ParseCommandLine.
2586	#
2587	# - Why aren't we doing END_LIST in _ParseCommandLine?
2588	# - Because you will never be inside $() at the top level.
2589	# - We also know it will end in a newline. It can't end in "fi"!
2590	# - example: if true; then { echo hi; } fi
2591
2592	children = [] # type: List[command_t]
2593	done = False
2594	while not done:
2595	# Most keywords are valid "first words". But do/done/then do not BEGIN
2596	# commands, so they are not valid.
2597	if self._AtSecondaryKeyword():
2598	break
2599
2600	child = self.ParseAndOr()
2601
2602	self._GetWord()
2603	if self.c_id == Id.Op_Newline:
2604	self._SetNext()
2605
2606	self._GetWord()
2607	if self.c_id in END_LIST:
2608	done = True
2609
2610	elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2611	tok = cast(Token, self.cur_word) # for MyPy
2612	child = command.Sentence(child, tok)
2613	self._SetNext()
2614
2615	self._GetWord()
2616	if self.c_id == Id.Op_Newline:
2617	self._SetNext() # skip over newline
2618
2619	# Test if we should keep going. There might be another command after
2620	# the semi and newline.
2621	self._GetWord()
2622	if self.c_id in END_LIST: # \n EOF
2623	done = True
2624
2625	elif self.c_id in END_LIST: # ; EOF
2626	done = True
2627
2628	elif self.c_id in END_LIST: # EOF
2629	done = True
2630
2631	# For if test -f foo; test -f bar {
2632	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2633	done = True
2634
2635	elif self.c_kind != Kind.Word:
2636	# e.g. f() { echo (( x )) ; }
2637	# but can't fail on 'fi fi', see osh/cmd_parse_test.py
2638
2639	#log("Invalid %s", self.cur_word)
2640	p_die("Invalid word while parsing command list",
2641	loc.Word(self.cur_word))
2642
2643	children.append(child)
2644
2645	return command.CommandList(children)
2646
2647	def _ParseCommandList(self):
2648	# type: () -> command.CommandList
2649	"""
2650	command_list : newline_ok command_term trailer? ;
2651
2652	This one is called by all the compound commands. It's basically a command
2653	block.
2654
2655	NOTE: Rather than translating the CFG directly, the code follows a style
2656	more like this: more like this: (and_or trailer)+. It makes capture
2657	easier.
2658	"""
2659	self._NewlineOk()
2660	return self._ParseCommandTerm()
2661
2662	def ParseLogicalLine(self):
2663	# type: () -> command_t
2664	"""Parse a single line for main_loop.
2665
2666	A wrapper around _ParseCommandLine(). Similar but not identical to
2667	_ParseCommandList() and ParseCommandSub().
2668
2669	Raises:
2670	ParseError
2671	"""
2672	self._NewlineOk()
2673	self._GetWord()
2674	if self.c_id == Id.Eof_Real:
2675	return None # main loop checks for here docs
2676	node = self._ParseCommandLine()
2677	return node
2678
2679	def ParseInteractiveLine(self):
2680	# type: () -> parse_result_t
2681	"""Parse a single line for Interactive main_loop.
2682
2683	Different from ParseLogicalLine because newlines are handled differently.
2684
2685	Raises:
2686	ParseError
2687	"""
2688	self._GetWord()
2689	if self.c_id == Id.Op_Newline:
2690	return parse_result.EmptyLine
2691	if self.c_id == Id.Eof_Real:
2692	return parse_result.Eof
2693
2694	node = self._ParseCommandLine()
2695	return parse_result.Node(node)
2696
2697	def ParseCommandSub(self):
2698	# type: () -> command_t
2699	"""Parse $(echo hi) and `echo hi` for word_parse.py.
2700
2701	They can have multiple lines, like this: echo $( echo one echo
2702	two )
2703	"""
2704	self._NewlineOk()
2705
2706	self._GetWord()
2707	if self.c_kind == Kind.Eof: # e.g. $()
2708	return command.NoOp
2709
2710	c_list = self._ParseCommandTerm()
2711	if len(c_list.children) == 1:
2712	return c_list.children[0]
2713	else:
2714	return c_list
2715
2716	def CheckForPendingHereDocs(self):
2717	# type: () -> None
2718	# NOTE: This happens when there is no newline at the end of a file, like
2719	# osh -c 'cat <<EOF'
2720	if len(self.pending_here_docs):
2721	node = self.pending_here_docs[0] # Just show the first one?
2722	h = cast(redir_param.HereDoc, node.arg)
2723	p_die('Unterminated here doc began here', loc.Word(h.here_begin))