osh/cmd_parse.py

OILS / osh / cmd_parse.py View on Github | oilshell.org

2709 lines, 1397 significant

1	# Copyright 2016 Andy Chu. All rights reserved.
2	# Licensed under the Apache License, Version 2.0 (the "License");
3	# you may not use this file except in compliance with the License.
4	# You may obtain a copy of the License at
5	#
6	# http://www.apache.org/licenses/LICENSE-2.0
7	"""
8	cmd_parse.py - Parse high level shell commands.
9	"""
10	from __future__ import print_function
11
12	from _devbuild.gen import grammar_nt
13	from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind, Kind_str
14	from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15	from _devbuild.gen.syntax_asdl import (
16	loc,
17	SourceLine,
18	source,
19	parse_result,
20	parse_result_t,
21	command,
22	command_t,
23	condition,
24	condition_t,
25	for_iter,
26	ArgList,
27	BraceGroup,
28	LiteralBlock,
29	CaseArm,
30	case_arg,
31	IfArm,
32	pat,
33	pat_t,
34	Redir,
35	redir_param,
36	redir_loc,
37	redir_loc_t,
38	word_e,
39	word_t,
40	CompoundWord,
41	Token,
42	word_part_e,
43	word_part_t,
44	rhs_word,
45	rhs_word_t,
46	sh_lhs,
47	sh_lhs_t,
48	AssignPair,
49	EnvPair,
50	ParsedAssignment,
51	assign_op_e,
52	NameType,
53	proc_sig,
54	proc_sig_e,
55	Proc,
56	Func,
57	)
58	from core import alloc
59	from core import error
60	from core.error import p_die
61	from core import ui
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from frontend import match
66	from frontend import reader
67	from mycpp.mylib import log
68	from osh import braces
69	from osh import bool_parse
70	from osh import word_
71
72	from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73	if TYPE_CHECKING:
74	from core.alloc import Arena
75	from core import optview
76	from frontend.lexer import Lexer
77	from frontend.parse_lib import ParseContext, AliasesInFlight
78	from frontend.reader import _Reader
79	from osh.word_parse import WordParser
80
81	_ = Kind_str # for debug prints
82
83	TAB_CH = 9 # ord('\t')
84	SPACE_CH = 32 # ord(' ')
85
86
87	def _ReadHereLines(
88	line_reader, # type: _Reader
89	h, # type: Redir
90	delimiter, # type: str
91	):
92	# type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93	# NOTE: We read all lines at once, instead of parsing line-by-line,
94	# because of cases like this:
95	# cat <<EOF
96	# 1 $(echo 2
97	# echo 3) 4
98	# EOF
99	here_lines = [] # type: List[Tuple[SourceLine, int]]
100	last_line = None # type: Tuple[SourceLine, int]
101	strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103	while True:
104	src_line, unused_offset = line_reader.GetLine()
105
106	if src_line is None: # EOF
107	# An unterminated here doc is just a warning in bash. We make it
108	# fatal because we want to be strict, and because it causes problems
109	# reporting other errors.
110	# Attribute it to the << in <<EOF for now.
111	p_die("Couldn't find terminator for here doc that starts here",
112	h.op)
113
114	assert len(src_line.content) != 0 # None should be the empty line
115
116	line = src_line.content
117
118	# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119	# the first tab.
120	start_offset = 0
121	if strip_leading_tabs:
122	n = len(line)
123	i = 0 # used after loop exit
124	while i < n:
125	if line[i] != '\t':
126	break
127	i += 1
128	start_offset = i
129
130	if line[start_offset:].rstrip() == delimiter:
131	last_line = (src_line, start_offset)
132	break
133
134	here_lines.append((src_line, start_offset))
135
136	return here_lines, last_line
137
138
139	def _MakeLiteralHereLines(
140	here_lines, # type: List[Tuple[SourceLine, int]]
141	arena, # type: Arena
142	do_lossless, # type: bool
143	):
144	# type: (...) -> List[word_part_t]
145	"""Create a Token for each line.
146
147	For <<'EOF' and <<-'EOF' - single quoted rule
148
149	<<- has non-zero start_offset
150	"""
151	# less precise type, because List[T] is an invariant type
152	tokens = [] # type: List[word_part_t]
153	for src_line, start_offset in here_lines:
154
155	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
156	# arena invariant, but don't refer to it.
157	if do_lossless: # avoid garbage, doesn't affect correctness
158	arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, src_line,
159	None)
160
161	t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
162	src_line, src_line.content[start_offset:])
163	tokens.append(t)
164	return tokens
165
166
167	def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
168	# type: (ParseContext, Redir, _Reader, Arena) -> None
169	"""Fill in attributes of a pending here doc node."""
170	h = cast(redir_param.HereDoc, r.arg)
171	# "If any character in word is quoted, the delimiter shall be formed by
172	# performing quote removal on word, and the here-document lines shall not
173	# be expanded. Otherwise, the delimiter shall be the word itself."
174	# NOTE: \EOF counts, or even E\OF
175	ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
176	if not ok:
177	p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
178
179	here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
180
181	if delim_quoted:
182	# <<'EOF' and <<-'EOF' - Literal for each line.
183	h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
184	parse_ctx.do_lossless)
185	else:
186	# <<EOF and <<-EOF - Parse as word
187	line_reader = reader.VirtualLineReader(arena, here_lines,
188	parse_ctx.do_lossless)
189	w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
190	w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
191
192	end_line, start_offset = last_line
193
194	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
195	# arena invariant, but don't refer to it.
196	if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
197	arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, end_line, None)
198
199	# Create a Token with the end terminator. Maintains the invariant that the
200	# tokens "add up".
201	h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
202	len(end_line.content), end_line, '')
203
204
205	def _MakeAssignPair(parse_ctx, preparsed, arena):
206	# type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
207	"""Create an AssignPair from a 4-tuples from DetectShAssignment."""
208
209	left_token = preparsed.left
210	close_token = preparsed.close
211
212	lhs = None # type: sh_lhs_t
213
214	if left_token.id == Id.Lit_VarLike: # s=1
215	if lexer.IsPlusEquals(left_token):
216	var_name = lexer.TokenSliceRight(left_token, -2)
217	op = assign_op_e.PlusEqual
218	else:
219	var_name = lexer.TokenSliceRight(left_token, -1)
220	op = assign_op_e.Equal
221
222	lhs = sh_lhs.Name(left_token, var_name)
223
224	elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
225	var_name = lexer.TokenSliceRight(left_token, -1)
226	if lexer.IsPlusEquals(close_token):
227	op = assign_op_e.PlusEqual
228	else:
229	op = assign_op_e.Equal
230
231	assert left_token.line == close_token.line, \
232	'%s and %s not on same line' % (left_token, close_token)
233
234	left_pos = left_token.col + left_token.length
235	index_str = left_token.line.content[left_pos:close_token.col]
236	lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
237
238	elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
239	var_name = lexer.TokenSliceRight(left_token, -1)
240	if lexer.IsPlusEquals(close_token):
241	op = assign_op_e.PlusEqual
242	else:
243	op = assign_op_e.Equal
244
245	# Similar to SnipCodeString / SnipCodeBlock
246	if left_token.line == close_token.line:
247	# extract what's between brackets
248	s = left_token.col + left_token.length
249	code_str = left_token.line.content[s:close_token.col]
250	else:
251	raise NotImplementedError('%s != %s' %
252	(left_token.line, close_token.line))
253	a_parser = parse_ctx.MakeArithParser(code_str)
254
255	# a[i+1]= is a LHS
256	src = source.Reparsed('array LHS', left_token, close_token)
257	with alloc.ctx_SourceCode(arena, src):
258	index_node = a_parser.Parse() # may raise error.Parse
259
260	lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
261
262	else:
263	raise AssertionError()
264
265	# TODO: Should we also create a rhs_expr.ArrayLiteral here?
266	parts = preparsed.w.parts
267	offset = preparsed.part_offset
268
269	n = len(parts)
270	if offset == n:
271	rhs = rhs_word.Empty # type: rhs_word_t
272	else:
273	# tmp2 is for intersection of C++/MyPy type systems
274	tmp2 = CompoundWord(parts[offset:])
275	word_.TildeDetectAssign(tmp2)
276	rhs = tmp2
277
278	return AssignPair(left_token, lhs, op, rhs)
279
280
281	def _AppendMoreEnv(preparsed_list, more_env):
282	# type: (List[ParsedAssignment], List[EnvPair]) -> None
283	"""Helper to modify a SimpleCommand node.
284
285	Args:
286	preparsed: a list of 4-tuples from DetectShAssignment
287	more_env: a list to append env_pairs to
288	"""
289	for preparsed in preparsed_list:
290	left_token = preparsed.left
291
292	if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
293	p_die(
294	"Environment binding shouldn't look like an array assignment",
295	left_token)
296
297	if lexer.IsPlusEquals(left_token):
298	p_die('Expected = in environment binding, got +=', left_token)
299
300	var_name = lexer.TokenSliceRight(left_token, -1)
301
302	parts = preparsed.w.parts
303	n = len(parts)
304	offset = preparsed.part_offset
305	if offset == n:
306	val = rhs_word.Empty # type: rhs_word_t
307	else:
308	val = CompoundWord(parts[offset:])
309
310	more_env.append(EnvPair(left_token, var_name, val))
311
312
313	def _SplitSimpleCommandPrefix(words):
314	# type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
315	"""Second pass of SimpleCommand parsing: look for assignment words."""
316	preparsed_list = [] # type: List[ParsedAssignment]
317	suffix_words = [] # type: List[CompoundWord]
318
319	done_prefix = False
320	for w in words:
321	if done_prefix:
322	suffix_words.append(w)
323	continue
324
325	left_token, close_token, part_offset = word_.DetectShAssignment(w)
326	if left_token:
327	preparsed_list.append(
328	ParsedAssignment(left_token, close_token, part_offset, w))
329	else:
330	done_prefix = True
331	suffix_words.append(w)
332
333	return preparsed_list, suffix_words
334
335
336	def _MakeSimpleCommand(
337	preparsed_list, # type: List[ParsedAssignment]
338	suffix_words, # type: List[CompoundWord]
339	redirects, # type: List[Redir]
340	typed_args, # type: Optional[ArgList]
341	block, # type: Optional[LiteralBlock]
342	):
343	# type: (...) -> command.Simple
344	"""Create an command.Simple node."""
345
346	# FOO=(1 2 3) ls is not allowed.
347	for preparsed in preparsed_list:
348	if word_.HasArrayPart(preparsed.w):
349	p_die("Environment bindings can't contain array literals",
350	loc.Word(preparsed.w))
351
352	# NOTE: It would be possible to add this check back. But it already happens
353	# at runtime in EvalWordSequence2.
354	# echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
355	if 0:
356	for w in suffix_words:
357	if word_.HasArrayPart(w):
358	p_die("Commands can't contain array literals", loc.Word(w))
359
360	assert len(suffix_words) != 0
361	# {a,b,c} # Use { before brace detection
362	# ~/bin/ls # Use ~ before tilde detection
363	part0 = suffix_words[0].parts[0]
364	blame_tok = location.LeftTokenForWordPart(part0)
365
366	# NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
367	# can't implement bash's behavior of having say {~bob,~jane}/src work,
368	# because we only have a BracedTree.
369	# This is documented in spec/brace-expansion.
370	# NOTE: Technically we could do expansion outside of 'oshc translate', but it
371	# doesn't seem worth it.
372	words2 = braces.BraceDetectAll(suffix_words)
373	words3 = word_.TildeDetectAll(words2)
374
375	more_env = [] # type: List[EnvPair]
376	_AppendMoreEnv(preparsed_list, more_env)
377
378	# do_fork by default
379	return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
380	block, True)
381
382
383	class VarChecker(object):
384	"""Statically check for proc and variable usage errors."""
385
386	def __init__(self):
387	# type: () -> None
388	"""
389	Args:
390	oil_proc: Whether to disallow nested proc/function declarations
391	"""
392	# self.tokens for location info: 'proc' or another token
393	self.tokens = [] # type: List[Token]
394	self.names = [] # type: List[Dict[str, Id_t]]
395
396	def Push(self, blame_tok):
397	# type: (Token) -> None
398	"""Called when we enter a shell function, proc, or func.
399
400	Bash allows this, but it's confusing because it's the same as two
401	functions at the top level.
402
403	f() {
404	g() {
405	echo 'top level function defined in another one'
406	}
407	}
408
409	YSH disallows nested procs and funcs.
410	"""
411	if len(self.tokens) != 0:
412	if blame_tok.id == Id.KW_Proc:
413	p_die("procs must be defined at the top level", blame_tok)
414	if blame_tok.id == Id.KW_Func:
415	p_die("funcs must be defined at the top level", blame_tok)
416	if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
417	p_die("shell functions can't be defined inside proc or func",
418	blame_tok)
419
420	self.tokens.append(blame_tok)
421	entry = {} # type: Dict[str, Id_t]
422	self.names.append(entry)
423
424	def Pop(self):
425	# type: () -> None
426	self.names.pop()
427	self.tokens.pop()
428
429	def Check(self, keyword_id, var_name, blame_tok):
430	# type: (Id_t, str, Token) -> None
431	"""Check for declaration / mutation errors in proc and func.
432
433	var x
434	x already declared
435	setvar x:
436	x is not declared
437	setglobal x:
438	No errors are possible; we would need all these many conditions to
439	statically know the names:
440	- no 'source'
441	- shopt -u copy_env.
442	- AND use lib has to be static
443
444	What about bare assignment in Hay? I think these are dynamic checks --
445	there is no static check. Hay is for building up data imperatively,
446	and then LATER, right before main(), it can be type checked.
447
448	Package {
449	version = '3.11'
450	version = '3.12'
451	}
452	"""
453	# No static checks are the global level! Because of 'source', var and
454	# setvar are essentially the same.
455	if len(self.names) == 0:
456	return
457
458	top = self.names[-1]
459	if keyword_id == Id.KW_Var:
460	if var_name in top:
461	p_die('%r was already declared' % var_name, blame_tok)
462	else:
463	top[var_name] = keyword_id
464
465	if keyword_id == Id.KW_SetVar:
466	if var_name not in top:
467	# Note: the solution could be setglobal, etc.
468	p_die(
469	"setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
470	var_name, blame_tok)
471
472
473	class ctx_VarChecker(object):
474
475	def __init__(self, var_checker, blame_tok):
476	# type: (VarChecker, Token) -> None
477	var_checker.Push(blame_tok)
478	self.var_checker = var_checker
479
480	def __enter__(self):
481	# type: () -> None
482	pass
483
484	def __exit__(self, type, value, traceback):
485	# type: (Any, Any, Any) -> None
486	self.var_checker.Pop()
487
488
489	class ctx_CmdMode(object):
490
491	def __init__(self, cmd_parse, new_cmd_mode):
492	# type: (CommandParser, cmd_mode_t) -> None
493	self.cmd_parse = cmd_parse
494	self.prev_cmd_mode = cmd_parse.cmd_mode
495	cmd_parse.cmd_mode = new_cmd_mode
496
497	def __enter__(self):
498	# type: () -> None
499	pass
500
501	def __exit__(self, type, value, traceback):
502	# type: (Any, Any, Any) -> None
503	self.cmd_parse.cmd_mode = self.prev_cmd_mode
504
505
506	SECONDARY_KEYWORDS = [
507	Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
508	Id.KW_Esac
509	]
510
511
512	class CommandParser(object):
513	"""Recursive descent parser derived from POSIX shell grammar.
514
515	This is a BNF grammar:
516	https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
517
518	- Augmented with both bash/OSH and YSH constructs.
519
520	- We use regex-like iteration rather than recursive references
521	? means optional (0 or 1)
522	* means 0 or more
523	+ means 1 or more
524
525	- Keywords are spelled in Caps:
526	If Elif Case
527
528	- Operator tokens are quoted:
529	'(' '\|'
530
531	or can be spelled directly if it matters:
532
533	Op_LParen Op_Pipe
534
535	- Non-terminals are snake_case:
536	brace_group subshell
537
538	Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
539	the production should be in the method docstrings, e.g.
540
541	def ParseSubshell():
542	"
543	subshell : '(' compound_list ')'
544
545	Looking at Op_LParen # Comment to say how this method is called
546	"
547
548	The grammar may be factored to make parsing easier.
549	"""
550
551	def __init__(self,
552	parse_ctx,
553	parse_opts,
554	w_parser,
555	lexer,
556	line_reader,
557	eof_id=Id.Eof_Real):
558	# type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
559	self.parse_ctx = parse_ctx
560	self.aliases = parse_ctx.aliases # aliases to expand at parse time
561
562	self.parse_opts = parse_opts
563	self.w_parser = w_parser # type: WordParser # for normal parsing
564	self.lexer = lexer # for pushing hints, lookahead to (
565	self.line_reader = line_reader # for here docs
566	self.eof_id = eof_id
567
568	self.arena = line_reader.arena # for adding here doc and alias spans
569	self.aliases_in_flight = [] # type: AliasesInFlight
570
571	# A hacky boolean to remove 'if cd / {' ambiguity.
572	self.allow_block = True
573
574	# Stack of booleans for nested Attr and SHELL nodes.
575	# Attr nodes allow bare assignment x = 42, but not shell x=42.
576	# SHELL nodes are the inverse. 'var x = 42' is preferred in shell
577	# nodes, but x42 is still allowed.
578	#
579	# Note: this stack could be optimized by turning it into an integer and
580	# binary encoding.
581	self.hay_attrs_stack = [] # type: List[bool]
582
583	# Note: VarChecker is instantiated with each CommandParser, which means
584	# that two 'proc foo' -- inside a command sub and outside -- don't
585	# conflict, because they use different CommandParser instances. I think
586	# this OK but you can imagine different behaviors.
587	self.var_checker = VarChecker()
588
589	self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
590
591	self.Reset()
592
593	# Init_() function for "keyword arg"
594	def Init_AliasesInFlight(self, aliases_in_flight):
595	# type: (AliasesInFlight) -> None
596	self.aliases_in_flight = aliases_in_flight
597
598	def Reset(self):
599	# type: () -> None
600	"""Reset our own internal state.
601
602	Called by the interactive loop.
603	"""
604	# Cursor state set by _GetWord()
605	self.next_lex_mode = lex_mode_e.ShCommand
606	self.cur_word = None # type: word_t # current word
607	self.c_kind = Kind.Undefined
608	self.c_id = Id.Undefined_Tok
609
610	self.pending_here_docs = [
611	] # type: List[Redir] # should have HereLiteral arg
612
613	def ResetInputObjects(self):
614	# type: () -> None
615	"""Reset the internal state of our inputs.
616
617	Called by the interactive loop.
618	"""
619	self.w_parser.Reset()
620	self.lexer.ResetInputObjects()
621	self.line_reader.Reset()
622
623	def _SetNext(self):
624	# type: () -> None
625	"""Call this when you no longer need the current token.
626
627	This method is lazy. A subsequent call to _GetWord() will
628	actually read the next Token.
629	"""
630	self.next_lex_mode = lex_mode_e.ShCommand
631
632	def _SetNextBrack(self):
633	# type: () -> None
634	self.next_lex_mode = lex_mode_e.ShCommandBrack
635
636	def _GetWord(self):
637	# type: () -> None
638	"""Call this when you need to make a decision based on Id or Kind.
639
640	If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
641	self.c_id and self.c_kind.
642
643	Otherwise it does nothing.
644	"""
645	if self.next_lex_mode != lex_mode_e.Undefined:
646	w = self.w_parser.ReadWord(self.next_lex_mode)
647	#log("w %s", w)
648
649	# Here docs only happen in command mode, so other kinds of newlines don't
650	# count.
651	if w.tag() == word_e.Operator:
652	tok = cast(Token, w)
653	if tok.id == Id.Op_Newline:
654	for h in self.pending_here_docs:
655	_ParseHereDocBody(self.parse_ctx, h, self.line_reader,
656	self.arena)
657	del self.pending_here_docs[:] # No .clear() until Python 3.3.
658
659	self.cur_word = w
660
661	self.c_kind = word_.CommandKind(self.cur_word)
662	self.c_id = word_.CommandId(self.cur_word)
663	self.next_lex_mode = lex_mode_e.Undefined
664
665	def _Eat(self, c_id, msg=None):
666	# type: (Id_t, Optional[str]) -> word_t
667	"""Consume a word of a type, maybe showing a custom error message.
668
669	Args:
670	c_id: the Id we expected
671	msg: improved error message
672	"""
673	self._GetWord()
674	if self.c_id != c_id:
675	if msg is None:
676	msg = 'Expected word type %s, got %s' % (
677	ui.PrettyId(c_id), ui.PrettyId(self.c_id))
678	p_die(msg, loc.Word(self.cur_word))
679
680	skipped = self.cur_word
681	self._SetNext()
682	return skipped
683
684	def _NewlineOk(self):
685	# type: () -> None
686	"""Check for optional newline and consume it."""
687	self._GetWord()
688	if self.c_id == Id.Op_Newline:
689	self._SetNext()
690
691	def _AtSecondaryKeyword(self):
692	# type: () -> bool
693	self._GetWord()
694	if self.c_id in SECONDARY_KEYWORDS:
695	return True
696	return False
697
698	def ParseRedirect(self):
699	# type: () -> Redir
700	self._GetWord()
701	assert self.c_kind == Kind.Redir, self.cur_word
702	op_tok = cast(Token, self.cur_word) # for MyPy
703
704	# Note: the lexer could take distinguish between
705	# >out
706	# 3>out
707	# {fd}>out
708	#
709	# which would make the code below faster. But small string optimization
710	# would also speed it up, since redirects are small.
711
712	# One way to do this is with Kind.Redir and Kind.RedirNamed, and then
713	# possibly "unify" the IDs by subtracting a constant like 8 or 16?
714
715	op_val = lexer.TokenVal(op_tok)
716	if op_val[0] == '{':
717	pos = op_val.find('}')
718	assert pos != -1 # lexer ensures this
719	where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
720
721	elif op_val[0].isdigit():
722	pos = 1
723	if op_val[1].isdigit():
724	pos = 2
725	where = redir_loc.Fd(int(op_val[:pos]))
726
727	else:
728	where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
729
730	self._SetNext()
731
732	self._GetWord()
733	# Other redirect
734	if self.c_kind != Kind.Word:
735	p_die('Invalid token after redirect operator',
736	loc.Word(self.cur_word))
737
738	# Here doc
739	if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
740	arg = redir_param.HereDoc.CreateNull()
741	arg.here_begin = self.cur_word
742	arg.stdin_parts = []
743
744	r = Redir(op_tok, where, arg)
745
746	self.pending_here_docs.append(r) # will be filled on next newline.
747
748	self._SetNext()
749	return r
750
751	arg_word = self.cur_word
752	tilde = word_.TildeDetect(arg_word)
753	if tilde:
754	arg_word = tilde
755	self._SetNext()
756
757	# We should never get Empty, Token, etc.
758	assert arg_word.tag() == word_e.Compound, arg_word
759	return Redir(op_tok, where, cast(CompoundWord, arg_word))
760
761	def _ParseRedirectList(self):
762	# type: () -> List[Redir]
763	"""Try parsing any redirects at the cursor.
764
765	This is used for blocks only, not commands.
766	"""
767	redirects = [] # type: List[Redir]
768	while True:
769	# This prediction needs to ONLY accept redirect operators. Should we
770	# make them a separate Kind?
771	self._GetWord()
772	if self.c_kind != Kind.Redir:
773	break
774
775	node = self.ParseRedirect()
776	redirects.append(node)
777	self._SetNext()
778
779	return redirects
780
781	def _ScanSimpleCommand(self):
782	# type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
783	"""YSH extends simple commands with typed args and blocks.
784
785	Shell has a recursive grammar, which awkwardly expresses
786	non-grammatical rules:
787
788	simple_command : cmd_prefix cmd_word cmd_suffix
789	\| cmd_prefix cmd_word
790	\| cmd_prefix
791	\| cmd_name cmd_suffix
792	\| cmd_name
793	;
794	cmd_name : WORD /* Apply rule 7a */
795	;
796	cmd_word : WORD /* Apply rule 7b */
797	;
798	cmd_prefix : io_redirect
799	\| cmd_prefix io_redirect
800	\| ASSIGNMENT_WORD
801	\| cmd_prefix ASSIGNMENT_WORD
802	;
803	cmd_suffix : io_redirect
804	\| cmd_suffix io_redirect
805	\| WORD
806	\| cmd_suffix WORD
807
808	YSH grammar:
809
810	simple_command =
811	cmd_prefix* word+ typed_args? BraceGroup? cmd_suffix*
812
813	typed_args =
814	'(' arglist ')'
815	\| '[' arglist ']'
816
817	Notably, redirects shouldn't appear after between typed args and
818	BraceGroup.
819	"""
820	redirects = [] # type: List[Redir]
821	words = [] # type: List[CompoundWord]
822	typed_args = None # type: Optional[ArgList]
823	block = None # type: Optional[LiteralBlock]
824
825	first_word_caps = False # does first word look like Caps, but not CAPS
826
827	i = 0
828	while True:
829	self._GetWord()
830	if self.c_kind == Kind.Redir:
831	node = self.ParseRedirect()
832	redirects.append(node)
833
834	elif self.c_kind == Kind.Word:
835	if self.parse_opts.parse_brace():
836	# Treat { and } more like operators
837	if self.c_id == Id.Lit_LBrace:
838	if self.allow_block: # Disabled for if/while condition, etc.
839
840	# allow x = 42
841	self.hay_attrs_stack.append(first_word_caps)
842	brace_group = self.ParseBraceGroup()
843
844	# So we can get the source code back later
845	lines = self.arena.SaveLinesAndDiscard(
846	brace_group.left, brace_group.right)
847	block = LiteralBlock(brace_group, lines)
848
849	self.hay_attrs_stack.pop()
850
851	if 0:
852	print('--')
853	block.PrettyPrint()
854	print('\n--')
855	break
856	elif self.c_id == Id.Lit_RBrace:
857	# Another thing: { echo hi }
858	# We're DONE!!!
859	break
860
861	w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
862
863	if i == 0:
864	# Disallow leading =a because it's confusing
865	part0 = w.parts[0]
866	if part0.tag() == word_part_e.Literal:
867	tok = cast(Token, part0)
868	if tok.id == Id.Lit_Equals:
869	p_die(
870	"=word isn't allowed. Hint: add a space after =, or quote it",
871	tok)
872
873	# Is the first word a Hay Attr word?
874	ok, word_str, quoted = word_.StaticEval(w)
875	# Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
876	if (ok and len(word_str) and word_str[0].isupper() and
877	not word_str.isupper()):
878	first_word_caps = True
879	#log('W %s', word_str)
880
881	words.append(w)
882
883	elif self.c_id == Id.Op_LParen:
884	# 1. Check that there's a preceding space
885	prev_byte = self.lexer.ByteLookBack()
886	if prev_byte not in (SPACE_CH, TAB_CH):
887	if self.parse_opts.parse_at():
888	p_die('Space required before (',
889	loc.Word(self.cur_word))
890	else:
891	# inline func call like @sorted(x) is invalid in OSH, but the
892	# solution isn't a space
893	p_die(
894	'Unexpected left paren (might need a space before it)',
895	loc.Word(self.cur_word))
896
897	# 2. Check that it's not (). We disallow this because it's a no-op and
898	# there could be confusion with shell func defs.
899	# For some reason we need to call lexer.LookPastSpace, not
900	# w_parser.LookPastSpace. I think this is because we're at (, which is
901	# an operator token. All the other cases are like 'x=', which is PART
902	# of a word, and we don't know if it will end.
903	next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
904	if next_id == Id.Op_RParen:
905	p_die('Empty arg list not allowed',
906	loc.Word(self.cur_word))
907
908	typed_args = self.w_parser.ParseProcCallArgs(
909	grammar_nt.ysh_eager_arglist)
910
911	elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
912	typed_args = self.w_parser.ParseProcCallArgs(
913	grammar_nt.ysh_lazy_arglist)
914
915	else:
916	break
917
918	self._SetNextBrack() # Allow bracket for SECOND word on
919	i += 1
920	return redirects, words, typed_args, block
921
922	def _MaybeExpandAliases(self, words):
923	# type: (List[CompoundWord]) -> Optional[command_t]
924	"""Try to expand aliases.
925
926	Args:
927	words: A list of Compound
928
929	Returns:
930	A new LST node, or None.
931
932	Our implementation of alias has two design choices:
933	- Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
934	- What grammar rule to parse the expanded alias buffer with. In our case
935	it's ParseCommand().
936
937	This doesn't quite match what other shells do, but I can't figure out a
938	better places.
939
940	Most test cases pass, except for ones like:
941
942	alias LBRACE='{'
943	LBRACE echo one; echo two; }
944
945	alias MULTILINE='echo 1
946	echo 2
947	echo 3'
948	MULTILINE
949
950	NOTE: dash handles aliases in a totally different way. It has a global
951	variable checkkwd in parser.c. It assigns it all over the grammar, like
952	this:
953
954	checkkwd = CHKNL \| CHKKWD \| CHKALIAS;
955
956	The readtoken() function checks (checkkwd & CHKALIAS) and then calls
957	lookupalias(). This seems to provide a consistent behavior among shells,
958	but it's less modular and testable.
959
960	Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
961
962	Returns:
963	A command node if any aliases were expanded, or None otherwise.
964	"""
965	# Start a new list if there aren't any. This will be passed recursively
966	# through CommandParser instances.
967	aliases_in_flight = (self.aliases_in_flight
968	if len(self.aliases_in_flight) else [])
969
970	# for error message
971	first_word_str = None # type: Optional[str]
972	argv0_loc = loc.Word(words[0])
973
974	expanded = [] # type: List[str]
975	i = 0
976	n = len(words)
977
978	while i < n:
979	w = words[i]
980
981	ok, word_str, quoted = word_.StaticEval(w)
982	if not ok or quoted:
983	break
984
985	alias_exp = self.aliases.get(word_str)
986	if alias_exp is None:
987	break
988
989	# Prevent infinite loops. This is subtle: we want to prevent infinite
990	# expansion of alias echo='echo x'. But we don't want to prevent
991	# expansion of the second word in 'echo echo', so we add 'i' to
992	# "aliases_in_flight".
993	if (word_str, i) in aliases_in_flight:
994	break
995
996	if i == 0:
997	first_word_str = word_str # for error message
998
999	#log('%r -> %r', word_str, alias_exp)
1000	aliases_in_flight.append((word_str, i))
1001	expanded.append(alias_exp)
1002	i += 1
1003
1004	if not alias_exp.endswith(' '):
1005	# alias e='echo [ ' is the same expansion as
1006	# alias e='echo ['
1007	# The trailing space indicates whether we should continue to expand
1008	# aliases; it's not part of it.
1009	expanded.append(' ')
1010	break # No more expansions
1011
1012	if len(expanded) == 0: # No expansions; caller does parsing.
1013	return None
1014
1015	# We are expanding an alias, so copy the rest of the words and re-parse.
1016	if i < n:
1017	left_tok = location.LeftTokenForWord(words[i])
1018	right_tok = location.RightTokenForWord(words[-1])
1019
1020	# OLD CONSTRAINT
1021	#assert left_tok.line_id == right_tok.line_id
1022
1023	words_str = self.arena.SnipCodeString(left_tok, right_tok)
1024	expanded.append(words_str)
1025
1026	code_str = ''.join(expanded)
1027
1028	# TODO:
1029	# Aliases break static parsing (like backticks), so use our own Arena.
1030	# This matters for Hay, which calls SaveLinesAndDiscard().
1031	# arena = alloc.Arena()
1032	arena = self.arena
1033
1034	line_reader = reader.StringLineReader(code_str, arena)
1035	cp = self.parse_ctx.MakeOshParser(line_reader)
1036	cp.Init_AliasesInFlight(aliases_in_flight)
1037
1038	# break circular dep
1039	from frontend import parse_lib
1040
1041	# The interaction between COMPLETION and ALIASES requires special care.
1042	# See docstring of BeginAliasExpansion() in parse_lib.py.
1043	src = source.Alias(first_word_str, argv0_loc)
1044	with alloc.ctx_SourceCode(arena, src):
1045	with parse_lib.ctx_Alias(self.parse_ctx.trail):
1046	try:
1047	# _ParseCommandTerm() handles multiline commands, compound
1048	# commands, etc. as opposed to ParseLogicalLine()
1049	node = cp._ParseCommandTerm()
1050	except error.Parse as e:
1051	# Failure to parse alias expansion is a fatal error
1052	# We don't need more handling here/
1053	raise
1054
1055	if 0:
1056	log('AFTER expansion:')
1057	node.PrettyPrint()
1058
1059	return node
1060
1061	def ParseSimpleCommand(self):
1062	# type: () -> command_t
1063	"""Fixed transcription of the POSIX grammar (TODO: port to
1064	grammar/Shell.g)
1065
1066	io_file : '<' filename
1067	\| LESSAND filename
1068	...
1069
1070	io_here : DLESS here_end
1071	\| DLESSDASH here_end
1072
1073	redirect : IO_NUMBER (io_redirect \| io_here)
1074
1075	prefix_part : ASSIGNMENT_WORD \| redirect
1076	cmd_part : WORD \| redirect
1077
1078	assign_kw : Declare \| Export \| Local \| Readonly
1079
1080	# Without any words it is parsed as a command, not an assignment
1081	assign_listing : assign_kw
1082
1083	# Now we have something to do (might be changing assignment flags too)
1084	# NOTE: any prefixes should be a warning, but they are allowed in shell.
1085	assignment : prefix_part* assign_kw (WORD \| ASSIGNMENT_WORD)+
1086
1087	# an external command, a function call, or a builtin -- a "word_command"
1088	word_command : prefix_part* cmd_part+
1089
1090	simple_command : assign_listing
1091	\| assignment
1092	\| proc_command
1093
1094	Simple imperative algorithm:
1095
1096	1) Read a list of words and redirects. Append them to separate lists.
1097	2) Look for the first non-assignment word. If it's declare, etc., then
1098	keep parsing words AND assign words. Otherwise, just parse words.
1099	3) If there are no non-assignment words, then it's a global assignment.
1100
1101	{ redirects, global assignments } OR
1102	{ redirects, prefix_bindings, words } OR
1103	{ redirects, ERROR_prefix_bindings, keyword, assignments, words }
1104
1105	THEN CHECK that prefix bindings don't have any array literal parts!
1106	global assignment and keyword assignments can have the of course.
1107	well actually EXPORT shouldn't have them either -- WARNING
1108
1109	3 cases we want to warn: prefix_bindings for assignment, and array literal
1110	in prefix bindings, or export
1111
1112	A command can be an assignment word, word, or redirect on its own.
1113
1114	ls
1115	>out.txt
1116
1117	>out.txt FOO=bar # this touches the file
1118
1119	Or any sequence:
1120	ls foo bar
1121	<in.txt ls foo bar >out.txt
1122	<in.txt ls >out.txt foo bar
1123
1124	Or add one or more environment bindings:
1125	VAR=val env
1126	>out.txt VAR=val env
1127
1128	here_end vs filename is a matter of whether we test that it's quoted. e.g.
1129	<<EOF vs <<'EOF'.
1130	"""
1131	redirects, words, typed_args, block = self._ScanSimpleCommand()
1132
1133	typed_loc = None # type: Optional[Token]
1134	if block:
1135	typed_loc = block.brace_group.left
1136	if typed_args:
1137	typed_loc = typed_args.left # preferred over block location
1138
1139	if len(words) == 0: # e.g. >out.txt # redirect without words
1140	assert len(redirects) != 0
1141	if typed_loc is not None:
1142	p_die("Unexpected typed args", typed_loc)
1143
1144	simple = command.Simple.CreateNull()
1145	simple.blame_tok = redirects[0].op
1146	simple.more_env = []
1147	simple.words = []
1148	simple.redirects = redirects
1149	return simple
1150
1151	preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1152	if len(preparsed_list):
1153	# Disallow X=Y inside proc and func
1154	# and inside Hay Attr blocks
1155	# But allow X=Y at the top level
1156	# for interactive use foo=bar
1157	# for global constants GLOBAL=~/src
1158	# because YSH assignment doesn't have tilde sub
1159	if len(suffix_words) == 0:
1160	if (self.cmd_mode != cmd_mode_e.Shell or
1161	(len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1162	p_die('Use var/setvar to assign in YSH',
1163	preparsed_list[0].left)
1164
1165	# Set a reference to words and redirects for completion. We want to
1166	# inspect this state after a failed parse.
1167	self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1168
1169	if len(suffix_words) == 0:
1170	if typed_loc is not None:
1171	p_die("Unexpected typed args", typed_loc)
1172
1173	# ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1174	pairs = [] # type: List[AssignPair]
1175	for preparsed in preparsed_list:
1176	pairs.append(
1177	_MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1178
1179	left_tok = location.LeftTokenForCompoundWord(words[0])
1180	return command.ShAssignment(left_tok, pairs, redirects)
1181
1182	kind, kw_token = word_.IsControlFlow(suffix_words[0])
1183
1184	if kind == Kind.ControlFlow:
1185	if kw_token.id == Id.ControlFlow_Return:
1186	# return x - inside procs and shell functions
1187	# return (x) - inside funcs
1188	if typed_args is None:
1189	if self.cmd_mode not in (cmd_mode_e.Shell,
1190	cmd_mode_e.Proc):
1191	p_die('Shell-style returns not allowed here', kw_token)
1192	else:
1193	if self.cmd_mode != cmd_mode_e.Func:
1194	p_die('Typed return is only allowed inside func',
1195	typed_loc)
1196	if len(typed_args.pos_args) != 1:
1197	p_die("Typed return expects one argument", typed_loc)
1198	if len(typed_args.named_args) != 0:
1199	p_die("Typed return doesn't take named arguments",
1200	typed_loc)
1201	return command.Retval(kw_token, typed_args.pos_args[0])
1202
1203	if typed_loc is not None:
1204	p_die("Unexpected typed args", typed_loc)
1205	if not self.parse_opts.parse_ignored() and len(redirects):
1206	p_die("Control flow shouldn't have redirects", kw_token)
1207
1208	if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1209	p_die("Control flow shouldn't have environment bindings",
1210	preparsed_list[0].left)
1211
1212	# Attach the token for errors. (ShAssignment may not need it.)
1213	if len(suffix_words) == 1:
1214	arg_word = None # type: Optional[word_t]
1215	elif len(suffix_words) == 2:
1216	arg_word = suffix_words[1]
1217	else:
1218	p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1219	loc.Word(suffix_words[2]))
1220
1221	return command.ControlFlow(kw_token, arg_word)
1222
1223	# Alias expansion only understands words, not typed args ( ) or block { }
1224	if not typed_args and not block and self.parse_opts.expand_aliases():
1225	# If any expansions were detected, then parse again.
1226	expanded_node = self._MaybeExpandAliases(suffix_words)
1227	if expanded_node:
1228	# Attach env bindings and redirects to the expanded node.
1229	more_env = [] # type: List[EnvPair]
1230	_AppendMoreEnv(preparsed_list, more_env)
1231	exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1232	return exp
1233
1234	# TODO: check that we don't have env1=x x[1]=y env2=z here.
1235
1236	# FOO=bar printenv.py FOO
1237	node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1238	typed_args, block)
1239	return node
1240
1241	def ParseBraceGroup(self):
1242	# type: () -> BraceGroup
1243	"""
1244	Original:
1245	brace_group : LBrace command_list RBrace ;
1246
1247	YSH:
1248	brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1249
1250	The doc comment can only occur if there's a newline.
1251	"""
1252	ate = self._Eat(Id.Lit_LBrace)
1253	left = word_.BraceToken(ate)
1254
1255	doc_word = None # type: word_t
1256	self._GetWord()
1257	if self.c_id == Id.Op_Newline:
1258	self._SetNext()
1259	# Set a flag so we don't skip over ###
1260	with word_.ctx_EmitDocToken(self.w_parser):
1261	self._GetWord()
1262
1263	if self.c_id == Id.Ignored_Comment:
1264	doc_word = self.cur_word
1265	self._SetNext()
1266
1267	# Id.Ignored_Comment means it's a Token, or None
1268	doc_token = cast(Token, doc_word)
1269
1270	c_list = self._ParseCommandList()
1271
1272	ate = self._Eat(Id.Lit_RBrace)
1273	right = word_.BraceToken(ate)
1274
1275	# Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1276	# would allow us to revert this back to None, which was changed in
1277	# https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1278	# behavior saves allocations, but is less type safe.
1279	return BraceGroup(left, doc_token, c_list.children, [],
1280	right) # no redirects yet
1281
1282	def ParseDoGroup(self):
1283	# type: () -> command.DoGroup
1284	"""Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1285
1286	do_group : Do command_list Done ; /* Apply rule 6 */
1287	"""
1288	ate = self._Eat(Id.KW_Do)
1289	do_kw = word_.AsKeywordToken(ate)
1290
1291	c_list = self._ParseCommandList() # could be anything
1292
1293	ate = self._Eat(Id.KW_Done)
1294	done_kw = word_.AsKeywordToken(ate)
1295
1296	return command.DoGroup(do_kw, c_list.children, done_kw)
1297
1298	def ParseForWords(self):
1299	# type: () -> Tuple[List[CompoundWord], Optional[Token]]
1300	"""
1301	for_words : WORD* for_sep
1302	;
1303	for_sep : ';' newline_ok
1304	\| NEWLINES
1305	;
1306	"""
1307	words = [] # type: List[CompoundWord]
1308	# The span_id of any semi-colon, so we can remove it.
1309	semi_tok = None # type: Optional[Token]
1310
1311	while True:
1312	self._GetWord()
1313	if self.c_id == Id.Op_Semi:
1314	tok = cast(Token, self.cur_word)
1315	semi_tok = tok
1316	self._SetNext()
1317	self._NewlineOk()
1318	break
1319	elif self.c_id == Id.Op_Newline:
1320	self._SetNext()
1321	break
1322	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1323	break
1324
1325	if self.cur_word.tag() != word_e.Compound:
1326	# TODO: Can we also show a pointer to the 'for' keyword?
1327	p_die('Invalid word in for loop', loc.Word(self.cur_word))
1328
1329	w2 = cast(CompoundWord, self.cur_word)
1330	words.append(w2)
1331	self._SetNext()
1332	return words, semi_tok
1333
1334	def _ParseForExprLoop(self, for_kw):
1335	# type: (Token) -> command.ForExpr
1336	"""
1337	Shell:
1338	for '((' init ';' cond ';' update '))' for_sep? do_group
1339
1340	YSH:
1341	for '((' init ';' cond ';' update '))' for_sep? brace_group
1342	"""
1343	node = self.w_parser.ReadForExpression()
1344	node.keyword = for_kw
1345
1346	self._SetNext()
1347
1348	self._GetWord()
1349	if self.c_id == Id.Op_Semi:
1350	self._SetNext()
1351	self._NewlineOk()
1352	elif self.c_id == Id.Op_Newline:
1353	self._SetNext()
1354	elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1355	pass
1356	elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1357	pass
1358	else:
1359	p_die('Invalid word after for expression', loc.Word(self.cur_word))
1360
1361	if self.c_id == Id.Lit_LBrace:
1362	node.body = self.ParseBraceGroup()
1363	else:
1364	node.body = self.ParseDoGroup()
1365	return node
1366
1367	def _ParseForEachLoop(self, for_kw):
1368	# type: (Token) -> command.ForEach
1369	node = command.ForEach.CreateNull(alloc_lists=True)
1370	node.keyword = for_kw
1371
1372	num_iter_names = 0
1373	while True:
1374	w = self.cur_word
1375
1376	# Hack that makes the language more familiar:
1377	# - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1378	# - 'x y' is also accepted but not idiomatic.
1379	UP_w = w
1380	if w.tag() == word_e.Compound:
1381	w = cast(CompoundWord, UP_w)
1382	if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1383	w.parts.pop()
1384
1385	ok, iter_name, quoted = word_.StaticEval(w)
1386	if not ok or quoted: # error: for $x
1387	p_die('Expected loop variable (a constant word)', loc.Word(w))
1388
1389	if not match.IsValidVarName(iter_name): # error: for -
1390	# TODO: consider commas?
1391	if ',' in iter_name:
1392	p_die('Loop variables look like x, y (fix spaces)',
1393	loc.Word(w))
1394	p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1395
1396	node.iter_names.append(iter_name)
1397	num_iter_names += 1
1398	self._SetNext()
1399
1400	self._GetWord()
1401	# 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1402	# Subtlety: 'var' is KW_Var and is a valid loop name
1403	if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1404	break
1405
1406	if num_iter_names == 3:
1407	p_die('Unexpected word after 3 loop variables',
1408	loc.Word(self.cur_word))
1409
1410	self._NewlineOk()
1411
1412	self._GetWord()
1413	if self.c_id == Id.KW_In:
1414	# Ideally we would want ( not 'in'. But we still have to fix the bug
1415	# where we require a SPACE between in and (
1416	# for x in(y) # should be accepted, but isn't
1417
1418	expr_blame = word_.AsKeywordToken(self.cur_word)
1419
1420	self._SetNext() # skip in
1421	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1422	enode = self.w_parser.ParseYshExprForCommand()
1423	node.iterable = for_iter.YshExpr(enode, expr_blame)
1424
1425	# For simplicity, we don't accept for x in (obj); do ...
1426	self._GetWord()
1427	if self.c_id != Id.Lit_LBrace:
1428	p_die('Expected { after iterable expression',
1429	loc.Word(self.cur_word))
1430	else:
1431	semi_tok = None # type: Optional[Token]
1432	iter_words, semi_tok = self.ParseForWords()
1433	node.semi_tok = semi_tok
1434
1435	if not self.parse_opts.parse_bare_word() and len(
1436	iter_words) == 1:
1437	ok, s, quoted = word_.StaticEval(iter_words[0])
1438	if ok and match.IsValidVarName(s) and not quoted:
1439	p_die(
1440	'Surround this word with either parens or quotes (parse_bare_word)',
1441	loc.Word(iter_words[0]))
1442
1443	words2 = braces.BraceDetectAll(iter_words)
1444	words3 = word_.TildeDetectAll(words2)
1445	node.iterable = for_iter.Words(words3)
1446
1447	# Now that we know there are words, do an extra check
1448	if num_iter_names > 2:
1449	p_die('Expected at most 2 loop variables', for_kw)
1450
1451	elif self.c_id == Id.KW_Do:
1452	node.iterable = for_iter.Args # implicitly loop over "$@"
1453	# do not advance
1454
1455	elif self.c_id == Id.Op_Semi: # for x; do
1456	node.iterable = for_iter.Args # implicitly loop over "$@"
1457	self._SetNext()
1458
1459	else: # for foo BAD
1460	p_die('Unexpected word after for loop variable',
1461	loc.Word(self.cur_word))
1462
1463	self._GetWord()
1464	if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1465	node.body = self.ParseBraceGroup()
1466	else:
1467	node.body = self.ParseDoGroup()
1468
1469	return node
1470
1471	def ParseFor(self):
1472	# type: () -> command_t
1473	"""
1474	TODO: Update the grammar
1475
1476	for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1477	\| For '((' ... TODO
1478	"""
1479	ate = self._Eat(Id.KW_For)
1480	for_kw = word_.AsKeywordToken(ate)
1481
1482	self._GetWord()
1483	if self.c_id == Id.Op_DLeftParen:
1484	if not self.parse_opts.parse_dparen():
1485	p_die("Bash for loops aren't allowed (parse_dparen)",
1486	loc.Word(self.cur_word))
1487
1488	# for (( i = 0; i < 10; i++)
1489	n1 = self._ParseForExprLoop(for_kw)
1490	n1.redirects = self._ParseRedirectList()
1491	return n1
1492	else:
1493	# for x in a b; do echo hi; done
1494	n2 = self._ParseForEachLoop(for_kw)
1495	n2.redirects = self._ParseRedirectList()
1496	return n2
1497
1498	def _ParseConditionList(self):
1499	# type: () -> condition_t
1500	"""
1501	condition_list: command_list
1502
1503	This is a helper to parse a condition list for if commands and while/until
1504	loops. It will throw a parse error if there are no conditions in the list.
1505	"""
1506	self.allow_block = False
1507	commands = self._ParseCommandList()
1508	self.allow_block = True
1509
1510	if len(commands.children) == 0:
1511	p_die("Expected a condition", loc.Word(self.cur_word))
1512
1513	return condition.Shell(commands.children)
1514
1515	def ParseWhileUntil(self, keyword):
1516	# type: (Token) -> command.WhileUntil
1517	"""
1518	while_clause : While command_list do_group ;
1519	until_clause : Until command_list do_group ;
1520	"""
1521	self._SetNext() # skip keyword
1522
1523	if (self.parse_opts.parse_paren() and
1524	self.w_parser.LookPastSpace() == Id.Op_LParen):
1525	enode = self.w_parser.ParseYshExprForCommand()
1526	cond = condition.YshExpr(enode) # type: condition_t
1527	else:
1528	cond = self._ParseConditionList()
1529
1530	# NOTE: The LSTs will be different for OSH and YSH, but the execution
1531	# should be unchanged. To be sure we should desugar.
1532	self._GetWord()
1533	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1534	# while test -f foo {
1535	body_node = self.ParseBraceGroup() # type: command_t
1536	else:
1537	body_node = self.ParseDoGroup()
1538
1539	# no redirects yet
1540	return command.WhileUntil(keyword, cond, body_node, None)
1541
1542	def ParseCaseArm(self):
1543	# type: () -> CaseArm
1544	"""
1545	case_item: '('? pattern ('\|' pattern)* ')'
1546	newline_ok command_term? trailer? ;
1547
1548	Looking at '(' or pattern
1549	"""
1550	self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1551
1552	left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1553
1554	if self.c_id == Id.Op_LParen: # Optional (
1555	self._SetNext()
1556
1557	pat_words = [] # type: List[word_t]
1558	while True:
1559	self._GetWord()
1560	if self.c_kind != Kind.Word:
1561	p_die('Expected case pattern', loc.Word(self.cur_word))
1562	pat_words.append(self.cur_word)
1563	self._SetNext()
1564
1565	self._GetWord()
1566	if self.c_id == Id.Op_Pipe:
1567	self._SetNext()
1568	else:
1569	break
1570
1571	ate = self._Eat(Id.Right_CasePat)
1572	middle_tok = word_.AsOperatorToken(ate)
1573
1574	self._NewlineOk()
1575
1576	self._GetWord()
1577	if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
1578	c_list = self._ParseCommandTerm()
1579	action_children = c_list.children
1580	else:
1581	action_children = []
1582
1583	dsemi_tok = None # type: Token
1584	self._GetWord()
1585	if self.c_id == Id.KW_Esac: # missing last ;;
1586	pass
1587	elif self.c_id == Id.Op_DSemi:
1588	dsemi_tok = word_.AsOperatorToken(self.cur_word)
1589	self._SetNext()
1590	else:
1591	# Happens on EOF
1592	p_die('Expected ;; or esac', loc.Word(self.cur_word))
1593
1594	self._NewlineOk()
1595
1596	return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1597	action_children, dsemi_tok)
1598
1599	def ParseYshCaseArm(self, discriminant):
1600	# type: (Id_t) -> CaseArm
1601	"""
1602	case_item : pattern newline_ok brace_group newline_ok
1603	pattern : pat_words
1604	\| pat_exprs
1605	\| pat_eggex
1606	\| pat_else
1607	pat_words : pat_word (newline_ok '\|' newline_ok pat_word)*
1608	pat_exprs : pat_expr (newline_ok '\|' newline_ok pat_expr)*
1609	pat_word : WORD
1610	pat_eggex : '/' oil_eggex '/'
1611	pat_expr : '(' oil_expr ')'
1612	pat_else : '(' Id.KW_Else ')'
1613
1614	Looking at: 'pattern'
1615
1616	Note that the trailing `newline_ok` in `case_item` is handled by
1617	`ParseYshCase`. We do this because parsing that `newline_ok` returns
1618	the next "discriminant" for the next token, so it makes more sense to
1619	handle it there.
1620	"""
1621	left_tok = None # type: Token
1622	pattern = None # type: pat_t
1623
1624	if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1625	# pat_exprs, pat_else or pat_eggex
1626	pattern, left_tok = self.w_parser.ParseYshCasePattern()
1627	else:
1628	# pat_words
1629	pat_words = [] # type: List[word_t]
1630	while True:
1631	self._GetWord()
1632	if self.c_kind != Kind.Word:
1633	p_die('Expected case pattern', loc.Word(self.cur_word))
1634	pat_words.append(self.cur_word)
1635	self._SetNext()
1636
1637	if not left_tok:
1638	left_tok = location.LeftTokenForWord(self.cur_word)
1639
1640	self._NewlineOk()
1641
1642	self._GetWord()
1643	if self.c_id == Id.Op_Pipe:
1644	self._SetNext()
1645	self._NewlineOk()
1646	else:
1647	break
1648	pattern = pat.Words(pat_words)
1649
1650	self._NewlineOk()
1651	action = self.ParseBraceGroup()
1652
1653	# The left token of the action is our "middle" token
1654	return CaseArm(left_tok, pattern, action.left, action.children,
1655	action.right)
1656
1657	def ParseYshCase(self, case_kw):
1658	# type: (Token) -> command.Case
1659	"""
1660	ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1661
1662	Looking at: token after 'case'
1663	"""
1664	enode = self.w_parser.ParseYshExprForCommand()
1665	to_match = case_arg.YshExpr(enode)
1666
1667	ate = self._Eat(Id.Lit_LBrace)
1668	arms_start = word_.BraceToken(ate)
1669
1670	discriminant = self.w_parser.NewlineOkForYshCase()
1671
1672	# Note: for now, zero arms are accepted, just like POSIX case $x in esac
1673	arms = [] # type: List[CaseArm]
1674	while discriminant != Id.Op_RBrace:
1675	arm = self.ParseYshCaseArm(discriminant)
1676	arms.append(arm)
1677
1678	discriminant = self.w_parser.NewlineOkForYshCase()
1679
1680	# NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1681	# token is read as an Id.Op_RBrace, but we need to store this as a
1682	# Id.Lit_RBrace.
1683	ate = self._Eat(Id.Op_RBrace)
1684	arms_end = word_.AsOperatorToken(ate)
1685	arms_end.id = Id.Lit_RBrace
1686
1687	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1688	None)
1689
1690	def ParseOldCase(self, case_kw):
1691	# type: (Token) -> command.Case
1692	"""
1693	case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1694
1695	-> Looking at WORD
1696
1697	FYI original POSIX case list, which takes pains for DSEMI
1698
1699	case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1700	"""
1701	self._GetWord()
1702	w = self.cur_word
1703	if not self.parse_opts.parse_bare_word():
1704	ok, s, quoted = word_.StaticEval(w)
1705	if ok and not quoted:
1706	p_die(
1707	"This is a constant string. You may want a variable like $x (parse_bare_word)",
1708	loc.Word(w))
1709
1710	if w.tag() != word_e.Compound:
1711	p_die("Expected a word to match against", loc.Word(w))
1712
1713	to_match = case_arg.Word(w)
1714	self._SetNext() # past WORD
1715
1716	self._NewlineOk()
1717
1718	ate = self._Eat(Id.KW_In)
1719	arms_start = word_.AsKeywordToken(ate)
1720
1721	self._NewlineOk()
1722
1723	arms = [] # type: List[CaseArm]
1724	while True:
1725	self._GetWord()
1726	if self.c_id == Id.KW_Esac: # this is Kind.Word
1727	break
1728	# case arm should begin with a pattern word or (
1729	if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1730	break
1731
1732	arm = self.ParseCaseArm()
1733	arms.append(arm)
1734
1735	ate = self._Eat(Id.KW_Esac)
1736	arms_end = word_.AsKeywordToken(ate)
1737
1738	# no redirects yet
1739	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1740	None)
1741
1742	def ParseCase(self):
1743	# type: () -> command.Case
1744	"""
1745	case_clause : old_case # from POSIX
1746	\| ysh_case
1747	;
1748
1749	Looking at 'Case'
1750	"""
1751	case_kw = word_.AsKeywordToken(self.cur_word)
1752	self._SetNext() # past 'case'
1753
1754	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1755	return self.ParseYshCase(case_kw)
1756	else:
1757	return self.ParseOldCase(case_kw)
1758
1759	def _ParseYshElifElse(self, if_node):
1760	# type: (command.If) -> None
1761	"""If test -f foo { echo foo.
1762
1763	} elif test -f bar; test -f spam { ^ we parsed up to here echo
1764	bar } else { echo none }
1765	"""
1766	arms = if_node.arms
1767
1768	while self.c_id == Id.KW_Elif:
1769	elif_kw = word_.AsKeywordToken(self.cur_word)
1770	self._SetNext() # skip elif
1771	if (self.parse_opts.parse_paren() and
1772	self.w_parser.LookPastSpace() == Id.Op_LParen):
1773	enode = self.w_parser.ParseYshExprForCommand()
1774	cond = condition.YshExpr(enode) # type: condition_t
1775	else:
1776	self.allow_block = False
1777	commands = self._ParseCommandList()
1778	self.allow_block = True
1779	cond = condition.Shell(commands.children)
1780
1781	body = self.ParseBraceGroup()
1782	self._GetWord()
1783
1784	arm = IfArm(elif_kw, cond, None, body.children, [elif_kw.span_id])
1785	arms.append(arm)
1786
1787	self._GetWord()
1788	if self.c_id == Id.KW_Else:
1789	self._SetNext()
1790	body = self.ParseBraceGroup()
1791	if_node.else_action = body.children
1792
1793	def _ParseYshIf(self, if_kw, cond):
1794	# type: (Token, condition_t) -> command.If
1795	"""if test -f foo {
1796
1797	# ^ we parsed up to here
1798	echo foo
1799	} elif test -f bar; test -f spam {
1800	echo bar
1801	} else {
1802	echo none
1803	}
1804	NOTE: If you do something like if test -n foo{, the parser keeps going, and
1805	the error is confusing because it doesn't point to the right place.
1806
1807	I think we might need strict_brace so that foo{ is disallowed. It has to
1808	be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1809	form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1810	Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1811	"""
1812	if_node = command.If.CreateNull(alloc_lists=True)
1813	if_node.if_kw = if_kw
1814
1815	body1 = self.ParseBraceGroup()
1816	# Every arm has 1 spid, unlike shell-style
1817	# TODO: We could get the spids from the brace group.
1818	arm = IfArm(if_kw, cond, None, body1.children, [if_kw.span_id])
1819
1820	if_node.arms.append(arm)
1821
1822	self._GetWord()
1823	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1824	self._ParseYshElifElse(if_node)
1825	# the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1826	# spid because that's in the BraceGroup.
1827	return if_node
1828
1829	def _ParseElifElse(self, if_node):
1830	# type: (command.If) -> None
1831	"""
1832	else_part: (Elif command_list Then command_list)* Else command_list ;
1833	"""
1834	arms = if_node.arms
1835
1836	self._GetWord()
1837	while self.c_id == Id.KW_Elif:
1838	elif_kw = word_.AsKeywordToken(self.cur_word)
1839	self._SetNext() # past 'elif'
1840
1841	cond = self._ParseConditionList()
1842
1843	ate = self._Eat(Id.KW_Then)
1844	then_kw = word_.AsKeywordToken(ate)
1845
1846	body = self._ParseCommandList()
1847	arm = IfArm(elif_kw, cond, then_kw, body.children,
1848	[elif_kw.span_id, then_kw.span_id])
1849
1850	arms.append(arm)
1851
1852	self._GetWord()
1853	if self.c_id == Id.KW_Else:
1854	else_kw = word_.AsKeywordToken(self.cur_word)
1855	self._SetNext() # past 'else'
1856	body = self._ParseCommandList()
1857	if_node.else_action = body.children
1858	else:
1859	else_kw = None
1860
1861	if_node.else_kw = else_kw
1862
1863	def ParseIf(self):
1864	# type: () -> command.If
1865	"""
1866	if_clause : If command_list Then command_list else_part? Fi ;
1867
1868	open : '{' \| Then
1869	close : '}' \| Fi
1870
1871	ysh_if : If ( command_list \| '(' expr ')' )
1872	open command_list else_part? close;
1873
1874	There are 2 conditionals here: parse_paren, then parse_brace
1875	"""
1876	if_node = command.If.CreateNull(alloc_lists=True)
1877	if_kw = word_.AsKeywordToken(self.cur_word)
1878	if_node.if_kw = if_kw
1879	self._SetNext() # past 'if'
1880
1881	if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1882	) == Id.Op_LParen:
1883	# if (x + 1)
1884	enode = self.w_parser.ParseYshExprForCommand()
1885	cond = condition.YshExpr(enode) # type: condition_t
1886	else:
1887	# if echo 1; echo 2; then
1888	# Remove ambiguity with if cd / {
1889	cond = self._ParseConditionList()
1890
1891	self._GetWord()
1892	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1893	return self._ParseYshIf(if_kw, cond)
1894
1895	ate = self._Eat(Id.KW_Then)
1896	then_kw = word_.AsKeywordToken(ate)
1897
1898	body = self._ParseCommandList()
1899
1900	# First arm
1901	arm = IfArm(if_kw, cond, then_kw, body.children,
1902	[if_kw.span_id, then_kw.span_id])
1903	if_node.arms.append(arm)
1904
1905	# 2nd to Nth arm
1906	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1907	self._ParseElifElse(if_node)
1908
1909	ate = self._Eat(Id.KW_Fi)
1910	if_node.fi_kw = word_.AsKeywordToken(ate)
1911
1912	return if_node
1913
1914	def ParseTime(self):
1915	# type: () -> command_t
1916	"""Time [-p] pipeline.
1917
1918	According to bash help.
1919	"""
1920	time_kw = word_.AsKeywordToken(self.cur_word)
1921	self._SetNext() # skip time
1922	pipeline = self.ParsePipeline()
1923	return command.TimeBlock(time_kw, pipeline)
1924
1925	def ParseCompoundCommand(self):
1926	# type: () -> command_t
1927	"""
1928	Refactoring: we put io_redirect* here instead of in function_body and
1929	command.
1930
1931	compound_command : brace_group io_redirect*
1932	\| subshell io_redirect*
1933	\| for_clause io_redirect*
1934	\| while_clause io_redirect*
1935	\| until_clause io_redirect*
1936	\| if_clause io_redirect*
1937	\| case_clause io_redirect*
1938
1939	# bash extensions
1940	\| time_clause
1941	\| [[ BoolExpr ]]
1942	\| (( ArithExpr ))
1943	"""
1944	self._GetWord()
1945	if self.c_id == Id.Lit_LBrace:
1946	n1 = self.ParseBraceGroup()
1947	n1.redirects = self._ParseRedirectList()
1948	return n1
1949	if self.c_id == Id.Op_LParen:
1950	n2 = self.ParseSubshell()
1951	n2.redirects = self._ParseRedirectList()
1952	return n2
1953
1954	if self.c_id == Id.KW_For:
1955	# Note: Redirects parsed in this call. POSIX for and bash for (( have
1956	# redirects, but YSH for doesn't.
1957	return self.ParseFor()
1958	if self.c_id in (Id.KW_While, Id.KW_Until):
1959	keyword = word_.AsKeywordToken(self.cur_word)
1960	n3 = self.ParseWhileUntil(keyword)
1961	n3.redirects = self._ParseRedirectList()
1962	return n3
1963
1964	if self.c_id == Id.KW_If:
1965	n4 = self.ParseIf()
1966	n4.redirects = self._ParseRedirectList()
1967	return n4
1968	if self.c_id == Id.KW_Case:
1969	n5 = self.ParseCase()
1970	n5.redirects = self._ParseRedirectList()
1971	return n5
1972
1973	if self.c_id == Id.KW_DLeftBracket:
1974	n6 = self.ParseDBracket()
1975	n6.redirects = self._ParseRedirectList()
1976	return n6
1977	if self.c_id == Id.Op_DLeftParen:
1978	if not self.parse_opts.parse_dparen():
1979	p_die('You may want a space between parens (parse_dparen)',
1980	loc.Word(self.cur_word))
1981	n7 = self.ParseDParen()
1982	n7.redirects = self._ParseRedirectList()
1983	return n7
1984
1985	# bash extensions: no redirects
1986	if self.c_id == Id.KW_Time:
1987	return self.ParseTime()
1988
1989	# Happens in function body, e.g. myfunc() oops
1990	p_die('Unexpected word while parsing compound command',
1991	loc.Word(self.cur_word))
1992	assert False # for MyPy
1993
1994	def ParseFunctionDef(self):
1995	# type: () -> command.ShFunction
1996	"""
1997	function_header : fname '(' ')'
1998	function_def : function_header newline_ok function_body ;
1999
2000	Precondition: Looking at the function name.
2001
2002	NOTE: There is an ambiguity with:
2003
2004	function foo ( echo hi ) and
2005	function foo () ( echo hi )
2006
2007	Bash only accepts the latter, though it doesn't really follow a grammar.
2008	"""
2009	word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2010	name = word_.ShFunctionName(word0)
2011	if len(name) == 0: # example: foo$x is invalid
2012	p_die('Invalid function name', loc.Word(word0))
2013
2014	part0 = word0.parts[0]
2015	# If we got a non-empty string from ShFunctionName, this should be true.
2016	assert part0.tag() == word_part_e.Literal
2017	blame_tok = cast(Token, part0) # for ctx_VarChecker
2018
2019	self._SetNext() # move past function name
2020
2021	# Must be true because of lookahead
2022	self._GetWord()
2023	assert self.c_id == Id.Op_LParen, self.cur_word
2024
2025	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2026	self._SetNext()
2027
2028	self._GetWord()
2029	if self.c_id == Id.Right_ShFunction:
2030	# 'f ()' implies a function definition, since invoking it with no args
2031	# would just be 'f'
2032	self._SetNext()
2033
2034	self._NewlineOk()
2035
2036	func = command.ShFunction.CreateNull()
2037	func.name = name
2038	with ctx_VarChecker(self.var_checker, blame_tok):
2039	func.body = self.ParseCompoundCommand()
2040
2041	func.name_tok = location.LeftTokenForCompoundWord(word0)
2042	return func
2043	else:
2044	p_die('Expected ) in function definition', loc.Word(self.cur_word))
2045	return None
2046
2047	def ParseKshFunctionDef(self):
2048	# type: () -> command.ShFunction
2049	"""
2050	ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2051	"""
2052	keyword_tok = word_.AsKeywordToken(self.cur_word)
2053
2054	self._SetNext() # skip past 'function'
2055	self._GetWord()
2056
2057	cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2058	name = word_.ShFunctionName(cur_word)
2059	if len(name) == 0: # example: foo$x is invalid
2060	p_die('Invalid KSH-style function name', loc.Word(cur_word))
2061
2062	name_word = self.cur_word
2063	self._SetNext() # skip past 'function name
2064
2065	self._GetWord()
2066	if self.c_id == Id.Op_LParen:
2067	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2068	self._SetNext()
2069	self._Eat(Id.Right_ShFunction)
2070
2071	self._NewlineOk()
2072
2073	func = command.ShFunction.CreateNull()
2074	func.name = name
2075	with ctx_VarChecker(self.var_checker, keyword_tok):
2076	func.body = self.ParseCompoundCommand()
2077
2078	func.keyword = keyword_tok
2079	func.name_tok = location.LeftTokenForWord(name_word)
2080	return func
2081
2082	def ParseYshProc(self):
2083	# type: () -> Proc
2084	node = Proc.CreateNull(alloc_lists=True)
2085
2086	keyword_tok = word_.AsKeywordToken(self.cur_word)
2087	node.keyword = keyword_tok
2088
2089	with ctx_VarChecker(self.var_checker, keyword_tok):
2090	with ctx_CmdMode(self, cmd_mode_e.Proc):
2091	self.w_parser.ParseProc(node)
2092	if node.sig.tag() == proc_sig_e.Closed: # Register params
2093	sig = cast(proc_sig.Closed, node.sig)
2094
2095	# Treat 3 kinds of params as variables.
2096	wp = sig.word
2097	if wp:
2098	for param in wp.params:
2099	self.var_checker.Check(Id.KW_Var, param.name,
2100	param.blame_tok)
2101	if wp.rest_of:
2102	r = wp.rest_of
2103	self.var_checker.Check(Id.KW_Var, r.name,
2104	r.blame_tok)
2105	# We COULD register __out here but it would require a different API.
2106	#if param.prefix and param.prefix.id == Id.Arith_Colon:
2107	# self.var_checker.Check(Id.KW_Var, '__' + param.name)
2108
2109	posit = sig.positional
2110	if posit:
2111	for param in posit.params:
2112	self.var_checker.Check(Id.KW_Var, param.name,
2113	param.blame_tok)
2114	if posit.rest_of:
2115	r = posit.rest_of
2116	self.var_checker.Check(Id.KW_Var, r.name,
2117	r.blame_tok)
2118
2119	named = sig.named
2120	if named:
2121	for param in named.params:
2122	self.var_checker.Check(Id.KW_Var, param.name,
2123	param.blame_tok)
2124	if named.rest_of:
2125	r = named.rest_of
2126	self.var_checker.Check(Id.KW_Var, r.name,
2127	r.blame_tok)
2128
2129	if sig.block_param:
2130	b = sig.block_param
2131	self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2132
2133	self._SetNext()
2134	node.body = self.ParseBraceGroup()
2135	# No redirects for YSH procs (only at call site)
2136
2137	return node
2138
2139	def ParseYshFunc(self):
2140	# type: () -> Func
2141	"""
2142	ysh_func: (
2143	Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2144	)
2145	Looking at KW_Func
2146	"""
2147	node = Func.CreateNull(alloc_lists=True)
2148
2149	keyword_tok = word_.AsKeywordToken(self.cur_word)
2150	node.keyword = keyword_tok
2151
2152	with ctx_VarChecker(self.var_checker, keyword_tok):
2153	self.w_parser.ParseFunc(node)
2154
2155	posit = node.positional
2156	if posit:
2157	for param in posit.params:
2158	self.var_checker.Check(Id.KW_Var, param.name,
2159	param.blame_tok)
2160	if posit.rest_of:
2161	r = posit.rest_of
2162	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2163
2164	named = node.named
2165	if named:
2166	for param in named.params:
2167	self.var_checker.Check(Id.KW_Var, param.name,
2168	param.blame_tok)
2169	if named.rest_of:
2170	r = named.rest_of
2171	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2172
2173	self._SetNext()
2174	with ctx_CmdMode(self, cmd_mode_e.Func):
2175	node.body = self.ParseBraceGroup()
2176
2177	return node
2178
2179	def ParseCoproc(self):
2180	# type: () -> command_t
2181	"""
2182	TODO: command.Coproc?
2183	"""
2184	raise NotImplementedError()
2185
2186	def ParseSubshell(self):
2187	# type: () -> command.Subshell
2188	"""
2189	subshell : '(' compound_list ')'
2190
2191	Looking at Op_LParen
2192	"""
2193	left = word_.AsOperatorToken(self.cur_word)
2194	self._SetNext() # skip past (
2195
2196	# Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2197	# translation stack, we want to delay it.
2198
2199	self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2200
2201	c_list = self._ParseCommandList()
2202	if len(c_list.children) == 1:
2203	child = c_list.children[0]
2204	else:
2205	child = c_list
2206
2207	ate = self._Eat(Id.Right_Subshell)
2208	right = word_.AsOperatorToken(ate)
2209
2210	return command.Subshell(left, child, right, None) # no redirects yet
2211
2212	def ParseDBracket(self):
2213	# type: () -> command.DBracket
2214	"""Pass the underlying word parser off to the boolean expression
2215	parser."""
2216	left = word_.AsKeywordToken(self.cur_word)
2217	# TODO: Test interactive. Without closing ]], you should get > prompt
2218	# (PS2)
2219
2220	self._SetNext() # skip [[
2221	b_parser = bool_parse.BoolParser(self.w_parser)
2222	bnode, right = b_parser.Parse() # May raise
2223	return command.DBracket(left, bnode, right, None) # no redirects yet
2224
2225	def ParseDParen(self):
2226	# type: () -> command.DParen
2227	left = word_.AsOperatorToken(self.cur_word)
2228
2229	self._SetNext() # skip ((
2230	anode, right = self.w_parser.ReadDParen()
2231	assert anode is not None
2232
2233	return command.DParen(left, anode, right, None) # no redirects yet
2234
2235	def ParseCommand(self):
2236	# type: () -> command_t
2237	"""
2238	command : simple_command
2239	\| compound_command # OSH edit: io_redirect* folded in
2240	\| function_def
2241	\| ksh_function_def
2242
2243	# YSH extensions
2244	\| proc NAME ...
2245	\| const ...
2246	\| var ...
2247	\| setglobal ...
2248	\| setref ...
2249	\| setvar ...
2250	\| _ EXPR
2251	\| = EXPR
2252	;
2253
2254	Note: the reason const / var are not part of compound_command is because
2255	they can't be alone in a shell function body.
2256
2257	Example:
2258	This is valid shell f() if true; then echo hi; fi
2259	This is invalid f() var x = 1
2260	"""
2261	if self._AtSecondaryKeyword():
2262	p_die('Unexpected word when parsing command',
2263	loc.Word(self.cur_word))
2264
2265	# YSH Extensions
2266
2267	if self.c_id == Id.KW_Proc: # proc p { ... }
2268	# proc is hidden because of the 'local reasoning' principle. Code
2269	# inside procs should be YSH, full stop. That means ysh:upgrade is
2270	# on.
2271	if self.parse_opts.parse_proc():
2272	return self.ParseYshProc()
2273	else:
2274	# 2024-02: This avoids bad syntax errors if you type YSH code
2275	# into OSH
2276	# proc p (x) { echo hi } would actually be parsed as a
2277	# command.Simple! Shell compatibility: quote 'proc'
2278	p_die("proc is a YSH keyword, but this is OSH.",
2279	loc.Word(self.cur_word))
2280
2281	if self.c_id == Id.KW_Func: # func f(x) { ... }
2282	if self.parse_opts.parse_func():
2283	return self.ParseYshFunc()
2284	else:
2285	# Same reasoning as above, for 'proc'
2286	p_die("func is a YSH keyword, but this is OSH.",
2287	loc.Word(self.cur_word))
2288
2289	if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2290	p_die("const can't be inside proc or func. Use var instead.",
2291	loc.Word(self.cur_word))
2292
2293	if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2294	keyword_id = self.c_id
2295	kw_token = word_.LiteralToken(self.cur_word)
2296	self._SetNext()
2297	n8 = self.w_parser.ParseVarDecl(kw_token)
2298	for lhs in n8.lhs:
2299	self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2300	return n8
2301
2302	if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2303	kw_token = word_.LiteralToken(self.cur_word)
2304	self._SetNext()
2305	n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2306	return n9
2307
2308	if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2309	# = 42 + a[i]
2310	# call mylist->append('x')
2311
2312	keyword = word_.LiteralToken(self.cur_word)
2313	assert keyword is not None
2314	self._SetNext()
2315	enode = self.w_parser.ParseCommandExpr()
2316	return command.Expr(keyword, enode)
2317
2318	if self.c_id == Id.KW_Function:
2319	return self.ParseKshFunctionDef()
2320
2321	if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2322	Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2323	Id.KW_If, Id.KW_Case, Id.KW_Time):
2324	return self.ParseCompoundCommand()
2325
2326	# Syntax error for '}' starting a line, which all shells disallow.
2327	if self.c_id == Id.Lit_RBrace:
2328	p_die('Unexpected right brace', loc.Word(self.cur_word))
2329
2330	if self.c_kind == Kind.Redir: # Leading redirect
2331	return self.ParseSimpleCommand()
2332
2333	if self.c_kind == Kind.Word:
2334	# ensured by Kind.Word
2335	cur_word = cast(CompoundWord, self.cur_word)
2336
2337	# NOTE: At the top level, only Token and Compound are possible.
2338	# Can this be modelled better in the type system, removing asserts?
2339	#
2340	# TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2341	# Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2342	# That requires 2 tokens of lookahead, which we don't have
2343	#
2344	# Or maybe we don't just have ParseSimpleCommand -- we will have
2345	# ParseYshCommand or something
2346
2347	if (self.w_parser.LookAheadFuncParens() and
2348	not word_.IsVarLike(cur_word)):
2349	return self.ParseFunctionDef() # f() { echo; } # function
2350
2351	# Parse x = 1+2*3 when inside HayNode { } blocks
2352	parts = cur_word.parts
2353	if self.parse_opts.parse_equals() and len(parts) == 1:
2354	part0 = parts[0]
2355	if part0.tag() == word_part_e.Literal:
2356	tok = cast(Token, part0)
2357	if (match.IsValidVarName(tok.tval) and
2358	self.w_parser.LookPastSpace() == Id.Lit_Equals):
2359	assert tok.id == Id.Lit_Chars, tok
2360
2361	if len(self.hay_attrs_stack
2362	) and self.hay_attrs_stack[-1]:
2363	# Note: no static var_checker.Check() for bare assignment
2364	enode = self.w_parser.ParseBareDecl()
2365	self._SetNext() # Somehow this is necessary
2366	# TODO: Use BareDecl here. Well, do that when we
2367	# treat it as const or lazy.
2368	return command.VarDecl(
2369	None,
2370	[NameType(tok, lexer.TokenVal(tok), None)],
2371	enode)
2372	else:
2373	self._SetNext()
2374	self._GetWord()
2375	p_die(
2376	'Unexpected = (Hint: use var/setvar, or quote it)',
2377	loc.Word(self.cur_word))
2378
2379	# echo foo
2380	# f=(a b c) # array
2381	# array[1+2]+=1
2382	return self.ParseSimpleCommand()
2383
2384	if self.c_kind == Kind.Eof:
2385	p_die("Unexpected EOF while parsing command",
2386	loc.Word(self.cur_word))
2387
2388	# NOTE: This only happens in batch mode in the second turn of the loop!
2389	# e.g. )
2390	p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2391
2392	assert False # for MyPy
2393
2394	def ParsePipeline(self):
2395	# type: () -> command_t
2396	"""
2397	pipeline : Bang? command ( '\|' newline_ok command )* ;
2398	"""
2399	negated = None # type: Optional[Token]
2400
2401	self._GetWord()
2402	if self.c_id == Id.KW_Bang:
2403	negated = word_.AsKeywordToken(self.cur_word)
2404	self._SetNext()
2405
2406	child = self.ParseCommand()
2407	assert child is not None
2408
2409	children = [child]
2410
2411	self._GetWord()
2412	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2413	if negated is not None:
2414	node = command.Pipeline(negated, children, [])
2415	return node
2416	else:
2417	return child # no pipeline
2418
2419	# \| or \|&
2420	ops = [] # type: List[Token]
2421	while True:
2422	op = word_.AsOperatorToken(self.cur_word)
2423	ops.append(op)
2424
2425	self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2426	self._NewlineOk()
2427
2428	child = self.ParseCommand()
2429	children.append(child)
2430
2431	self._GetWord()
2432	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2433	break
2434
2435	return command.Pipeline(negated, children, ops)
2436
2437	def ParseAndOr(self):
2438	# type: () -> command_t
2439	self._GetWord()
2440	if self.c_id == Id.Word_Compound:
2441	first_word_tok = word_.LiteralToken(self.cur_word)
2442	if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
2443	# We got '...', so parse in multiline mode
2444	self._SetNext()
2445	with word_.ctx_Multiline(self.w_parser):
2446	return self._ParseAndOr()
2447
2448	# Parse in normal mode, not multiline
2449	return self._ParseAndOr()
2450
2451	def _ParseAndOr(self):
2452	# type: () -> command_t
2453	"""
2454	and_or : and_or ( AND_IF \| OR_IF ) newline_ok pipeline
2455	\| pipeline
2456
2457	Note that it is left recursive and left associative. We parse it
2458	iteratively with a token of lookahead.
2459	"""
2460	child = self.ParsePipeline()
2461	assert child is not None
2462
2463	self._GetWord()
2464	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2465	return child
2466
2467	ops = [] # type: List[Token]
2468	children = [child]
2469
2470	while True:
2471	ops.append(word_.AsOperatorToken(self.cur_word))
2472
2473	self._SetNext() # skip past \|\| &&
2474	self._NewlineOk()
2475
2476	child = self.ParsePipeline()
2477	children.append(child)
2478
2479	self._GetWord()
2480	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2481	break
2482
2483	return command.AndOr(children, ops)
2484
2485	# NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2486
2487	# At the top level, we execute after every line, e.g. to
2488	# - process alias (a form of dynamic parsing)
2489	# - process 'exit', because invalid syntax might appear after it
2490
2491	# On the other hand, for a while loop body, we parse the whole thing at once,
2492	# and then execute it. We don't want to parse it over and over again!
2493
2494	# COMPARE
2495	# command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2496	# command_term : and_or (trailer and_or)* ; # CHILDREN
2497
2498	def _ParseCommandLine(self):
2499	# type: () -> command_t
2500	"""
2501	command_line : and_or (sync_op and_or)* trailer? ;
2502	trailer : sync_op newline_ok
2503	\| NEWLINES;
2504	sync_op : '&' \| ';';
2505
2506	NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2507	there is another command word after the sync op.
2508
2509	But it's easier to express imperatively. Do the following in a loop:
2510	1. ParseAndOr
2511	2. Peek.
2512	a. If there's a newline, then return. (We're only parsing a single
2513	line.)
2514	b. If there's a sync_op, process it. Then look for a newline and
2515	return. Otherwise, parse another AndOr.
2516	"""
2517	# This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2518	# I don't think we should add anything else here; otherwise it will be
2519	# ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2520	END_LIST = [Id.Op_Newline, Id.Eof_Real]
2521
2522	children = [] # type: List[command_t]
2523	done = False
2524	while not done:
2525	child = self.ParseAndOr()
2526
2527	self._GetWord()
2528	if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2529	tok = cast(Token, self.cur_word) # for MyPy
2530	child = command.Sentence(child, tok)
2531	self._SetNext()
2532
2533	self._GetWord()
2534	if self.c_id in END_LIST:
2535	done = True
2536
2537	elif self.c_id in END_LIST:
2538	done = True
2539
2540	else:
2541	# e.g. echo a(b)
2542	p_die('Invalid word while parsing command line',
2543	loc.Word(self.cur_word))
2544
2545	children.append(child)
2546
2547	# Simplify the AST.
2548	if len(children) > 1:
2549	return command.CommandList(children)
2550	else:
2551	return children[0]
2552
2553	def _ParseCommandTerm(self):
2554	# type: () -> command.CommandList
2555	""""
2556	command_term : and_or (trailer and_or)* ;
2557	trailer : sync_op newline_ok
2558	\| NEWLINES;
2559	sync_op : '&' \| ';';
2560
2561	This is handled in imperative style, like _ParseCommandLine.
2562	Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2563	which is slightly different. (HOW? Is it the DSEMI?)
2564
2565	Returns:
2566	syntax_asdl.command
2567	"""
2568	# Token types that will end the command term.
2569	END_LIST = [self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi]
2570
2571	# NOTE: This is similar to _ParseCommandLine.
2572	#
2573	# - Why aren't we doing END_LIST in _ParseCommandLine?
2574	# - Because you will never be inside $() at the top level.
2575	# - We also know it will end in a newline. It can't end in "fi"!
2576	# - example: if true; then { echo hi; } fi
2577
2578	children = [] # type: List[command_t]
2579	done = False
2580	while not done:
2581	# Most keywords are valid "first words". But do/done/then do not BEGIN
2582	# commands, so they are not valid.
2583	if self._AtSecondaryKeyword():
2584	break
2585
2586	child = self.ParseAndOr()
2587
2588	self._GetWord()
2589	if self.c_id == Id.Op_Newline:
2590	self._SetNext()
2591
2592	self._GetWord()
2593	if self.c_id in END_LIST:
2594	done = True
2595
2596	elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2597	tok = cast(Token, self.cur_word) # for MyPy
2598	child = command.Sentence(child, tok)
2599	self._SetNext()
2600
2601	self._GetWord()
2602	if self.c_id == Id.Op_Newline:
2603	self._SetNext() # skip over newline
2604
2605	# Test if we should keep going. There might be another command after
2606	# the semi and newline.
2607	self._GetWord()
2608	if self.c_id in END_LIST: # \n EOF
2609	done = True
2610
2611	elif self.c_id in END_LIST: # ; EOF
2612	done = True
2613
2614	elif self.c_id in END_LIST: # EOF
2615	done = True
2616
2617	# For if test -f foo; test -f bar {
2618	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2619	done = True
2620
2621	elif self.c_kind != Kind.Word:
2622	# e.g. f() { echo (( x )) ; }
2623	# but can't fail on 'fi fi', see osh/cmd_parse_test.py
2624
2625	#log("Invalid %s", self.cur_word)
2626	p_die("Invalid word while parsing command list",
2627	loc.Word(self.cur_word))
2628
2629	children.append(child)
2630
2631	return command.CommandList(children)
2632
2633	def _ParseCommandList(self):
2634	# type: () -> command.CommandList
2635	"""
2636	command_list : newline_ok command_term trailer? ;
2637
2638	This one is called by all the compound commands. It's basically a command
2639	block.
2640
2641	NOTE: Rather than translating the CFG directly, the code follows a style
2642	more like this: more like this: (and_or trailer)+. It makes capture
2643	easier.
2644	"""
2645	self._NewlineOk()
2646	return self._ParseCommandTerm()
2647
2648	def ParseLogicalLine(self):
2649	# type: () -> command_t
2650	"""Parse a single line for main_loop.
2651
2652	A wrapper around _ParseCommandLine(). Similar but not identical to
2653	_ParseCommandList() and ParseCommandSub().
2654
2655	Raises:
2656	ParseError
2657	"""
2658	self._NewlineOk()
2659	self._GetWord()
2660	if self.c_id == Id.Eof_Real:
2661	return None # main loop checks for here docs
2662	node = self._ParseCommandLine()
2663	return node
2664
2665	def ParseInteractiveLine(self):
2666	# type: () -> parse_result_t
2667	"""Parse a single line for Interactive main_loop.
2668
2669	Different from ParseLogicalLine because newlines are handled differently.
2670
2671	Raises:
2672	ParseError
2673	"""
2674	self._GetWord()
2675	if self.c_id == Id.Op_Newline:
2676	return parse_result.EmptyLine
2677	if self.c_id == Id.Eof_Real:
2678	return parse_result.Eof
2679
2680	node = self._ParseCommandLine()
2681	return parse_result.Node(node)
2682
2683	def ParseCommandSub(self):
2684	# type: () -> command_t
2685	"""Parse $(echo hi) and `echo hi` for word_parse.py.
2686
2687	They can have multiple lines, like this: echo $( echo one echo
2688	two )
2689	"""
2690	self._NewlineOk()
2691
2692	self._GetWord()
2693	if self.c_kind == Kind.Eof: # e.g. $()
2694	return command.NoOp
2695
2696	c_list = self._ParseCommandTerm()
2697	if len(c_list.children) == 1:
2698	return c_list.children[0]
2699	else:
2700	return c_list
2701
2702	def CheckForPendingHereDocs(self):
2703	# type: () -> None
2704	# NOTE: This happens when there is no newline at the end of a file, like
2705	# osh -c 'cat <<EOF'
2706	if len(self.pending_here_docs):
2707	node = self.pending_here_docs[0] # Just show the first one?
2708	h = cast(redir_param.HereDoc, node.arg)
2709	p_die('Unterminated here doc began here', loc.Word(h.here_begin))