osh/cmd_parse.py

OILS / osh / cmd_parse.py View on Github | oilshell.org

2773 lines, 1419 significant

1	# Copyright 2016 Andy Chu. All rights reserved.
2	# Licensed under the Apache License, Version 2.0 (the "License");
3	# you may not use this file except in compliance with the License.
4	# You may obtain a copy of the License at
5	#
6	# http://www.apache.org/licenses/LICENSE-2.0
7	"""
8	cmd_parse.py - Parse high level shell commands.
9	"""
10	from __future__ import print_function
11
12	from _devbuild.gen import grammar_nt
13	from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
14	from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15	from _devbuild.gen.syntax_asdl import (
16	loc,
17	SourceLine,
18	source,
19	parse_result,
20	parse_result_t,
21	command,
22	command_t,
23	condition,
24	condition_t,
25	for_iter,
26	ArgList,
27	BraceGroup,
28	LiteralBlock,
29	CaseArm,
30	case_arg,
31	IfArm,
32	pat,
33	pat_t,
34	Redir,
35	redir_param,
36	redir_loc,
37	redir_loc_t,
38	word_e,
39	word_t,
40	CompoundWord,
41	Token,
42	word_part_e,
43	word_part_t,
44	rhs_word,
45	rhs_word_t,
46	sh_lhs,
47	sh_lhs_t,
48	AssignPair,
49	EnvPair,
50	ParsedAssignment,
51	assign_op_e,
52	NameType,
53	proc_sig,
54	proc_sig_e,
55	Proc,
56	Func,
57	)
58	from core import alloc
59	from core import error
60	from core.error import p_die
61	from core import ui
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from frontend import match
66	from frontend import reader
67	from mycpp.mylib import log
68	from osh import braces
69	from osh import bool_parse
70	from osh import word_
71
72	from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73	if TYPE_CHECKING:
74	from core.alloc import Arena
75	from core import optview
76	from frontend.lexer import Lexer
77	from frontend.parse_lib import ParseContext, AliasesInFlight
78	from frontend.reader import _Reader
79	from osh.word_parse import WordParser
80
81	_ = Kind_str # for debug prints
82
83	TAB_CH = 9 # ord('\t')
84	SPACE_CH = 32 # ord(' ')
85
86
87	def _ReadHereLines(
88	line_reader, # type: _Reader
89	h, # type: Redir
90	delimiter, # type: str
91	):
92	# type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93	# NOTE: We read all lines at once, instead of parsing line-by-line,
94	# because of cases like this:
95	# cat <<EOF
96	# 1 $(echo 2
97	# echo 3) 4
98	# EOF
99	here_lines = [] # type: List[Tuple[SourceLine, int]]
100	last_line = None # type: Tuple[SourceLine, int]
101	strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103	while True:
104	src_line, unused_offset = line_reader.GetLine()
105
106	if src_line is None: # EOF
107	# An unterminated here doc is just a warning in bash. We make it
108	# fatal because we want to be strict, and because it causes problems
109	# reporting other errors.
110	# Attribute it to the << in <<EOF for now.
111	p_die("Couldn't find terminator for here doc that starts here",
112	h.op)
113
114	assert len(src_line.content) != 0 # None should be the empty line
115
116	line = src_line.content
117
118	# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119	# the first tab.
120	start_offset = 0
121	if strip_leading_tabs:
122	n = len(line)
123	i = 0 # used after loop exit
124	while i < n:
125	if line[i] != '\t':
126	break
127	i += 1
128	start_offset = i
129
130	if line[start_offset:].rstrip() == delimiter:
131	last_line = (src_line, start_offset)
132	break
133
134	here_lines.append((src_line, start_offset))
135
136	return here_lines, last_line
137
138
139	def _MakeLiteralHereLines(
140	here_lines, # type: List[Tuple[SourceLine, int]]
141	arena, # type: Arena
142	do_lossless, # type: bool
143	):
144	# type: (...) -> List[word_part_t]
145	"""Create a Token for each line.
146
147	For <<'EOF' and <<-'EOF' - single quoted rule
148
149	<<- has non-zero start_offset
150	"""
151	# less precise type, because List[T] is an invariant type
152	tokens = [] # type: List[word_part_t]
153	for src_line, start_offset in here_lines:
154
155	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
156	# arena invariant, but don't refer to it.
157	#
158	# Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
159	# here docs, but it's more complex with double quoted EOF docs.
160
161	if do_lossless: # avoid garbage, doesn't affect correctness
162	arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
163	src_line)
164
165	t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
166	src_line)
167	tokens.append(t)
168	return tokens
169
170
171	def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
172	# type: (ParseContext, Redir, _Reader, Arena) -> None
173	"""Fill in attributes of a pending here doc node."""
174	h = cast(redir_param.HereDoc, r.arg)
175	# "If any character in word is quoted, the delimiter shall be formed by
176	# performing quote removal on word, and the here-document lines shall not
177	# be expanded. Otherwise, the delimiter shall be the word itself."
178	# NOTE: \EOF counts, or even E\OF
179	ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
180	if not ok:
181	p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
182
183	here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
184
185	if delim_quoted:
186	# <<'EOF' and <<-'EOF' - Literal for each line.
187	h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
188	parse_ctx.do_lossless)
189	else:
190	# <<EOF and <<-EOF - Parse as word
191	line_reader = reader.VirtualLineReader(arena, here_lines,
192	parse_ctx.do_lossless)
193	w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
194	w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
195
196	end_line, start_offset = last_line
197
198	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
199	# arena invariant, but don't refer to it.
200	if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
201	arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
202
203	# Create a Token with the end terminator. Maintains the invariant that the
204	# tokens "add up".
205	h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
206	len(end_line.content), end_line)
207
208
209	def _MakeAssignPair(parse_ctx, preparsed, arena):
210	# type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
211	"""Create an AssignPair from a 4-tuples from DetectShAssignment."""
212
213	left_token = preparsed.left
214	close_token = preparsed.close
215
216	lhs = None # type: sh_lhs_t
217
218	if left_token.id == Id.Lit_VarLike: # s=1
219	if lexer.IsPlusEquals(left_token):
220	var_name = lexer.TokenSliceRight(left_token, -2)
221	op = assign_op_e.PlusEqual
222	else:
223	var_name = lexer.TokenSliceRight(left_token, -1)
224	op = assign_op_e.Equal
225
226	lhs = sh_lhs.Name(left_token, var_name)
227
228	elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
229	var_name = lexer.TokenSliceRight(left_token, -1)
230	if lexer.IsPlusEquals(close_token):
231	op = assign_op_e.PlusEqual
232	else:
233	op = assign_op_e.Equal
234
235	assert left_token.line == close_token.line, \
236	'%s and %s not on same line' % (left_token, close_token)
237
238	left_pos = left_token.col + left_token.length
239	index_str = left_token.line.content[left_pos:close_token.col]
240	lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
241
242	elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
243	var_name = lexer.TokenSliceRight(left_token, -1)
244	if lexer.IsPlusEquals(close_token):
245	op = assign_op_e.PlusEqual
246	else:
247	op = assign_op_e.Equal
248
249	# Similar to SnipCodeString / SnipCodeBlock
250	if left_token.line == close_token.line:
251	# extract what's between brackets
252	s = left_token.col + left_token.length
253	code_str = left_token.line.content[s:close_token.col]
254	else:
255	raise NotImplementedError('%s != %s' %
256	(left_token.line, close_token.line))
257	a_parser = parse_ctx.MakeArithParser(code_str)
258
259	# a[i+1]= is a LHS
260	src = source.Reparsed('array LHS', left_token, close_token)
261	with alloc.ctx_SourceCode(arena, src):
262	index_node = a_parser.Parse() # may raise error.Parse
263
264	lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
265
266	else:
267	raise AssertionError()
268
269	# TODO: Should we also create a rhs_expr.ArrayLiteral here?
270	parts = preparsed.w.parts
271	offset = preparsed.part_offset
272
273	n = len(parts)
274	if offset == n:
275	rhs = rhs_word.Empty # type: rhs_word_t
276	else:
277	w = CompoundWord(parts[offset:])
278	word_.TildeDetectAssign(w)
279	rhs = w
280
281	return AssignPair(left_token, lhs, op, rhs)
282
283
284	def _AppendMoreEnv(preparsed_list, more_env):
285	# type: (List[ParsedAssignment], List[EnvPair]) -> None
286	"""Helper to modify a SimpleCommand node.
287
288	Args:
289	preparsed: a list of 4-tuples from DetectShAssignment
290	more_env: a list to append env_pairs to
291	"""
292	for preparsed in preparsed_list:
293	left_token = preparsed.left
294
295	if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
296	p_die(
297	"Environment binding shouldn't look like an array assignment",
298	left_token)
299
300	if lexer.IsPlusEquals(left_token):
301	p_die('Expected = in environment binding, got +=', left_token)
302
303	var_name = lexer.TokenSliceRight(left_token, -1)
304
305	parts = preparsed.w.parts
306	n = len(parts)
307	offset = preparsed.part_offset
308	if offset == n:
309	rhs = rhs_word.Empty # type: rhs_word_t
310	else:
311	w = CompoundWord(parts[offset:])
312	word_.TildeDetectAssign(w)
313	rhs = w
314
315	more_env.append(EnvPair(left_token, var_name, rhs))
316
317
318	def _SplitSimpleCommandPrefix(words):
319	# type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
320	"""Second pass of SimpleCommand parsing: look for assignment words."""
321	preparsed_list = [] # type: List[ParsedAssignment]
322	suffix_words = [] # type: List[CompoundWord]
323
324	done_prefix = False
325	for w in words:
326	if done_prefix:
327	suffix_words.append(w)
328	continue
329
330	left_token, close_token, part_offset = word_.DetectShAssignment(w)
331	if left_token:
332	preparsed_list.append(
333	ParsedAssignment(left_token, close_token, part_offset, w))
334	else:
335	done_prefix = True
336	suffix_words.append(w)
337
338	return preparsed_list, suffix_words
339
340
341	def _MakeSimpleCommand(
342	preparsed_list, # type: List[ParsedAssignment]
343	suffix_words, # type: List[CompoundWord]
344	redirects, # type: List[Redir]
345	typed_args, # type: Optional[ArgList]
346	block, # type: Optional[LiteralBlock]
347	):
348	# type: (...) -> command.Simple
349	"""Create an command.Simple node."""
350
351	# FOO=(1 2 3) ls is not allowed.
352	for preparsed in preparsed_list:
353	if word_.HasArrayPart(preparsed.w):
354	p_die("Environment bindings can't contain array literals",
355	loc.Word(preparsed.w))
356
357	# NOTE: It would be possible to add this check back. But it already happens
358	# at runtime in EvalWordSequence2.
359	# echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
360	if 0:
361	for w in suffix_words:
362	if word_.HasArrayPart(w):
363	p_die("Commands can't contain array literals", loc.Word(w))
364
365	assert len(suffix_words) != 0
366	# {a,b,c} # Use { before brace detection
367	# ~/bin/ls # Use ~ before tilde detection
368	part0 = suffix_words[0].parts[0]
369	blame_tok = location.LeftTokenForWordPart(part0)
370
371	# NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
372	# can't implement bash's behavior of having say {~bob,~jane}/src work,
373	# because we only have a BracedTree.
374	# This is documented in spec/brace-expansion.
375	# NOTE: Technically we could do expansion outside of 'oshc translate', but it
376	# doesn't seem worth it.
377	words2 = braces.BraceDetectAll(suffix_words)
378	words3 = word_.TildeDetectAll(words2)
379
380	more_env = [] # type: List[EnvPair]
381	_AppendMoreEnv(preparsed_list, more_env)
382
383	# do_fork by default
384	return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
385	block, True)
386
387
388	class VarChecker(object):
389	"""Statically check for proc and variable usage errors."""
390
391	def __init__(self):
392	# type: () -> None
393	"""
394	Args:
395	oil_proc: Whether to disallow nested proc/function declarations
396	"""
397	# self.tokens for location info: 'proc' or another token
398	self.tokens = [] # type: List[Token]
399	self.names = [] # type: List[Dict[str, Id_t]]
400
401	def Push(self, blame_tok):
402	# type: (Token) -> None
403	"""Called when we enter a shell function, proc, or func.
404
405	Bash allows this, but it's confusing because it's the same as two
406	functions at the top level.
407
408	f() {
409	g() {
410	echo 'top level function defined in another one'
411	}
412	}
413
414	YSH disallows nested procs and funcs.
415	"""
416	if len(self.tokens) != 0:
417	if blame_tok.id == Id.KW_Proc:
418	p_die("procs must be defined at the top level", blame_tok)
419	if blame_tok.id == Id.KW_Func:
420	p_die("funcs must be defined at the top level", blame_tok)
421	if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
422	p_die("shell functions can't be defined inside proc or func",
423	blame_tok)
424
425	self.tokens.append(blame_tok)
426	entry = {} # type: Dict[str, Id_t]
427	self.names.append(entry)
428
429	def Pop(self):
430	# type: () -> None
431	self.names.pop()
432	self.tokens.pop()
433
434	def Check(self, keyword_id, var_name, blame_tok):
435	# type: (Id_t, str, Token) -> None
436	"""Check for declaration / mutation errors in proc and func.
437
438	var x
439	x already declared
440	setvar x:
441	x is not declared
442	setglobal x:
443	No errors are possible; we would need all these many conditions to
444	statically know the names:
445	- no 'source'
446	- shopt -u copy_env.
447	- AND use lib has to be static
448
449	What about bare assignment in Hay? I think these are dynamic checks --
450	there is no static check. Hay is for building up data imperatively,
451	and then LATER, right before main(), it can be type checked.
452
453	Package {
454	version = '3.11'
455	version = '3.12'
456	}
457	"""
458	# No static checks are the global level! Because of 'source', var and
459	# setvar are essentially the same.
460	if len(self.names) == 0:
461	return
462
463	top = self.names[-1]
464	if keyword_id == Id.KW_Var:
465	if var_name in top:
466	p_die('%r was already declared' % var_name, blame_tok)
467	else:
468	top[var_name] = keyword_id
469
470	if keyword_id == Id.KW_SetVar:
471	if var_name not in top:
472	# Note: the solution could be setglobal, etc.
473	p_die(
474	"setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
475	var_name, blame_tok)
476
477
478	class ctx_VarChecker(object):
479
480	def __init__(self, var_checker, blame_tok):
481	# type: (VarChecker, Token) -> None
482	var_checker.Push(blame_tok)
483	self.var_checker = var_checker
484
485	def __enter__(self):
486	# type: () -> None
487	pass
488
489	def __exit__(self, type, value, traceback):
490	# type: (Any, Any, Any) -> None
491	self.var_checker.Pop()
492
493
494	class ctx_CmdMode(object):
495
496	def __init__(self, cmd_parse, new_cmd_mode):
497	# type: (CommandParser, cmd_mode_t) -> None
498	self.cmd_parse = cmd_parse
499	self.prev_cmd_mode = cmd_parse.cmd_mode
500	cmd_parse.cmd_mode = new_cmd_mode
501
502	def __enter__(self):
503	# type: () -> None
504	pass
505
506	def __exit__(self, type, value, traceback):
507	# type: (Any, Any, Any) -> None
508	self.cmd_parse.cmd_mode = self.prev_cmd_mode
509
510
511	SECONDARY_KEYWORDS = [
512	Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
513	Id.KW_Esac
514	]
515
516
517	class CommandParser(object):
518	"""Recursive descent parser derived from POSIX shell grammar.
519
520	This is a BNF grammar:
521	https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
522
523	- Augmented with both bash/OSH and YSH constructs.
524
525	- We use regex-like iteration rather than recursive references
526	? means optional (0 or 1)
527	* means 0 or more
528	+ means 1 or more
529
530	- Keywords are spelled in Caps:
531	If Elif Case
532
533	- Operator tokens are quoted:
534	'(' '\|'
535
536	or can be spelled directly if it matters:
537
538	Op_LParen Op_Pipe
539
540	- Non-terminals are snake_case:
541	brace_group subshell
542
543	Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
544	the production should be in the method docstrings, e.g.
545
546	def ParseSubshell():
547	"
548	subshell : '(' compound_list ')'
549
550	Looking at Op_LParen # Comment to say how this method is called
551	"
552
553	The grammar may be factored to make parsing easier.
554	"""
555
556	def __init__(self,
557	parse_ctx,
558	parse_opts,
559	w_parser,
560	lexer,
561	line_reader,
562	eof_id=Id.Eof_Real):
563	# type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
564	self.parse_ctx = parse_ctx
565	self.aliases = parse_ctx.aliases # aliases to expand at parse time
566
567	self.parse_opts = parse_opts
568	self.w_parser = w_parser # type: WordParser # for normal parsing
569	self.lexer = lexer # for pushing hints, lookahead to (
570	self.line_reader = line_reader # for here docs
571	self.eof_id = eof_id
572
573	self.arena = line_reader.arena # for adding here doc and alias spans
574	self.aliases_in_flight = [] # type: AliasesInFlight
575
576	# A hacky boolean to remove 'if cd / {' ambiguity.
577	self.allow_block = True
578
579	# Stack of booleans for nested Attr and SHELL nodes.
580	# Attr nodes allow bare assignment x = 42, but not shell x=42.
581	# SHELL nodes are the inverse. 'var x = 42' is preferred in shell
582	# nodes, but x42 is still allowed.
583	#
584	# Note: this stack could be optimized by turning it into an integer and
585	# binary encoding.
586	self.hay_attrs_stack = [] # type: List[bool]
587
588	# Note: VarChecker is instantiated with each CommandParser, which means
589	# that two 'proc foo' -- inside a command sub and outside -- don't
590	# conflict, because they use different CommandParser instances. I think
591	# this OK but you can imagine different behaviors.
592	self.var_checker = VarChecker()
593
594	self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
595
596	self.Reset()
597
598	# Init_() function for "keyword arg"
599	def Init_AliasesInFlight(self, aliases_in_flight):
600	# type: (AliasesInFlight) -> None
601	self.aliases_in_flight = aliases_in_flight
602
603	def Reset(self):
604	# type: () -> None
605	"""Reset our own internal state.
606
607	Called by the interactive loop.
608	"""
609	# Cursor state set by _GetWord()
610	self.next_lex_mode = lex_mode_e.ShCommand
611	self.cur_word = None # type: word_t # current word
612	self.c_kind = Kind.Undefined
613	self.c_id = Id.Undefined_Tok
614
615	self.pending_here_docs = [] # type: List[Redir]
616
617	def ResetInputObjects(self):
618	# type: () -> None
619	"""Reset the internal state of our inputs.
620
621	Called by the interactive loop.
622	"""
623	self.w_parser.Reset()
624	self.lexer.ResetInputObjects()
625	self.line_reader.Reset()
626
627	def _SetNext(self):
628	# type: () -> None
629	"""Call this when you no longer need the current token.
630
631	This method is lazy. A subsequent call to _GetWord() will
632	actually read the next Token.
633	"""
634	self.next_lex_mode = lex_mode_e.ShCommand
635
636	def _SetNextBrack(self):
637	# type: () -> None
638	self.next_lex_mode = lex_mode_e.ShCommandFakeBrack
639
640	def _GetWord(self):
641	# type: () -> None
642	"""Call this when you need to make a decision based on Id or Kind.
643
644	If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
645	self.c_id and self.c_kind.
646
647	Otherwise it does nothing.
648	"""
649	if self.next_lex_mode != lex_mode_e.Undefined:
650	w = self.w_parser.ReadWord(self.next_lex_mode)
651	#log("w %s", w)
652
653	# Here docs only happen in command mode, so other kinds of newlines don't
654	# count.
655	if w.tag() == word_e.Operator:
656	tok = cast(Token, w)
657	if tok.id == Id.Op_Newline:
658	for h in self.pending_here_docs:
659	_ParseHereDocBody(self.parse_ctx, h, self.line_reader,
660	self.arena)
661	del self.pending_here_docs[:] # No .clear() until Python 3.3.
662
663	self.cur_word = w
664
665	self.c_kind = word_.CommandKind(self.cur_word)
666	# Has special case for Id.Lit_{LBrace,RBrace,Equals}
667	self.c_id = word_.CommandId(self.cur_word)
668	self.next_lex_mode = lex_mode_e.Undefined
669
670	def _Eat(self, c_id, msg=None):
671	# type: (Id_t, Optional[str]) -> word_t
672	"""Consume a word of a type, maybe showing a custom error message.
673
674	Args:
675	c_id: the Id we expected
676	msg: improved error message
677	"""
678	self._GetWord()
679	if self.c_id != c_id:
680	if msg is None:
681	msg = 'Expected word type %s, got %s' % (
682	ui.PrettyId(c_id), ui.PrettyId(self.c_id))
683	p_die(msg, loc.Word(self.cur_word))
684
685	skipped = self.cur_word
686	self._SetNext()
687	return skipped
688
689	def _NewlineOk(self):
690	# type: () -> None
691	"""Check for optional newline and consume it."""
692	self._GetWord()
693	if self.c_id == Id.Op_Newline:
694	self._SetNext()
695
696	def _AtSecondaryKeyword(self):
697	# type: () -> bool
698	self._GetWord()
699	if self.c_id in SECONDARY_KEYWORDS:
700	return True
701	return False
702
703	def ParseRedirect(self):
704	# type: () -> Redir
705	self._GetWord()
706	assert self.c_kind == Kind.Redir, self.cur_word
707	op_tok = cast(Token, self.cur_word) # for MyPy
708
709	# Note: the lexer could take distinguish between
710	# >out
711	# 3>out
712	# {fd}>out
713	#
714	# which would make the code below faster. But small string optimization
715	# would also speed it up, since redirects are small.
716
717	# One way to do this is with Kind.Redir and Kind.RedirNamed, and then
718	# possibly "unify" the IDs by subtracting a constant like 8 or 16?
719
720	op_val = lexer.TokenVal(op_tok)
721	if op_val[0] == '{':
722	pos = op_val.find('}')
723	assert pos != -1 # lexer ensures this
724	where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
725
726	elif op_val[0].isdigit():
727	pos = 1
728	if op_val[1].isdigit():
729	pos = 2
730	where = redir_loc.Fd(int(op_val[:pos]))
731
732	else:
733	where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
734
735	self._SetNext()
736
737	self._GetWord()
738	# Other redirect
739	if self.c_kind != Kind.Word:
740	p_die('Invalid token after redirect operator',
741	loc.Word(self.cur_word))
742
743	# Here doc
744	if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
745	arg = redir_param.HereDoc.CreateNull()
746	arg.here_begin = self.cur_word
747	arg.stdin_parts = []
748
749	r = Redir(op_tok, where, arg)
750
751	self.pending_here_docs.append(r) # will be filled on next newline.
752
753	self._SetNext()
754	return r
755
756	arg_word = self.cur_word
757	tilde = word_.TildeDetect(arg_word)
758	if tilde:
759	arg_word = tilde
760	self._SetNext()
761
762	# We should never get Empty, Token, etc.
763	assert arg_word.tag() == word_e.Compound, arg_word
764	return Redir(op_tok, where, cast(CompoundWord, arg_word))
765
766	def _ParseRedirectList(self):
767	# type: () -> List[Redir]
768	"""Try parsing any redirects at the cursor.
769
770	This is used for blocks only, not commands.
771	"""
772	redirects = [] # type: List[Redir]
773	while True:
774	# This prediction needs to ONLY accept redirect operators. Should we
775	# make them a separate Kind?
776	self._GetWord()
777	if self.c_kind != Kind.Redir:
778	break
779
780	node = self.ParseRedirect()
781	redirects.append(node)
782	self._SetNext()
783
784	return redirects
785
786	def _ScanSimpleCommand(self):
787	# type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
788	"""YSH extends simple commands with typed args and blocks.
789
790	Shell has a recursive grammar, which awkwardly expresses
791	non-grammatical rules:
792
793	simple_command : cmd_prefix cmd_word cmd_suffix
794	\| cmd_prefix cmd_word
795	\| cmd_prefix
796	\| cmd_name cmd_suffix
797	\| cmd_name
798	;
799	cmd_name : WORD /* Apply rule 7a */
800	;
801	cmd_word : WORD /* Apply rule 7b */
802	;
803	cmd_prefix : io_redirect
804	\| cmd_prefix io_redirect
805	\| ASSIGNMENT_WORD
806	\| cmd_prefix ASSIGNMENT_WORD
807	;
808	cmd_suffix : io_redirect
809	\| cmd_suffix io_redirect
810	\| WORD
811	\| cmd_suffix WORD
812
813	YSH grammar:
814
815	redirect = redir_op WORD
816	item = WORD \| redirect
817
818	typed_args =
819	'(' arglist ')'
820	\| '[' arglist ']'
821
822	simple_command =
823	cmd_prefix* item+ typed_args? BraceGroup? cmd_suffix*
824
825	Notably, redirects shouldn't appear after typed args, or after
826	BraceGroup.
827
828	Examples:
829
830	This is an assignment:
831	foo=1 >out
832
833	This is a command.Simple
834	>out
835
836	What about
837	>out (42)
838	"""
839	redirects = [] # type: List[Redir]
840	words = [] # type: List[CompoundWord]
841	typed_args = None # type: Optional[ArgList]
842	block = None # type: Optional[LiteralBlock]
843
844	first_word_caps = False # does first word look like Caps, but not CAPS
845
846	i = 0
847	while True:
848	self._GetWord()
849
850	# If we got { }, change it to something that's not Kind.Word
851	kind2 = self.c_kind
852	if (kind2 == Kind.Word and self.parse_opts.parse_brace() and
853	self.c_id in (Id.Lit_LBrace, Id.Lit_RBrace)):
854	kind2 = Kind.Op
855
856	if kind2 == Kind.Redir:
857	node = self.ParseRedirect()
858	redirects.append(node)
859
860	elif kind2 == Kind.Word:
861	w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
862
863	if i == 0:
864	# Disallow leading =a because it's confusing
865	part0 = w.parts[0]
866	if part0.tag() == word_part_e.Literal:
867	tok = cast(Token, part0)
868	if tok.id == Id.Lit_Equals:
869	p_die(
870	"=word isn't allowed. Hint: add a space after =, or quote it",
871	tok)
872
873	# Is the first word a Hay Attr word?
874	#
875	# Can we remove this StaticEval() call, and just look
876	# inside Token? I think once we get rid of SHELL nodes,
877	# this will be simpler.
878
879	ok, word_str, quoted = word_.StaticEval(w)
880	# Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
881	if (ok and len(word_str) and word_str[0].isupper() and
882	not word_str.isupper()):
883	first_word_caps = True
884	#log('W %s', word_str)
885
886	words.append(w)
887
888	else:
889	break
890
891	self._SetNextBrack() # Allow bracket for SECOND word on
892	i += 1
893
894	# my-cmd (x) or my-cmd [x]
895	self._GetWord()
896	if self.c_id == Id.Op_LParen:
897	# 1. Check that there's a preceding space
898	prev_byte = self.lexer.ByteLookBack()
899	if prev_byte not in (SPACE_CH, TAB_CH):
900	if self.parse_opts.parse_at():
901	p_die('Space required before (',
902	loc.Word(self.cur_word))
903	else:
904	# inline func call like @sorted(x) is invalid in OSH, but the
905	# solution isn't a space
906	p_die(
907	'Unexpected left paren (might need a space before it)',
908	loc.Word(self.cur_word))
909
910	# 2. Check that it's not (). We disallow this because it's a no-op and
911	# there could be confusion with shell func defs.
912	# For some reason we need to call lexer.LookPastSpace, not
913	# w_parser.LookPastSpace. I think this is because we're at (, which is
914	# an operator token. All the other cases are like 'x=', which is PART
915	# of a word, and we don't know if it will end.
916	next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
917	if next_id == Id.Op_RParen:
918	p_die('Empty arg list not allowed',
919	loc.Word(self.cur_word))
920
921	typed_args = self.w_parser.ParseProcCallArgs(
922	grammar_nt.ysh_eager_arglist)
923
924	self._SetNext()
925
926	elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
927	typed_args = self.w_parser.ParseProcCallArgs(
928	grammar_nt.ysh_lazy_arglist)
929
930	self._SetNext()
931
932	self._GetWord()
933
934	# Allow redirects after typed args, e.g.
935	# json write (x) > out.txt
936	if self.c_kind == Kind.Redir:
937	redirects.extend(self._ParseRedirectList())
938
939	# my-cmd { echo hi } my-cmd (x) { echo hi } ...
940	if (self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace and
941	# Disabled for if/while condition, etc.
942	self.allow_block):
943
944	# allow x = 42
945	self.hay_attrs_stack.append(first_word_caps)
946	brace_group = self.ParseBraceGroup()
947
948	# So we can get the source code back later
949	lines = self.arena.SaveLinesAndDiscard(brace_group.left,
950	brace_group.right)
951	block = LiteralBlock(brace_group, lines)
952
953	self.hay_attrs_stack.pop()
954
955	self._GetWord()
956
957	# Allow redirects after block, e.g.
958	# cd /tmp { echo $PWD } > out.txt
959	if self.c_kind == Kind.Redir:
960	redirects.extend(self._ParseRedirectList())
961
962	return redirects, words, typed_args, block
963
964	def _MaybeExpandAliases(self, words):
965	# type: (List[CompoundWord]) -> Optional[command_t]
966	"""Try to expand aliases.
967
968	Args:
969	words: A list of Compound
970
971	Returns:
972	A new LST node, or None.
973
974	Our implementation of alias has two design choices:
975	- Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
976	- What grammar rule to parse the expanded alias buffer with. In our case
977	it's ParseCommand().
978
979	This doesn't quite match what other shells do, but I can't figure out a
980	better places.
981
982	Most test cases pass, except for ones like:
983
984	alias LBRACE='{'
985	LBRACE echo one; echo two; }
986
987	alias MULTILINE='echo 1
988	echo 2
989	echo 3'
990	MULTILINE
991
992	NOTE: dash handles aliases in a totally different way. It has a global
993	variable checkkwd in parser.c. It assigns it all over the grammar, like
994	this:
995
996	checkkwd = CHKNL \| CHKKWD \| CHKALIAS;
997
998	The readtoken() function checks (checkkwd & CHKALIAS) and then calls
999	lookupalias(). This seems to provide a consistent behavior among shells,
1000	but it's less modular and testable.
1001
1002	Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
1003
1004	Returns:
1005	A command node if any aliases were expanded, or None otherwise.
1006	"""
1007	# Start a new list if there aren't any. This will be passed recursively
1008	# through CommandParser instances.
1009	aliases_in_flight = (self.aliases_in_flight
1010	if len(self.aliases_in_flight) else [])
1011
1012	# for error message
1013	first_word_str = None # type: Optional[str]
1014	argv0_loc = loc.Word(words[0])
1015
1016	expanded = [] # type: List[str]
1017	i = 0
1018	n = len(words)
1019
1020	while i < n:
1021	w = words[i]
1022
1023	ok, word_str, quoted = word_.StaticEval(w)
1024	if not ok or quoted:
1025	break
1026
1027	alias_exp = self.aliases.get(word_str)
1028	if alias_exp is None:
1029	break
1030
1031	# Prevent infinite loops. This is subtle: we want to prevent infinite
1032	# expansion of alias echo='echo x'. But we don't want to prevent
1033	# expansion of the second word in 'echo echo', so we add 'i' to
1034	# "aliases_in_flight".
1035	if (word_str, i) in aliases_in_flight:
1036	break
1037
1038	if i == 0:
1039	first_word_str = word_str # for error message
1040
1041	#log('%r -> %r', word_str, alias_exp)
1042	aliases_in_flight.append((word_str, i))
1043	expanded.append(alias_exp)
1044	i += 1
1045
1046	if not alias_exp.endswith(' '):
1047	# alias e='echo [ ' is the same expansion as
1048	# alias e='echo ['
1049	# The trailing space indicates whether we should continue to expand
1050	# aliases; it's not part of it.
1051	expanded.append(' ')
1052	break # No more expansions
1053
1054	if len(expanded) == 0: # No expansions; caller does parsing.
1055	return None
1056
1057	# We are expanding an alias, so copy the rest of the words and re-parse.
1058	if i < n:
1059	left_tok = location.LeftTokenForWord(words[i])
1060	right_tok = location.RightTokenForWord(words[-1])
1061
1062	# OLD CONSTRAINT
1063	#assert left_tok.line_id == right_tok.line_id
1064
1065	words_str = self.arena.SnipCodeString(left_tok, right_tok)
1066	expanded.append(words_str)
1067
1068	code_str = ''.join(expanded)
1069
1070	# TODO:
1071	# Aliases break static parsing (like backticks), so use our own Arena.
1072	# This matters for Hay, which calls SaveLinesAndDiscard().
1073	# arena = alloc.Arena()
1074	arena = self.arena
1075
1076	line_reader = reader.StringLineReader(code_str, arena)
1077	cp = self.parse_ctx.MakeOshParser(line_reader)
1078	cp.Init_AliasesInFlight(aliases_in_flight)
1079
1080	# break circular dep
1081	from frontend import parse_lib
1082
1083	# The interaction between COMPLETION and ALIASES requires special care.
1084	# See docstring of BeginAliasExpansion() in parse_lib.py.
1085	src = source.Alias(first_word_str, argv0_loc)
1086	with alloc.ctx_SourceCode(arena, src):
1087	with parse_lib.ctx_Alias(self.parse_ctx.trail):
1088	try:
1089	# _ParseCommandTerm() handles multiline commands, compound
1090	# commands, etc. as opposed to ParseLogicalLine()
1091	node = cp._ParseCommandTerm()
1092	except error.Parse as e:
1093	# Failure to parse alias expansion is a fatal error
1094	# We don't need more handling here/
1095	raise
1096
1097	if 0:
1098	log('AFTER expansion:')
1099	node.PrettyPrint()
1100
1101	return node
1102
1103	def ParseSimpleCommand(self):
1104	# type: () -> command_t
1105	"""Fixed transcription of the POSIX grammar
1106
1107	io_file : '<' filename
1108	\| LESSAND filename
1109	...
1110
1111	io_here : DLESS here_end
1112	\| DLESSDASH here_end
1113
1114	redirect : IO_NUMBER (io_redirect \| io_here)
1115
1116	prefix_part : ASSIGNMENT_WORD \| redirect
1117	cmd_part : WORD \| redirect
1118
1119	assign_kw : Declare \| Export \| Local \| Readonly
1120
1121	# Without any words it is parsed as a command, not an assignment
1122	assign_listing : assign_kw
1123
1124	# Now we have something to do (might be changing assignment flags too)
1125	# NOTE: any prefixes should be a warning, but they are allowed in shell.
1126	assignment : prefix_part* assign_kw (WORD \| ASSIGNMENT_WORD)+
1127
1128	# an external command, a function call, or a builtin -- a "word_command"
1129	word_command : prefix_part* cmd_part+
1130
1131	simple_command : assign_listing
1132	\| assignment
1133	\| proc_command
1134
1135	Simple imperative algorithm:
1136
1137	1) Read a list of words and redirects. Append them to separate lists.
1138	2) Look for the first non-assignment word. If it's declare, etc., then
1139	keep parsing words AND assign words. Otherwise, just parse words.
1140	3) If there are no non-assignment words, then it's a global assignment.
1141
1142	{ redirects, global assignments } OR
1143	{ redirects, prefix_bindings, words } OR
1144	{ redirects, ERROR_prefix_bindings, keyword, assignments, words }
1145
1146	THEN CHECK that prefix bindings don't have any array literal parts!
1147	global assignment and keyword assignments can have the of course.
1148	well actually EXPORT shouldn't have them either -- WARNING
1149
1150	3 cases we want to warn: prefix_bindings for assignment, and array literal
1151	in prefix bindings, or export
1152
1153	A command can be an assignment word, word, or redirect on its own.
1154
1155	ls
1156	>out.txt
1157
1158	>out.txt FOO=bar # this touches the file
1159
1160	Or any sequence:
1161	ls foo bar
1162	<in.txt ls foo bar >out.txt
1163	<in.txt ls >out.txt foo bar
1164
1165	Or add one or more environment bindings:
1166	VAR=val env
1167	>out.txt VAR=val env
1168
1169	here_end vs filename is a matter of whether we test that it's quoted. e.g.
1170	<<EOF vs <<'EOF'.
1171	"""
1172	redirects, words, typed_args, block = self._ScanSimpleCommand()
1173
1174	typed_loc = None # type: Optional[Token]
1175	if block:
1176	typed_loc = block.brace_group.left
1177	if typed_args:
1178	typed_loc = typed_args.left # preferred over block location
1179
1180	if len(words) == 0: # e.g. >out.txt # redirect without words
1181	assert len(redirects) != 0
1182	if typed_loc is not None:
1183	p_die("Unexpected typed args", typed_loc)
1184
1185	simple = command.Simple.CreateNull()
1186	simple.blame_tok = redirects[0].op
1187	simple.more_env = []
1188	simple.words = []
1189	simple.redirects = redirects
1190	return simple
1191
1192	preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1193	if len(preparsed_list):
1194	# Disallow X=Y inside proc and func
1195	# and inside Hay Attr blocks
1196	# But allow X=Y at the top level
1197	# for interactive use foo=bar
1198	# for global constants GLOBAL=~/src
1199	# because YSH assignment doesn't have tilde sub
1200	if len(suffix_words) == 0:
1201	if (self.cmd_mode != cmd_mode_e.Shell or
1202	(len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1203	p_die('Use var/setvar to assign in YSH',
1204	preparsed_list[0].left)
1205
1206	# Set a reference to words and redirects for completion. We want to
1207	# inspect this state after a failed parse.
1208	self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1209
1210	if len(suffix_words) == 0:
1211	if typed_loc is not None:
1212	p_die("Unexpected typed args", typed_loc)
1213
1214	# ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1215	pairs = [] # type: List[AssignPair]
1216	for preparsed in preparsed_list:
1217	pairs.append(
1218	_MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1219
1220	left_tok = location.LeftTokenForCompoundWord(words[0])
1221	return command.ShAssignment(left_tok, pairs, redirects)
1222
1223	kind, kw_token = word_.IsControlFlow(suffix_words[0])
1224
1225	if kind == Kind.ControlFlow:
1226	if kw_token.id == Id.ControlFlow_Return:
1227	# return x - inside procs and shell functions
1228	# return (x) - inside funcs
1229	if typed_args is None:
1230	if self.cmd_mode not in (cmd_mode_e.Shell,
1231	cmd_mode_e.Proc):
1232	p_die('Shell-style returns not allowed here', kw_token)
1233	else:
1234	if self.cmd_mode != cmd_mode_e.Func:
1235	p_die('Typed return is only allowed inside func',
1236	typed_loc)
1237	if len(typed_args.pos_args) != 1:
1238	p_die("Typed return expects one argument", typed_loc)
1239	if len(typed_args.named_args) != 0:
1240	p_die("Typed return doesn't take named arguments",
1241	typed_loc)
1242	return command.Retval(kw_token, typed_args.pos_args[0])
1243
1244	if typed_loc is not None:
1245	p_die("Unexpected typed args", typed_loc)
1246	if not self.parse_opts.parse_ignored() and len(redirects):
1247	p_die("Control flow shouldn't have redirects", kw_token)
1248
1249	if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1250	p_die("Control flow shouldn't have environment bindings",
1251	preparsed_list[0].left)
1252
1253	# Attach the token for errors. (ShAssignment may not need it.)
1254	if len(suffix_words) == 1:
1255	arg_word = None # type: Optional[word_t]
1256	elif len(suffix_words) == 2:
1257	arg_word = suffix_words[1]
1258	else:
1259	p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1260	loc.Word(suffix_words[2]))
1261
1262	return command.ControlFlow(kw_token, arg_word)
1263
1264	# Alias expansion only understands words, not typed args ( ) or block { }
1265	if not typed_args and not block and self.parse_opts.expand_aliases():
1266	# If any expansions were detected, then parse again.
1267	expanded_node = self._MaybeExpandAliases(suffix_words)
1268	if expanded_node:
1269	# Attach env bindings and redirects to the expanded node.
1270	more_env = [] # type: List[EnvPair]
1271	_AppendMoreEnv(preparsed_list, more_env)
1272	exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1273	return exp
1274
1275	# TODO: check that we don't have env1=x x[1]=y env2=z here.
1276
1277	# FOO=bar printenv.py FOO
1278	node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1279	typed_args, block)
1280	return node
1281
1282	def ParseBraceGroup(self):
1283	# type: () -> BraceGroup
1284	"""
1285	Original:
1286	brace_group : LBrace command_list RBrace ;
1287
1288	YSH:
1289	brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1290
1291	The doc comment can only occur if there's a newline.
1292	"""
1293	ate = self._Eat(Id.Lit_LBrace)
1294	left = word_.BraceToken(ate)
1295
1296	doc_word = None # type: word_t
1297	self._GetWord()
1298	if self.c_id == Id.Op_Newline:
1299	self._SetNext()
1300	# Set a flag so we don't skip over ###
1301	with word_.ctx_EmitDocToken(self.w_parser):
1302	self._GetWord()
1303
1304	if self.c_id == Id.Ignored_Comment:
1305	doc_word = self.cur_word
1306	self._SetNext()
1307
1308	# Id.Ignored_Comment means it's a Token, or None
1309	doc_token = cast(Token, doc_word)
1310
1311	c_list = self._ParseCommandList()
1312
1313	ate = self._Eat(Id.Lit_RBrace)
1314	right = word_.BraceToken(ate)
1315
1316	# Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1317	# would allow us to revert this back to None, which was changed in
1318	# https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1319	# behavior saves allocations, but is less type safe.
1320	return BraceGroup(left, doc_token, c_list.children, [],
1321	right) # no redirects yet
1322
1323	def ParseDoGroup(self):
1324	# type: () -> command.DoGroup
1325	"""Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1326
1327	do_group : Do command_list Done ; /* Apply rule 6 */
1328	"""
1329	ate = self._Eat(Id.KW_Do)
1330	do_kw = word_.AsKeywordToken(ate)
1331
1332	c_list = self._ParseCommandList() # could be anything
1333
1334	ate = self._Eat(Id.KW_Done)
1335	done_kw = word_.AsKeywordToken(ate)
1336
1337	return command.DoGroup(do_kw, c_list.children, done_kw)
1338
1339	def ParseForWords(self):
1340	# type: () -> Tuple[List[CompoundWord], Optional[Token]]
1341	"""
1342	for_words : WORD* for_sep
1343	;
1344	for_sep : ';' newline_ok
1345	\| NEWLINES
1346	;
1347	"""
1348	words = [] # type: List[CompoundWord]
1349	# The token of any semi-colon, so we can remove it.
1350	semi_tok = None # type: Optional[Token]
1351
1352	while True:
1353	self._GetWord()
1354	if self.c_id == Id.Op_Semi:
1355	tok = cast(Token, self.cur_word)
1356	semi_tok = tok
1357	self._SetNext()
1358	self._NewlineOk()
1359	break
1360	elif self.c_id == Id.Op_Newline:
1361	self._SetNext()
1362	break
1363	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1364	break
1365
1366	if self.cur_word.tag() != word_e.Compound:
1367	# TODO: Can we also show a pointer to the 'for' keyword?
1368	p_die('Invalid word in for loop', loc.Word(self.cur_word))
1369
1370	w2 = cast(CompoundWord, self.cur_word)
1371	words.append(w2)
1372	self._SetNext()
1373	return words, semi_tok
1374
1375	def _ParseForExprLoop(self, for_kw):
1376	# type: (Token) -> command.ForExpr
1377	"""
1378	Shell:
1379	for '((' init ';' cond ';' update '))' for_sep? do_group
1380
1381	YSH:
1382	for '((' init ';' cond ';' update '))' for_sep? brace_group
1383	"""
1384	node = self.w_parser.ReadForExpression()
1385	node.keyword = for_kw
1386
1387	self._SetNext()
1388
1389	self._GetWord()
1390	if self.c_id == Id.Op_Semi:
1391	self._SetNext()
1392	self._NewlineOk()
1393	elif self.c_id == Id.Op_Newline:
1394	self._SetNext()
1395	elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1396	pass
1397	elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1398	pass
1399	else:
1400	p_die('Invalid word after for expression', loc.Word(self.cur_word))
1401
1402	if self.c_id == Id.Lit_LBrace:
1403	node.body = self.ParseBraceGroup()
1404	else:
1405	node.body = self.ParseDoGroup()
1406	return node
1407
1408	def _ParseForEachLoop(self, for_kw):
1409	# type: (Token) -> command.ForEach
1410	node = command.ForEach.CreateNull(alloc_lists=True)
1411	node.keyword = for_kw
1412
1413	num_iter_names = 0
1414	while True:
1415	w = self.cur_word
1416
1417	# Hack that makes the language more familiar:
1418	# - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1419	# - 'x y' is also accepted but not idiomatic.
1420	UP_w = w
1421	if w.tag() == word_e.Compound:
1422	w = cast(CompoundWord, UP_w)
1423	if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1424	w.parts.pop()
1425
1426	ok, iter_name, quoted = word_.StaticEval(w)
1427	if not ok or quoted: # error: for $x
1428	p_die('Expected loop variable (a constant word)', loc.Word(w))
1429
1430	if not match.IsValidVarName(iter_name): # error: for -
1431	# TODO: consider commas?
1432	if ',' in iter_name:
1433	p_die('Loop variables look like x, y (fix spaces)',
1434	loc.Word(w))
1435	p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1436
1437	node.iter_names.append(iter_name)
1438	num_iter_names += 1
1439	self._SetNext()
1440
1441	self._GetWord()
1442	# 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1443	# Subtlety: 'var' is KW_Var and is a valid loop name
1444	if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1445	break
1446
1447	if num_iter_names == 3:
1448	p_die('Unexpected word after 3 loop variables',
1449	loc.Word(self.cur_word))
1450
1451	self._NewlineOk()
1452
1453	self._GetWord()
1454	if self.c_id == Id.KW_In:
1455	# Ideally we would want ( not 'in'. But we still have to fix the bug
1456	# where we require a SPACE between in and (
1457	# for x in(y) # should be accepted, but isn't
1458
1459	expr_blame = word_.AsKeywordToken(self.cur_word)
1460
1461	self._SetNext() # skip in
1462	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1463	enode = self.w_parser.ParseYshExprForCommand()
1464	node.iterable = for_iter.YshExpr(enode, expr_blame)
1465
1466	# For simplicity, we don't accept for x in (obj); do ...
1467	self._GetWord()
1468	if self.c_id != Id.Lit_LBrace:
1469	p_die('Expected { after iterable expression',
1470	loc.Word(self.cur_word))
1471	else:
1472	semi_tok = None # type: Optional[Token]
1473	iter_words, semi_tok = self.ParseForWords()
1474	node.semi_tok = semi_tok
1475
1476	if not self.parse_opts.parse_bare_word() and len(
1477	iter_words) == 1:
1478	ok, s, quoted = word_.StaticEval(iter_words[0])
1479	if ok and match.IsValidVarName(s) and not quoted:
1480	p_die(
1481	'Surround this word with either parens or quotes (parse_bare_word)',
1482	loc.Word(iter_words[0]))
1483
1484	words2 = braces.BraceDetectAll(iter_words)
1485	words3 = word_.TildeDetectAll(words2)
1486	node.iterable = for_iter.Words(words3)
1487
1488	# Now that we know there are words, do an extra check
1489	if num_iter_names > 2:
1490	p_die('Expected at most 2 loop variables', for_kw)
1491
1492	elif self.c_id == Id.KW_Do:
1493	node.iterable = for_iter.Args # implicitly loop over "$@"
1494	# do not advance
1495
1496	elif self.c_id == Id.Op_Semi: # for x; do
1497	node.iterable = for_iter.Args # implicitly loop over "$@"
1498	self._SetNext()
1499
1500	else: # for foo BAD
1501	p_die('Unexpected word after for loop variable',
1502	loc.Word(self.cur_word))
1503
1504	self._GetWord()
1505	if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1506	node.body = self.ParseBraceGroup()
1507	else:
1508	node.body = self.ParseDoGroup()
1509
1510	return node
1511
1512	def ParseFor(self):
1513	# type: () -> command_t
1514	"""
1515	TODO: Update the grammar
1516
1517	for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1518	\| For '((' ... TODO
1519	"""
1520	ate = self._Eat(Id.KW_For)
1521	for_kw = word_.AsKeywordToken(ate)
1522
1523	self._GetWord()
1524	if self.c_id == Id.Op_DLeftParen:
1525	if not self.parse_opts.parse_dparen():
1526	p_die("Bash for loops aren't allowed (parse_dparen)",
1527	loc.Word(self.cur_word))
1528
1529	# for (( i = 0; i < 10; i++)
1530	n1 = self._ParseForExprLoop(for_kw)
1531	n1.redirects = self._ParseRedirectList()
1532	return n1
1533	else:
1534	# for x in a b; do echo hi; done
1535	n2 = self._ParseForEachLoop(for_kw)
1536	n2.redirects = self._ParseRedirectList()
1537	return n2
1538
1539	def _ParseConditionList(self):
1540	# type: () -> condition_t
1541	"""
1542	condition_list: command_list
1543
1544	This is a helper to parse a condition list for if commands and while/until
1545	loops. It will throw a parse error if there are no conditions in the list.
1546	"""
1547	self.allow_block = False
1548	commands = self._ParseCommandList()
1549	self.allow_block = True
1550
1551	if len(commands.children) == 0:
1552	p_die("Expected a condition", loc.Word(self.cur_word))
1553
1554	return condition.Shell(commands.children)
1555
1556	def ParseWhileUntil(self, keyword):
1557	# type: (Token) -> command.WhileUntil
1558	"""
1559	while_clause : While command_list do_group ;
1560	until_clause : Until command_list do_group ;
1561	"""
1562	self._SetNext() # skip keyword
1563
1564	if (self.parse_opts.parse_paren() and
1565	self.w_parser.LookPastSpace() == Id.Op_LParen):
1566	enode = self.w_parser.ParseYshExprForCommand()
1567	cond = condition.YshExpr(enode) # type: condition_t
1568	else:
1569	cond = self._ParseConditionList()
1570
1571	# NOTE: The LSTs will be different for OSH and YSH, but the execution
1572	# should be unchanged. To be sure we should desugar.
1573	self._GetWord()
1574	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1575	# while test -f foo {
1576	body_node = self.ParseBraceGroup() # type: command_t
1577	else:
1578	body_node = self.ParseDoGroup()
1579
1580	# no redirects yet
1581	return command.WhileUntil(keyword, cond, body_node, None)
1582
1583	def ParseCaseArm(self):
1584	# type: () -> CaseArm
1585	"""
1586	case_item: '('? pattern ('\|' pattern)* ')'
1587	newline_ok command_term? trailer? ;
1588
1589	Looking at '(' or pattern
1590	"""
1591	self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1592
1593	left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1594
1595	if self.c_id == Id.Op_LParen: # Optional (
1596	self._SetNext()
1597
1598	pat_words = [] # type: List[word_t]
1599	while True:
1600	self._GetWord()
1601	if self.c_kind != Kind.Word:
1602	p_die('Expected case pattern', loc.Word(self.cur_word))
1603	pat_words.append(self.cur_word)
1604	self._SetNext()
1605
1606	self._GetWord()
1607	if self.c_id == Id.Op_Pipe:
1608	self._SetNext()
1609	else:
1610	break
1611
1612	ate = self._Eat(Id.Right_CasePat)
1613	middle_tok = word_.AsOperatorToken(ate)
1614
1615	self._NewlineOk()
1616
1617	self._GetWord()
1618	if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
1619	Id.KW_Esac):
1620	c_list = self._ParseCommandTerm()
1621	action_children = c_list.children
1622	else:
1623	action_children = []
1624
1625	dsemi_tok = None # type: Token
1626	self._GetWord()
1627	if self.c_id == Id.KW_Esac: # missing last ;;
1628	pass
1629	elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
1630	dsemi_tok = word_.AsOperatorToken(self.cur_word)
1631	self._SetNext()
1632	else:
1633	# Happens on EOF
1634	p_die('Expected ;; or esac', loc.Word(self.cur_word))
1635
1636	self._NewlineOk()
1637
1638	return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1639	action_children, dsemi_tok)
1640
1641	def ParseYshCaseArm(self, discriminant):
1642	# type: (Id_t) -> CaseArm
1643	"""
1644	case_item : pattern newline_ok brace_group newline_ok
1645	pattern : pat_words
1646	\| pat_exprs
1647	\| pat_eggex
1648	\| pat_else
1649	pat_words : pat_word (newline_ok '\|' newline_ok pat_word)*
1650	pat_exprs : pat_expr (newline_ok '\|' newline_ok pat_expr)*
1651	pat_word : WORD
1652	pat_eggex : '/' oil_eggex '/'
1653	pat_expr : '(' oil_expr ')'
1654	pat_else : '(' Id.KW_Else ')'
1655
1656	Looking at: 'pattern'
1657
1658	Note that the trailing `newline_ok` in `case_item` is handled by
1659	`ParseYshCase`. We do this because parsing that `newline_ok` returns
1660	the next "discriminant" for the next token, so it makes more sense to
1661	handle it there.
1662	"""
1663	left_tok = None # type: Token
1664	pattern = None # type: pat_t
1665
1666	if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1667	# pat_exprs, pat_else or pat_eggex
1668	pattern, left_tok = self.w_parser.ParseYshCasePattern()
1669	else:
1670	# pat_words
1671	pat_words = [] # type: List[word_t]
1672	while True:
1673	self._GetWord()
1674	if self.c_kind != Kind.Word:
1675	p_die('Expected case pattern', loc.Word(self.cur_word))
1676	pat_words.append(self.cur_word)
1677	self._SetNext()
1678
1679	if not left_tok:
1680	left_tok = location.LeftTokenForWord(self.cur_word)
1681
1682	self._NewlineOk()
1683
1684	self._GetWord()
1685	if self.c_id == Id.Op_Pipe:
1686	self._SetNext()
1687	self._NewlineOk()
1688	else:
1689	break
1690	pattern = pat.Words(pat_words)
1691
1692	self._NewlineOk()
1693	action = self.ParseBraceGroup()
1694
1695	# The left token of the action is our "middle" token
1696	return CaseArm(left_tok, pattern, action.left, action.children,
1697	action.right)
1698
1699	def ParseYshCase(self, case_kw):
1700	# type: (Token) -> command.Case
1701	"""
1702	ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1703
1704	Looking at: token after 'case'
1705	"""
1706	enode = self.w_parser.ParseYshExprForCommand()
1707	to_match = case_arg.YshExpr(enode)
1708
1709	ate = self._Eat(Id.Lit_LBrace)
1710	arms_start = word_.BraceToken(ate)
1711
1712	discriminant = self.w_parser.NewlineOkForYshCase()
1713
1714	# Note: for now, zero arms are accepted, just like POSIX case $x in esac
1715	arms = [] # type: List[CaseArm]
1716	while discriminant != Id.Op_RBrace:
1717	arm = self.ParseYshCaseArm(discriminant)
1718	arms.append(arm)
1719
1720	discriminant = self.w_parser.NewlineOkForYshCase()
1721
1722	# NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1723	# token is read as an Id.Op_RBrace, but we need to store this as a
1724	# Id.Lit_RBrace.
1725	ate = self._Eat(Id.Op_RBrace)
1726	arms_end = word_.AsOperatorToken(ate)
1727	arms_end.id = Id.Lit_RBrace
1728
1729	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1730	None)
1731
1732	def ParseOldCase(self, case_kw):
1733	# type: (Token) -> command.Case
1734	"""
1735	case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1736
1737	-> Looking at WORD
1738
1739	FYI original POSIX case list, which takes pains for DSEMI
1740
1741	case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1742	"""
1743	self._GetWord()
1744	w = self.cur_word
1745	if not self.parse_opts.parse_bare_word():
1746	ok, s, quoted = word_.StaticEval(w)
1747	if ok and not quoted:
1748	p_die(
1749	"This is a constant string. You may want a variable like $x (parse_bare_word)",
1750	loc.Word(w))
1751
1752	if w.tag() != word_e.Compound:
1753	p_die("Expected a word to match against", loc.Word(w))
1754
1755	to_match = case_arg.Word(w)
1756	self._SetNext() # past WORD
1757
1758	self._NewlineOk()
1759
1760	ate = self._Eat(Id.KW_In)
1761	arms_start = word_.AsKeywordToken(ate)
1762
1763	self._NewlineOk()
1764
1765	arms = [] # type: List[CaseArm]
1766	while True:
1767	self._GetWord()
1768	if self.c_id == Id.KW_Esac:
1769	break
1770	# case arm should begin with a pattern word or (
1771	if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1772	break
1773
1774	arm = self.ParseCaseArm()
1775	arms.append(arm)
1776
1777	ate = self._Eat(Id.KW_Esac)
1778	arms_end = word_.AsKeywordToken(ate)
1779
1780	# no redirects yet
1781	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1782	None)
1783
1784	def ParseCase(self):
1785	# type: () -> command.Case
1786	"""
1787	case_clause : old_case # from POSIX
1788	\| ysh_case
1789	;
1790
1791	Looking at 'Case'
1792	"""
1793	case_kw = word_.AsKeywordToken(self.cur_word)
1794	self._SetNext() # past 'case'
1795
1796	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1797	return self.ParseYshCase(case_kw)
1798	else:
1799	return self.ParseOldCase(case_kw)
1800
1801	def _ParseYshElifElse(self, if_node):
1802	# type: (command.If) -> None
1803	"""If test -f foo { echo foo.
1804
1805	} elif test -f bar; test -f spam { ^ we parsed up to here echo
1806	bar } else { echo none }
1807	"""
1808	arms = if_node.arms
1809
1810	while self.c_id == Id.KW_Elif:
1811	elif_kw = word_.AsKeywordToken(self.cur_word)
1812	self._SetNext() # skip elif
1813	if (self.parse_opts.parse_paren() and
1814	self.w_parser.LookPastSpace() == Id.Op_LParen):
1815	enode = self.w_parser.ParseYshExprForCommand()
1816	cond = condition.YshExpr(enode) # type: condition_t
1817	else:
1818	self.allow_block = False
1819	commands = self._ParseCommandList()
1820	self.allow_block = True
1821	cond = condition.Shell(commands.children)
1822
1823	body = self.ParseBraceGroup()
1824	self._GetWord()
1825
1826	arm = IfArm(elif_kw, cond, None, body.children, None)
1827	arms.append(arm)
1828
1829	self._GetWord()
1830	if self.c_id == Id.KW_Else:
1831	self._SetNext()
1832	body = self.ParseBraceGroup()
1833	if_node.else_action = body.children
1834
1835	def _ParseYshIf(self, if_kw, cond):
1836	# type: (Token, condition_t) -> command.If
1837	"""
1838	if test -f foo {
1839	# ^ we parsed up to here
1840	echo foo
1841	} elif test -f bar; test -f spam {
1842	echo bar
1843	} else {
1844	echo none
1845	}
1846	NOTE: If you do something like if test -n foo{, the parser keeps going, and
1847	the error is confusing because it doesn't point to the right place.
1848
1849	I think we might need strict_brace so that foo{ is disallowed. It has to
1850	be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1851	form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1852	Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1853	"""
1854	if_node = command.If.CreateNull(alloc_lists=True)
1855	if_node.if_kw = if_kw
1856
1857	body1 = self.ParseBraceGroup()
1858	# Every arm has 1 spid, unlike shell-style
1859	# TODO: We could get the spids from the brace group.
1860	arm = IfArm(if_kw, cond, None, body1.children, None)
1861
1862	if_node.arms.append(arm)
1863
1864	self._GetWord()
1865	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1866	self._ParseYshElifElse(if_node)
1867	# the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1868	# spid because that's in the BraceGroup.
1869	return if_node
1870
1871	def _ParseElifElse(self, if_node):
1872	# type: (command.If) -> None
1873	"""
1874	else_part: (Elif command_list Then command_list)* Else command_list ;
1875	"""
1876	arms = if_node.arms
1877
1878	self._GetWord()
1879	while self.c_id == Id.KW_Elif:
1880	elif_kw = word_.AsKeywordToken(self.cur_word)
1881	self._SetNext() # past 'elif'
1882
1883	cond = self._ParseConditionList()
1884
1885	ate = self._Eat(Id.KW_Then)
1886	then_kw = word_.AsKeywordToken(ate)
1887
1888	body = self._ParseCommandList()
1889	arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
1890
1891	arms.append(arm)
1892
1893	self._GetWord()
1894	if self.c_id == Id.KW_Else:
1895	else_kw = word_.AsKeywordToken(self.cur_word)
1896	self._SetNext() # past 'else'
1897	body = self._ParseCommandList()
1898	if_node.else_action = body.children
1899	else:
1900	else_kw = None
1901
1902	if_node.else_kw = else_kw
1903
1904	def ParseIf(self):
1905	# type: () -> command.If
1906	"""
1907	if_clause : If command_list Then command_list else_part? Fi ;
1908
1909	open : '{' \| Then
1910	close : '}' \| Fi
1911
1912	ysh_if : If ( command_list \| '(' expr ')' )
1913	open command_list else_part? close;
1914
1915	There are 2 conditionals here: parse_paren, then parse_brace
1916	"""
1917	if_node = command.If.CreateNull(alloc_lists=True)
1918	if_kw = word_.AsKeywordToken(self.cur_word)
1919	if_node.if_kw = if_kw
1920	self._SetNext() # past 'if'
1921
1922	if (self.parse_opts.parse_paren() and
1923	self.w_parser.LookPastSpace() == Id.Op_LParen):
1924	# if (x + 1)
1925	enode = self.w_parser.ParseYshExprForCommand()
1926	cond = condition.YshExpr(enode) # type: condition_t
1927	else:
1928	# if echo 1; echo 2; then
1929	# Remove ambiguity with if cd / {
1930	cond = self._ParseConditionList()
1931
1932	self._GetWord()
1933	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1934	return self._ParseYshIf(if_kw, cond)
1935
1936	ate = self._Eat(Id.KW_Then)
1937	then_kw = word_.AsKeywordToken(ate)
1938
1939	body = self._ParseCommandList()
1940
1941	# First arm
1942	arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
1943	if_node.arms.append(arm)
1944
1945	# 2nd to Nth arm
1946	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1947	self._ParseElifElse(if_node)
1948
1949	ate = self._Eat(Id.KW_Fi)
1950	if_node.fi_kw = word_.AsKeywordToken(ate)
1951
1952	return if_node
1953
1954	def ParseTime(self):
1955	# type: () -> command_t
1956	"""Time [-p] pipeline.
1957
1958	According to bash help.
1959	"""
1960	time_kw = word_.AsKeywordToken(self.cur_word)
1961	self._SetNext() # skip time
1962	pipeline = self.ParsePipeline()
1963	return command.TimeBlock(time_kw, pipeline)
1964
1965	def ParseCompoundCommand(self):
1966	# type: () -> command_t
1967	"""
1968	Refactoring: we put io_redirect* here instead of in function_body and
1969	command.
1970
1971	compound_command : brace_group io_redirect*
1972	\| subshell io_redirect*
1973	\| for_clause io_redirect*
1974	\| while_clause io_redirect*
1975	\| until_clause io_redirect*
1976	\| if_clause io_redirect*
1977	\| case_clause io_redirect*
1978
1979	# bash extensions
1980	\| time_clause
1981	\| [[ BoolExpr ]]
1982	\| (( ArithExpr ))
1983	"""
1984	self._GetWord()
1985	if self.c_id == Id.Lit_LBrace:
1986	n1 = self.ParseBraceGroup()
1987	n1.redirects = self._ParseRedirectList()
1988	return n1
1989	if self.c_id == Id.Op_LParen:
1990	n2 = self.ParseSubshell()
1991	n2.redirects = self._ParseRedirectList()
1992	return n2
1993
1994	if self.c_id == Id.KW_For:
1995	# Note: Redirects parsed in this call. POSIX for and bash for (( have
1996	# redirects, but YSH for doesn't.
1997	return self.ParseFor()
1998	if self.c_id in (Id.KW_While, Id.KW_Until):
1999	keyword = word_.AsKeywordToken(self.cur_word)
2000	n3 = self.ParseWhileUntil(keyword)
2001	n3.redirects = self._ParseRedirectList()
2002	return n3
2003
2004	if self.c_id == Id.KW_If:
2005	n4 = self.ParseIf()
2006	n4.redirects = self._ParseRedirectList()
2007	return n4
2008	if self.c_id == Id.KW_Case:
2009	n5 = self.ParseCase()
2010	n5.redirects = self._ParseRedirectList()
2011	return n5
2012
2013	if self.c_id == Id.KW_DLeftBracket:
2014	if not self.parse_opts.parse_dbracket():
2015	p_die('Bash [[ not allowed in YSH (parse_dbracket)',
2016	loc.Word(self.cur_word))
2017	n6 = self.ParseDBracket()
2018	n6.redirects = self._ParseRedirectList()
2019	return n6
2020	if self.c_id == Id.Op_DLeftParen:
2021	if not self.parse_opts.parse_dparen():
2022	p_die(
2023	'Bash (( not allowed in YSH (parse_dparen, see OILS-ERR-14 for wart)',
2024	loc.Word(self.cur_word))
2025	n7 = self.ParseDParen()
2026	n7.redirects = self._ParseRedirectList()
2027	return n7
2028
2029	# bash extensions: no redirects
2030	if self.c_id == Id.KW_Time:
2031	return self.ParseTime()
2032
2033	# Happens in function body, e.g. myfunc() oops
2034	p_die(
2035	'Unexpected word while parsing compound command (%s)' %
2036	Id_str(self.c_id), loc.Word(self.cur_word))
2037	assert False # for MyPy
2038
2039	def ParseFunctionDef(self):
2040	# type: () -> command.ShFunction
2041	"""
2042	function_header : fname '(' ')'
2043	function_def : function_header newline_ok function_body ;
2044
2045	Precondition: Looking at the function name.
2046
2047	NOTE: There is an ambiguity with:
2048
2049	function foo ( echo hi ) and
2050	function foo () ( echo hi )
2051
2052	Bash only accepts the latter, though it doesn't really follow a grammar.
2053	"""
2054	word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2055	name = word_.ShFunctionName(word0)
2056	if len(name) == 0: # example: foo$x is invalid
2057	p_die('Invalid function name', loc.Word(word0))
2058
2059	part0 = word0.parts[0]
2060	# If we got a non-empty string from ShFunctionName, this should be true.
2061	assert part0.tag() == word_part_e.Literal
2062	blame_tok = cast(Token, part0) # for ctx_VarChecker
2063
2064	self._SetNext() # move past function name
2065
2066	# Must be true because of lookahead
2067	self._GetWord()
2068	assert self.c_id == Id.Op_LParen, self.cur_word
2069
2070	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2071	self._SetNext()
2072
2073	self._GetWord()
2074	if self.c_id == Id.Right_ShFunction:
2075	# 'f ()' implies a function definition, since invoking it with no args
2076	# would just be 'f'
2077	self._SetNext()
2078
2079	self._NewlineOk()
2080
2081	func = command.ShFunction.CreateNull()
2082	func.name = name
2083	with ctx_VarChecker(self.var_checker, blame_tok):
2084	func.body = self.ParseCompoundCommand()
2085
2086	func.name_tok = location.LeftTokenForCompoundWord(word0)
2087	return func
2088	else:
2089	p_die('Expected ) in function definition', loc.Word(self.cur_word))
2090	return None
2091
2092	def ParseKshFunctionDef(self):
2093	# type: () -> command.ShFunction
2094	"""
2095	ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2096	"""
2097	keyword_tok = word_.AsKeywordToken(self.cur_word)
2098
2099	self._SetNext() # skip past 'function'
2100	self._GetWord()
2101
2102	cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2103	name = word_.ShFunctionName(cur_word)
2104	if len(name) == 0: # example: foo$x is invalid
2105	p_die('Invalid KSH-style function name', loc.Word(cur_word))
2106
2107	name_word = self.cur_word
2108	self._SetNext() # skip past 'function name
2109
2110	self._GetWord()
2111	if self.c_id == Id.Op_LParen:
2112	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2113	self._SetNext()
2114	self._Eat(Id.Right_ShFunction)
2115
2116	self._NewlineOk()
2117
2118	func = command.ShFunction.CreateNull()
2119	func.name = name
2120	with ctx_VarChecker(self.var_checker, keyword_tok):
2121	func.body = self.ParseCompoundCommand()
2122
2123	func.keyword = keyword_tok
2124	func.name_tok = location.LeftTokenForWord(name_word)
2125	return func
2126
2127	def ParseYshProc(self):
2128	# type: () -> Proc
2129	node = Proc.CreateNull(alloc_lists=True)
2130
2131	keyword_tok = word_.AsKeywordToken(self.cur_word)
2132	node.keyword = keyword_tok
2133
2134	with ctx_VarChecker(self.var_checker, keyword_tok):
2135	with ctx_CmdMode(self, cmd_mode_e.Proc):
2136	self.w_parser.ParseProc(node)
2137	if node.sig.tag() == proc_sig_e.Closed: # Register params
2138	sig = cast(proc_sig.Closed, node.sig)
2139
2140	# Treat 3 kinds of params as variables.
2141	wp = sig.word
2142	if wp:
2143	for param in wp.params:
2144	self.var_checker.Check(Id.KW_Var, param.name,
2145	param.blame_tok)
2146	if wp.rest_of:
2147	r = wp.rest_of
2148	self.var_checker.Check(Id.KW_Var, r.name,
2149	r.blame_tok)
2150	# We COULD register __out here but it would require a different API.
2151	#if param.prefix and param.prefix.id == Id.Arith_Colon:
2152	# self.var_checker.Check(Id.KW_Var, '__' + param.name)
2153
2154	posit = sig.positional
2155	if posit:
2156	for param in posit.params:
2157	self.var_checker.Check(Id.KW_Var, param.name,
2158	param.blame_tok)
2159	if posit.rest_of:
2160	r = posit.rest_of
2161	self.var_checker.Check(Id.KW_Var, r.name,
2162	r.blame_tok)
2163
2164	named = sig.named
2165	if named:
2166	for param in named.params:
2167	self.var_checker.Check(Id.KW_Var, param.name,
2168	param.blame_tok)
2169	if named.rest_of:
2170	r = named.rest_of
2171	self.var_checker.Check(Id.KW_Var, r.name,
2172	r.blame_tok)
2173
2174	if sig.block_param:
2175	b = sig.block_param
2176	self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2177
2178	self._SetNext()
2179	node.body = self.ParseBraceGroup()
2180	# No redirects for YSH procs (only at call site)
2181
2182	return node
2183
2184	def ParseYshFunc(self):
2185	# type: () -> Func
2186	"""
2187	ysh_func: (
2188	Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2189	)
2190	Looking at KW_Func
2191	"""
2192	node = Func.CreateNull(alloc_lists=True)
2193
2194	keyword_tok = word_.AsKeywordToken(self.cur_word)
2195	node.keyword = keyword_tok
2196
2197	with ctx_VarChecker(self.var_checker, keyword_tok):
2198	self.w_parser.ParseFunc(node)
2199
2200	posit = node.positional
2201	if posit:
2202	for param in posit.params:
2203	self.var_checker.Check(Id.KW_Var, param.name,
2204	param.blame_tok)
2205	if posit.rest_of:
2206	r = posit.rest_of
2207	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2208
2209	named = node.named
2210	if named:
2211	for param in named.params:
2212	self.var_checker.Check(Id.KW_Var, param.name,
2213	param.blame_tok)
2214	if named.rest_of:
2215	r = named.rest_of
2216	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2217
2218	self._SetNext()
2219	with ctx_CmdMode(self, cmd_mode_e.Func):
2220	node.body = self.ParseBraceGroup()
2221
2222	return node
2223
2224	def ParseCoproc(self):
2225	# type: () -> command_t
2226	"""
2227	TODO: command.Coproc?
2228	"""
2229	raise NotImplementedError()
2230
2231	def ParseSubshell(self):
2232	# type: () -> command.Subshell
2233	"""
2234	subshell : '(' compound_list ')'
2235
2236	Looking at Op_LParen
2237	"""
2238	left = word_.AsOperatorToken(self.cur_word)
2239	self._SetNext() # skip past (
2240
2241	# Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2242	# translation stack, we want to delay it.
2243
2244	self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2245
2246	c_list = self._ParseCommandList()
2247	if len(c_list.children) == 1:
2248	child = c_list.children[0]
2249	else:
2250	child = c_list
2251
2252	ate = self._Eat(Id.Right_Subshell)
2253	right = word_.AsOperatorToken(ate)
2254
2255	return command.Subshell(left, child, right, None) # no redirects yet
2256
2257	def ParseDBracket(self):
2258	# type: () -> command.DBracket
2259	"""Pass the underlying word parser off to the boolean expression
2260	parser."""
2261	left = word_.AsKeywordToken(self.cur_word)
2262	# TODO: Test interactive. Without closing ]], you should get > prompt
2263	# (PS2)
2264
2265	self._SetNext() # skip [[
2266	b_parser = bool_parse.BoolParser(self.w_parser)
2267	bnode, right = b_parser.Parse() # May raise
2268	return command.DBracket(left, bnode, right, None) # no redirects yet
2269
2270	def ParseDParen(self):
2271	# type: () -> command.DParen
2272	left = word_.AsOperatorToken(self.cur_word)
2273
2274	self._SetNext() # skip ((
2275	anode, right = self.w_parser.ReadDParen()
2276	assert anode is not None
2277
2278	return command.DParen(left, anode, right, None) # no redirects yet
2279
2280	def ParseCommand(self):
2281	# type: () -> command_t
2282	"""
2283	command : simple_command
2284	\| compound_command # OSH edit: io_redirect* folded in
2285	\| function_def
2286	\| ksh_function_def
2287
2288	# YSH extensions
2289	\| proc NAME ...
2290	\| typed proc NAME ...
2291	\| func NAME ...
2292	\| const ...
2293	\| var ...
2294	\| setglobal ...
2295	\| setref ...
2296	\| setvar ...
2297	\| call EXPR
2298	\| = EXPR
2299	;
2300
2301	Note: the reason const / var are not part of compound_command is because
2302	they can't be alone in a shell function body.
2303
2304	Example:
2305	This is valid shell f() if true; then echo hi; fi
2306	This is invalid f() var x = 1
2307	"""
2308	if self._AtSecondaryKeyword():
2309	p_die('Unexpected word when parsing command',
2310	loc.Word(self.cur_word))
2311
2312	# YSH Extensions
2313
2314	if self.c_id == Id.KW_Proc: # proc p { ... }
2315	# proc is hidden because of the 'local reasoning' principle. Code
2316	# inside procs should be YSH, full stop. That means ysh:upgrade is
2317	# on.
2318	if self.parse_opts.parse_proc():
2319	return self.ParseYshProc()
2320	else:
2321	# 2024-02: This avoids bad syntax errors if you type YSH code
2322	# into OSH
2323	# proc p (x) { echo hi } would actually be parsed as a
2324	# command.Simple! Shell compatibility: quote 'proc'
2325	p_die("proc is a YSH keyword, but this is OSH.",
2326	loc.Word(self.cur_word))
2327
2328	if self.c_id == Id.KW_Typed: # typed proc p () { ... }
2329	self._SetNext()
2330	self._GetWord()
2331	if self.c_id != Id.KW_Proc:
2332	p_die("Expected 'proc' after 'typed'", loc.Word(self.cur_word))
2333
2334	if self.parse_opts.parse_proc():
2335	return self.ParseYshProc()
2336	else:
2337	p_die("typed is a YSH keyword, but this is OSH.",
2338	loc.Word(self.cur_word))
2339
2340	if self.c_id == Id.KW_Func: # func f(x) { ... }
2341	if self.parse_opts.parse_func():
2342	return self.ParseYshFunc()
2343	else:
2344	# Same reasoning as above, for 'proc'
2345	p_die("func is a YSH keyword, but this is OSH.",
2346	loc.Word(self.cur_word))
2347
2348	if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2349	p_die("const can't be inside proc or func. Use var instead.",
2350	loc.Word(self.cur_word))
2351
2352	if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2353	keyword_id = self.c_id
2354	kw_token = word_.LiteralToken(self.cur_word)
2355	self._SetNext()
2356	n8 = self.w_parser.ParseVarDecl(kw_token)
2357	for lhs in n8.lhs:
2358	self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2359	return n8
2360
2361	if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2362	kw_token = word_.LiteralToken(self.cur_word)
2363	self._SetNext()
2364	n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2365	return n9
2366
2367	if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2368	# = 42 + a[i]
2369	# call mylist->append('x')
2370
2371	keyword = word_.LiteralToken(self.cur_word)
2372	assert keyword is not None
2373	self._SetNext()
2374	enode = self.w_parser.ParseCommandExpr()
2375	return command.Expr(keyword, enode)
2376
2377	if self.c_id == Id.KW_Function:
2378	return self.ParseKshFunctionDef()
2379
2380	if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2381	Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2382	Id.KW_If, Id.KW_Case, Id.KW_Time):
2383	return self.ParseCompoundCommand()
2384
2385	# Syntax error for '}' starting a line, which all shells disallow.
2386	if self.c_id == Id.Lit_RBrace:
2387	p_die('Unexpected right brace', loc.Word(self.cur_word))
2388
2389	if self.c_kind == Kind.Redir: # Leading redirect
2390	return self.ParseSimpleCommand()
2391
2392	if self.c_kind == Kind.Word:
2393	# ensured by Kind.Word
2394	cur_word = cast(CompoundWord, self.cur_word)
2395
2396	# NOTE: At the top level, only Token and Compound are possible.
2397	# Can this be modelled better in the type system, removing asserts?
2398	#
2399	# TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2400	# Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2401	# That requires 2 tokens of lookahead, which we don't have
2402	#
2403	# Or maybe we don't just have ParseSimpleCommand -- we will have
2404	# ParseYshCommand or something
2405
2406	if (self.w_parser.LookAheadFuncParens() and
2407	not word_.IsVarLike(cur_word)):
2408	return self.ParseFunctionDef() # f() { echo; } # function
2409
2410	# Parse x = 1+2*3 when inside HayNode { } blocks
2411	parts = cur_word.parts
2412	if self.parse_opts.parse_equals() and len(parts) == 1:
2413	part0 = parts[0]
2414	if part0.tag() == word_part_e.Literal:
2415	tok = cast(Token, part0)
2416	if (match.IsValidVarName(lexer.LazyStr(tok)) and
2417	self.w_parser.LookPastSpace() == Id.Lit_Equals):
2418	assert tok.id == Id.Lit_Chars, tok
2419
2420	if (len(self.hay_attrs_stack) and
2421	self.hay_attrs_stack[-1]):
2422	# Note: no static var_checker.Check() for bare assignment
2423	enode = self.w_parser.ParseBareDecl()
2424	self._SetNext() # Somehow this is necessary
2425	# TODO: Use BareDecl here. Well, do that when we
2426	# treat it as const or lazy.
2427	return command.VarDecl(
2428	None,
2429	[NameType(tok, lexer.TokenVal(tok), None)],
2430	enode)
2431	else:
2432	self._SetNext()
2433	self._GetWord()
2434	p_die(
2435	'Unexpected = (Hint: use var/setvar, or quote it)',
2436	loc.Word(self.cur_word))
2437
2438	# echo foo
2439	# f=(a b c) # array
2440	# array[1+2]+=1
2441	return self.ParseSimpleCommand()
2442
2443	if self.c_kind == Kind.Eof:
2444	p_die("Unexpected EOF while parsing command",
2445	loc.Word(self.cur_word))
2446
2447	# NOTE: This only happens in batch mode in the second turn of the loop!
2448	# e.g. )
2449	p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2450
2451	assert False # for MyPy
2452
2453	def ParsePipeline(self):
2454	# type: () -> command_t
2455	"""
2456	pipeline : Bang? command ( '\|' newline_ok command )* ;
2457	"""
2458	negated = None # type: Optional[Token]
2459
2460	self._GetWord()
2461	if self.c_id == Id.KW_Bang:
2462	negated = word_.AsKeywordToken(self.cur_word)
2463	self._SetNext()
2464
2465	child = self.ParseCommand()
2466	assert child is not None
2467
2468	children = [child]
2469
2470	self._GetWord()
2471	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2472	if negated is not None:
2473	node = command.Pipeline(negated, children, [])
2474	return node
2475	else:
2476	return child # no pipeline
2477
2478	# \| or \|&
2479	ops = [] # type: List[Token]
2480	while True:
2481	op = word_.AsOperatorToken(self.cur_word)
2482	ops.append(op)
2483
2484	self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2485	self._NewlineOk()
2486
2487	child = self.ParseCommand()
2488	children.append(child)
2489
2490	self._GetWord()
2491	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2492	break
2493
2494	return command.Pipeline(negated, children, ops)
2495
2496	def ParseAndOr(self):
2497	# type: () -> command_t
2498	self._GetWord()
2499	if self.c_id == Id.Lit_TDot:
2500	# We got '...', so parse in multiline mode
2501	self._SetNext()
2502	with word_.ctx_Multiline(self.w_parser):
2503	return self._ParseAndOr()
2504
2505	# Parse in normal mode, not multiline
2506	return self._ParseAndOr()
2507
2508	def _ParseAndOr(self):
2509	# type: () -> command_t
2510	"""
2511	and_or : and_or ( AND_IF \| OR_IF ) newline_ok pipeline
2512	\| pipeline
2513
2514	Note that it is left recursive and left associative. We parse it
2515	iteratively with a token of lookahead.
2516	"""
2517	child = self.ParsePipeline()
2518	assert child is not None
2519
2520	self._GetWord()
2521	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2522	return child
2523
2524	ops = [] # type: List[Token]
2525	children = [child]
2526
2527	while True:
2528	ops.append(word_.AsOperatorToken(self.cur_word))
2529
2530	self._SetNext() # skip past \|\| &&
2531	self._NewlineOk()
2532
2533	child = self.ParsePipeline()
2534	children.append(child)
2535
2536	self._GetWord()
2537	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2538	break
2539
2540	return command.AndOr(children, ops)
2541
2542	# NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2543
2544	# At the top level, we execute after every line, e.g. to
2545	# - process alias (a form of dynamic parsing)
2546	# - process 'exit', because invalid syntax might appear after it
2547
2548	# On the other hand, for a while loop body, we parse the whole thing at once,
2549	# and then execute it. We don't want to parse it over and over again!
2550
2551	# COMPARE
2552	# command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2553	# command_term : and_or (trailer and_or)* ; # CHILDREN
2554
2555	def _ParseCommandLine(self):
2556	# type: () -> command_t
2557	"""
2558	command_line : and_or (sync_op and_or)* trailer? ;
2559	trailer : sync_op newline_ok
2560	\| NEWLINES;
2561	sync_op : '&' \| ';';
2562
2563	NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2564	there is another command word after the sync op.
2565
2566	But it's easier to express imperatively. Do the following in a loop:
2567	1. ParseAndOr
2568	2. Peek.
2569	a. If there's a newline, then return. (We're only parsing a single
2570	line.)
2571	b. If there's a sync_op, process it. Then look for a newline and
2572	return. Otherwise, parse another AndOr.
2573	"""
2574	# This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2575	# I don't think we should add anything else here; otherwise it will be
2576	# ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2577	END_LIST = [Id.Op_Newline, Id.Eof_Real]
2578
2579	children = [] # type: List[command_t]
2580	done = False
2581	while not done:
2582	child = self.ParseAndOr()
2583
2584	self._GetWord()
2585	if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2586	tok = cast(Token, self.cur_word) # for MyPy
2587	child = command.Sentence(child, tok)
2588	self._SetNext()
2589
2590	self._GetWord()
2591	if self.c_id in END_LIST:
2592	done = True
2593
2594	elif self.c_id in END_LIST:
2595	done = True
2596
2597	else:
2598	# e.g. echo a(b)
2599	p_die(
2600	'Invalid word while parsing command line (%s)' %
2601	Id_str(self.c_id), loc.Word(self.cur_word))
2602
2603	children.append(child)
2604
2605	# Simplify the AST.
2606	if len(children) > 1:
2607	return command.CommandList(children)
2608	else:
2609	return children[0]
2610
2611	def _ParseCommandTerm(self):
2612	# type: () -> command.CommandList
2613	""""
2614	command_term : and_or (trailer and_or)* ;
2615	trailer : sync_op newline_ok
2616	\| NEWLINES;
2617	sync_op : '&' \| ';';
2618
2619	This is handled in imperative style, like _ParseCommandLine.
2620	Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2621	which is slightly different. (HOW? Is it the DSEMI?)
2622
2623	Returns:
2624	syntax_asdl.command
2625	"""
2626	# Token types that will end the command term.
2627	END_LIST = [
2628	self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
2629	Id.Op_SemiAmp, Id.Op_DSemiAmp
2630	]
2631
2632	# NOTE: This is similar to _ParseCommandLine.
2633	#
2634	# - Why aren't we doing END_LIST in _ParseCommandLine?
2635	# - Because you will never be inside $() at the top level.
2636	# - We also know it will end in a newline. It can't end in "fi"!
2637	# - example: if true; then { echo hi; } fi
2638
2639	children = [] # type: List[command_t]
2640	done = False
2641	while not done:
2642	# Most keywords are valid "first words". But do/done/then do not BEGIN
2643	# commands, so they are not valid.
2644	if self._AtSecondaryKeyword():
2645	break
2646
2647	child = self.ParseAndOr()
2648
2649	self._GetWord()
2650	if self.c_id == Id.Op_Newline:
2651	self._SetNext()
2652
2653	self._GetWord()
2654	if self.c_id in END_LIST:
2655	done = True
2656
2657	elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2658	tok = cast(Token, self.cur_word) # for MyPy
2659	child = command.Sentence(child, tok)
2660	self._SetNext()
2661
2662	self._GetWord()
2663	if self.c_id == Id.Op_Newline:
2664	self._SetNext() # skip over newline
2665
2666	# Test if we should keep going. There might be another command after
2667	# the semi and newline.
2668	self._GetWord()
2669	if self.c_id in END_LIST: # \n EOF
2670	done = True
2671
2672	elif self.c_id in END_LIST: # ; EOF
2673	done = True
2674
2675	elif self.c_id in END_LIST: # EOF
2676	done = True
2677
2678	# For if test -f foo; test -f bar {
2679	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2680	done = True
2681
2682	elif self.c_kind != Kind.Word:
2683	# e.g. f() { echo (( x )) ; }
2684	# but can't fail on 'fi fi', see osh/cmd_parse_test.py
2685
2686	#log("Invalid %s", self.cur_word)
2687	p_die("Invalid word while parsing command list",
2688	loc.Word(self.cur_word))
2689
2690	children.append(child)
2691
2692	return command.CommandList(children)
2693
2694	def _ParseCommandList(self):
2695	# type: () -> command.CommandList
2696	"""
2697	command_list : newline_ok command_term trailer? ;
2698
2699	This one is called by all the compound commands. It's basically a command
2700	block.
2701
2702	NOTE: Rather than translating the CFG directly, the code follows a style
2703	more like this: more like this: (and_or trailer)+. It makes capture
2704	easier.
2705	"""
2706	self._NewlineOk()
2707	return self._ParseCommandTerm()
2708
2709	def ParseLogicalLine(self):
2710	# type: () -> command_t
2711	"""Parse a single line for main_loop.
2712
2713	A wrapper around _ParseCommandLine(). Similar but not identical to
2714	_ParseCommandList() and ParseCommandSub().
2715
2716	Raises:
2717	ParseError
2718	"""
2719	self._NewlineOk()
2720	self._GetWord()
2721	if self.c_id == Id.Eof_Real:
2722	return None # main loop checks for here docs
2723	node = self._ParseCommandLine()
2724	return node
2725
2726	def ParseInteractiveLine(self):
2727	# type: () -> parse_result_t
2728	"""Parse a single line for Interactive main_loop.
2729
2730	Different from ParseLogicalLine because newlines are handled differently.
2731
2732	Raises:
2733	ParseError
2734	"""
2735	self._GetWord()
2736	if self.c_id == Id.Op_Newline:
2737	return parse_result.EmptyLine
2738	if self.c_id == Id.Eof_Real:
2739	return parse_result.Eof
2740
2741	node = self._ParseCommandLine()
2742	return parse_result.Node(node)
2743
2744	def ParseCommandSub(self):
2745	# type: () -> command_t
2746	"""Parse $(echo hi) and `echo hi` for word_parse.py.
2747
2748	They can have multiple lines, like this: echo $( echo one echo
2749	two )
2750	"""
2751	self._NewlineOk()
2752
2753	self._GetWord()
2754	if self.c_kind == Kind.Eof: # e.g. $()
2755	return command.NoOp
2756
2757	c_list = self._ParseCommandTerm()
2758	if len(c_list.children) == 1:
2759	return c_list.children[0]
2760	else:
2761	return c_list
2762
2763	def CheckForPendingHereDocs(self):
2764	# type: () -> None
2765	# NOTE: This happens when there is no newline at the end of a file, like
2766	# osh -c 'cat <<EOF'
2767	if len(self.pending_here_docs):
2768	node = self.pending_here_docs[0] # Just show the first one?
2769	h = cast(redir_param.HereDoc, node.arg)
2770	p_die('Unterminated here doc began here', loc.Word(h.here_begin))
2771
2772
2773	# vim: sw=4