osh/cmd_parse.py

OILS / osh / cmd_parse.py View on Github | oilshell.org

2771 lines, 1417 significant

1	# Copyright 2016 Andy Chu. All rights reserved.
2	# Licensed under the Apache License, Version 2.0 (the "License");
3	# you may not use this file except in compliance with the License.
4	# You may obtain a copy of the License at
5	#
6	# http://www.apache.org/licenses/LICENSE-2.0
7	"""
8	cmd_parse.py - Parse high level shell commands.
9	"""
10	from __future__ import print_function
11
12	from _devbuild.gen import grammar_nt
13	from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
14	from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15	from _devbuild.gen.syntax_asdl import (
16	loc,
17	SourceLine,
18	source,
19	parse_result,
20	parse_result_t,
21	command,
22	command_t,
23	condition,
24	condition_t,
25	for_iter,
26	ArgList,
27	BraceGroup,
28	LiteralBlock,
29	CaseArm,
30	case_arg,
31	IfArm,
32	pat,
33	pat_t,
34	Redir,
35	redir_param,
36	redir_loc,
37	redir_loc_t,
38	word_e,
39	word_t,
40	CompoundWord,
41	Token,
42	word_part_e,
43	word_part_t,
44	rhs_word,
45	rhs_word_t,
46	sh_lhs,
47	sh_lhs_t,
48	AssignPair,
49	EnvPair,
50	ParsedAssignment,
51	assign_op_e,
52	NameType,
53	proc_sig,
54	proc_sig_e,
55	Proc,
56	Func,
57	)
58	from core import alloc
59	from core import error
60	from core.error import p_die
61	from core import ui
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from frontend import match
66	from frontend import reader
67	from mycpp.mylib import log
68	from osh import braces
69	from osh import bool_parse
70	from osh import word_
71
72	from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73	if TYPE_CHECKING:
74	from core.alloc import Arena
75	from core import optview
76	from frontend.lexer import Lexer
77	from frontend.parse_lib import ParseContext, AliasesInFlight
78	from frontend.reader import _Reader
79	from osh.word_parse import WordParser
80
81	_ = Kind_str # for debug prints
82
83	TAB_CH = 9 # ord('\t')
84	SPACE_CH = 32 # ord(' ')
85
86
87	def _ReadHereLines(
88	line_reader, # type: _Reader
89	h, # type: Redir
90	delimiter, # type: str
91	):
92	# type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93	# NOTE: We read all lines at once, instead of parsing line-by-line,
94	# because of cases like this:
95	# cat <<EOF
96	# 1 $(echo 2
97	# echo 3) 4
98	# EOF
99	here_lines = [] # type: List[Tuple[SourceLine, int]]
100	last_line = None # type: Tuple[SourceLine, int]
101	strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103	while True:
104	src_line, unused_offset = line_reader.GetLine()
105
106	if src_line is None: # EOF
107	# An unterminated here doc is just a warning in bash. We make it
108	# fatal because we want to be strict, and because it causes problems
109	# reporting other errors.
110	# Attribute it to the << in <<EOF for now.
111	p_die("Couldn't find terminator for here doc that starts here",
112	h.op)
113
114	assert len(src_line.content) != 0 # None should be the empty line
115
116	line = src_line.content
117
118	# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119	# the first tab.
120	start_offset = 0
121	if strip_leading_tabs:
122	n = len(line)
123	i = 0 # used after loop exit
124	while i < n:
125	if line[i] != '\t':
126	break
127	i += 1
128	start_offset = i
129
130	if line[start_offset:].rstrip() == delimiter:
131	last_line = (src_line, start_offset)
132	break
133
134	here_lines.append((src_line, start_offset))
135
136	return here_lines, last_line
137
138
139	def _MakeLiteralHereLines(
140	here_lines, # type: List[Tuple[SourceLine, int]]
141	arena, # type: Arena
142	do_lossless, # type: bool
143	):
144	# type: (...) -> List[word_part_t]
145	"""Create a Token for each line.
146
147	For <<'EOF' and <<-'EOF' - single quoted rule
148
149	<<- has non-zero start_offset
150	"""
151	# less precise type, because List[T] is an invariant type
152	tokens = [] # type: List[word_part_t]
153	for src_line, start_offset in here_lines:
154
155	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
156	# arena invariant, but don't refer to it.
157	#
158	# Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
159	# here docs, but it's more complex with double quoted EOF docs.
160
161	if do_lossless: # avoid garbage, doesn't affect correctness
162	arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
163	src_line)
164
165	t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
166	src_line)
167	tokens.append(t)
168	return tokens
169
170
171	def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
172	# type: (ParseContext, Redir, _Reader, Arena) -> None
173	"""Fill in attributes of a pending here doc node."""
174	h = cast(redir_param.HereDoc, r.arg)
175	# "If any character in word is quoted, the delimiter shall be formed by
176	# performing quote removal on word, and the here-document lines shall not
177	# be expanded. Otherwise, the delimiter shall be the word itself."
178	# NOTE: \EOF counts, or even E\OF
179	ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
180	if not ok:
181	p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
182
183	here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
184
185	if delim_quoted:
186	# <<'EOF' and <<-'EOF' - Literal for each line.
187	h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
188	parse_ctx.do_lossless)
189	else:
190	# <<EOF and <<-EOF - Parse as word
191	line_reader = reader.VirtualLineReader(arena, here_lines,
192	parse_ctx.do_lossless)
193	w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
194	w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
195
196	end_line, start_offset = last_line
197
198	# Maintain lossless invariant for STRIPPED tabs: add a Token to the
199	# arena invariant, but don't refer to it.
200	if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
201	arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
202
203	# Create a Token with the end terminator. Maintains the invariant that the
204	# tokens "add up".
205	h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
206	len(end_line.content), end_line)
207
208
209	def _MakeAssignPair(parse_ctx, preparsed, arena):
210	# type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
211	"""Create an AssignPair from a 4-tuples from DetectShAssignment."""
212
213	left_token = preparsed.left
214	close_token = preparsed.close
215
216	lhs = None # type: sh_lhs_t
217
218	if left_token.id == Id.Lit_VarLike: # s=1
219	if lexer.IsPlusEquals(left_token):
220	var_name = lexer.TokenSliceRight(left_token, -2)
221	op = assign_op_e.PlusEqual
222	else:
223	var_name = lexer.TokenSliceRight(left_token, -1)
224	op = assign_op_e.Equal
225
226	lhs = sh_lhs.Name(left_token, var_name)
227
228	elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
229	var_name = lexer.TokenSliceRight(left_token, -1)
230	if lexer.IsPlusEquals(close_token):
231	op = assign_op_e.PlusEqual
232	else:
233	op = assign_op_e.Equal
234
235	assert left_token.line == close_token.line, \
236	'%s and %s not on same line' % (left_token, close_token)
237
238	left_pos = left_token.col + left_token.length
239	index_str = left_token.line.content[left_pos:close_token.col]
240	lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
241
242	elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
243	var_name = lexer.TokenSliceRight(left_token, -1)
244	if lexer.IsPlusEquals(close_token):
245	op = assign_op_e.PlusEqual
246	else:
247	op = assign_op_e.Equal
248
249	# Similar to SnipCodeString / SnipCodeBlock
250	if left_token.line == close_token.line:
251	# extract what's between brackets
252	s = left_token.col + left_token.length
253	code_str = left_token.line.content[s:close_token.col]
254	else:
255	raise NotImplementedError('%s != %s' %
256	(left_token.line, close_token.line))
257	a_parser = parse_ctx.MakeArithParser(code_str)
258
259	# a[i+1]= is a LHS
260	src = source.Reparsed('array LHS', left_token, close_token)
261	with alloc.ctx_SourceCode(arena, src):
262	index_node = a_parser.Parse() # may raise error.Parse
263
264	lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
265
266	else:
267	raise AssertionError()
268
269	# TODO: Should we also create a rhs_expr.ArrayLiteral here?
270	parts = preparsed.w.parts
271	offset = preparsed.part_offset
272
273	n = len(parts)
274	if offset == n:
275	rhs = rhs_word.Empty # type: rhs_word_t
276	else:
277	w = CompoundWord(parts[offset:])
278	word_.TildeDetectAssign(w)
279	rhs = w
280
281	return AssignPair(left_token, lhs, op, rhs)
282
283
284	def _AppendMoreEnv(preparsed_list, more_env):
285	# type: (List[ParsedAssignment], List[EnvPair]) -> None
286	"""Helper to modify a SimpleCommand node.
287
288	Args:
289	preparsed: a list of 4-tuples from DetectShAssignment
290	more_env: a list to append env_pairs to
291	"""
292	for preparsed in preparsed_list:
293	left_token = preparsed.left
294
295	if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
296	p_die(
297	"Environment binding shouldn't look like an array assignment",
298	left_token)
299
300	if lexer.IsPlusEquals(left_token):
301	p_die('Expected = in environment binding, got +=', left_token)
302
303	var_name = lexer.TokenSliceRight(left_token, -1)
304
305	parts = preparsed.w.parts
306	n = len(parts)
307	offset = preparsed.part_offset
308	if offset == n:
309	rhs = rhs_word.Empty # type: rhs_word_t
310	else:
311	w = CompoundWord(parts[offset:])
312	word_.TildeDetectAssign(w)
313	rhs = w
314
315	more_env.append(EnvPair(left_token, var_name, rhs))
316
317
318	def _SplitSimpleCommandPrefix(words):
319	# type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
320	"""Second pass of SimpleCommand parsing: look for assignment words."""
321	preparsed_list = [] # type: List[ParsedAssignment]
322	suffix_words = [] # type: List[CompoundWord]
323
324	done_prefix = False
325	for w in words:
326	if done_prefix:
327	suffix_words.append(w)
328	continue
329
330	left_token, close_token, part_offset = word_.DetectShAssignment(w)
331	if left_token:
332	preparsed_list.append(
333	ParsedAssignment(left_token, close_token, part_offset, w))
334	else:
335	done_prefix = True
336	suffix_words.append(w)
337
338	return preparsed_list, suffix_words
339
340
341	def _MakeSimpleCommand(
342	preparsed_list, # type: List[ParsedAssignment]
343	suffix_words, # type: List[CompoundWord]
344	redirects, # type: List[Redir]
345	typed_args, # type: Optional[ArgList]
346	block, # type: Optional[LiteralBlock]
347	):
348	# type: (...) -> command.Simple
349	"""Create an command.Simple node."""
350
351	# FOO=(1 2 3) ls is not allowed.
352	for preparsed in preparsed_list:
353	if word_.HasArrayPart(preparsed.w):
354	p_die("Environment bindings can't contain array literals",
355	loc.Word(preparsed.w))
356
357	# NOTE: It would be possible to add this check back. But it already happens
358	# at runtime in EvalWordSequence2.
359	# echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
360	if 0:
361	for w in suffix_words:
362	if word_.HasArrayPart(w):
363	p_die("Commands can't contain array literals", loc.Word(w))
364
365	assert len(suffix_words) != 0
366	# {a,b,c} # Use { before brace detection
367	# ~/bin/ls # Use ~ before tilde detection
368	part0 = suffix_words[0].parts[0]
369	blame_tok = location.LeftTokenForWordPart(part0)
370
371	# NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
372	# can't implement bash's behavior of having say {~bob,~jane}/src work,
373	# because we only have a BracedTree.
374	# This is documented in spec/brace-expansion.
375	# NOTE: Technically we could do expansion outside of 'oshc translate', but it
376	# doesn't seem worth it.
377	words2 = braces.BraceDetectAll(suffix_words)
378	words3 = word_.TildeDetectAll(words2)
379
380	more_env = [] # type: List[EnvPair]
381	_AppendMoreEnv(preparsed_list, more_env)
382
383	# do_fork by default
384	return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
385	block, True)
386
387
388	class VarChecker(object):
389	"""Statically check for proc and variable usage errors."""
390
391	def __init__(self):
392	# type: () -> None
393	"""
394	Args:
395	oil_proc: Whether to disallow nested proc/function declarations
396	"""
397	# self.tokens for location info: 'proc' or another token
398	self.tokens = [] # type: List[Token]
399	self.names = [] # type: List[Dict[str, Id_t]]
400
401	def Push(self, blame_tok):
402	# type: (Token) -> None
403	"""Called when we enter a shell function, proc, or func.
404
405	Bash allows this, but it's confusing because it's the same as two
406	functions at the top level.
407
408	f() {
409	g() {
410	echo 'top level function defined in another one'
411	}
412	}
413
414	YSH disallows nested procs and funcs.
415	"""
416	if len(self.tokens) != 0:
417	if blame_tok.id == Id.KW_Proc:
418	p_die("procs must be defined at the top level", blame_tok)
419	if blame_tok.id == Id.KW_Func:
420	p_die("funcs must be defined at the top level", blame_tok)
421	if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
422	p_die("shell functions can't be defined inside proc or func",
423	blame_tok)
424
425	self.tokens.append(blame_tok)
426	entry = {} # type: Dict[str, Id_t]
427	self.names.append(entry)
428
429	def Pop(self):
430	# type: () -> None
431	self.names.pop()
432	self.tokens.pop()
433
434	def Check(self, keyword_id, var_name, blame_tok):
435	# type: (Id_t, str, Token) -> None
436	"""Check for declaration / mutation errors in proc and func.
437
438	var x
439	x already declared
440	setvar x:
441	x is not declared
442	setglobal x:
443	No errors are possible; we would need all these many conditions to
444	statically know the names:
445	- no 'source'
446	- shopt -u copy_env.
447	- AND use lib has to be static
448
449	What about bare assignment in Hay? I think these are dynamic checks --
450	there is no static check. Hay is for building up data imperatively,
451	and then LATER, right before main(), it can be type checked.
452
453	Package {
454	version = '3.11'
455	version = '3.12'
456	}
457	"""
458	# No static checks are the global level! Because of 'source', var and
459	# setvar are essentially the same.
460	if len(self.names) == 0:
461	return
462
463	top = self.names[-1]
464	if keyword_id == Id.KW_Var:
465	if var_name in top:
466	p_die('%r was already declared' % var_name, blame_tok)
467	else:
468	top[var_name] = keyword_id
469
470	if keyword_id == Id.KW_SetVar:
471	if var_name not in top:
472	# Note: the solution could be setglobal, etc.
473	p_die(
474	"setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
475	var_name, blame_tok)
476
477
478	class ctx_VarChecker(object):
479
480	def __init__(self, var_checker, blame_tok):
481	# type: (VarChecker, Token) -> None
482	var_checker.Push(blame_tok)
483	self.var_checker = var_checker
484
485	def __enter__(self):
486	# type: () -> None
487	pass
488
489	def __exit__(self, type, value, traceback):
490	# type: (Any, Any, Any) -> None
491	self.var_checker.Pop()
492
493
494	class ctx_CmdMode(object):
495
496	def __init__(self, cmd_parse, new_cmd_mode):
497	# type: (CommandParser, cmd_mode_t) -> None
498	self.cmd_parse = cmd_parse
499	self.prev_cmd_mode = cmd_parse.cmd_mode
500	cmd_parse.cmd_mode = new_cmd_mode
501
502	def __enter__(self):
503	# type: () -> None
504	pass
505
506	def __exit__(self, type, value, traceback):
507	# type: (Any, Any, Any) -> None
508	self.cmd_parse.cmd_mode = self.prev_cmd_mode
509
510
511	SECONDARY_KEYWORDS = [
512	Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
513	Id.KW_Esac
514	]
515
516
517	class CommandParser(object):
518	"""Recursive descent parser derived from POSIX shell grammar.
519
520	This is a BNF grammar:
521	https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
522
523	- Augmented with both bash/OSH and YSH constructs.
524
525	- We use regex-like iteration rather than recursive references
526	? means optional (0 or 1)
527	* means 0 or more
528	+ means 1 or more
529
530	- Keywords are spelled in Caps:
531	If Elif Case
532
533	- Operator tokens are quoted:
534	'(' '\|'
535
536	or can be spelled directly if it matters:
537
538	Op_LParen Op_Pipe
539
540	- Non-terminals are snake_case:
541	brace_group subshell
542
543	Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
544	the production should be in the method docstrings, e.g.
545
546	def ParseSubshell():
547	"
548	subshell : '(' compound_list ')'
549
550	Looking at Op_LParen # Comment to say how this method is called
551	"
552
553	The grammar may be factored to make parsing easier.
554	"""
555
556	def __init__(self,
557	parse_ctx,
558	parse_opts,
559	w_parser,
560	lexer,
561	line_reader,
562	eof_id=Id.Eof_Real):
563	# type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
564	self.parse_ctx = parse_ctx
565	self.aliases = parse_ctx.aliases # aliases to expand at parse time
566
567	self.parse_opts = parse_opts
568	self.w_parser = w_parser # type: WordParser # for normal parsing
569	self.lexer = lexer # for pushing hints, lookahead to (
570	self.line_reader = line_reader # for here docs
571	self.eof_id = eof_id
572
573	self.arena = line_reader.arena # for adding here doc and alias spans
574	self.aliases_in_flight = [] # type: AliasesInFlight
575
576	# A hacky boolean to remove 'if cd / {' ambiguity.
577	self.allow_block = True
578
579	# Stack of booleans for nested Attr and SHELL nodes.
580	# Attr nodes allow bare assignment x = 42, but not shell x=42.
581	# SHELL nodes are the inverse. 'var x = 42' is preferred in shell
582	# nodes, but x42 is still allowed.
583	#
584	# Note: this stack could be optimized by turning it into an integer and
585	# binary encoding.
586	self.hay_attrs_stack = [] # type: List[bool]
587
588	# Note: VarChecker is instantiated with each CommandParser, which means
589	# that two 'proc foo' -- inside a command sub and outside -- don't
590	# conflict, because they use different CommandParser instances. I think
591	# this OK but you can imagine different behaviors.
592	self.var_checker = VarChecker()
593
594	self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
595
596	self.Reset()
597
598	# Init_() function for "keyword arg"
599	def Init_AliasesInFlight(self, aliases_in_flight):
600	# type: (AliasesInFlight) -> None
601	self.aliases_in_flight = aliases_in_flight
602
603	def Reset(self):
604	# type: () -> None
605	"""Reset our own internal state.
606
607	Called by the interactive loop.
608	"""
609	# Cursor state set by _GetWord()
610	self.next_lex_mode = lex_mode_e.ShCommand
611	self.cur_word = None # type: word_t # current word
612	self.c_kind = Kind.Undefined
613	self.c_id = Id.Undefined_Tok
614
615	self.pending_here_docs = [] # type: List[Redir]
616
617	def ResetInputObjects(self):
618	# type: () -> None
619	"""Reset the internal state of our inputs.
620
621	Called by the interactive loop.
622	"""
623	self.w_parser.Reset()
624	self.lexer.ResetInputObjects()
625	self.line_reader.Reset()
626
627	def _SetNext(self):
628	# type: () -> None
629	"""Call this when you no longer need the current token.
630
631	This method is lazy. A subsequent call to _GetWord() will
632	actually read the next Token.
633	"""
634	self.next_lex_mode = lex_mode_e.ShCommand
635
636	def _SetNextBrack(self):
637	# type: () -> None
638	self.next_lex_mode = lex_mode_e.ShCommandFakeBrack
639
640	def _GetWord(self):
641	# type: () -> None
642	"""Call this when you need to make a decision based on Id or Kind.
643
644	If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
645	self.c_id and self.c_kind.
646
647	Otherwise it does nothing.
648	"""
649	if self.next_lex_mode != lex_mode_e.Undefined:
650	w = self.w_parser.ReadWord(self.next_lex_mode)
651	#log("w %s", w)
652
653	# Here docs only happen in command mode, so other kinds of newlines don't
654	# count.
655	if w.tag() == word_e.Operator:
656	tok = cast(Token, w)
657	if tok.id == Id.Op_Newline:
658	for h in self.pending_here_docs:
659	_ParseHereDocBody(self.parse_ctx, h, self.line_reader,
660	self.arena)
661	del self.pending_here_docs[:] # No .clear() until Python 3.3.
662
663	self.cur_word = w
664
665	self.c_kind = word_.CommandKind(self.cur_word)
666	# Has special case for Id.Lit_{LBrace,RBrace,Equals}
667	self.c_id = word_.CommandId(self.cur_word)
668	self.next_lex_mode = lex_mode_e.Undefined
669
670	def _Eat(self, c_id, msg=None):
671	# type: (Id_t, Optional[str]) -> word_t
672	"""Consume a word of a type, maybe showing a custom error message.
673
674	Args:
675	c_id: the Id we expected
676	msg: improved error message
677	"""
678	self._GetWord()
679	if self.c_id != c_id:
680	if msg is None:
681	msg = 'Expected word type %s, got %s' % (
682	ui.PrettyId(c_id), ui.PrettyId(self.c_id))
683	p_die(msg, loc.Word(self.cur_word))
684
685	skipped = self.cur_word
686	self._SetNext()
687	return skipped
688
689	def _NewlineOk(self):
690	# type: () -> None
691	"""Check for optional newline and consume it."""
692	self._GetWord()
693	if self.c_id == Id.Op_Newline:
694	self._SetNext()
695
696	def _AtSecondaryKeyword(self):
697	# type: () -> bool
698	self._GetWord()
699	if self.c_id in SECONDARY_KEYWORDS:
700	return True
701	return False
702
703	def ParseRedirect(self):
704	# type: () -> Redir
705	self._GetWord()
706	assert self.c_kind == Kind.Redir, self.cur_word
707	op_tok = cast(Token, self.cur_word) # for MyPy
708
709	# Note: the lexer could take distinguish between
710	# >out
711	# 3>out
712	# {fd}>out
713	#
714	# which would make the code below faster. But small string optimization
715	# would also speed it up, since redirects are small.
716
717	# One way to do this is with Kind.Redir and Kind.RedirNamed, and then
718	# possibly "unify" the IDs by subtracting a constant like 8 or 16?
719
720	op_val = lexer.TokenVal(op_tok)
721	if op_val[0] == '{':
722	pos = op_val.find('}')
723	assert pos != -1 # lexer ensures this
724	where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
725
726	elif op_val[0].isdigit():
727	pos = 1
728	if op_val[1].isdigit():
729	pos = 2
730	where = redir_loc.Fd(int(op_val[:pos]))
731
732	else:
733	where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
734
735	self._SetNext()
736
737	self._GetWord()
738	# Other redirect
739	if self.c_kind != Kind.Word:
740	p_die('Invalid token after redirect operator',
741	loc.Word(self.cur_word))
742
743	# Here doc
744	if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
745	arg = redir_param.HereDoc.CreateNull()
746	arg.here_begin = self.cur_word
747	arg.stdin_parts = []
748
749	r = Redir(op_tok, where, arg)
750
751	self.pending_here_docs.append(r) # will be filled on next newline.
752
753	self._SetNext()
754	return r
755
756	arg_word = self.cur_word
757	tilde = word_.TildeDetect(arg_word)
758	if tilde:
759	arg_word = tilde
760	self._SetNext()
761
762	# We should never get Empty, Token, etc.
763	assert arg_word.tag() == word_e.Compound, arg_word
764	return Redir(op_tok, where, cast(CompoundWord, arg_word))
765
766	def _ParseRedirectList(self):
767	# type: () -> List[Redir]
768	"""Try parsing any redirects at the cursor.
769
770	This is used for blocks only, not commands.
771	"""
772	redirects = [] # type: List[Redir]
773	while True:
774	# This prediction needs to ONLY accept redirect operators. Should we
775	# make them a separate Kind?
776	self._GetWord()
777	if self.c_kind != Kind.Redir:
778	break
779
780	node = self.ParseRedirect()
781	redirects.append(node)
782	self._SetNext()
783
784	return redirects
785
786	def _ScanSimpleCommand(self):
787	# type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
788	"""YSH extends simple commands with typed args and blocks.
789
790	Shell has a recursive grammar, which awkwardly expresses
791	non-grammatical rules:
792
793	simple_command : cmd_prefix cmd_word cmd_suffix
794	\| cmd_prefix cmd_word
795	\| cmd_prefix
796	\| cmd_name cmd_suffix
797	\| cmd_name
798	;
799	cmd_name : WORD /* Apply rule 7a */
800	;
801	cmd_word : WORD /* Apply rule 7b */
802	;
803	cmd_prefix : io_redirect
804	\| cmd_prefix io_redirect
805	\| ASSIGNMENT_WORD
806	\| cmd_prefix ASSIGNMENT_WORD
807	;
808	cmd_suffix : io_redirect
809	\| cmd_suffix io_redirect
810	\| WORD
811	\| cmd_suffix WORD
812
813	YSH grammar:
814
815	redirect = redir_op WORD
816	item = WORD \| redirect
817
818	typed_args =
819	'(' arglist ')'
820	\| '[' arglist ']'
821
822	simple_command =
823	cmd_prefix* item+ typed_args? BraceGroup? cmd_suffix*
824
825	Notably, redirects shouldn't appear after typed args, or after
826	BraceGroup.
827
828	Examples:
829
830	This is an assignment:
831	foo=1 >out
832
833	This is a command.Simple
834	>out
835
836	What about
837	>out (42)
838	"""
839	redirects = [] # type: List[Redir]
840	words = [] # type: List[CompoundWord]
841	typed_args = None # type: Optional[ArgList]
842	block = None # type: Optional[LiteralBlock]
843
844	first_word_caps = False # does first word look like Caps, but not CAPS
845
846	i = 0
847	while True:
848	self._GetWord()
849
850	# If we got { }, change it to something that's not Kind.Word
851	kind2 = self.c_kind
852	if (kind2 == Kind.Word and self.parse_opts.parse_brace() and
853	self.c_id in (Id.Lit_LBrace, Id.Lit_RBrace)):
854	kind2 = Kind.Op
855
856	if kind2 == Kind.Redir:
857	node = self.ParseRedirect()
858	redirects.append(node)
859
860	elif kind2 == Kind.Word:
861	w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
862
863	if i == 0:
864	# Disallow leading =a because it's confusing
865	part0 = w.parts[0]
866	if part0.tag() == word_part_e.Literal:
867	tok = cast(Token, part0)
868	if tok.id == Id.Lit_Equals:
869	p_die(
870	"=word isn't allowed. Hint: add a space after =, or quote it",
871	tok)
872
873	# Is the first word a Hay Attr word?
874	#
875	# Can we remove this StaticEval() call, and just look
876	# inside Token? I think once we get rid of SHELL nodes,
877	# this will be simpler.
878
879	ok, word_str, quoted = word_.StaticEval(w)
880	# Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
881	if (ok and len(word_str) and word_str[0].isupper() and
882	not word_str.isupper()):
883	first_word_caps = True
884	#log('W %s', word_str)
885
886	words.append(w)
887
888	else:
889	break
890
891	self._SetNextBrack() # Allow bracket for SECOND word on
892	i += 1
893
894	# my-cmd (x) or my-cmd [x]
895	self._GetWord()
896	if self.c_id == Id.Op_LParen:
897	# 1. Check that there's a preceding space
898	prev_byte = self.lexer.ByteLookBack()
899	if prev_byte not in (SPACE_CH, TAB_CH):
900	if self.parse_opts.parse_at():
901	p_die('Space required before (', loc.Word(self.cur_word))
902	else:
903	# inline func call like @sorted(x) is invalid in OSH, but the
904	# solution isn't a space
905	p_die(
906	'Unexpected left paren (might need a space before it)',
907	loc.Word(self.cur_word))
908
909	# 2. Check that it's not (). We disallow this because it's a no-op and
910	# there could be confusion with shell func defs.
911	# For some reason we need to call lexer.LookPastSpace, not
912	# w_parser.LookPastSpace. I think this is because we're at (, which is
913	# an operator token. All the other cases are like 'x=', which is PART
914	# of a word, and we don't know if it will end.
915	next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
916	if next_id == Id.Op_RParen:
917	p_die('Empty arg list not allowed', loc.Word(self.cur_word))
918
919	typed_args = self.w_parser.ParseProcCallArgs(
920	grammar_nt.ysh_eager_arglist)
921
922	self._SetNext()
923
924	elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
925	typed_args = self.w_parser.ParseProcCallArgs(
926	grammar_nt.ysh_lazy_arglist)
927
928	self._SetNext()
929
930	self._GetWord()
931
932	# Allow redirects after typed args, e.g.
933	# json write (x) > out.txt
934	if self.c_kind == Kind.Redir:
935	redirects.extend(self._ParseRedirectList())
936
937	# my-cmd { echo hi } my-cmd (x) { echo hi } ...
938	if (self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace and
939	# Disabled for if/while condition, etc.
940	self.allow_block):
941
942	# allow x = 42
943	self.hay_attrs_stack.append(first_word_caps)
944	brace_group = self.ParseBraceGroup()
945
946	# So we can get the source code back later
947	lines = self.arena.SaveLinesAndDiscard(brace_group.left,
948	brace_group.right)
949	block = LiteralBlock(brace_group, lines)
950
951	self.hay_attrs_stack.pop()
952
953	self._GetWord()
954
955	# Allow redirects after block, e.g.
956	# cd /tmp { echo $PWD } > out.txt
957	if self.c_kind == Kind.Redir:
958	redirects.extend(self._ParseRedirectList())
959
960	return redirects, words, typed_args, block
961
962	def _MaybeExpandAliases(self, words):
963	# type: (List[CompoundWord]) -> Optional[command_t]
964	"""Try to expand aliases.
965
966	Args:
967	words: A list of Compound
968
969	Returns:
970	A new LST node, or None.
971
972	Our implementation of alias has two design choices:
973	- Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
974	- What grammar rule to parse the expanded alias buffer with. In our case
975	it's ParseCommand().
976
977	This doesn't quite match what other shells do, but I can't figure out a
978	better places.
979
980	Most test cases pass, except for ones like:
981
982	alias LBRACE='{'
983	LBRACE echo one; echo two; }
984
985	alias MULTILINE='echo 1
986	echo 2
987	echo 3'
988	MULTILINE
989
990	NOTE: dash handles aliases in a totally different way. It has a global
991	variable checkkwd in parser.c. It assigns it all over the grammar, like
992	this:
993
994	checkkwd = CHKNL \| CHKKWD \| CHKALIAS;
995
996	The readtoken() function checks (checkkwd & CHKALIAS) and then calls
997	lookupalias(). This seems to provide a consistent behavior among shells,
998	but it's less modular and testable.
999
1000	Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
1001
1002	Returns:
1003	A command node if any aliases were expanded, or None otherwise.
1004	"""
1005	# Start a new list if there aren't any. This will be passed recursively
1006	# through CommandParser instances.
1007	aliases_in_flight = (self.aliases_in_flight
1008	if len(self.aliases_in_flight) else [])
1009
1010	# for error message
1011	first_word_str = None # type: Optional[str]
1012	argv0_loc = loc.Word(words[0])
1013
1014	expanded = [] # type: List[str]
1015	i = 0
1016	n = len(words)
1017
1018	while i < n:
1019	w = words[i]
1020
1021	ok, word_str, quoted = word_.StaticEval(w)
1022	if not ok or quoted:
1023	break
1024
1025	alias_exp = self.aliases.get(word_str)
1026	if alias_exp is None:
1027	break
1028
1029	# Prevent infinite loops. This is subtle: we want to prevent infinite
1030	# expansion of alias echo='echo x'. But we don't want to prevent
1031	# expansion of the second word in 'echo echo', so we add 'i' to
1032	# "aliases_in_flight".
1033	if (word_str, i) in aliases_in_flight:
1034	break
1035
1036	if i == 0:
1037	first_word_str = word_str # for error message
1038
1039	#log('%r -> %r', word_str, alias_exp)
1040	aliases_in_flight.append((word_str, i))
1041	expanded.append(alias_exp)
1042	i += 1
1043
1044	if not alias_exp.endswith(' '):
1045	# alias e='echo [ ' is the same expansion as
1046	# alias e='echo ['
1047	# The trailing space indicates whether we should continue to expand
1048	# aliases; it's not part of it.
1049	expanded.append(' ')
1050	break # No more expansions
1051
1052	if len(expanded) == 0: # No expansions; caller does parsing.
1053	return None
1054
1055	# We are expanding an alias, so copy the rest of the words and re-parse.
1056	if i < n:
1057	left_tok = location.LeftTokenForWord(words[i])
1058	right_tok = location.RightTokenForWord(words[-1])
1059
1060	# OLD CONSTRAINT
1061	#assert left_tok.line_id == right_tok.line_id
1062
1063	words_str = self.arena.SnipCodeString(left_tok, right_tok)
1064	expanded.append(words_str)
1065
1066	code_str = ''.join(expanded)
1067
1068	# TODO:
1069	# Aliases break static parsing (like backticks), so use our own Arena.
1070	# This matters for Hay, which calls SaveLinesAndDiscard().
1071	# arena = alloc.Arena()
1072	arena = self.arena
1073
1074	line_reader = reader.StringLineReader(code_str, arena)
1075	cp = self.parse_ctx.MakeOshParser(line_reader)
1076	cp.Init_AliasesInFlight(aliases_in_flight)
1077
1078	# break circular dep
1079	from frontend import parse_lib
1080
1081	# The interaction between COMPLETION and ALIASES requires special care.
1082	# See docstring of BeginAliasExpansion() in parse_lib.py.
1083	src = source.Alias(first_word_str, argv0_loc)
1084	with alloc.ctx_SourceCode(arena, src):
1085	with parse_lib.ctx_Alias(self.parse_ctx.trail):
1086	try:
1087	# _ParseCommandTerm() handles multiline commands, compound
1088	# commands, etc. as opposed to ParseLogicalLine()
1089	node = cp._ParseCommandTerm()
1090	except error.Parse as e:
1091	# Failure to parse alias expansion is a fatal error
1092	# We don't need more handling here/
1093	raise
1094
1095	if 0:
1096	log('AFTER expansion:')
1097	node.PrettyPrint()
1098
1099	return node
1100
1101	def ParseSimpleCommand(self):
1102	# type: () -> command_t
1103	"""Fixed transcription of the POSIX grammar
1104
1105	io_file : '<' filename
1106	\| LESSAND filename
1107	...
1108
1109	io_here : DLESS here_end
1110	\| DLESSDASH here_end
1111
1112	redirect : IO_NUMBER (io_redirect \| io_here)
1113
1114	prefix_part : ASSIGNMENT_WORD \| redirect
1115	cmd_part : WORD \| redirect
1116
1117	assign_kw : Declare \| Export \| Local \| Readonly
1118
1119	# Without any words it is parsed as a command, not an assignment
1120	assign_listing : assign_kw
1121
1122	# Now we have something to do (might be changing assignment flags too)
1123	# NOTE: any prefixes should be a warning, but they are allowed in shell.
1124	assignment : prefix_part* assign_kw (WORD \| ASSIGNMENT_WORD)+
1125
1126	# an external command, a function call, or a builtin -- a "word_command"
1127	word_command : prefix_part* cmd_part+
1128
1129	simple_command : assign_listing
1130	\| assignment
1131	\| proc_command
1132
1133	Simple imperative algorithm:
1134
1135	1) Read a list of words and redirects. Append them to separate lists.
1136	2) Look for the first non-assignment word. If it's declare, etc., then
1137	keep parsing words AND assign words. Otherwise, just parse words.
1138	3) If there are no non-assignment words, then it's a global assignment.
1139
1140	{ redirects, global assignments } OR
1141	{ redirects, prefix_bindings, words } OR
1142	{ redirects, ERROR_prefix_bindings, keyword, assignments, words }
1143
1144	THEN CHECK that prefix bindings don't have any array literal parts!
1145	global assignment and keyword assignments can have the of course.
1146	well actually EXPORT shouldn't have them either -- WARNING
1147
1148	3 cases we want to warn: prefix_bindings for assignment, and array literal
1149	in prefix bindings, or export
1150
1151	A command can be an assignment word, word, or redirect on its own.
1152
1153	ls
1154	>out.txt
1155
1156	>out.txt FOO=bar # this touches the file
1157
1158	Or any sequence:
1159	ls foo bar
1160	<in.txt ls foo bar >out.txt
1161	<in.txt ls >out.txt foo bar
1162
1163	Or add one or more environment bindings:
1164	VAR=val env
1165	>out.txt VAR=val env
1166
1167	here_end vs filename is a matter of whether we test that it's quoted. e.g.
1168	<<EOF vs <<'EOF'.
1169	"""
1170	redirects, words, typed_args, block = self._ScanSimpleCommand()
1171
1172	typed_loc = None # type: Optional[Token]
1173	if block:
1174	typed_loc = block.brace_group.left
1175	if typed_args:
1176	typed_loc = typed_args.left # preferred over block location
1177
1178	if len(words) == 0: # e.g. >out.txt # redirect without words
1179	assert len(redirects) != 0
1180	if typed_loc is not None:
1181	p_die("Unexpected typed args", typed_loc)
1182
1183	simple = command.Simple.CreateNull()
1184	simple.blame_tok = redirects[0].op
1185	simple.more_env = []
1186	simple.words = []
1187	simple.redirects = redirects
1188	return simple
1189
1190	preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1191	if len(preparsed_list):
1192	# Disallow X=Y inside proc and func
1193	# and inside Hay Attr blocks
1194	# But allow X=Y at the top level
1195	# for interactive use foo=bar
1196	# for global constants GLOBAL=~/src
1197	# because YSH assignment doesn't have tilde sub
1198	if len(suffix_words) == 0:
1199	if (self.cmd_mode != cmd_mode_e.Shell or
1200	(len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1201	p_die('Use var/setvar to assign in YSH',
1202	preparsed_list[0].left)
1203
1204	# Set a reference to words and redirects for completion. We want to
1205	# inspect this state after a failed parse.
1206	self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1207
1208	if len(suffix_words) == 0:
1209	if typed_loc is not None:
1210	p_die("Unexpected typed args", typed_loc)
1211
1212	# ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1213	pairs = [] # type: List[AssignPair]
1214	for preparsed in preparsed_list:
1215	pairs.append(
1216	_MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1217
1218	left_tok = location.LeftTokenForCompoundWord(words[0])
1219	return command.ShAssignment(left_tok, pairs, redirects)
1220
1221	kind, kw_token = word_.IsControlFlow(suffix_words[0])
1222
1223	if kind == Kind.ControlFlow:
1224	if kw_token.id == Id.ControlFlow_Return:
1225	# return x - inside procs and shell functions
1226	# return (x) - inside funcs
1227	if typed_args is None:
1228	if self.cmd_mode not in (cmd_mode_e.Shell,
1229	cmd_mode_e.Proc):
1230	p_die('Shell-style returns not allowed here', kw_token)
1231	else:
1232	if self.cmd_mode != cmd_mode_e.Func:
1233	p_die('Typed return is only allowed inside func',
1234	typed_loc)
1235	if len(typed_args.pos_args) != 1:
1236	p_die("Typed return expects one argument", typed_loc)
1237	if len(typed_args.named_args) != 0:
1238	p_die("Typed return doesn't take named arguments",
1239	typed_loc)
1240	return command.Retval(kw_token, typed_args.pos_args[0])
1241
1242	if typed_loc is not None:
1243	p_die("Unexpected typed args", typed_loc)
1244	if not self.parse_opts.parse_ignored() and len(redirects):
1245	p_die("Control flow shouldn't have redirects", kw_token)
1246
1247	if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1248	p_die("Control flow shouldn't have environment bindings",
1249	preparsed_list[0].left)
1250
1251	# Attach the token for errors. (ShAssignment may not need it.)
1252	if len(suffix_words) == 1:
1253	arg_word = None # type: Optional[word_t]
1254	elif len(suffix_words) == 2:
1255	arg_word = suffix_words[1]
1256	else:
1257	p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1258	loc.Word(suffix_words[2]))
1259
1260	return command.ControlFlow(kw_token, arg_word)
1261
1262	# Alias expansion only understands words, not typed args ( ) or block { }
1263	if not typed_args and not block and self.parse_opts.expand_aliases():
1264	# If any expansions were detected, then parse again.
1265	expanded_node = self._MaybeExpandAliases(suffix_words)
1266	if expanded_node:
1267	# Attach env bindings and redirects to the expanded node.
1268	more_env = [] # type: List[EnvPair]
1269	_AppendMoreEnv(preparsed_list, more_env)
1270	exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1271	return exp
1272
1273	# TODO: check that we don't have env1=x x[1]=y env2=z here.
1274
1275	# FOO=bar printenv.py FOO
1276	node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1277	typed_args, block)
1278	return node
1279
1280	def ParseBraceGroup(self):
1281	# type: () -> BraceGroup
1282	"""
1283	Original:
1284	brace_group : LBrace command_list RBrace ;
1285
1286	YSH:
1287	brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1288
1289	The doc comment can only occur if there's a newline.
1290	"""
1291	ate = self._Eat(Id.Lit_LBrace)
1292	left = word_.BraceToken(ate)
1293
1294	doc_word = None # type: word_t
1295	self._GetWord()
1296	if self.c_id == Id.Op_Newline:
1297	self._SetNext()
1298	# Set a flag so we don't skip over ###
1299	with word_.ctx_EmitDocToken(self.w_parser):
1300	self._GetWord()
1301
1302	if self.c_id == Id.Ignored_Comment:
1303	doc_word = self.cur_word
1304	self._SetNext()
1305
1306	# Id.Ignored_Comment means it's a Token, or None
1307	doc_token = cast(Token, doc_word)
1308
1309	c_list = self._ParseCommandList()
1310
1311	ate = self._Eat(Id.Lit_RBrace)
1312	right = word_.BraceToken(ate)
1313
1314	# Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1315	# would allow us to revert this back to None, which was changed in
1316	# https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1317	# behavior saves allocations, but is less type safe.
1318	return BraceGroup(left, doc_token, c_list.children, [],
1319	right) # no redirects yet
1320
1321	def ParseDoGroup(self):
1322	# type: () -> command.DoGroup
1323	"""Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1324
1325	do_group : Do command_list Done ; /* Apply rule 6 */
1326	"""
1327	ate = self._Eat(Id.KW_Do)
1328	do_kw = word_.AsKeywordToken(ate)
1329
1330	c_list = self._ParseCommandList() # could be anything
1331
1332	ate = self._Eat(Id.KW_Done)
1333	done_kw = word_.AsKeywordToken(ate)
1334
1335	return command.DoGroup(do_kw, c_list.children, done_kw)
1336
1337	def ParseForWords(self):
1338	# type: () -> Tuple[List[CompoundWord], Optional[Token]]
1339	"""
1340	for_words : WORD* for_sep
1341	;
1342	for_sep : ';' newline_ok
1343	\| NEWLINES
1344	;
1345	"""
1346	words = [] # type: List[CompoundWord]
1347	# The token of any semi-colon, so we can remove it.
1348	semi_tok = None # type: Optional[Token]
1349
1350	while True:
1351	self._GetWord()
1352	if self.c_id == Id.Op_Semi:
1353	tok = cast(Token, self.cur_word)
1354	semi_tok = tok
1355	self._SetNext()
1356	self._NewlineOk()
1357	break
1358	elif self.c_id == Id.Op_Newline:
1359	self._SetNext()
1360	break
1361	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1362	break
1363
1364	if self.cur_word.tag() != word_e.Compound:
1365	# TODO: Can we also show a pointer to the 'for' keyword?
1366	p_die('Invalid word in for loop', loc.Word(self.cur_word))
1367
1368	w2 = cast(CompoundWord, self.cur_word)
1369	words.append(w2)
1370	self._SetNext()
1371	return words, semi_tok
1372
1373	def _ParseForExprLoop(self, for_kw):
1374	# type: (Token) -> command.ForExpr
1375	"""
1376	Shell:
1377	for '((' init ';' cond ';' update '))' for_sep? do_group
1378
1379	YSH:
1380	for '((' init ';' cond ';' update '))' for_sep? brace_group
1381	"""
1382	node = self.w_parser.ReadForExpression()
1383	node.keyword = for_kw
1384
1385	self._SetNext()
1386
1387	self._GetWord()
1388	if self.c_id == Id.Op_Semi:
1389	self._SetNext()
1390	self._NewlineOk()
1391	elif self.c_id == Id.Op_Newline:
1392	self._SetNext()
1393	elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1394	pass
1395	elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1396	pass
1397	else:
1398	p_die('Invalid word after for expression', loc.Word(self.cur_word))
1399
1400	if self.c_id == Id.Lit_LBrace:
1401	node.body = self.ParseBraceGroup()
1402	else:
1403	node.body = self.ParseDoGroup()
1404	return node
1405
1406	def _ParseForEachLoop(self, for_kw):
1407	# type: (Token) -> command.ForEach
1408	node = command.ForEach.CreateNull(alloc_lists=True)
1409	node.keyword = for_kw
1410
1411	num_iter_names = 0
1412	while True:
1413	w = self.cur_word
1414
1415	# Hack that makes the language more familiar:
1416	# - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1417	# - 'x y' is also accepted but not idiomatic.
1418	UP_w = w
1419	if w.tag() == word_e.Compound:
1420	w = cast(CompoundWord, UP_w)
1421	if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1422	w.parts.pop()
1423
1424	ok, iter_name, quoted = word_.StaticEval(w)
1425	if not ok or quoted: # error: for $x
1426	p_die('Expected loop variable (a constant word)', loc.Word(w))
1427
1428	if not match.IsValidVarName(iter_name): # error: for -
1429	# TODO: consider commas?
1430	if ',' in iter_name:
1431	p_die('Loop variables look like x, y (fix spaces)',
1432	loc.Word(w))
1433	p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1434
1435	node.iter_names.append(iter_name)
1436	num_iter_names += 1
1437	self._SetNext()
1438
1439	self._GetWord()
1440	# 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1441	# Subtlety: 'var' is KW_Var and is a valid loop name
1442	if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1443	break
1444
1445	if num_iter_names == 3:
1446	p_die('Unexpected word after 3 loop variables',
1447	loc.Word(self.cur_word))
1448
1449	self._NewlineOk()
1450
1451	self._GetWord()
1452	if self.c_id == Id.KW_In:
1453	# Ideally we would want ( not 'in'. But we still have to fix the bug
1454	# where we require a SPACE between in and (
1455	# for x in(y) # should be accepted, but isn't
1456
1457	expr_blame = word_.AsKeywordToken(self.cur_word)
1458
1459	self._SetNext() # skip in
1460	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1461	enode = self.w_parser.ParseYshExprForCommand()
1462	node.iterable = for_iter.YshExpr(enode, expr_blame)
1463
1464	# For simplicity, we don't accept for x in (obj); do ...
1465	self._GetWord()
1466	if self.c_id != Id.Lit_LBrace:
1467	p_die('Expected { after iterable expression',
1468	loc.Word(self.cur_word))
1469	else:
1470	semi_tok = None # type: Optional[Token]
1471	iter_words, semi_tok = self.ParseForWords()
1472	node.semi_tok = semi_tok
1473
1474	if not self.parse_opts.parse_bare_word() and len(
1475	iter_words) == 1:
1476	ok, s, quoted = word_.StaticEval(iter_words[0])
1477	if ok and match.IsValidVarName(s) and not quoted:
1478	p_die(
1479	'Surround this word with either parens or quotes (parse_bare_word)',
1480	loc.Word(iter_words[0]))
1481
1482	words2 = braces.BraceDetectAll(iter_words)
1483	words3 = word_.TildeDetectAll(words2)
1484	node.iterable = for_iter.Words(words3)
1485
1486	# Now that we know there are words, do an extra check
1487	if num_iter_names > 2:
1488	p_die('Expected at most 2 loop variables', for_kw)
1489
1490	elif self.c_id == Id.KW_Do:
1491	node.iterable = for_iter.Args # implicitly loop over "$@"
1492	# do not advance
1493
1494	elif self.c_id == Id.Op_Semi: # for x; do
1495	node.iterable = for_iter.Args # implicitly loop over "$@"
1496	self._SetNext()
1497
1498	else: # for foo BAD
1499	p_die('Unexpected word after for loop variable',
1500	loc.Word(self.cur_word))
1501
1502	self._GetWord()
1503	if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1504	node.body = self.ParseBraceGroup()
1505	else:
1506	node.body = self.ParseDoGroup()
1507
1508	return node
1509
1510	def ParseFor(self):
1511	# type: () -> command_t
1512	"""
1513	TODO: Update the grammar
1514
1515	for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1516	\| For '((' ... TODO
1517	"""
1518	ate = self._Eat(Id.KW_For)
1519	for_kw = word_.AsKeywordToken(ate)
1520
1521	self._GetWord()
1522	if self.c_id == Id.Op_DLeftParen:
1523	if not self.parse_opts.parse_dparen():
1524	p_die("Bash for loops aren't allowed (parse_dparen)",
1525	loc.Word(self.cur_word))
1526
1527	# for (( i = 0; i < 10; i++)
1528	n1 = self._ParseForExprLoop(for_kw)
1529	n1.redirects = self._ParseRedirectList()
1530	return n1
1531	else:
1532	# for x in a b; do echo hi; done
1533	n2 = self._ParseForEachLoop(for_kw)
1534	n2.redirects = self._ParseRedirectList()
1535	return n2
1536
1537	def _ParseConditionList(self):
1538	# type: () -> condition_t
1539	"""
1540	condition_list: command_list
1541
1542	This is a helper to parse a condition list for if commands and while/until
1543	loops. It will throw a parse error if there are no conditions in the list.
1544	"""
1545	self.allow_block = False
1546	commands = self._ParseCommandList()
1547	self.allow_block = True
1548
1549	if len(commands.children) == 0:
1550	p_die("Expected a condition", loc.Word(self.cur_word))
1551
1552	return condition.Shell(commands.children)
1553
1554	def ParseWhileUntil(self, keyword):
1555	# type: (Token) -> command.WhileUntil
1556	"""
1557	while_clause : While command_list do_group ;
1558	until_clause : Until command_list do_group ;
1559	"""
1560	self._SetNext() # skip keyword
1561
1562	if (self.parse_opts.parse_paren() and
1563	self.w_parser.LookPastSpace() == Id.Op_LParen):
1564	enode = self.w_parser.ParseYshExprForCommand()
1565	cond = condition.YshExpr(enode) # type: condition_t
1566	else:
1567	cond = self._ParseConditionList()
1568
1569	# NOTE: The LSTs will be different for OSH and YSH, but the execution
1570	# should be unchanged. To be sure we should desugar.
1571	self._GetWord()
1572	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1573	# while test -f foo {
1574	body_node = self.ParseBraceGroup() # type: command_t
1575	else:
1576	body_node = self.ParseDoGroup()
1577
1578	# no redirects yet
1579	return command.WhileUntil(keyword, cond, body_node, None)
1580
1581	def ParseCaseArm(self):
1582	# type: () -> CaseArm
1583	"""
1584	case_item: '('? pattern ('\|' pattern)* ')'
1585	newline_ok command_term? trailer? ;
1586
1587	Looking at '(' or pattern
1588	"""
1589	self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1590
1591	left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1592
1593	if self.c_id == Id.Op_LParen: # Optional (
1594	self._SetNext()
1595
1596	pat_words = [] # type: List[word_t]
1597	while True:
1598	self._GetWord()
1599	if self.c_kind != Kind.Word:
1600	p_die('Expected case pattern', loc.Word(self.cur_word))
1601	pat_words.append(self.cur_word)
1602	self._SetNext()
1603
1604	self._GetWord()
1605	if self.c_id == Id.Op_Pipe:
1606	self._SetNext()
1607	else:
1608	break
1609
1610	ate = self._Eat(Id.Right_CasePat)
1611	middle_tok = word_.AsOperatorToken(ate)
1612
1613	self._NewlineOk()
1614
1615	self._GetWord()
1616	if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
1617	Id.KW_Esac):
1618	c_list = self._ParseCommandTerm()
1619	action_children = c_list.children
1620	else:
1621	action_children = []
1622
1623	dsemi_tok = None # type: Token
1624	self._GetWord()
1625	if self.c_id == Id.KW_Esac: # missing last ;;
1626	pass
1627	elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
1628	dsemi_tok = word_.AsOperatorToken(self.cur_word)
1629	self._SetNext()
1630	else:
1631	# Happens on EOF
1632	p_die('Expected ;; or esac', loc.Word(self.cur_word))
1633
1634	self._NewlineOk()
1635
1636	return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1637	action_children, dsemi_tok)
1638
1639	def ParseYshCaseArm(self, discriminant):
1640	# type: (Id_t) -> CaseArm
1641	"""
1642	case_item : pattern newline_ok brace_group newline_ok
1643	pattern : pat_words
1644	\| pat_exprs
1645	\| pat_eggex
1646	\| pat_else
1647	pat_words : pat_word (newline_ok '\|' newline_ok pat_word)*
1648	pat_exprs : pat_expr (newline_ok '\|' newline_ok pat_expr)*
1649	pat_word : WORD
1650	pat_eggex : '/' oil_eggex '/'
1651	pat_expr : '(' oil_expr ')'
1652	pat_else : '(' Id.KW_Else ')'
1653
1654	Looking at: 'pattern'
1655
1656	Note that the trailing `newline_ok` in `case_item` is handled by
1657	`ParseYshCase`. We do this because parsing that `newline_ok` returns
1658	the next "discriminant" for the next token, so it makes more sense to
1659	handle it there.
1660	"""
1661	left_tok = None # type: Token
1662	pattern = None # type: pat_t
1663
1664	if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1665	# pat_exprs, pat_else or pat_eggex
1666	pattern, left_tok = self.w_parser.ParseYshCasePattern()
1667	else:
1668	# pat_words
1669	pat_words = [] # type: List[word_t]
1670	while True:
1671	self._GetWord()
1672	if self.c_kind != Kind.Word:
1673	p_die('Expected case pattern', loc.Word(self.cur_word))
1674	pat_words.append(self.cur_word)
1675	self._SetNext()
1676
1677	if not left_tok:
1678	left_tok = location.LeftTokenForWord(self.cur_word)
1679
1680	self._NewlineOk()
1681
1682	self._GetWord()
1683	if self.c_id == Id.Op_Pipe:
1684	self._SetNext()
1685	self._NewlineOk()
1686	else:
1687	break
1688	pattern = pat.Words(pat_words)
1689
1690	self._NewlineOk()
1691	action = self.ParseBraceGroup()
1692
1693	# The left token of the action is our "middle" token
1694	return CaseArm(left_tok, pattern, action.left, action.children,
1695	action.right)
1696
1697	def ParseYshCase(self, case_kw):
1698	# type: (Token) -> command.Case
1699	"""
1700	ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1701
1702	Looking at: token after 'case'
1703	"""
1704	enode = self.w_parser.ParseYshExprForCommand()
1705	to_match = case_arg.YshExpr(enode)
1706
1707	ate = self._Eat(Id.Lit_LBrace)
1708	arms_start = word_.BraceToken(ate)
1709
1710	discriminant = self.w_parser.NewlineOkForYshCase()
1711
1712	# Note: for now, zero arms are accepted, just like POSIX case $x in esac
1713	arms = [] # type: List[CaseArm]
1714	while discriminant != Id.Op_RBrace:
1715	arm = self.ParseYshCaseArm(discriminant)
1716	arms.append(arm)
1717
1718	discriminant = self.w_parser.NewlineOkForYshCase()
1719
1720	# NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1721	# token is read as an Id.Op_RBrace, but we need to store this as a
1722	# Id.Lit_RBrace.
1723	ate = self._Eat(Id.Op_RBrace)
1724	arms_end = word_.AsOperatorToken(ate)
1725	arms_end.id = Id.Lit_RBrace
1726
1727	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1728	None)
1729
1730	def ParseOldCase(self, case_kw):
1731	# type: (Token) -> command.Case
1732	"""
1733	case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1734
1735	-> Looking at WORD
1736
1737	FYI original POSIX case list, which takes pains for DSEMI
1738
1739	case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1740	"""
1741	self._GetWord()
1742	w = self.cur_word
1743	if not self.parse_opts.parse_bare_word():
1744	ok, s, quoted = word_.StaticEval(w)
1745	if ok and not quoted:
1746	p_die(
1747	"This is a constant string. You may want a variable like $x (parse_bare_word)",
1748	loc.Word(w))
1749
1750	if w.tag() != word_e.Compound:
1751	p_die("Expected a word to match against", loc.Word(w))
1752
1753	to_match = case_arg.Word(w)
1754	self._SetNext() # past WORD
1755
1756	self._NewlineOk()
1757
1758	ate = self._Eat(Id.KW_In)
1759	arms_start = word_.AsKeywordToken(ate)
1760
1761	self._NewlineOk()
1762
1763	arms = [] # type: List[CaseArm]
1764	while True:
1765	self._GetWord()
1766	if self.c_id == Id.KW_Esac:
1767	break
1768	# case arm should begin with a pattern word or (
1769	if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1770	break
1771
1772	arm = self.ParseCaseArm()
1773	arms.append(arm)
1774
1775	ate = self._Eat(Id.KW_Esac)
1776	arms_end = word_.AsKeywordToken(ate)
1777
1778	# no redirects yet
1779	return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1780	None)
1781
1782	def ParseCase(self):
1783	# type: () -> command.Case
1784	"""
1785	case_clause : old_case # from POSIX
1786	\| ysh_case
1787	;
1788
1789	Looking at 'Case'
1790	"""
1791	case_kw = word_.AsKeywordToken(self.cur_word)
1792	self._SetNext() # past 'case'
1793
1794	if self.w_parser.LookPastSpace() == Id.Op_LParen:
1795	return self.ParseYshCase(case_kw)
1796	else:
1797	return self.ParseOldCase(case_kw)
1798
1799	def _ParseYshElifElse(self, if_node):
1800	# type: (command.If) -> None
1801	"""If test -f foo { echo foo.
1802
1803	} elif test -f bar; test -f spam { ^ we parsed up to here echo
1804	bar } else { echo none }
1805	"""
1806	arms = if_node.arms
1807
1808	while self.c_id == Id.KW_Elif:
1809	elif_kw = word_.AsKeywordToken(self.cur_word)
1810	self._SetNext() # skip elif
1811	if (self.parse_opts.parse_paren() and
1812	self.w_parser.LookPastSpace() == Id.Op_LParen):
1813	enode = self.w_parser.ParseYshExprForCommand()
1814	cond = condition.YshExpr(enode) # type: condition_t
1815	else:
1816	self.allow_block = False
1817	commands = self._ParseCommandList()
1818	self.allow_block = True
1819	cond = condition.Shell(commands.children)
1820
1821	body = self.ParseBraceGroup()
1822	self._GetWord()
1823
1824	arm = IfArm(elif_kw, cond, None, body.children, None)
1825	arms.append(arm)
1826
1827	self._GetWord()
1828	if self.c_id == Id.KW_Else:
1829	self._SetNext()
1830	body = self.ParseBraceGroup()
1831	if_node.else_action = body.children
1832
1833	def _ParseYshIf(self, if_kw, cond):
1834	# type: (Token, condition_t) -> command.If
1835	"""
1836	if test -f foo {
1837	# ^ we parsed up to here
1838	echo foo
1839	} elif test -f bar; test -f spam {
1840	echo bar
1841	} else {
1842	echo none
1843	}
1844	NOTE: If you do something like if test -n foo{, the parser keeps going, and
1845	the error is confusing because it doesn't point to the right place.
1846
1847	I think we might need strict_brace so that foo{ is disallowed. It has to
1848	be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1849	form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1850	Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1851	"""
1852	if_node = command.If.CreateNull(alloc_lists=True)
1853	if_node.if_kw = if_kw
1854
1855	body1 = self.ParseBraceGroup()
1856	# Every arm has 1 spid, unlike shell-style
1857	# TODO: We could get the spids from the brace group.
1858	arm = IfArm(if_kw, cond, None, body1.children, None)
1859
1860	if_node.arms.append(arm)
1861
1862	self._GetWord()
1863	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1864	self._ParseYshElifElse(if_node)
1865	# the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1866	# spid because that's in the BraceGroup.
1867	return if_node
1868
1869	def _ParseElifElse(self, if_node):
1870	# type: (command.If) -> None
1871	"""
1872	else_part: (Elif command_list Then command_list)* Else command_list ;
1873	"""
1874	arms = if_node.arms
1875
1876	self._GetWord()
1877	while self.c_id == Id.KW_Elif:
1878	elif_kw = word_.AsKeywordToken(self.cur_word)
1879	self._SetNext() # past 'elif'
1880
1881	cond = self._ParseConditionList()
1882
1883	ate = self._Eat(Id.KW_Then)
1884	then_kw = word_.AsKeywordToken(ate)
1885
1886	body = self._ParseCommandList()
1887	arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
1888
1889	arms.append(arm)
1890
1891	self._GetWord()
1892	if self.c_id == Id.KW_Else:
1893	else_kw = word_.AsKeywordToken(self.cur_word)
1894	self._SetNext() # past 'else'
1895	body = self._ParseCommandList()
1896	if_node.else_action = body.children
1897	else:
1898	else_kw = None
1899
1900	if_node.else_kw = else_kw
1901
1902	def ParseIf(self):
1903	# type: () -> command.If
1904	"""
1905	if_clause : If command_list Then command_list else_part? Fi ;
1906
1907	open : '{' \| Then
1908	close : '}' \| Fi
1909
1910	ysh_if : If ( command_list \| '(' expr ')' )
1911	open command_list else_part? close;
1912
1913	There are 2 conditionals here: parse_paren, then parse_brace
1914	"""
1915	if_node = command.If.CreateNull(alloc_lists=True)
1916	if_kw = word_.AsKeywordToken(self.cur_word)
1917	if_node.if_kw = if_kw
1918	self._SetNext() # past 'if'
1919
1920	if (self.parse_opts.parse_paren() and
1921	self.w_parser.LookPastSpace() == Id.Op_LParen):
1922	# if (x + 1)
1923	enode = self.w_parser.ParseYshExprForCommand()
1924	cond = condition.YshExpr(enode) # type: condition_t
1925	else:
1926	# if echo 1; echo 2; then
1927	# Remove ambiguity with if cd / {
1928	cond = self._ParseConditionList()
1929
1930	self._GetWord()
1931	if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1932	return self._ParseYshIf(if_kw, cond)
1933
1934	ate = self._Eat(Id.KW_Then)
1935	then_kw = word_.AsKeywordToken(ate)
1936
1937	body = self._ParseCommandList()
1938
1939	# First arm
1940	arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
1941	if_node.arms.append(arm)
1942
1943	# 2nd to Nth arm
1944	if self.c_id in (Id.KW_Elif, Id.KW_Else):
1945	self._ParseElifElse(if_node)
1946
1947	ate = self._Eat(Id.KW_Fi)
1948	if_node.fi_kw = word_.AsKeywordToken(ate)
1949
1950	return if_node
1951
1952	def ParseTime(self):
1953	# type: () -> command_t
1954	"""Time [-p] pipeline.
1955
1956	According to bash help.
1957	"""
1958	time_kw = word_.AsKeywordToken(self.cur_word)
1959	self._SetNext() # skip time
1960	pipeline = self.ParsePipeline()
1961	return command.TimeBlock(time_kw, pipeline)
1962
1963	def ParseCompoundCommand(self):
1964	# type: () -> command_t
1965	"""
1966	Refactoring: we put io_redirect* here instead of in function_body and
1967	command.
1968
1969	compound_command : brace_group io_redirect*
1970	\| subshell io_redirect*
1971	\| for_clause io_redirect*
1972	\| while_clause io_redirect*
1973	\| until_clause io_redirect*
1974	\| if_clause io_redirect*
1975	\| case_clause io_redirect*
1976
1977	# bash extensions
1978	\| time_clause
1979	\| [[ BoolExpr ]]
1980	\| (( ArithExpr ))
1981	"""
1982	self._GetWord()
1983	if self.c_id == Id.Lit_LBrace:
1984	n1 = self.ParseBraceGroup()
1985	n1.redirects = self._ParseRedirectList()
1986	return n1
1987	if self.c_id == Id.Op_LParen:
1988	n2 = self.ParseSubshell()
1989	n2.redirects = self._ParseRedirectList()
1990	return n2
1991
1992	if self.c_id == Id.KW_For:
1993	# Note: Redirects parsed in this call. POSIX for and bash for (( have
1994	# redirects, but YSH for doesn't.
1995	return self.ParseFor()
1996	if self.c_id in (Id.KW_While, Id.KW_Until):
1997	keyword = word_.AsKeywordToken(self.cur_word)
1998	n3 = self.ParseWhileUntil(keyword)
1999	n3.redirects = self._ParseRedirectList()
2000	return n3
2001
2002	if self.c_id == Id.KW_If:
2003	n4 = self.ParseIf()
2004	n4.redirects = self._ParseRedirectList()
2005	return n4
2006	if self.c_id == Id.KW_Case:
2007	n5 = self.ParseCase()
2008	n5.redirects = self._ParseRedirectList()
2009	return n5
2010
2011	if self.c_id == Id.KW_DLeftBracket:
2012	if not self.parse_opts.parse_dbracket():
2013	p_die('Bash [[ not allowed in YSH (parse_dbracket)',
2014	loc.Word(self.cur_word))
2015	n6 = self.ParseDBracket()
2016	n6.redirects = self._ParseRedirectList()
2017	return n6
2018	if self.c_id == Id.Op_DLeftParen:
2019	if not self.parse_opts.parse_dparen():
2020	p_die(
2021	'Bash (( not allowed in YSH (parse_dparen, see OILS-ERR-14 for wart)',
2022	loc.Word(self.cur_word))
2023	n7 = self.ParseDParen()
2024	n7.redirects = self._ParseRedirectList()
2025	return n7
2026
2027	# bash extensions: no redirects
2028	if self.c_id == Id.KW_Time:
2029	return self.ParseTime()
2030
2031	# Happens in function body, e.g. myfunc() oops
2032	p_die(
2033	'Unexpected word while parsing compound command (%s)' %
2034	Id_str(self.c_id), loc.Word(self.cur_word))
2035	assert False # for MyPy
2036
2037	def ParseFunctionDef(self):
2038	# type: () -> command.ShFunction
2039	"""
2040	function_header : fname '(' ')'
2041	function_def : function_header newline_ok function_body ;
2042
2043	Precondition: Looking at the function name.
2044
2045	NOTE: There is an ambiguity with:
2046
2047	function foo ( echo hi ) and
2048	function foo () ( echo hi )
2049
2050	Bash only accepts the latter, though it doesn't really follow a grammar.
2051	"""
2052	word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2053	name = word_.ShFunctionName(word0)
2054	if len(name) == 0: # example: foo$x is invalid
2055	p_die('Invalid function name', loc.Word(word0))
2056
2057	part0 = word0.parts[0]
2058	# If we got a non-empty string from ShFunctionName, this should be true.
2059	assert part0.tag() == word_part_e.Literal
2060	blame_tok = cast(Token, part0) # for ctx_VarChecker
2061
2062	self._SetNext() # move past function name
2063
2064	# Must be true because of lookahead
2065	self._GetWord()
2066	assert self.c_id == Id.Op_LParen, self.cur_word
2067
2068	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2069	self._SetNext()
2070
2071	self._GetWord()
2072	if self.c_id == Id.Right_ShFunction:
2073	# 'f ()' implies a function definition, since invoking it with no args
2074	# would just be 'f'
2075	self._SetNext()
2076
2077	self._NewlineOk()
2078
2079	func = command.ShFunction.CreateNull()
2080	func.name = name
2081	with ctx_VarChecker(self.var_checker, blame_tok):
2082	func.body = self.ParseCompoundCommand()
2083
2084	func.name_tok = location.LeftTokenForCompoundWord(word0)
2085	return func
2086	else:
2087	p_die('Expected ) in function definition', loc.Word(self.cur_word))
2088	return None
2089
2090	def ParseKshFunctionDef(self):
2091	# type: () -> command.ShFunction
2092	"""
2093	ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2094	"""
2095	keyword_tok = word_.AsKeywordToken(self.cur_word)
2096
2097	self._SetNext() # skip past 'function'
2098	self._GetWord()
2099
2100	cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2101	name = word_.ShFunctionName(cur_word)
2102	if len(name) == 0: # example: foo$x is invalid
2103	p_die('Invalid KSH-style function name', loc.Word(cur_word))
2104
2105	name_word = self.cur_word
2106	self._SetNext() # skip past 'function name
2107
2108	self._GetWord()
2109	if self.c_id == Id.Op_LParen:
2110	self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2111	self._SetNext()
2112	self._Eat(Id.Right_ShFunction)
2113
2114	self._NewlineOk()
2115
2116	func = command.ShFunction.CreateNull()
2117	func.name = name
2118	with ctx_VarChecker(self.var_checker, keyword_tok):
2119	func.body = self.ParseCompoundCommand()
2120
2121	func.keyword = keyword_tok
2122	func.name_tok = location.LeftTokenForWord(name_word)
2123	return func
2124
2125	def ParseYshProc(self):
2126	# type: () -> Proc
2127	node = Proc.CreateNull(alloc_lists=True)
2128
2129	keyword_tok = word_.AsKeywordToken(self.cur_word)
2130	node.keyword = keyword_tok
2131
2132	with ctx_VarChecker(self.var_checker, keyword_tok):
2133	with ctx_CmdMode(self, cmd_mode_e.Proc):
2134	self.w_parser.ParseProc(node)
2135	if node.sig.tag() == proc_sig_e.Closed: # Register params
2136	sig = cast(proc_sig.Closed, node.sig)
2137
2138	# Treat 3 kinds of params as variables.
2139	wp = sig.word
2140	if wp:
2141	for param in wp.params:
2142	self.var_checker.Check(Id.KW_Var, param.name,
2143	param.blame_tok)
2144	if wp.rest_of:
2145	r = wp.rest_of
2146	self.var_checker.Check(Id.KW_Var, r.name,
2147	r.blame_tok)
2148	# We COULD register __out here but it would require a different API.
2149	#if param.prefix and param.prefix.id == Id.Arith_Colon:
2150	# self.var_checker.Check(Id.KW_Var, '__' + param.name)
2151
2152	posit = sig.positional
2153	if posit:
2154	for param in posit.params:
2155	self.var_checker.Check(Id.KW_Var, param.name,
2156	param.blame_tok)
2157	if posit.rest_of:
2158	r = posit.rest_of
2159	self.var_checker.Check(Id.KW_Var, r.name,
2160	r.blame_tok)
2161
2162	named = sig.named
2163	if named:
2164	for param in named.params:
2165	self.var_checker.Check(Id.KW_Var, param.name,
2166	param.blame_tok)
2167	if named.rest_of:
2168	r = named.rest_of
2169	self.var_checker.Check(Id.KW_Var, r.name,
2170	r.blame_tok)
2171
2172	if sig.block_param:
2173	b = sig.block_param
2174	self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2175
2176	self._SetNext()
2177	node.body = self.ParseBraceGroup()
2178	# No redirects for YSH procs (only at call site)
2179
2180	return node
2181
2182	def ParseYshFunc(self):
2183	# type: () -> Func
2184	"""
2185	ysh_func: (
2186	Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2187	)
2188	Looking at KW_Func
2189	"""
2190	node = Func.CreateNull(alloc_lists=True)
2191
2192	keyword_tok = word_.AsKeywordToken(self.cur_word)
2193	node.keyword = keyword_tok
2194
2195	with ctx_VarChecker(self.var_checker, keyword_tok):
2196	self.w_parser.ParseFunc(node)
2197
2198	posit = node.positional
2199	if posit:
2200	for param in posit.params:
2201	self.var_checker.Check(Id.KW_Var, param.name,
2202	param.blame_tok)
2203	if posit.rest_of:
2204	r = posit.rest_of
2205	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2206
2207	named = node.named
2208	if named:
2209	for param in named.params:
2210	self.var_checker.Check(Id.KW_Var, param.name,
2211	param.blame_tok)
2212	if named.rest_of:
2213	r = named.rest_of
2214	self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2215
2216	self._SetNext()
2217	with ctx_CmdMode(self, cmd_mode_e.Func):
2218	node.body = self.ParseBraceGroup()
2219
2220	return node
2221
2222	def ParseCoproc(self):
2223	# type: () -> command_t
2224	"""
2225	TODO: command.Coproc?
2226	"""
2227	raise NotImplementedError()
2228
2229	def ParseSubshell(self):
2230	# type: () -> command.Subshell
2231	"""
2232	subshell : '(' compound_list ')'
2233
2234	Looking at Op_LParen
2235	"""
2236	left = word_.AsOperatorToken(self.cur_word)
2237	self._SetNext() # skip past (
2238
2239	# Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2240	# translation stack, we want to delay it.
2241
2242	self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2243
2244	c_list = self._ParseCommandList()
2245	if len(c_list.children) == 1:
2246	child = c_list.children[0]
2247	else:
2248	child = c_list
2249
2250	ate = self._Eat(Id.Right_Subshell)
2251	right = word_.AsOperatorToken(ate)
2252
2253	return command.Subshell(left, child, right, None) # no redirects yet
2254
2255	def ParseDBracket(self):
2256	# type: () -> command.DBracket
2257	"""Pass the underlying word parser off to the boolean expression
2258	parser."""
2259	left = word_.AsKeywordToken(self.cur_word)
2260	# TODO: Test interactive. Without closing ]], you should get > prompt
2261	# (PS2)
2262
2263	self._SetNext() # skip [[
2264	b_parser = bool_parse.BoolParser(self.w_parser)
2265	bnode, right = b_parser.Parse() # May raise
2266	return command.DBracket(left, bnode, right, None) # no redirects yet
2267
2268	def ParseDParen(self):
2269	# type: () -> command.DParen
2270	left = word_.AsOperatorToken(self.cur_word)
2271
2272	self._SetNext() # skip ((
2273	anode, right = self.w_parser.ReadDParen()
2274	assert anode is not None
2275
2276	return command.DParen(left, anode, right, None) # no redirects yet
2277
2278	def ParseCommand(self):
2279	# type: () -> command_t
2280	"""
2281	command : simple_command
2282	\| compound_command # OSH edit: io_redirect* folded in
2283	\| function_def
2284	\| ksh_function_def
2285
2286	# YSH extensions
2287	\| proc NAME ...
2288	\| typed proc NAME ...
2289	\| func NAME ...
2290	\| const ...
2291	\| var ...
2292	\| setglobal ...
2293	\| setref ...
2294	\| setvar ...
2295	\| call EXPR
2296	\| = EXPR
2297	;
2298
2299	Note: the reason const / var are not part of compound_command is because
2300	they can't be alone in a shell function body.
2301
2302	Example:
2303	This is valid shell f() if true; then echo hi; fi
2304	This is invalid f() var x = 1
2305	"""
2306	if self._AtSecondaryKeyword():
2307	p_die('Unexpected word when parsing command',
2308	loc.Word(self.cur_word))
2309
2310	# YSH Extensions
2311
2312	if self.c_id == Id.KW_Proc: # proc p { ... }
2313	# proc is hidden because of the 'local reasoning' principle. Code
2314	# inside procs should be YSH, full stop. That means ysh:upgrade is
2315	# on.
2316	if self.parse_opts.parse_proc():
2317	return self.ParseYshProc()
2318	else:
2319	# 2024-02: This avoids bad syntax errors if you type YSH code
2320	# into OSH
2321	# proc p (x) { echo hi } would actually be parsed as a
2322	# command.Simple! Shell compatibility: quote 'proc'
2323	p_die("proc is a YSH keyword, but this is OSH.",
2324	loc.Word(self.cur_word))
2325
2326	if self.c_id == Id.KW_Typed: # typed proc p () { ... }
2327	self._SetNext()
2328	self._GetWord()
2329	if self.c_id != Id.KW_Proc:
2330	p_die("Expected 'proc' after 'typed'", loc.Word(self.cur_word))
2331
2332	if self.parse_opts.parse_proc():
2333	return self.ParseYshProc()
2334	else:
2335	p_die("typed is a YSH keyword, but this is OSH.",
2336	loc.Word(self.cur_word))
2337
2338	if self.c_id == Id.KW_Func: # func f(x) { ... }
2339	if self.parse_opts.parse_func():
2340	return self.ParseYshFunc()
2341	else:
2342	# Same reasoning as above, for 'proc'
2343	p_die("func is a YSH keyword, but this is OSH.",
2344	loc.Word(self.cur_word))
2345
2346	if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2347	p_die("const can't be inside proc or func. Use var instead.",
2348	loc.Word(self.cur_word))
2349
2350	if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2351	keyword_id = self.c_id
2352	kw_token = word_.LiteralToken(self.cur_word)
2353	self._SetNext()
2354	n8 = self.w_parser.ParseVarDecl(kw_token)
2355	for lhs in n8.lhs:
2356	self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2357	return n8
2358
2359	if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2360	kw_token = word_.LiteralToken(self.cur_word)
2361	self._SetNext()
2362	n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2363	return n9
2364
2365	if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2366	# = 42 + a[i]
2367	# call mylist->append('x')
2368
2369	keyword = word_.LiteralToken(self.cur_word)
2370	assert keyword is not None
2371	self._SetNext()
2372	enode = self.w_parser.ParseCommandExpr()
2373	return command.Expr(keyword, enode)
2374
2375	if self.c_id == Id.KW_Function:
2376	return self.ParseKshFunctionDef()
2377
2378	if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2379	Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2380	Id.KW_If, Id.KW_Case, Id.KW_Time):
2381	return self.ParseCompoundCommand()
2382
2383	# Syntax error for '}' starting a line, which all shells disallow.
2384	if self.c_id == Id.Lit_RBrace:
2385	p_die('Unexpected right brace', loc.Word(self.cur_word))
2386
2387	if self.c_kind == Kind.Redir: # Leading redirect
2388	return self.ParseSimpleCommand()
2389
2390	if self.c_kind == Kind.Word:
2391	# ensured by Kind.Word
2392	cur_word = cast(CompoundWord, self.cur_word)
2393
2394	# NOTE: At the top level, only Token and Compound are possible.
2395	# Can this be modelled better in the type system, removing asserts?
2396	#
2397	# TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2398	# Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2399	# That requires 2 tokens of lookahead, which we don't have
2400	#
2401	# Or maybe we don't just have ParseSimpleCommand -- we will have
2402	# ParseYshCommand or something
2403
2404	if (self.w_parser.LookAheadFuncParens() and
2405	not word_.IsVarLike(cur_word)):
2406	return self.ParseFunctionDef() # f() { echo; } # function
2407
2408	# Parse x = 1+2*3 when inside HayNode { } blocks
2409	parts = cur_word.parts
2410	if self.parse_opts.parse_equals() and len(parts) == 1:
2411	part0 = parts[0]
2412	if part0.tag() == word_part_e.Literal:
2413	tok = cast(Token, part0)
2414	if (match.IsValidVarName(lexer.LazyStr(tok)) and
2415	self.w_parser.LookPastSpace() == Id.Lit_Equals):
2416	assert tok.id == Id.Lit_Chars, tok
2417
2418	if (len(self.hay_attrs_stack) and
2419	self.hay_attrs_stack[-1]):
2420	# Note: no static var_checker.Check() for bare assignment
2421	enode = self.w_parser.ParseBareDecl()
2422	self._SetNext() # Somehow this is necessary
2423	# TODO: Use BareDecl here. Well, do that when we
2424	# treat it as const or lazy.
2425	return command.VarDecl(
2426	None,
2427	[NameType(tok, lexer.TokenVal(tok), None)],
2428	enode)
2429	else:
2430	self._SetNext()
2431	self._GetWord()
2432	p_die(
2433	'Unexpected = (Hint: use var/setvar, or quote it)',
2434	loc.Word(self.cur_word))
2435
2436	# echo foo
2437	# f=(a b c) # array
2438	# array[1+2]+=1
2439	return self.ParseSimpleCommand()
2440
2441	if self.c_kind == Kind.Eof:
2442	p_die("Unexpected EOF while parsing command",
2443	loc.Word(self.cur_word))
2444
2445	# NOTE: This only happens in batch mode in the second turn of the loop!
2446	# e.g. )
2447	p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2448
2449	assert False # for MyPy
2450
2451	def ParsePipeline(self):
2452	# type: () -> command_t
2453	"""
2454	pipeline : Bang? command ( '\|' newline_ok command )* ;
2455	"""
2456	negated = None # type: Optional[Token]
2457
2458	self._GetWord()
2459	if self.c_id == Id.KW_Bang:
2460	negated = word_.AsKeywordToken(self.cur_word)
2461	self._SetNext()
2462
2463	child = self.ParseCommand()
2464	assert child is not None
2465
2466	children = [child]
2467
2468	self._GetWord()
2469	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2470	if negated is not None:
2471	node = command.Pipeline(negated, children, [])
2472	return node
2473	else:
2474	return child # no pipeline
2475
2476	# \| or \|&
2477	ops = [] # type: List[Token]
2478	while True:
2479	op = word_.AsOperatorToken(self.cur_word)
2480	ops.append(op)
2481
2482	self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2483	self._NewlineOk()
2484
2485	child = self.ParseCommand()
2486	children.append(child)
2487
2488	self._GetWord()
2489	if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2490	break
2491
2492	return command.Pipeline(negated, children, ops)
2493
2494	def ParseAndOr(self):
2495	# type: () -> command_t
2496	self._GetWord()
2497	if self.c_id == Id.Lit_TDot:
2498	# We got '...', so parse in multiline mode
2499	self._SetNext()
2500	with word_.ctx_Multiline(self.w_parser):
2501	return self._ParseAndOr()
2502
2503	# Parse in normal mode, not multiline
2504	return self._ParseAndOr()
2505
2506	def _ParseAndOr(self):
2507	# type: () -> command_t
2508	"""
2509	and_or : and_or ( AND_IF \| OR_IF ) newline_ok pipeline
2510	\| pipeline
2511
2512	Note that it is left recursive and left associative. We parse it
2513	iteratively with a token of lookahead.
2514	"""
2515	child = self.ParsePipeline()
2516	assert child is not None
2517
2518	self._GetWord()
2519	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2520	return child
2521
2522	ops = [] # type: List[Token]
2523	children = [child]
2524
2525	while True:
2526	ops.append(word_.AsOperatorToken(self.cur_word))
2527
2528	self._SetNext() # skip past \|\| &&
2529	self._NewlineOk()
2530
2531	child = self.ParsePipeline()
2532	children.append(child)
2533
2534	self._GetWord()
2535	if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2536	break
2537
2538	return command.AndOr(children, ops)
2539
2540	# NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2541
2542	# At the top level, we execute after every line, e.g. to
2543	# - process alias (a form of dynamic parsing)
2544	# - process 'exit', because invalid syntax might appear after it
2545
2546	# On the other hand, for a while loop body, we parse the whole thing at once,
2547	# and then execute it. We don't want to parse it over and over again!
2548
2549	# COMPARE
2550	# command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2551	# command_term : and_or (trailer and_or)* ; # CHILDREN
2552
2553	def _ParseCommandLine(self):
2554	# type: () -> command_t
2555	"""
2556	command_line : and_or (sync_op and_or)* trailer? ;
2557	trailer : sync_op newline_ok
2558	\| NEWLINES;
2559	sync_op : '&' \| ';';
2560
2561	NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2562	there is another command word after the sync op.
2563
2564	But it's easier to express imperatively. Do the following in a loop:
2565	1. ParseAndOr
2566	2. Peek.
2567	a. If there's a newline, then return. (We're only parsing a single
2568	line.)
2569	b. If there's a sync_op, process it. Then look for a newline and
2570	return. Otherwise, parse another AndOr.
2571	"""
2572	# This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2573	# I don't think we should add anything else here; otherwise it will be
2574	# ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2575	END_LIST = [Id.Op_Newline, Id.Eof_Real]
2576
2577	children = [] # type: List[command_t]
2578	done = False
2579	while not done:
2580	child = self.ParseAndOr()
2581
2582	self._GetWord()
2583	if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2584	tok = cast(Token, self.cur_word) # for MyPy
2585	child = command.Sentence(child, tok)
2586	self._SetNext()
2587
2588	self._GetWord()
2589	if self.c_id in END_LIST:
2590	done = True
2591
2592	elif self.c_id in END_LIST:
2593	done = True
2594
2595	else:
2596	# e.g. echo a(b)
2597	p_die(
2598	'Invalid word while parsing command line (%s)' %
2599	Id_str(self.c_id), loc.Word(self.cur_word))
2600
2601	children.append(child)
2602
2603	# Simplify the AST.
2604	if len(children) > 1:
2605	return command.CommandList(children)
2606	else:
2607	return children[0]
2608
2609	def _ParseCommandTerm(self):
2610	# type: () -> command.CommandList
2611	""""
2612	command_term : and_or (trailer and_or)* ;
2613	trailer : sync_op newline_ok
2614	\| NEWLINES;
2615	sync_op : '&' \| ';';
2616
2617	This is handled in imperative style, like _ParseCommandLine.
2618	Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2619	which is slightly different. (HOW? Is it the DSEMI?)
2620
2621	Returns:
2622	syntax_asdl.command
2623	"""
2624	# Token types that will end the command term.
2625	END_LIST = [
2626	self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
2627	Id.Op_SemiAmp, Id.Op_DSemiAmp
2628	]
2629
2630	# NOTE: This is similar to _ParseCommandLine.
2631	#
2632	# - Why aren't we doing END_LIST in _ParseCommandLine?
2633	# - Because you will never be inside $() at the top level.
2634	# - We also know it will end in a newline. It can't end in "fi"!
2635	# - example: if true; then { echo hi; } fi
2636
2637	children = [] # type: List[command_t]
2638	done = False
2639	while not done:
2640	# Most keywords are valid "first words". But do/done/then do not BEGIN
2641	# commands, so they are not valid.
2642	if self._AtSecondaryKeyword():
2643	break
2644
2645	child = self.ParseAndOr()
2646
2647	self._GetWord()
2648	if self.c_id == Id.Op_Newline:
2649	self._SetNext()
2650
2651	self._GetWord()
2652	if self.c_id in END_LIST:
2653	done = True
2654
2655	elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2656	tok = cast(Token, self.cur_word) # for MyPy
2657	child = command.Sentence(child, tok)
2658	self._SetNext()
2659
2660	self._GetWord()
2661	if self.c_id == Id.Op_Newline:
2662	self._SetNext() # skip over newline
2663
2664	# Test if we should keep going. There might be another command after
2665	# the semi and newline.
2666	self._GetWord()
2667	if self.c_id in END_LIST: # \n EOF
2668	done = True
2669
2670	elif self.c_id in END_LIST: # ; EOF
2671	done = True
2672
2673	elif self.c_id in END_LIST: # EOF
2674	done = True
2675
2676	# For if test -f foo; test -f bar {
2677	elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2678	done = True
2679
2680	elif self.c_kind != Kind.Word:
2681	# e.g. f() { echo (( x )) ; }
2682	# but can't fail on 'fi fi', see osh/cmd_parse_test.py
2683
2684	#log("Invalid %s", self.cur_word)
2685	p_die("Invalid word while parsing command list",
2686	loc.Word(self.cur_word))
2687
2688	children.append(child)
2689
2690	return command.CommandList(children)
2691
2692	def _ParseCommandList(self):
2693	# type: () -> command.CommandList
2694	"""
2695	command_list : newline_ok command_term trailer? ;
2696
2697	This one is called by all the compound commands. It's basically a command
2698	block.
2699
2700	NOTE: Rather than translating the CFG directly, the code follows a style
2701	more like this: more like this: (and_or trailer)+. It makes capture
2702	easier.
2703	"""
2704	self._NewlineOk()
2705	return self._ParseCommandTerm()
2706
2707	def ParseLogicalLine(self):
2708	# type: () -> command_t
2709	"""Parse a single line for main_loop.
2710
2711	A wrapper around _ParseCommandLine(). Similar but not identical to
2712	_ParseCommandList() and ParseCommandSub().
2713
2714	Raises:
2715	ParseError
2716	"""
2717	self._NewlineOk()
2718	self._GetWord()
2719	if self.c_id == Id.Eof_Real:
2720	return None # main loop checks for here docs
2721	node = self._ParseCommandLine()
2722	return node
2723
2724	def ParseInteractiveLine(self):
2725	# type: () -> parse_result_t
2726	"""Parse a single line for Interactive main_loop.
2727
2728	Different from ParseLogicalLine because newlines are handled differently.
2729
2730	Raises:
2731	ParseError
2732	"""
2733	self._GetWord()
2734	if self.c_id == Id.Op_Newline:
2735	return parse_result.EmptyLine
2736	if self.c_id == Id.Eof_Real:
2737	return parse_result.Eof
2738
2739	node = self._ParseCommandLine()
2740	return parse_result.Node(node)
2741
2742	def ParseCommandSub(self):
2743	# type: () -> command_t
2744	"""Parse $(echo hi) and `echo hi` for word_parse.py.
2745
2746	They can have multiple lines, like this: echo $( echo one echo
2747	two )
2748	"""
2749	self._NewlineOk()
2750
2751	self._GetWord()
2752	if self.c_kind == Kind.Eof: # e.g. $()
2753	return command.NoOp
2754
2755	c_list = self._ParseCommandTerm()
2756	if len(c_list.children) == 1:
2757	return c_list.children[0]
2758	else:
2759	return c_list
2760
2761	def CheckForPendingHereDocs(self):
2762	# type: () -> None
2763	# NOTE: This happens when there is no newline at the end of a file, like
2764	# osh -c 'cat <<EOF'
2765	if len(self.pending_here_docs):
2766	node = self.pending_here_docs[0] # Just show the first one?
2767	h = cast(redir_param.HereDoc, node.arg)
2768	p_die('Unterminated here doc began here', loc.Word(h.here_begin))
2769
2770
2771	# vim: sw=4