OILS / osh / cmd_parse.py View on Github | oilshell.org

2709 lines, 1397 significant
1# Copyright 2016 Andy Chu. All rights reserved.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7"""
8cmd_parse.py - Parse high level shell commands.
9"""
10from __future__ import print_function
11
12from _devbuild.gen import grammar_nt
13from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind, Kind_str
14from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15from _devbuild.gen.syntax_asdl import (
16 loc,
17 SourceLine,
18 source,
19 parse_result,
20 parse_result_t,
21 command,
22 command_t,
23 condition,
24 condition_t,
25 for_iter,
26 ArgList,
27 BraceGroup,
28 LiteralBlock,
29 CaseArm,
30 case_arg,
31 IfArm,
32 pat,
33 pat_t,
34 Redir,
35 redir_param,
36 redir_loc,
37 redir_loc_t,
38 word_e,
39 word_t,
40 CompoundWord,
41 Token,
42 word_part_e,
43 word_part_t,
44 rhs_word,
45 rhs_word_t,
46 sh_lhs,
47 sh_lhs_t,
48 AssignPair,
49 EnvPair,
50 ParsedAssignment,
51 assign_op_e,
52 NameType,
53 proc_sig,
54 proc_sig_e,
55 Proc,
56 Func,
57)
58from core import alloc
59from core import error
60from core.error import p_die
61from core import ui
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from frontend import match
66from frontend import reader
67from mycpp.mylib import log
68from osh import braces
69from osh import bool_parse
70from osh import word_
71
72from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73if TYPE_CHECKING:
74 from core.alloc import Arena
75 from core import optview
76 from frontend.lexer import Lexer
77 from frontend.parse_lib import ParseContext, AliasesInFlight
78 from frontend.reader import _Reader
79 from osh.word_parse import WordParser
80
81_ = Kind_str # for debug prints
82
83TAB_CH = 9 # ord('\t')
84SPACE_CH = 32 # ord(' ')
85
86
87def _ReadHereLines(
88 line_reader, # type: _Reader
89 h, # type: Redir
90 delimiter, # type: str
91):
92 # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93 # NOTE: We read all lines at once, instead of parsing line-by-line,
94 # because of cases like this:
95 # cat <<EOF
96 # 1 $(echo 2
97 # echo 3) 4
98 # EOF
99 here_lines = [] # type: List[Tuple[SourceLine, int]]
100 last_line = None # type: Tuple[SourceLine, int]
101 strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103 while True:
104 src_line, unused_offset = line_reader.GetLine()
105
106 if src_line is None: # EOF
107 # An unterminated here doc is just a warning in bash. We make it
108 # fatal because we want to be strict, and because it causes problems
109 # reporting other errors.
110 # Attribute it to the << in <<EOF for now.
111 p_die("Couldn't find terminator for here doc that starts here",
112 h.op)
113
114 assert len(src_line.content) != 0 # None should be the empty line
115
116 line = src_line.content
117
118 # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119 # the first tab.
120 start_offset = 0
121 if strip_leading_tabs:
122 n = len(line)
123 i = 0 # used after loop exit
124 while i < n:
125 if line[i] != '\t':
126 break
127 i += 1
128 start_offset = i
129
130 if line[start_offset:].rstrip() == delimiter:
131 last_line = (src_line, start_offset)
132 break
133
134 here_lines.append((src_line, start_offset))
135
136 return here_lines, last_line
137
138
139def _MakeLiteralHereLines(
140 here_lines, # type: List[Tuple[SourceLine, int]]
141 arena, # type: Arena
142 do_lossless, # type: bool
143):
144 # type: (...) -> List[word_part_t]
145 """Create a Token for each line.
146
147 For <<'EOF' and <<-'EOF' - single quoted rule
148
149 <<- has non-zero start_offset
150 """
151 # less precise type, because List[T] is an invariant type
152 tokens = [] # type: List[word_part_t]
153 for src_line, start_offset in here_lines:
154
155 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
156 # arena invariant, but don't refer to it.
157 if do_lossless: # avoid garbage, doesn't affect correctness
158 arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, src_line,
159 None)
160
161 t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
162 src_line, src_line.content[start_offset:])
163 tokens.append(t)
164 return tokens
165
166
167def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
168 # type: (ParseContext, Redir, _Reader, Arena) -> None
169 """Fill in attributes of a pending here doc node."""
170 h = cast(redir_param.HereDoc, r.arg)
171 # "If any character in word is quoted, the delimiter shall be formed by
172 # performing quote removal on word, and the here-document lines shall not
173 # be expanded. Otherwise, the delimiter shall be the word itself."
174 # NOTE: \EOF counts, or even E\OF
175 ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
176 if not ok:
177 p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
178
179 here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
180
181 if delim_quoted:
182 # <<'EOF' and <<-'EOF' - Literal for each line.
183 h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
184 parse_ctx.do_lossless)
185 else:
186 # <<EOF and <<-EOF - Parse as word
187 line_reader = reader.VirtualLineReader(arena, here_lines,
188 parse_ctx.do_lossless)
189 w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
190 w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
191
192 end_line, start_offset = last_line
193
194 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
195 # arena invariant, but don't refer to it.
196 if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
197 arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, end_line, None)
198
199 # Create a Token with the end terminator. Maintains the invariant that the
200 # tokens "add up".
201 h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
202 len(end_line.content), end_line, '')
203
204
205def _MakeAssignPair(parse_ctx, preparsed, arena):
206 # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
207 """Create an AssignPair from a 4-tuples from DetectShAssignment."""
208
209 left_token = preparsed.left
210 close_token = preparsed.close
211
212 lhs = None # type: sh_lhs_t
213
214 if left_token.id == Id.Lit_VarLike: # s=1
215 if lexer.IsPlusEquals(left_token):
216 var_name = lexer.TokenSliceRight(left_token, -2)
217 op = assign_op_e.PlusEqual
218 else:
219 var_name = lexer.TokenSliceRight(left_token, -1)
220 op = assign_op_e.Equal
221
222 lhs = sh_lhs.Name(left_token, var_name)
223
224 elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
225 var_name = lexer.TokenSliceRight(left_token, -1)
226 if lexer.IsPlusEquals(close_token):
227 op = assign_op_e.PlusEqual
228 else:
229 op = assign_op_e.Equal
230
231 assert left_token.line == close_token.line, \
232 '%s and %s not on same line' % (left_token, close_token)
233
234 left_pos = left_token.col + left_token.length
235 index_str = left_token.line.content[left_pos:close_token.col]
236 lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
237
238 elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
239 var_name = lexer.TokenSliceRight(left_token, -1)
240 if lexer.IsPlusEquals(close_token):
241 op = assign_op_e.PlusEqual
242 else:
243 op = assign_op_e.Equal
244
245 # Similar to SnipCodeString / SnipCodeBlock
246 if left_token.line == close_token.line:
247 # extract what's between brackets
248 s = left_token.col + left_token.length
249 code_str = left_token.line.content[s:close_token.col]
250 else:
251 raise NotImplementedError('%s != %s' %
252 (left_token.line, close_token.line))
253 a_parser = parse_ctx.MakeArithParser(code_str)
254
255 # a[i+1]= is a LHS
256 src = source.Reparsed('array LHS', left_token, close_token)
257 with alloc.ctx_SourceCode(arena, src):
258 index_node = a_parser.Parse() # may raise error.Parse
259
260 lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
261
262 else:
263 raise AssertionError()
264
265 # TODO: Should we also create a rhs_expr.ArrayLiteral here?
266 parts = preparsed.w.parts
267 offset = preparsed.part_offset
268
269 n = len(parts)
270 if offset == n:
271 rhs = rhs_word.Empty # type: rhs_word_t
272 else:
273 # tmp2 is for intersection of C++/MyPy type systems
274 tmp2 = CompoundWord(parts[offset:])
275 word_.TildeDetectAssign(tmp2)
276 rhs = tmp2
277
278 return AssignPair(left_token, lhs, op, rhs)
279
280
281def _AppendMoreEnv(preparsed_list, more_env):
282 # type: (List[ParsedAssignment], List[EnvPair]) -> None
283 """Helper to modify a SimpleCommand node.
284
285 Args:
286 preparsed: a list of 4-tuples from DetectShAssignment
287 more_env: a list to append env_pairs to
288 """
289 for preparsed in preparsed_list:
290 left_token = preparsed.left
291
292 if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
293 p_die(
294 "Environment binding shouldn't look like an array assignment",
295 left_token)
296
297 if lexer.IsPlusEquals(left_token):
298 p_die('Expected = in environment binding, got +=', left_token)
299
300 var_name = lexer.TokenSliceRight(left_token, -1)
301
302 parts = preparsed.w.parts
303 n = len(parts)
304 offset = preparsed.part_offset
305 if offset == n:
306 val = rhs_word.Empty # type: rhs_word_t
307 else:
308 val = CompoundWord(parts[offset:])
309
310 more_env.append(EnvPair(left_token, var_name, val))
311
312
313def _SplitSimpleCommandPrefix(words):
314 # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
315 """Second pass of SimpleCommand parsing: look for assignment words."""
316 preparsed_list = [] # type: List[ParsedAssignment]
317 suffix_words = [] # type: List[CompoundWord]
318
319 done_prefix = False
320 for w in words:
321 if done_prefix:
322 suffix_words.append(w)
323 continue
324
325 left_token, close_token, part_offset = word_.DetectShAssignment(w)
326 if left_token:
327 preparsed_list.append(
328 ParsedAssignment(left_token, close_token, part_offset, w))
329 else:
330 done_prefix = True
331 suffix_words.append(w)
332
333 return preparsed_list, suffix_words
334
335
336def _MakeSimpleCommand(
337 preparsed_list, # type: List[ParsedAssignment]
338 suffix_words, # type: List[CompoundWord]
339 redirects, # type: List[Redir]
340 typed_args, # type: Optional[ArgList]
341 block, # type: Optional[LiteralBlock]
342):
343 # type: (...) -> command.Simple
344 """Create an command.Simple node."""
345
346 # FOO=(1 2 3) ls is not allowed.
347 for preparsed in preparsed_list:
348 if word_.HasArrayPart(preparsed.w):
349 p_die("Environment bindings can't contain array literals",
350 loc.Word(preparsed.w))
351
352 # NOTE: It would be possible to add this check back. But it already happens
353 # at runtime in EvalWordSequence2.
354 # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
355 if 0:
356 for w in suffix_words:
357 if word_.HasArrayPart(w):
358 p_die("Commands can't contain array literals", loc.Word(w))
359
360 assert len(suffix_words) != 0
361 # {a,b,c} # Use { before brace detection
362 # ~/bin/ls # Use ~ before tilde detection
363 part0 = suffix_words[0].parts[0]
364 blame_tok = location.LeftTokenForWordPart(part0)
365
366 # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
367 # can't implement bash's behavior of having say {~bob,~jane}/src work,
368 # because we only have a BracedTree.
369 # This is documented in spec/brace-expansion.
370 # NOTE: Technically we could do expansion outside of 'oshc translate', but it
371 # doesn't seem worth it.
372 words2 = braces.BraceDetectAll(suffix_words)
373 words3 = word_.TildeDetectAll(words2)
374
375 more_env = [] # type: List[EnvPair]
376 _AppendMoreEnv(preparsed_list, more_env)
377
378 # do_fork by default
379 return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
380 block, True)
381
382
383class VarChecker(object):
384 """Statically check for proc and variable usage errors."""
385
386 def __init__(self):
387 # type: () -> None
388 """
389 Args:
390 oil_proc: Whether to disallow nested proc/function declarations
391 """
392 # self.tokens for location info: 'proc' or another token
393 self.tokens = [] # type: List[Token]
394 self.names = [] # type: List[Dict[str, Id_t]]
395
396 def Push(self, blame_tok):
397 # type: (Token) -> None
398 """Called when we enter a shell function, proc, or func.
399
400 Bash allows this, but it's confusing because it's the same as two
401 functions at the top level.
402
403 f() {
404 g() {
405 echo 'top level function defined in another one'
406 }
407 }
408
409 YSH disallows nested procs and funcs.
410 """
411 if len(self.tokens) != 0:
412 if blame_tok.id == Id.KW_Proc:
413 p_die("procs must be defined at the top level", blame_tok)
414 if blame_tok.id == Id.KW_Func:
415 p_die("funcs must be defined at the top level", blame_tok)
416 if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
417 p_die("shell functions can't be defined inside proc or func",
418 blame_tok)
419
420 self.tokens.append(blame_tok)
421 entry = {} # type: Dict[str, Id_t]
422 self.names.append(entry)
423
424 def Pop(self):
425 # type: () -> None
426 self.names.pop()
427 self.tokens.pop()
428
429 def Check(self, keyword_id, var_name, blame_tok):
430 # type: (Id_t, str, Token) -> None
431 """Check for declaration / mutation errors in proc and func.
432
433 var x
434 x already declared
435 setvar x:
436 x is not declared
437 setglobal x:
438 No errors are possible; we would need all these many conditions to
439 statically know the names:
440 - no 'source'
441 - shopt -u copy_env.
442 - AND use lib has to be static
443
444 What about bare assignment in Hay? I think these are dynamic checks --
445 there is no static check. Hay is for building up data imperatively,
446 and then LATER, right before main(), it can be type checked.
447
448 Package {
449 version = '3.11'
450 version = '3.12'
451 }
452 """
453 # No static checks are the global level! Because of 'source', var and
454 # setvar are essentially the same.
455 if len(self.names) == 0:
456 return
457
458 top = self.names[-1]
459 if keyword_id == Id.KW_Var:
460 if var_name in top:
461 p_die('%r was already declared' % var_name, blame_tok)
462 else:
463 top[var_name] = keyword_id
464
465 if keyword_id == Id.KW_SetVar:
466 if var_name not in top:
467 # Note: the solution could be setglobal, etc.
468 p_die(
469 "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
470 var_name, blame_tok)
471
472
473class ctx_VarChecker(object):
474
475 def __init__(self, var_checker, blame_tok):
476 # type: (VarChecker, Token) -> None
477 var_checker.Push(blame_tok)
478 self.var_checker = var_checker
479
480 def __enter__(self):
481 # type: () -> None
482 pass
483
484 def __exit__(self, type, value, traceback):
485 # type: (Any, Any, Any) -> None
486 self.var_checker.Pop()
487
488
489class ctx_CmdMode(object):
490
491 def __init__(self, cmd_parse, new_cmd_mode):
492 # type: (CommandParser, cmd_mode_t) -> None
493 self.cmd_parse = cmd_parse
494 self.prev_cmd_mode = cmd_parse.cmd_mode
495 cmd_parse.cmd_mode = new_cmd_mode
496
497 def __enter__(self):
498 # type: () -> None
499 pass
500
501 def __exit__(self, type, value, traceback):
502 # type: (Any, Any, Any) -> None
503 self.cmd_parse.cmd_mode = self.prev_cmd_mode
504
505
506SECONDARY_KEYWORDS = [
507 Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
508 Id.KW_Esac
509]
510
511
512class CommandParser(object):
513 """Recursive descent parser derived from POSIX shell grammar.
514
515 This is a BNF grammar:
516 https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
517
518 - Augmented with both bash/OSH and YSH constructs.
519
520 - We use regex-like iteration rather than recursive references
521 ? means optional (0 or 1)
522 * means 0 or more
523 + means 1 or more
524
525 - Keywords are spelled in Caps:
526 If Elif Case
527
528 - Operator tokens are quoted:
529 '(' '|'
530
531 or can be spelled directly if it matters:
532
533 Op_LParen Op_Pipe
534
535 - Non-terminals are snake_case:
536 brace_group subshell
537
538 Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
539 the production should be in the method docstrings, e.g.
540
541 def ParseSubshell():
542 "
543 subshell : '(' compound_list ')'
544
545 Looking at Op_LParen # Comment to say how this method is called
546 "
547
548 The grammar may be factored to make parsing easier.
549 """
550
551 def __init__(self,
552 parse_ctx,
553 parse_opts,
554 w_parser,
555 lexer,
556 line_reader,
557 eof_id=Id.Eof_Real):
558 # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
559 self.parse_ctx = parse_ctx
560 self.aliases = parse_ctx.aliases # aliases to expand at parse time
561
562 self.parse_opts = parse_opts
563 self.w_parser = w_parser # type: WordParser # for normal parsing
564 self.lexer = lexer # for pushing hints, lookahead to (
565 self.line_reader = line_reader # for here docs
566 self.eof_id = eof_id
567
568 self.arena = line_reader.arena # for adding here doc and alias spans
569 self.aliases_in_flight = [] # type: AliasesInFlight
570
571 # A hacky boolean to remove 'if cd / {' ambiguity.
572 self.allow_block = True
573
574 # Stack of booleans for nested Attr and SHELL nodes.
575 # Attr nodes allow bare assignment x = 42, but not shell x=42.
576 # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
577 # nodes, but x42 is still allowed.
578 #
579 # Note: this stack could be optimized by turning it into an integer and
580 # binary encoding.
581 self.hay_attrs_stack = [] # type: List[bool]
582
583 # Note: VarChecker is instantiated with each CommandParser, which means
584 # that two 'proc foo' -- inside a command sub and outside -- don't
585 # conflict, because they use different CommandParser instances. I think
586 # this OK but you can imagine different behaviors.
587 self.var_checker = VarChecker()
588
589 self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
590
591 self.Reset()
592
593 # Init_() function for "keyword arg"
594 def Init_AliasesInFlight(self, aliases_in_flight):
595 # type: (AliasesInFlight) -> None
596 self.aliases_in_flight = aliases_in_flight
597
598 def Reset(self):
599 # type: () -> None
600 """Reset our own internal state.
601
602 Called by the interactive loop.
603 """
604 # Cursor state set by _GetWord()
605 self.next_lex_mode = lex_mode_e.ShCommand
606 self.cur_word = None # type: word_t # current word
607 self.c_kind = Kind.Undefined
608 self.c_id = Id.Undefined_Tok
609
610 self.pending_here_docs = [
611 ] # type: List[Redir] # should have HereLiteral arg
612
613 def ResetInputObjects(self):
614 # type: () -> None
615 """Reset the internal state of our inputs.
616
617 Called by the interactive loop.
618 """
619 self.w_parser.Reset()
620 self.lexer.ResetInputObjects()
621 self.line_reader.Reset()
622
623 def _SetNext(self):
624 # type: () -> None
625 """Call this when you no longer need the current token.
626
627 This method is lazy. A subsequent call to _GetWord() will
628 actually read the next Token.
629 """
630 self.next_lex_mode = lex_mode_e.ShCommand
631
632 def _SetNextBrack(self):
633 # type: () -> None
634 self.next_lex_mode = lex_mode_e.ShCommandBrack
635
636 def _GetWord(self):
637 # type: () -> None
638 """Call this when you need to make a decision based on Id or Kind.
639
640 If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
641 self.c_id and self.c_kind.
642
643 Otherwise it does nothing.
644 """
645 if self.next_lex_mode != lex_mode_e.Undefined:
646 w = self.w_parser.ReadWord(self.next_lex_mode)
647 #log("w %s", w)
648
649 # Here docs only happen in command mode, so other kinds of newlines don't
650 # count.
651 if w.tag() == word_e.Operator:
652 tok = cast(Token, w)
653 if tok.id == Id.Op_Newline:
654 for h in self.pending_here_docs:
655 _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
656 self.arena)
657 del self.pending_here_docs[:] # No .clear() until Python 3.3.
658
659 self.cur_word = w
660
661 self.c_kind = word_.CommandKind(self.cur_word)
662 self.c_id = word_.CommandId(self.cur_word)
663 self.next_lex_mode = lex_mode_e.Undefined
664
665 def _Eat(self, c_id, msg=None):
666 # type: (Id_t, Optional[str]) -> word_t
667 """Consume a word of a type, maybe showing a custom error message.
668
669 Args:
670 c_id: the Id we expected
671 msg: improved error message
672 """
673 self._GetWord()
674 if self.c_id != c_id:
675 if msg is None:
676 msg = 'Expected word type %s, got %s' % (
677 ui.PrettyId(c_id), ui.PrettyId(self.c_id))
678 p_die(msg, loc.Word(self.cur_word))
679
680 skipped = self.cur_word
681 self._SetNext()
682 return skipped
683
684 def _NewlineOk(self):
685 # type: () -> None
686 """Check for optional newline and consume it."""
687 self._GetWord()
688 if self.c_id == Id.Op_Newline:
689 self._SetNext()
690
691 def _AtSecondaryKeyword(self):
692 # type: () -> bool
693 self._GetWord()
694 if self.c_id in SECONDARY_KEYWORDS:
695 return True
696 return False
697
698 def ParseRedirect(self):
699 # type: () -> Redir
700 self._GetWord()
701 assert self.c_kind == Kind.Redir, self.cur_word
702 op_tok = cast(Token, self.cur_word) # for MyPy
703
704 # Note: the lexer could take distinguish between
705 # >out
706 # 3>out
707 # {fd}>out
708 #
709 # which would make the code below faster. But small string optimization
710 # would also speed it up, since redirects are small.
711
712 # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
713 # possibly "unify" the IDs by subtracting a constant like 8 or 16?
714
715 op_val = lexer.TokenVal(op_tok)
716 if op_val[0] == '{':
717 pos = op_val.find('}')
718 assert pos != -1 # lexer ensures this
719 where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
720
721 elif op_val[0].isdigit():
722 pos = 1
723 if op_val[1].isdigit():
724 pos = 2
725 where = redir_loc.Fd(int(op_val[:pos]))
726
727 else:
728 where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
729
730 self._SetNext()
731
732 self._GetWord()
733 # Other redirect
734 if self.c_kind != Kind.Word:
735 p_die('Invalid token after redirect operator',
736 loc.Word(self.cur_word))
737
738 # Here doc
739 if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
740 arg = redir_param.HereDoc.CreateNull()
741 arg.here_begin = self.cur_word
742 arg.stdin_parts = []
743
744 r = Redir(op_tok, where, arg)
745
746 self.pending_here_docs.append(r) # will be filled on next newline.
747
748 self._SetNext()
749 return r
750
751 arg_word = self.cur_word
752 tilde = word_.TildeDetect(arg_word)
753 if tilde:
754 arg_word = tilde
755 self._SetNext()
756
757 # We should never get Empty, Token, etc.
758 assert arg_word.tag() == word_e.Compound, arg_word
759 return Redir(op_tok, where, cast(CompoundWord, arg_word))
760
761 def _ParseRedirectList(self):
762 # type: () -> List[Redir]
763 """Try parsing any redirects at the cursor.
764
765 This is used for blocks only, not commands.
766 """
767 redirects = [] # type: List[Redir]
768 while True:
769 # This prediction needs to ONLY accept redirect operators. Should we
770 # make them a separate Kind?
771 self._GetWord()
772 if self.c_kind != Kind.Redir:
773 break
774
775 node = self.ParseRedirect()
776 redirects.append(node)
777 self._SetNext()
778
779 return redirects
780
781 def _ScanSimpleCommand(self):
782 # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
783 """YSH extends simple commands with typed args and blocks.
784
785 Shell has a recursive grammar, which awkwardly expresses
786 non-grammatical rules:
787
788 simple_command : cmd_prefix cmd_word cmd_suffix
789 | cmd_prefix cmd_word
790 | cmd_prefix
791 | cmd_name cmd_suffix
792 | cmd_name
793 ;
794 cmd_name : WORD /* Apply rule 7a */
795 ;
796 cmd_word : WORD /* Apply rule 7b */
797 ;
798 cmd_prefix : io_redirect
799 | cmd_prefix io_redirect
800 | ASSIGNMENT_WORD
801 | cmd_prefix ASSIGNMENT_WORD
802 ;
803 cmd_suffix : io_redirect
804 | cmd_suffix io_redirect
805 | WORD
806 | cmd_suffix WORD
807
808 YSH grammar:
809
810 simple_command =
811 cmd_prefix* word+ typed_args? BraceGroup? cmd_suffix*
812
813 typed_args =
814 '(' arglist ')'
815 | '[' arglist ']'
816
817 Notably, redirects shouldn't appear after between typed args and
818 BraceGroup.
819 """
820 redirects = [] # type: List[Redir]
821 words = [] # type: List[CompoundWord]
822 typed_args = None # type: Optional[ArgList]
823 block = None # type: Optional[LiteralBlock]
824
825 first_word_caps = False # does first word look like Caps, but not CAPS
826
827 i = 0
828 while True:
829 self._GetWord()
830 if self.c_kind == Kind.Redir:
831 node = self.ParseRedirect()
832 redirects.append(node)
833
834 elif self.c_kind == Kind.Word:
835 if self.parse_opts.parse_brace():
836 # Treat { and } more like operators
837 if self.c_id == Id.Lit_LBrace:
838 if self.allow_block: # Disabled for if/while condition, etc.
839
840 # allow x = 42
841 self.hay_attrs_stack.append(first_word_caps)
842 brace_group = self.ParseBraceGroup()
843
844 # So we can get the source code back later
845 lines = self.arena.SaveLinesAndDiscard(
846 brace_group.left, brace_group.right)
847 block = LiteralBlock(brace_group, lines)
848
849 self.hay_attrs_stack.pop()
850
851 if 0:
852 print('--')
853 block.PrettyPrint()
854 print('\n--')
855 break
856 elif self.c_id == Id.Lit_RBrace:
857 # Another thing: { echo hi }
858 # We're DONE!!!
859 break
860
861 w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
862
863 if i == 0:
864 # Disallow leading =a because it's confusing
865 part0 = w.parts[0]
866 if part0.tag() == word_part_e.Literal:
867 tok = cast(Token, part0)
868 if tok.id == Id.Lit_Equals:
869 p_die(
870 "=word isn't allowed. Hint: add a space after =, or quote it",
871 tok)
872
873 # Is the first word a Hay Attr word?
874 ok, word_str, quoted = word_.StaticEval(w)
875 # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
876 if (ok and len(word_str) and word_str[0].isupper() and
877 not word_str.isupper()):
878 first_word_caps = True
879 #log('W %s', word_str)
880
881 words.append(w)
882
883 elif self.c_id == Id.Op_LParen:
884 # 1. Check that there's a preceding space
885 prev_byte = self.lexer.ByteLookBack()
886 if prev_byte not in (SPACE_CH, TAB_CH):
887 if self.parse_opts.parse_at():
888 p_die('Space required before (',
889 loc.Word(self.cur_word))
890 else:
891 # inline func call like @sorted(x) is invalid in OSH, but the
892 # solution isn't a space
893 p_die(
894 'Unexpected left paren (might need a space before it)',
895 loc.Word(self.cur_word))
896
897 # 2. Check that it's not (). We disallow this because it's a no-op and
898 # there could be confusion with shell func defs.
899 # For some reason we need to call lexer.LookPastSpace, not
900 # w_parser.LookPastSpace. I think this is because we're at (, which is
901 # an operator token. All the other cases are like 'x=', which is PART
902 # of a word, and we don't know if it will end.
903 next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
904 if next_id == Id.Op_RParen:
905 p_die('Empty arg list not allowed',
906 loc.Word(self.cur_word))
907
908 typed_args = self.w_parser.ParseProcCallArgs(
909 grammar_nt.ysh_eager_arglist)
910
911 elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
912 typed_args = self.w_parser.ParseProcCallArgs(
913 grammar_nt.ysh_lazy_arglist)
914
915 else:
916 break
917
918 self._SetNextBrack() # Allow bracket for SECOND word on
919 i += 1
920 return redirects, words, typed_args, block
921
922 def _MaybeExpandAliases(self, words):
923 # type: (List[CompoundWord]) -> Optional[command_t]
924 """Try to expand aliases.
925
926 Args:
927 words: A list of Compound
928
929 Returns:
930 A new LST node, or None.
931
932 Our implementation of alias has two design choices:
933 - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
934 - What grammar rule to parse the expanded alias buffer with. In our case
935 it's ParseCommand().
936
937 This doesn't quite match what other shells do, but I can't figure out a
938 better places.
939
940 Most test cases pass, except for ones like:
941
942 alias LBRACE='{'
943 LBRACE echo one; echo two; }
944
945 alias MULTILINE='echo 1
946 echo 2
947 echo 3'
948 MULTILINE
949
950 NOTE: dash handles aliases in a totally different way. It has a global
951 variable checkkwd in parser.c. It assigns it all over the grammar, like
952 this:
953
954 checkkwd = CHKNL | CHKKWD | CHKALIAS;
955
956 The readtoken() function checks (checkkwd & CHKALIAS) and then calls
957 lookupalias(). This seems to provide a consistent behavior among shells,
958 but it's less modular and testable.
959
960 Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
961
962 Returns:
963 A command node if any aliases were expanded, or None otherwise.
964 """
965 # Start a new list if there aren't any. This will be passed recursively
966 # through CommandParser instances.
967 aliases_in_flight = (self.aliases_in_flight
968 if len(self.aliases_in_flight) else [])
969
970 # for error message
971 first_word_str = None # type: Optional[str]
972 argv0_loc = loc.Word(words[0])
973
974 expanded = [] # type: List[str]
975 i = 0
976 n = len(words)
977
978 while i < n:
979 w = words[i]
980
981 ok, word_str, quoted = word_.StaticEval(w)
982 if not ok or quoted:
983 break
984
985 alias_exp = self.aliases.get(word_str)
986 if alias_exp is None:
987 break
988
989 # Prevent infinite loops. This is subtle: we want to prevent infinite
990 # expansion of alias echo='echo x'. But we don't want to prevent
991 # expansion of the second word in 'echo echo', so we add 'i' to
992 # "aliases_in_flight".
993 if (word_str, i) in aliases_in_flight:
994 break
995
996 if i == 0:
997 first_word_str = word_str # for error message
998
999 #log('%r -> %r', word_str, alias_exp)
1000 aliases_in_flight.append((word_str, i))
1001 expanded.append(alias_exp)
1002 i += 1
1003
1004 if not alias_exp.endswith(' '):
1005 # alias e='echo [ ' is the same expansion as
1006 # alias e='echo ['
1007 # The trailing space indicates whether we should continue to expand
1008 # aliases; it's not part of it.
1009 expanded.append(' ')
1010 break # No more expansions
1011
1012 if len(expanded) == 0: # No expansions; caller does parsing.
1013 return None
1014
1015 # We are expanding an alias, so copy the rest of the words and re-parse.
1016 if i < n:
1017 left_tok = location.LeftTokenForWord(words[i])
1018 right_tok = location.RightTokenForWord(words[-1])
1019
1020 # OLD CONSTRAINT
1021 #assert left_tok.line_id == right_tok.line_id
1022
1023 words_str = self.arena.SnipCodeString(left_tok, right_tok)
1024 expanded.append(words_str)
1025
1026 code_str = ''.join(expanded)
1027
1028 # TODO:
1029 # Aliases break static parsing (like backticks), so use our own Arena.
1030 # This matters for Hay, which calls SaveLinesAndDiscard().
1031 # arena = alloc.Arena()
1032 arena = self.arena
1033
1034 line_reader = reader.StringLineReader(code_str, arena)
1035 cp = self.parse_ctx.MakeOshParser(line_reader)
1036 cp.Init_AliasesInFlight(aliases_in_flight)
1037
1038 # break circular dep
1039 from frontend import parse_lib
1040
1041 # The interaction between COMPLETION and ALIASES requires special care.
1042 # See docstring of BeginAliasExpansion() in parse_lib.py.
1043 src = source.Alias(first_word_str, argv0_loc)
1044 with alloc.ctx_SourceCode(arena, src):
1045 with parse_lib.ctx_Alias(self.parse_ctx.trail):
1046 try:
1047 # _ParseCommandTerm() handles multiline commands, compound
1048 # commands, etc. as opposed to ParseLogicalLine()
1049 node = cp._ParseCommandTerm()
1050 except error.Parse as e:
1051 # Failure to parse alias expansion is a fatal error
1052 # We don't need more handling here/
1053 raise
1054
1055 if 0:
1056 log('AFTER expansion:')
1057 node.PrettyPrint()
1058
1059 return node
1060
1061 def ParseSimpleCommand(self):
1062 # type: () -> command_t
1063 """Fixed transcription of the POSIX grammar (TODO: port to
1064 grammar/Shell.g)
1065
1066 io_file : '<' filename
1067 | LESSAND filename
1068 ...
1069
1070 io_here : DLESS here_end
1071 | DLESSDASH here_end
1072
1073 redirect : IO_NUMBER (io_redirect | io_here)
1074
1075 prefix_part : ASSIGNMENT_WORD | redirect
1076 cmd_part : WORD | redirect
1077
1078 assign_kw : Declare | Export | Local | Readonly
1079
1080 # Without any words it is parsed as a command, not an assignment
1081 assign_listing : assign_kw
1082
1083 # Now we have something to do (might be changing assignment flags too)
1084 # NOTE: any prefixes should be a warning, but they are allowed in shell.
1085 assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
1086
1087 # an external command, a function call, or a builtin -- a "word_command"
1088 word_command : prefix_part* cmd_part+
1089
1090 simple_command : assign_listing
1091 | assignment
1092 | proc_command
1093
1094 Simple imperative algorithm:
1095
1096 1) Read a list of words and redirects. Append them to separate lists.
1097 2) Look for the first non-assignment word. If it's declare, etc., then
1098 keep parsing words AND assign words. Otherwise, just parse words.
1099 3) If there are no non-assignment words, then it's a global assignment.
1100
1101 { redirects, global assignments } OR
1102 { redirects, prefix_bindings, words } OR
1103 { redirects, ERROR_prefix_bindings, keyword, assignments, words }
1104
1105 THEN CHECK that prefix bindings don't have any array literal parts!
1106 global assignment and keyword assignments can have the of course.
1107 well actually EXPORT shouldn't have them either -- WARNING
1108
1109 3 cases we want to warn: prefix_bindings for assignment, and array literal
1110 in prefix bindings, or export
1111
1112 A command can be an assignment word, word, or redirect on its own.
1113
1114 ls
1115 >out.txt
1116
1117 >out.txt FOO=bar # this touches the file
1118
1119 Or any sequence:
1120 ls foo bar
1121 <in.txt ls foo bar >out.txt
1122 <in.txt ls >out.txt foo bar
1123
1124 Or add one or more environment bindings:
1125 VAR=val env
1126 >out.txt VAR=val env
1127
1128 here_end vs filename is a matter of whether we test that it's quoted. e.g.
1129 <<EOF vs <<'EOF'.
1130 """
1131 redirects, words, typed_args, block = self._ScanSimpleCommand()
1132
1133 typed_loc = None # type: Optional[Token]
1134 if block:
1135 typed_loc = block.brace_group.left
1136 if typed_args:
1137 typed_loc = typed_args.left # preferred over block location
1138
1139 if len(words) == 0: # e.g. >out.txt # redirect without words
1140 assert len(redirects) != 0
1141 if typed_loc is not None:
1142 p_die("Unexpected typed args", typed_loc)
1143
1144 simple = command.Simple.CreateNull()
1145 simple.blame_tok = redirects[0].op
1146 simple.more_env = []
1147 simple.words = []
1148 simple.redirects = redirects
1149 return simple
1150
1151 preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1152 if len(preparsed_list):
1153 # Disallow X=Y inside proc and func
1154 # and inside Hay Attr blocks
1155 # But allow X=Y at the top level
1156 # for interactive use foo=bar
1157 # for global constants GLOBAL=~/src
1158 # because YSH assignment doesn't have tilde sub
1159 if len(suffix_words) == 0:
1160 if (self.cmd_mode != cmd_mode_e.Shell or
1161 (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1162 p_die('Use var/setvar to assign in YSH',
1163 preparsed_list[0].left)
1164
1165 # Set a reference to words and redirects for completion. We want to
1166 # inspect this state after a failed parse.
1167 self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1168
1169 if len(suffix_words) == 0:
1170 if typed_loc is not None:
1171 p_die("Unexpected typed args", typed_loc)
1172
1173 # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1174 pairs = [] # type: List[AssignPair]
1175 for preparsed in preparsed_list:
1176 pairs.append(
1177 _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1178
1179 left_tok = location.LeftTokenForCompoundWord(words[0])
1180 return command.ShAssignment(left_tok, pairs, redirects)
1181
1182 kind, kw_token = word_.IsControlFlow(suffix_words[0])
1183
1184 if kind == Kind.ControlFlow:
1185 if kw_token.id == Id.ControlFlow_Return:
1186 # return x - inside procs and shell functions
1187 # return (x) - inside funcs
1188 if typed_args is None:
1189 if self.cmd_mode not in (cmd_mode_e.Shell,
1190 cmd_mode_e.Proc):
1191 p_die('Shell-style returns not allowed here', kw_token)
1192 else:
1193 if self.cmd_mode != cmd_mode_e.Func:
1194 p_die('Typed return is only allowed inside func',
1195 typed_loc)
1196 if len(typed_args.pos_args) != 1:
1197 p_die("Typed return expects one argument", typed_loc)
1198 if len(typed_args.named_args) != 0:
1199 p_die("Typed return doesn't take named arguments",
1200 typed_loc)
1201 return command.Retval(kw_token, typed_args.pos_args[0])
1202
1203 if typed_loc is not None:
1204 p_die("Unexpected typed args", typed_loc)
1205 if not self.parse_opts.parse_ignored() and len(redirects):
1206 p_die("Control flow shouldn't have redirects", kw_token)
1207
1208 if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1209 p_die("Control flow shouldn't have environment bindings",
1210 preparsed_list[0].left)
1211
1212 # Attach the token for errors. (ShAssignment may not need it.)
1213 if len(suffix_words) == 1:
1214 arg_word = None # type: Optional[word_t]
1215 elif len(suffix_words) == 2:
1216 arg_word = suffix_words[1]
1217 else:
1218 p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1219 loc.Word(suffix_words[2]))
1220
1221 return command.ControlFlow(kw_token, arg_word)
1222
1223 # Alias expansion only understands words, not typed args ( ) or block { }
1224 if not typed_args and not block and self.parse_opts.expand_aliases():
1225 # If any expansions were detected, then parse again.
1226 expanded_node = self._MaybeExpandAliases(suffix_words)
1227 if expanded_node:
1228 # Attach env bindings and redirects to the expanded node.
1229 more_env = [] # type: List[EnvPair]
1230 _AppendMoreEnv(preparsed_list, more_env)
1231 exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1232 return exp
1233
1234 # TODO: check that we don't have env1=x x[1]=y env2=z here.
1235
1236 # FOO=bar printenv.py FOO
1237 node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1238 typed_args, block)
1239 return node
1240
1241 def ParseBraceGroup(self):
1242 # type: () -> BraceGroup
1243 """
1244 Original:
1245 brace_group : LBrace command_list RBrace ;
1246
1247 YSH:
1248 brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1249
1250 The doc comment can only occur if there's a newline.
1251 """
1252 ate = self._Eat(Id.Lit_LBrace)
1253 left = word_.BraceToken(ate)
1254
1255 doc_word = None # type: word_t
1256 self._GetWord()
1257 if self.c_id == Id.Op_Newline:
1258 self._SetNext()
1259 # Set a flag so we don't skip over ###
1260 with word_.ctx_EmitDocToken(self.w_parser):
1261 self._GetWord()
1262
1263 if self.c_id == Id.Ignored_Comment:
1264 doc_word = self.cur_word
1265 self._SetNext()
1266
1267 # Id.Ignored_Comment means it's a Token, or None
1268 doc_token = cast(Token, doc_word)
1269
1270 c_list = self._ParseCommandList()
1271
1272 ate = self._Eat(Id.Lit_RBrace)
1273 right = word_.BraceToken(ate)
1274
1275 # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1276 # would allow us to revert this back to None, which was changed in
1277 # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1278 # behavior saves allocations, but is less type safe.
1279 return BraceGroup(left, doc_token, c_list.children, [],
1280 right) # no redirects yet
1281
1282 def ParseDoGroup(self):
1283 # type: () -> command.DoGroup
1284 """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1285
1286 do_group : Do command_list Done ; /* Apply rule 6 */
1287 """
1288 ate = self._Eat(Id.KW_Do)
1289 do_kw = word_.AsKeywordToken(ate)
1290
1291 c_list = self._ParseCommandList() # could be anything
1292
1293 ate = self._Eat(Id.KW_Done)
1294 done_kw = word_.AsKeywordToken(ate)
1295
1296 return command.DoGroup(do_kw, c_list.children, done_kw)
1297
1298 def ParseForWords(self):
1299 # type: () -> Tuple[List[CompoundWord], Optional[Token]]
1300 """
1301 for_words : WORD* for_sep
1302 ;
1303 for_sep : ';' newline_ok
1304 | NEWLINES
1305 ;
1306 """
1307 words = [] # type: List[CompoundWord]
1308 # The span_id of any semi-colon, so we can remove it.
1309 semi_tok = None # type: Optional[Token]
1310
1311 while True:
1312 self._GetWord()
1313 if self.c_id == Id.Op_Semi:
1314 tok = cast(Token, self.cur_word)
1315 semi_tok = tok
1316 self._SetNext()
1317 self._NewlineOk()
1318 break
1319 elif self.c_id == Id.Op_Newline:
1320 self._SetNext()
1321 break
1322 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1323 break
1324
1325 if self.cur_word.tag() != word_e.Compound:
1326 # TODO: Can we also show a pointer to the 'for' keyword?
1327 p_die('Invalid word in for loop', loc.Word(self.cur_word))
1328
1329 w2 = cast(CompoundWord, self.cur_word)
1330 words.append(w2)
1331 self._SetNext()
1332 return words, semi_tok
1333
1334 def _ParseForExprLoop(self, for_kw):
1335 # type: (Token) -> command.ForExpr
1336 """
1337 Shell:
1338 for '((' init ';' cond ';' update '))' for_sep? do_group
1339
1340 YSH:
1341 for '((' init ';' cond ';' update '))' for_sep? brace_group
1342 """
1343 node = self.w_parser.ReadForExpression()
1344 node.keyword = for_kw
1345
1346 self._SetNext()
1347
1348 self._GetWord()
1349 if self.c_id == Id.Op_Semi:
1350 self._SetNext()
1351 self._NewlineOk()
1352 elif self.c_id == Id.Op_Newline:
1353 self._SetNext()
1354 elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1355 pass
1356 elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1357 pass
1358 else:
1359 p_die('Invalid word after for expression', loc.Word(self.cur_word))
1360
1361 if self.c_id == Id.Lit_LBrace:
1362 node.body = self.ParseBraceGroup()
1363 else:
1364 node.body = self.ParseDoGroup()
1365 return node
1366
1367 def _ParseForEachLoop(self, for_kw):
1368 # type: (Token) -> command.ForEach
1369 node = command.ForEach.CreateNull(alloc_lists=True)
1370 node.keyword = for_kw
1371
1372 num_iter_names = 0
1373 while True:
1374 w = self.cur_word
1375
1376 # Hack that makes the language more familiar:
1377 # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1378 # - 'x y' is also accepted but not idiomatic.
1379 UP_w = w
1380 if w.tag() == word_e.Compound:
1381 w = cast(CompoundWord, UP_w)
1382 if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1383 w.parts.pop()
1384
1385 ok, iter_name, quoted = word_.StaticEval(w)
1386 if not ok or quoted: # error: for $x
1387 p_die('Expected loop variable (a constant word)', loc.Word(w))
1388
1389 if not match.IsValidVarName(iter_name): # error: for -
1390 # TODO: consider commas?
1391 if ',' in iter_name:
1392 p_die('Loop variables look like x, y (fix spaces)',
1393 loc.Word(w))
1394 p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1395
1396 node.iter_names.append(iter_name)
1397 num_iter_names += 1
1398 self._SetNext()
1399
1400 self._GetWord()
1401 # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1402 # Subtlety: 'var' is KW_Var and is a valid loop name
1403 if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1404 break
1405
1406 if num_iter_names == 3:
1407 p_die('Unexpected word after 3 loop variables',
1408 loc.Word(self.cur_word))
1409
1410 self._NewlineOk()
1411
1412 self._GetWord()
1413 if self.c_id == Id.KW_In:
1414 # Ideally we would want ( not 'in'. But we still have to fix the bug
1415 # where we require a SPACE between in and (
1416 # for x in(y) # should be accepted, but isn't
1417
1418 expr_blame = word_.AsKeywordToken(self.cur_word)
1419
1420 self._SetNext() # skip in
1421 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1422 enode = self.w_parser.ParseYshExprForCommand()
1423 node.iterable = for_iter.YshExpr(enode, expr_blame)
1424
1425 # For simplicity, we don't accept for x in (obj); do ...
1426 self._GetWord()
1427 if self.c_id != Id.Lit_LBrace:
1428 p_die('Expected { after iterable expression',
1429 loc.Word(self.cur_word))
1430 else:
1431 semi_tok = None # type: Optional[Token]
1432 iter_words, semi_tok = self.ParseForWords()
1433 node.semi_tok = semi_tok
1434
1435 if not self.parse_opts.parse_bare_word() and len(
1436 iter_words) == 1:
1437 ok, s, quoted = word_.StaticEval(iter_words[0])
1438 if ok and match.IsValidVarName(s) and not quoted:
1439 p_die(
1440 'Surround this word with either parens or quotes (parse_bare_word)',
1441 loc.Word(iter_words[0]))
1442
1443 words2 = braces.BraceDetectAll(iter_words)
1444 words3 = word_.TildeDetectAll(words2)
1445 node.iterable = for_iter.Words(words3)
1446
1447 # Now that we know there are words, do an extra check
1448 if num_iter_names > 2:
1449 p_die('Expected at most 2 loop variables', for_kw)
1450
1451 elif self.c_id == Id.KW_Do:
1452 node.iterable = for_iter.Args # implicitly loop over "$@"
1453 # do not advance
1454
1455 elif self.c_id == Id.Op_Semi: # for x; do
1456 node.iterable = for_iter.Args # implicitly loop over "$@"
1457 self._SetNext()
1458
1459 else: # for foo BAD
1460 p_die('Unexpected word after for loop variable',
1461 loc.Word(self.cur_word))
1462
1463 self._GetWord()
1464 if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1465 node.body = self.ParseBraceGroup()
1466 else:
1467 node.body = self.ParseDoGroup()
1468
1469 return node
1470
1471 def ParseFor(self):
1472 # type: () -> command_t
1473 """
1474 TODO: Update the grammar
1475
1476 for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1477 | For '((' ... TODO
1478 """
1479 ate = self._Eat(Id.KW_For)
1480 for_kw = word_.AsKeywordToken(ate)
1481
1482 self._GetWord()
1483 if self.c_id == Id.Op_DLeftParen:
1484 if not self.parse_opts.parse_dparen():
1485 p_die("Bash for loops aren't allowed (parse_dparen)",
1486 loc.Word(self.cur_word))
1487
1488 # for (( i = 0; i < 10; i++)
1489 n1 = self._ParseForExprLoop(for_kw)
1490 n1.redirects = self._ParseRedirectList()
1491 return n1
1492 else:
1493 # for x in a b; do echo hi; done
1494 n2 = self._ParseForEachLoop(for_kw)
1495 n2.redirects = self._ParseRedirectList()
1496 return n2
1497
1498 def _ParseConditionList(self):
1499 # type: () -> condition_t
1500 """
1501 condition_list: command_list
1502
1503 This is a helper to parse a condition list for if commands and while/until
1504 loops. It will throw a parse error if there are no conditions in the list.
1505 """
1506 self.allow_block = False
1507 commands = self._ParseCommandList()
1508 self.allow_block = True
1509
1510 if len(commands.children) == 0:
1511 p_die("Expected a condition", loc.Word(self.cur_word))
1512
1513 return condition.Shell(commands.children)
1514
1515 def ParseWhileUntil(self, keyword):
1516 # type: (Token) -> command.WhileUntil
1517 """
1518 while_clause : While command_list do_group ;
1519 until_clause : Until command_list do_group ;
1520 """
1521 self._SetNext() # skip keyword
1522
1523 if (self.parse_opts.parse_paren() and
1524 self.w_parser.LookPastSpace() == Id.Op_LParen):
1525 enode = self.w_parser.ParseYshExprForCommand()
1526 cond = condition.YshExpr(enode) # type: condition_t
1527 else:
1528 cond = self._ParseConditionList()
1529
1530 # NOTE: The LSTs will be different for OSH and YSH, but the execution
1531 # should be unchanged. To be sure we should desugar.
1532 self._GetWord()
1533 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1534 # while test -f foo {
1535 body_node = self.ParseBraceGroup() # type: command_t
1536 else:
1537 body_node = self.ParseDoGroup()
1538
1539 # no redirects yet
1540 return command.WhileUntil(keyword, cond, body_node, None)
1541
1542 def ParseCaseArm(self):
1543 # type: () -> CaseArm
1544 """
1545 case_item: '('? pattern ('|' pattern)* ')'
1546 newline_ok command_term? trailer? ;
1547
1548 Looking at '(' or pattern
1549 """
1550 self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1551
1552 left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1553
1554 if self.c_id == Id.Op_LParen: # Optional (
1555 self._SetNext()
1556
1557 pat_words = [] # type: List[word_t]
1558 while True:
1559 self._GetWord()
1560 if self.c_kind != Kind.Word:
1561 p_die('Expected case pattern', loc.Word(self.cur_word))
1562 pat_words.append(self.cur_word)
1563 self._SetNext()
1564
1565 self._GetWord()
1566 if self.c_id == Id.Op_Pipe:
1567 self._SetNext()
1568 else:
1569 break
1570
1571 ate = self._Eat(Id.Right_CasePat)
1572 middle_tok = word_.AsOperatorToken(ate)
1573
1574 self._NewlineOk()
1575
1576 self._GetWord()
1577 if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
1578 c_list = self._ParseCommandTerm()
1579 action_children = c_list.children
1580 else:
1581 action_children = []
1582
1583 dsemi_tok = None # type: Token
1584 self._GetWord()
1585 if self.c_id == Id.KW_Esac: # missing last ;;
1586 pass
1587 elif self.c_id == Id.Op_DSemi:
1588 dsemi_tok = word_.AsOperatorToken(self.cur_word)
1589 self._SetNext()
1590 else:
1591 # Happens on EOF
1592 p_die('Expected ;; or esac', loc.Word(self.cur_word))
1593
1594 self._NewlineOk()
1595
1596 return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1597 action_children, dsemi_tok)
1598
1599 def ParseYshCaseArm(self, discriminant):
1600 # type: (Id_t) -> CaseArm
1601 """
1602 case_item : pattern newline_ok brace_group newline_ok
1603 pattern : pat_words
1604 | pat_exprs
1605 | pat_eggex
1606 | pat_else
1607 pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
1608 pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
1609 pat_word : WORD
1610 pat_eggex : '/' oil_eggex '/'
1611 pat_expr : '(' oil_expr ')'
1612 pat_else : '(' Id.KW_Else ')'
1613
1614 Looking at: 'pattern'
1615
1616 Note that the trailing `newline_ok` in `case_item` is handled by
1617 `ParseYshCase`. We do this because parsing that `newline_ok` returns
1618 the next "discriminant" for the next token, so it makes more sense to
1619 handle it there.
1620 """
1621 left_tok = None # type: Token
1622 pattern = None # type: pat_t
1623
1624 if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1625 # pat_exprs, pat_else or pat_eggex
1626 pattern, left_tok = self.w_parser.ParseYshCasePattern()
1627 else:
1628 # pat_words
1629 pat_words = [] # type: List[word_t]
1630 while True:
1631 self._GetWord()
1632 if self.c_kind != Kind.Word:
1633 p_die('Expected case pattern', loc.Word(self.cur_word))
1634 pat_words.append(self.cur_word)
1635 self._SetNext()
1636
1637 if not left_tok:
1638 left_tok = location.LeftTokenForWord(self.cur_word)
1639
1640 self._NewlineOk()
1641
1642 self._GetWord()
1643 if self.c_id == Id.Op_Pipe:
1644 self._SetNext()
1645 self._NewlineOk()
1646 else:
1647 break
1648 pattern = pat.Words(pat_words)
1649
1650 self._NewlineOk()
1651 action = self.ParseBraceGroup()
1652
1653 # The left token of the action is our "middle" token
1654 return CaseArm(left_tok, pattern, action.left, action.children,
1655 action.right)
1656
1657 def ParseYshCase(self, case_kw):
1658 # type: (Token) -> command.Case
1659 """
1660 ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1661
1662 Looking at: token after 'case'
1663 """
1664 enode = self.w_parser.ParseYshExprForCommand()
1665 to_match = case_arg.YshExpr(enode)
1666
1667 ate = self._Eat(Id.Lit_LBrace)
1668 arms_start = word_.BraceToken(ate)
1669
1670 discriminant = self.w_parser.NewlineOkForYshCase()
1671
1672 # Note: for now, zero arms are accepted, just like POSIX case $x in esac
1673 arms = [] # type: List[CaseArm]
1674 while discriminant != Id.Op_RBrace:
1675 arm = self.ParseYshCaseArm(discriminant)
1676 arms.append(arm)
1677
1678 discriminant = self.w_parser.NewlineOkForYshCase()
1679
1680 # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1681 # token is read as an Id.Op_RBrace, but we need to store this as a
1682 # Id.Lit_RBrace.
1683 ate = self._Eat(Id.Op_RBrace)
1684 arms_end = word_.AsOperatorToken(ate)
1685 arms_end.id = Id.Lit_RBrace
1686
1687 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1688 None)
1689
1690 def ParseOldCase(self, case_kw):
1691 # type: (Token) -> command.Case
1692 """
1693 case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1694
1695 -> Looking at WORD
1696
1697 FYI original POSIX case list, which takes pains for DSEMI
1698
1699 case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1700 """
1701 self._GetWord()
1702 w = self.cur_word
1703 if not self.parse_opts.parse_bare_word():
1704 ok, s, quoted = word_.StaticEval(w)
1705 if ok and not quoted:
1706 p_die(
1707 "This is a constant string. You may want a variable like $x (parse_bare_word)",
1708 loc.Word(w))
1709
1710 if w.tag() != word_e.Compound:
1711 p_die("Expected a word to match against", loc.Word(w))
1712
1713 to_match = case_arg.Word(w)
1714 self._SetNext() # past WORD
1715
1716 self._NewlineOk()
1717
1718 ate = self._Eat(Id.KW_In)
1719 arms_start = word_.AsKeywordToken(ate)
1720
1721 self._NewlineOk()
1722
1723 arms = [] # type: List[CaseArm]
1724 while True:
1725 self._GetWord()
1726 if self.c_id == Id.KW_Esac: # this is Kind.Word
1727 break
1728 # case arm should begin with a pattern word or (
1729 if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1730 break
1731
1732 arm = self.ParseCaseArm()
1733 arms.append(arm)
1734
1735 ate = self._Eat(Id.KW_Esac)
1736 arms_end = word_.AsKeywordToken(ate)
1737
1738 # no redirects yet
1739 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1740 None)
1741
1742 def ParseCase(self):
1743 # type: () -> command.Case
1744 """
1745 case_clause : old_case # from POSIX
1746 | ysh_case
1747 ;
1748
1749 Looking at 'Case'
1750 """
1751 case_kw = word_.AsKeywordToken(self.cur_word)
1752 self._SetNext() # past 'case'
1753
1754 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1755 return self.ParseYshCase(case_kw)
1756 else:
1757 return self.ParseOldCase(case_kw)
1758
1759 def _ParseYshElifElse(self, if_node):
1760 # type: (command.If) -> None
1761 """If test -f foo { echo foo.
1762
1763 } elif test -f bar; test -f spam { ^ we parsed up to here echo
1764 bar } else { echo none }
1765 """
1766 arms = if_node.arms
1767
1768 while self.c_id == Id.KW_Elif:
1769 elif_kw = word_.AsKeywordToken(self.cur_word)
1770 self._SetNext() # skip elif
1771 if (self.parse_opts.parse_paren() and
1772 self.w_parser.LookPastSpace() == Id.Op_LParen):
1773 enode = self.w_parser.ParseYshExprForCommand()
1774 cond = condition.YshExpr(enode) # type: condition_t
1775 else:
1776 self.allow_block = False
1777 commands = self._ParseCommandList()
1778 self.allow_block = True
1779 cond = condition.Shell(commands.children)
1780
1781 body = self.ParseBraceGroup()
1782 self._GetWord()
1783
1784 arm = IfArm(elif_kw, cond, None, body.children, [elif_kw.span_id])
1785 arms.append(arm)
1786
1787 self._GetWord()
1788 if self.c_id == Id.KW_Else:
1789 self._SetNext()
1790 body = self.ParseBraceGroup()
1791 if_node.else_action = body.children
1792
1793 def _ParseYshIf(self, if_kw, cond):
1794 # type: (Token, condition_t) -> command.If
1795 """if test -f foo {
1796
1797 # ^ we parsed up to here
1798 echo foo
1799 } elif test -f bar; test -f spam {
1800 echo bar
1801 } else {
1802 echo none
1803 }
1804 NOTE: If you do something like if test -n foo{, the parser keeps going, and
1805 the error is confusing because it doesn't point to the right place.
1806
1807 I think we might need strict_brace so that foo{ is disallowed. It has to
1808 be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1809 form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1810 Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1811 """
1812 if_node = command.If.CreateNull(alloc_lists=True)
1813 if_node.if_kw = if_kw
1814
1815 body1 = self.ParseBraceGroup()
1816 # Every arm has 1 spid, unlike shell-style
1817 # TODO: We could get the spids from the brace group.
1818 arm = IfArm(if_kw, cond, None, body1.children, [if_kw.span_id])
1819
1820 if_node.arms.append(arm)
1821
1822 self._GetWord()
1823 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1824 self._ParseYshElifElse(if_node)
1825 # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1826 # spid because that's in the BraceGroup.
1827 return if_node
1828
1829 def _ParseElifElse(self, if_node):
1830 # type: (command.If) -> None
1831 """
1832 else_part: (Elif command_list Then command_list)* Else command_list ;
1833 """
1834 arms = if_node.arms
1835
1836 self._GetWord()
1837 while self.c_id == Id.KW_Elif:
1838 elif_kw = word_.AsKeywordToken(self.cur_word)
1839 self._SetNext() # past 'elif'
1840
1841 cond = self._ParseConditionList()
1842
1843 ate = self._Eat(Id.KW_Then)
1844 then_kw = word_.AsKeywordToken(ate)
1845
1846 body = self._ParseCommandList()
1847 arm = IfArm(elif_kw, cond, then_kw, body.children,
1848 [elif_kw.span_id, then_kw.span_id])
1849
1850 arms.append(arm)
1851
1852 self._GetWord()
1853 if self.c_id == Id.KW_Else:
1854 else_kw = word_.AsKeywordToken(self.cur_word)
1855 self._SetNext() # past 'else'
1856 body = self._ParseCommandList()
1857 if_node.else_action = body.children
1858 else:
1859 else_kw = None
1860
1861 if_node.else_kw = else_kw
1862
1863 def ParseIf(self):
1864 # type: () -> command.If
1865 """
1866 if_clause : If command_list Then command_list else_part? Fi ;
1867
1868 open : '{' | Then
1869 close : '}' | Fi
1870
1871 ysh_if : If ( command_list | '(' expr ')' )
1872 open command_list else_part? close;
1873
1874 There are 2 conditionals here: parse_paren, then parse_brace
1875 """
1876 if_node = command.If.CreateNull(alloc_lists=True)
1877 if_kw = word_.AsKeywordToken(self.cur_word)
1878 if_node.if_kw = if_kw
1879 self._SetNext() # past 'if'
1880
1881 if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1882 ) == Id.Op_LParen:
1883 # if (x + 1)
1884 enode = self.w_parser.ParseYshExprForCommand()
1885 cond = condition.YshExpr(enode) # type: condition_t
1886 else:
1887 # if echo 1; echo 2; then
1888 # Remove ambiguity with if cd / {
1889 cond = self._ParseConditionList()
1890
1891 self._GetWord()
1892 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1893 return self._ParseYshIf(if_kw, cond)
1894
1895 ate = self._Eat(Id.KW_Then)
1896 then_kw = word_.AsKeywordToken(ate)
1897
1898 body = self._ParseCommandList()
1899
1900 # First arm
1901 arm = IfArm(if_kw, cond, then_kw, body.children,
1902 [if_kw.span_id, then_kw.span_id])
1903 if_node.arms.append(arm)
1904
1905 # 2nd to Nth arm
1906 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1907 self._ParseElifElse(if_node)
1908
1909 ate = self._Eat(Id.KW_Fi)
1910 if_node.fi_kw = word_.AsKeywordToken(ate)
1911
1912 return if_node
1913
1914 def ParseTime(self):
1915 # type: () -> command_t
1916 """Time [-p] pipeline.
1917
1918 According to bash help.
1919 """
1920 time_kw = word_.AsKeywordToken(self.cur_word)
1921 self._SetNext() # skip time
1922 pipeline = self.ParsePipeline()
1923 return command.TimeBlock(time_kw, pipeline)
1924
1925 def ParseCompoundCommand(self):
1926 # type: () -> command_t
1927 """
1928 Refactoring: we put io_redirect* here instead of in function_body and
1929 command.
1930
1931 compound_command : brace_group io_redirect*
1932 | subshell io_redirect*
1933 | for_clause io_redirect*
1934 | while_clause io_redirect*
1935 | until_clause io_redirect*
1936 | if_clause io_redirect*
1937 | case_clause io_redirect*
1938
1939 # bash extensions
1940 | time_clause
1941 | [[ BoolExpr ]]
1942 | (( ArithExpr ))
1943 """
1944 self._GetWord()
1945 if self.c_id == Id.Lit_LBrace:
1946 n1 = self.ParseBraceGroup()
1947 n1.redirects = self._ParseRedirectList()
1948 return n1
1949 if self.c_id == Id.Op_LParen:
1950 n2 = self.ParseSubshell()
1951 n2.redirects = self._ParseRedirectList()
1952 return n2
1953
1954 if self.c_id == Id.KW_For:
1955 # Note: Redirects parsed in this call. POSIX for and bash for (( have
1956 # redirects, but YSH for doesn't.
1957 return self.ParseFor()
1958 if self.c_id in (Id.KW_While, Id.KW_Until):
1959 keyword = word_.AsKeywordToken(self.cur_word)
1960 n3 = self.ParseWhileUntil(keyword)
1961 n3.redirects = self._ParseRedirectList()
1962 return n3
1963
1964 if self.c_id == Id.KW_If:
1965 n4 = self.ParseIf()
1966 n4.redirects = self._ParseRedirectList()
1967 return n4
1968 if self.c_id == Id.KW_Case:
1969 n5 = self.ParseCase()
1970 n5.redirects = self._ParseRedirectList()
1971 return n5
1972
1973 if self.c_id == Id.KW_DLeftBracket:
1974 n6 = self.ParseDBracket()
1975 n6.redirects = self._ParseRedirectList()
1976 return n6
1977 if self.c_id == Id.Op_DLeftParen:
1978 if not self.parse_opts.parse_dparen():
1979 p_die('You may want a space between parens (parse_dparen)',
1980 loc.Word(self.cur_word))
1981 n7 = self.ParseDParen()
1982 n7.redirects = self._ParseRedirectList()
1983 return n7
1984
1985 # bash extensions: no redirects
1986 if self.c_id == Id.KW_Time:
1987 return self.ParseTime()
1988
1989 # Happens in function body, e.g. myfunc() oops
1990 p_die('Unexpected word while parsing compound command',
1991 loc.Word(self.cur_word))
1992 assert False # for MyPy
1993
1994 def ParseFunctionDef(self):
1995 # type: () -> command.ShFunction
1996 """
1997 function_header : fname '(' ')'
1998 function_def : function_header newline_ok function_body ;
1999
2000 Precondition: Looking at the function name.
2001
2002 NOTE: There is an ambiguity with:
2003
2004 function foo ( echo hi ) and
2005 function foo () ( echo hi )
2006
2007 Bash only accepts the latter, though it doesn't really follow a grammar.
2008 """
2009 word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2010 name = word_.ShFunctionName(word0)
2011 if len(name) == 0: # example: foo$x is invalid
2012 p_die('Invalid function name', loc.Word(word0))
2013
2014 part0 = word0.parts[0]
2015 # If we got a non-empty string from ShFunctionName, this should be true.
2016 assert part0.tag() == word_part_e.Literal
2017 blame_tok = cast(Token, part0) # for ctx_VarChecker
2018
2019 self._SetNext() # move past function name
2020
2021 # Must be true because of lookahead
2022 self._GetWord()
2023 assert self.c_id == Id.Op_LParen, self.cur_word
2024
2025 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2026 self._SetNext()
2027
2028 self._GetWord()
2029 if self.c_id == Id.Right_ShFunction:
2030 # 'f ()' implies a function definition, since invoking it with no args
2031 # would just be 'f'
2032 self._SetNext()
2033
2034 self._NewlineOk()
2035
2036 func = command.ShFunction.CreateNull()
2037 func.name = name
2038 with ctx_VarChecker(self.var_checker, blame_tok):
2039 func.body = self.ParseCompoundCommand()
2040
2041 func.name_tok = location.LeftTokenForCompoundWord(word0)
2042 return func
2043 else:
2044 p_die('Expected ) in function definition', loc.Word(self.cur_word))
2045 return None
2046
2047 def ParseKshFunctionDef(self):
2048 # type: () -> command.ShFunction
2049 """
2050 ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2051 """
2052 keyword_tok = word_.AsKeywordToken(self.cur_word)
2053
2054 self._SetNext() # skip past 'function'
2055 self._GetWord()
2056
2057 cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2058 name = word_.ShFunctionName(cur_word)
2059 if len(name) == 0: # example: foo$x is invalid
2060 p_die('Invalid KSH-style function name', loc.Word(cur_word))
2061
2062 name_word = self.cur_word
2063 self._SetNext() # skip past 'function name
2064
2065 self._GetWord()
2066 if self.c_id == Id.Op_LParen:
2067 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2068 self._SetNext()
2069 self._Eat(Id.Right_ShFunction)
2070
2071 self._NewlineOk()
2072
2073 func = command.ShFunction.CreateNull()
2074 func.name = name
2075 with ctx_VarChecker(self.var_checker, keyword_tok):
2076 func.body = self.ParseCompoundCommand()
2077
2078 func.keyword = keyword_tok
2079 func.name_tok = location.LeftTokenForWord(name_word)
2080 return func
2081
2082 def ParseYshProc(self):
2083 # type: () -> Proc
2084 node = Proc.CreateNull(alloc_lists=True)
2085
2086 keyword_tok = word_.AsKeywordToken(self.cur_word)
2087 node.keyword = keyword_tok
2088
2089 with ctx_VarChecker(self.var_checker, keyword_tok):
2090 with ctx_CmdMode(self, cmd_mode_e.Proc):
2091 self.w_parser.ParseProc(node)
2092 if node.sig.tag() == proc_sig_e.Closed: # Register params
2093 sig = cast(proc_sig.Closed, node.sig)
2094
2095 # Treat 3 kinds of params as variables.
2096 wp = sig.word
2097 if wp:
2098 for param in wp.params:
2099 self.var_checker.Check(Id.KW_Var, param.name,
2100 param.blame_tok)
2101 if wp.rest_of:
2102 r = wp.rest_of
2103 self.var_checker.Check(Id.KW_Var, r.name,
2104 r.blame_tok)
2105 # We COULD register __out here but it would require a different API.
2106 #if param.prefix and param.prefix.id == Id.Arith_Colon:
2107 # self.var_checker.Check(Id.KW_Var, '__' + param.name)
2108
2109 posit = sig.positional
2110 if posit:
2111 for param in posit.params:
2112 self.var_checker.Check(Id.KW_Var, param.name,
2113 param.blame_tok)
2114 if posit.rest_of:
2115 r = posit.rest_of
2116 self.var_checker.Check(Id.KW_Var, r.name,
2117 r.blame_tok)
2118
2119 named = sig.named
2120 if named:
2121 for param in named.params:
2122 self.var_checker.Check(Id.KW_Var, param.name,
2123 param.blame_tok)
2124 if named.rest_of:
2125 r = named.rest_of
2126 self.var_checker.Check(Id.KW_Var, r.name,
2127 r.blame_tok)
2128
2129 if sig.block_param:
2130 b = sig.block_param
2131 self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2132
2133 self._SetNext()
2134 node.body = self.ParseBraceGroup()
2135 # No redirects for YSH procs (only at call site)
2136
2137 return node
2138
2139 def ParseYshFunc(self):
2140 # type: () -> Func
2141 """
2142 ysh_func: (
2143 Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2144 )
2145 Looking at KW_Func
2146 """
2147 node = Func.CreateNull(alloc_lists=True)
2148
2149 keyword_tok = word_.AsKeywordToken(self.cur_word)
2150 node.keyword = keyword_tok
2151
2152 with ctx_VarChecker(self.var_checker, keyword_tok):
2153 self.w_parser.ParseFunc(node)
2154
2155 posit = node.positional
2156 if posit:
2157 for param in posit.params:
2158 self.var_checker.Check(Id.KW_Var, param.name,
2159 param.blame_tok)
2160 if posit.rest_of:
2161 r = posit.rest_of
2162 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2163
2164 named = node.named
2165 if named:
2166 for param in named.params:
2167 self.var_checker.Check(Id.KW_Var, param.name,
2168 param.blame_tok)
2169 if named.rest_of:
2170 r = named.rest_of
2171 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2172
2173 self._SetNext()
2174 with ctx_CmdMode(self, cmd_mode_e.Func):
2175 node.body = self.ParseBraceGroup()
2176
2177 return node
2178
2179 def ParseCoproc(self):
2180 # type: () -> command_t
2181 """
2182 TODO: command.Coproc?
2183 """
2184 raise NotImplementedError()
2185
2186 def ParseSubshell(self):
2187 # type: () -> command.Subshell
2188 """
2189 subshell : '(' compound_list ')'
2190
2191 Looking at Op_LParen
2192 """
2193 left = word_.AsOperatorToken(self.cur_word)
2194 self._SetNext() # skip past (
2195
2196 # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2197 # translation stack, we want to delay it.
2198
2199 self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2200
2201 c_list = self._ParseCommandList()
2202 if len(c_list.children) == 1:
2203 child = c_list.children[0]
2204 else:
2205 child = c_list
2206
2207 ate = self._Eat(Id.Right_Subshell)
2208 right = word_.AsOperatorToken(ate)
2209
2210 return command.Subshell(left, child, right, None) # no redirects yet
2211
2212 def ParseDBracket(self):
2213 # type: () -> command.DBracket
2214 """Pass the underlying word parser off to the boolean expression
2215 parser."""
2216 left = word_.AsKeywordToken(self.cur_word)
2217 # TODO: Test interactive. Without closing ]], you should get > prompt
2218 # (PS2)
2219
2220 self._SetNext() # skip [[
2221 b_parser = bool_parse.BoolParser(self.w_parser)
2222 bnode, right = b_parser.Parse() # May raise
2223 return command.DBracket(left, bnode, right, None) # no redirects yet
2224
2225 def ParseDParen(self):
2226 # type: () -> command.DParen
2227 left = word_.AsOperatorToken(self.cur_word)
2228
2229 self._SetNext() # skip ((
2230 anode, right = self.w_parser.ReadDParen()
2231 assert anode is not None
2232
2233 return command.DParen(left, anode, right, None) # no redirects yet
2234
2235 def ParseCommand(self):
2236 # type: () -> command_t
2237 """
2238 command : simple_command
2239 | compound_command # OSH edit: io_redirect* folded in
2240 | function_def
2241 | ksh_function_def
2242
2243 # YSH extensions
2244 | proc NAME ...
2245 | const ...
2246 | var ...
2247 | setglobal ...
2248 | setref ...
2249 | setvar ...
2250 | _ EXPR
2251 | = EXPR
2252 ;
2253
2254 Note: the reason const / var are not part of compound_command is because
2255 they can't be alone in a shell function body.
2256
2257 Example:
2258 This is valid shell f() if true; then echo hi; fi
2259 This is invalid f() var x = 1
2260 """
2261 if self._AtSecondaryKeyword():
2262 p_die('Unexpected word when parsing command',
2263 loc.Word(self.cur_word))
2264
2265 # YSH Extensions
2266
2267 if self.c_id == Id.KW_Proc: # proc p { ... }
2268 # proc is hidden because of the 'local reasoning' principle. Code
2269 # inside procs should be YSH, full stop. That means ysh:upgrade is
2270 # on.
2271 if self.parse_opts.parse_proc():
2272 return self.ParseYshProc()
2273 else:
2274 # 2024-02: This avoids bad syntax errors if you type YSH code
2275 # into OSH
2276 # proc p (x) { echo hi } would actually be parsed as a
2277 # command.Simple! Shell compatibility: quote 'proc'
2278 p_die("proc is a YSH keyword, but this is OSH.",
2279 loc.Word(self.cur_word))
2280
2281 if self.c_id == Id.KW_Func: # func f(x) { ... }
2282 if self.parse_opts.parse_func():
2283 return self.ParseYshFunc()
2284 else:
2285 # Same reasoning as above, for 'proc'
2286 p_die("func is a YSH keyword, but this is OSH.",
2287 loc.Word(self.cur_word))
2288
2289 if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2290 p_die("const can't be inside proc or func. Use var instead.",
2291 loc.Word(self.cur_word))
2292
2293 if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2294 keyword_id = self.c_id
2295 kw_token = word_.LiteralToken(self.cur_word)
2296 self._SetNext()
2297 n8 = self.w_parser.ParseVarDecl(kw_token)
2298 for lhs in n8.lhs:
2299 self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2300 return n8
2301
2302 if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2303 kw_token = word_.LiteralToken(self.cur_word)
2304 self._SetNext()
2305 n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2306 return n9
2307
2308 if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2309 # = 42 + a[i]
2310 # call mylist->append('x')
2311
2312 keyword = word_.LiteralToken(self.cur_word)
2313 assert keyword is not None
2314 self._SetNext()
2315 enode = self.w_parser.ParseCommandExpr()
2316 return command.Expr(keyword, enode)
2317
2318 if self.c_id == Id.KW_Function:
2319 return self.ParseKshFunctionDef()
2320
2321 if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2322 Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2323 Id.KW_If, Id.KW_Case, Id.KW_Time):
2324 return self.ParseCompoundCommand()
2325
2326 # Syntax error for '}' starting a line, which all shells disallow.
2327 if self.c_id == Id.Lit_RBrace:
2328 p_die('Unexpected right brace', loc.Word(self.cur_word))
2329
2330 if self.c_kind == Kind.Redir: # Leading redirect
2331 return self.ParseSimpleCommand()
2332
2333 if self.c_kind == Kind.Word:
2334 # ensured by Kind.Word
2335 cur_word = cast(CompoundWord, self.cur_word)
2336
2337 # NOTE: At the top level, only Token and Compound are possible.
2338 # Can this be modelled better in the type system, removing asserts?
2339 #
2340 # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2341 # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2342 # That requires 2 tokens of lookahead, which we don't have
2343 #
2344 # Or maybe we don't just have ParseSimpleCommand -- we will have
2345 # ParseYshCommand or something
2346
2347 if (self.w_parser.LookAheadFuncParens() and
2348 not word_.IsVarLike(cur_word)):
2349 return self.ParseFunctionDef() # f() { echo; } # function
2350
2351 # Parse x = 1+2*3 when inside HayNode { } blocks
2352 parts = cur_word.parts
2353 if self.parse_opts.parse_equals() and len(parts) == 1:
2354 part0 = parts[0]
2355 if part0.tag() == word_part_e.Literal:
2356 tok = cast(Token, part0)
2357 if (match.IsValidVarName(tok.tval) and
2358 self.w_parser.LookPastSpace() == Id.Lit_Equals):
2359 assert tok.id == Id.Lit_Chars, tok
2360
2361 if len(self.hay_attrs_stack
2362 ) and self.hay_attrs_stack[-1]:
2363 # Note: no static var_checker.Check() for bare assignment
2364 enode = self.w_parser.ParseBareDecl()
2365 self._SetNext() # Somehow this is necessary
2366 # TODO: Use BareDecl here. Well, do that when we
2367 # treat it as const or lazy.
2368 return command.VarDecl(
2369 None,
2370 [NameType(tok, lexer.TokenVal(tok), None)],
2371 enode)
2372 else:
2373 self._SetNext()
2374 self._GetWord()
2375 p_die(
2376 'Unexpected = (Hint: use var/setvar, or quote it)',
2377 loc.Word(self.cur_word))
2378
2379 # echo foo
2380 # f=(a b c) # array
2381 # array[1+2]+=1
2382 return self.ParseSimpleCommand()
2383
2384 if self.c_kind == Kind.Eof:
2385 p_die("Unexpected EOF while parsing command",
2386 loc.Word(self.cur_word))
2387
2388 # NOTE: This only happens in batch mode in the second turn of the loop!
2389 # e.g. )
2390 p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2391
2392 assert False # for MyPy
2393
2394 def ParsePipeline(self):
2395 # type: () -> command_t
2396 """
2397 pipeline : Bang? command ( '|' newline_ok command )* ;
2398 """
2399 negated = None # type: Optional[Token]
2400
2401 self._GetWord()
2402 if self.c_id == Id.KW_Bang:
2403 negated = word_.AsKeywordToken(self.cur_word)
2404 self._SetNext()
2405
2406 child = self.ParseCommand()
2407 assert child is not None
2408
2409 children = [child]
2410
2411 self._GetWord()
2412 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2413 if negated is not None:
2414 node = command.Pipeline(negated, children, [])
2415 return node
2416 else:
2417 return child # no pipeline
2418
2419 # | or |&
2420 ops = [] # type: List[Token]
2421 while True:
2422 op = word_.AsOperatorToken(self.cur_word)
2423 ops.append(op)
2424
2425 self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2426 self._NewlineOk()
2427
2428 child = self.ParseCommand()
2429 children.append(child)
2430
2431 self._GetWord()
2432 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2433 break
2434
2435 return command.Pipeline(negated, children, ops)
2436
2437 def ParseAndOr(self):
2438 # type: () -> command_t
2439 self._GetWord()
2440 if self.c_id == Id.Word_Compound:
2441 first_word_tok = word_.LiteralToken(self.cur_word)
2442 if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
2443 # We got '...', so parse in multiline mode
2444 self._SetNext()
2445 with word_.ctx_Multiline(self.w_parser):
2446 return self._ParseAndOr()
2447
2448 # Parse in normal mode, not multiline
2449 return self._ParseAndOr()
2450
2451 def _ParseAndOr(self):
2452 # type: () -> command_t
2453 """
2454 and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
2455 | pipeline
2456
2457 Note that it is left recursive and left associative. We parse it
2458 iteratively with a token of lookahead.
2459 """
2460 child = self.ParsePipeline()
2461 assert child is not None
2462
2463 self._GetWord()
2464 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2465 return child
2466
2467 ops = [] # type: List[Token]
2468 children = [child]
2469
2470 while True:
2471 ops.append(word_.AsOperatorToken(self.cur_word))
2472
2473 self._SetNext() # skip past || &&
2474 self._NewlineOk()
2475
2476 child = self.ParsePipeline()
2477 children.append(child)
2478
2479 self._GetWord()
2480 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2481 break
2482
2483 return command.AndOr(children, ops)
2484
2485 # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2486
2487 # At the top level, we execute after every line, e.g. to
2488 # - process alias (a form of dynamic parsing)
2489 # - process 'exit', because invalid syntax might appear after it
2490
2491 # On the other hand, for a while loop body, we parse the whole thing at once,
2492 # and then execute it. We don't want to parse it over and over again!
2493
2494 # COMPARE
2495 # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2496 # command_term : and_or (trailer and_or)* ; # CHILDREN
2497
2498 def _ParseCommandLine(self):
2499 # type: () -> command_t
2500 """
2501 command_line : and_or (sync_op and_or)* trailer? ;
2502 trailer : sync_op newline_ok
2503 | NEWLINES;
2504 sync_op : '&' | ';';
2505
2506 NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2507 there is another command word after the sync op.
2508
2509 But it's easier to express imperatively. Do the following in a loop:
2510 1. ParseAndOr
2511 2. Peek.
2512 a. If there's a newline, then return. (We're only parsing a single
2513 line.)
2514 b. If there's a sync_op, process it. Then look for a newline and
2515 return. Otherwise, parse another AndOr.
2516 """
2517 # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2518 # I don't think we should add anything else here; otherwise it will be
2519 # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2520 END_LIST = [Id.Op_Newline, Id.Eof_Real]
2521
2522 children = [] # type: List[command_t]
2523 done = False
2524 while not done:
2525 child = self.ParseAndOr()
2526
2527 self._GetWord()
2528 if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2529 tok = cast(Token, self.cur_word) # for MyPy
2530 child = command.Sentence(child, tok)
2531 self._SetNext()
2532
2533 self._GetWord()
2534 if self.c_id in END_LIST:
2535 done = True
2536
2537 elif self.c_id in END_LIST:
2538 done = True
2539
2540 else:
2541 # e.g. echo a(b)
2542 p_die('Invalid word while parsing command line',
2543 loc.Word(self.cur_word))
2544
2545 children.append(child)
2546
2547 # Simplify the AST.
2548 if len(children) > 1:
2549 return command.CommandList(children)
2550 else:
2551 return children[0]
2552
2553 def _ParseCommandTerm(self):
2554 # type: () -> command.CommandList
2555 """"
2556 command_term : and_or (trailer and_or)* ;
2557 trailer : sync_op newline_ok
2558 | NEWLINES;
2559 sync_op : '&' | ';';
2560
2561 This is handled in imperative style, like _ParseCommandLine.
2562 Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2563 which is slightly different. (HOW? Is it the DSEMI?)
2564
2565 Returns:
2566 syntax_asdl.command
2567 """
2568 # Token types that will end the command term.
2569 END_LIST = [self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi]
2570
2571 # NOTE: This is similar to _ParseCommandLine.
2572 #
2573 # - Why aren't we doing END_LIST in _ParseCommandLine?
2574 # - Because you will never be inside $() at the top level.
2575 # - We also know it will end in a newline. It can't end in "fi"!
2576 # - example: if true; then { echo hi; } fi
2577
2578 children = [] # type: List[command_t]
2579 done = False
2580 while not done:
2581 # Most keywords are valid "first words". But do/done/then do not BEGIN
2582 # commands, so they are not valid.
2583 if self._AtSecondaryKeyword():
2584 break
2585
2586 child = self.ParseAndOr()
2587
2588 self._GetWord()
2589 if self.c_id == Id.Op_Newline:
2590 self._SetNext()
2591
2592 self._GetWord()
2593 if self.c_id in END_LIST:
2594 done = True
2595
2596 elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2597 tok = cast(Token, self.cur_word) # for MyPy
2598 child = command.Sentence(child, tok)
2599 self._SetNext()
2600
2601 self._GetWord()
2602 if self.c_id == Id.Op_Newline:
2603 self._SetNext() # skip over newline
2604
2605 # Test if we should keep going. There might be another command after
2606 # the semi and newline.
2607 self._GetWord()
2608 if self.c_id in END_LIST: # \n EOF
2609 done = True
2610
2611 elif self.c_id in END_LIST: # ; EOF
2612 done = True
2613
2614 elif self.c_id in END_LIST: # EOF
2615 done = True
2616
2617 # For if test -f foo; test -f bar {
2618 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2619 done = True
2620
2621 elif self.c_kind != Kind.Word:
2622 # e.g. f() { echo (( x )) ; }
2623 # but can't fail on 'fi fi', see osh/cmd_parse_test.py
2624
2625 #log("Invalid %s", self.cur_word)
2626 p_die("Invalid word while parsing command list",
2627 loc.Word(self.cur_word))
2628
2629 children.append(child)
2630
2631 return command.CommandList(children)
2632
2633 def _ParseCommandList(self):
2634 # type: () -> command.CommandList
2635 """
2636 command_list : newline_ok command_term trailer? ;
2637
2638 This one is called by all the compound commands. It's basically a command
2639 block.
2640
2641 NOTE: Rather than translating the CFG directly, the code follows a style
2642 more like this: more like this: (and_or trailer)+. It makes capture
2643 easier.
2644 """
2645 self._NewlineOk()
2646 return self._ParseCommandTerm()
2647
2648 def ParseLogicalLine(self):
2649 # type: () -> command_t
2650 """Parse a single line for main_loop.
2651
2652 A wrapper around _ParseCommandLine(). Similar but not identical to
2653 _ParseCommandList() and ParseCommandSub().
2654
2655 Raises:
2656 ParseError
2657 """
2658 self._NewlineOk()
2659 self._GetWord()
2660 if self.c_id == Id.Eof_Real:
2661 return None # main loop checks for here docs
2662 node = self._ParseCommandLine()
2663 return node
2664
2665 def ParseInteractiveLine(self):
2666 # type: () -> parse_result_t
2667 """Parse a single line for Interactive main_loop.
2668
2669 Different from ParseLogicalLine because newlines are handled differently.
2670
2671 Raises:
2672 ParseError
2673 """
2674 self._GetWord()
2675 if self.c_id == Id.Op_Newline:
2676 return parse_result.EmptyLine
2677 if self.c_id == Id.Eof_Real:
2678 return parse_result.Eof
2679
2680 node = self._ParseCommandLine()
2681 return parse_result.Node(node)
2682
2683 def ParseCommandSub(self):
2684 # type: () -> command_t
2685 """Parse $(echo hi) and `echo hi` for word_parse.py.
2686
2687 They can have multiple lines, like this: echo $( echo one echo
2688 two )
2689 """
2690 self._NewlineOk()
2691
2692 self._GetWord()
2693 if self.c_kind == Kind.Eof: # e.g. $()
2694 return command.NoOp
2695
2696 c_list = self._ParseCommandTerm()
2697 if len(c_list.children) == 1:
2698 return c_list.children[0]
2699 else:
2700 return c_list
2701
2702 def CheckForPendingHereDocs(self):
2703 # type: () -> None
2704 # NOTE: This happens when there is no newline at the end of a file, like
2705 # osh -c 'cat <<EOF'
2706 if len(self.pending_here_docs):
2707 node = self.pending_here_docs[0] # Just show the first one?
2708 h = cast(redir_param.HereDoc, node.arg)
2709 p_die('Unterminated here doc began here', loc.Word(h.here_begin))