OILS / osh / cmd_parse.py View on Github | oilshell.org

2710 lines, 1399 significant
1# Copyright 2016 Andy Chu. All rights reserved.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7"""
8cmd_parse.py - Parse high level shell commands.
9"""
10from __future__ import print_function
11
12from _devbuild.gen import grammar_nt
13from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind, Kind_str
14from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15from _devbuild.gen.syntax_asdl import (
16 loc,
17 SourceLine,
18 source,
19 parse_result,
20 parse_result_t,
21 command,
22 command_t,
23 condition,
24 condition_t,
25 for_iter,
26 ArgList,
27 BraceGroup,
28 LiteralBlock,
29 CaseArm,
30 case_arg,
31 IfArm,
32 pat,
33 pat_t,
34 Redir,
35 redir_param,
36 redir_loc,
37 redir_loc_t,
38 word_e,
39 word_t,
40 CompoundWord,
41 Token,
42 word_part_e,
43 word_part_t,
44 rhs_word,
45 rhs_word_t,
46 sh_lhs,
47 sh_lhs_t,
48 AssignPair,
49 EnvPair,
50 ParsedAssignment,
51 assign_op_e,
52 NameType,
53 proc_sig,
54 proc_sig_e,
55 Proc,
56 Func,
57)
58from core import alloc
59from core import error
60from core.error import p_die
61from core import ui
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from frontend import match
66from frontend import reader
67from mycpp.mylib import log
68from osh import braces
69from osh import bool_parse
70from osh import word_
71
72from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73if TYPE_CHECKING:
74 from core.alloc import Arena
75 from core import optview
76 from frontend.lexer import Lexer
77 from frontend.parse_lib import ParseContext, AliasesInFlight
78 from frontend.reader import _Reader
79 from osh.word_parse import WordParser
80
81_ = Kind_str # for debug prints
82
83TAB_CH = 9 # ord('\t')
84SPACE_CH = 32 # ord(' ')
85
86
87def _ReadHereLines(
88 line_reader, # type: _Reader
89 h, # type: Redir
90 delimiter, # type: str
91):
92 # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93 # NOTE: We read all lines at once, instead of parsing line-by-line,
94 # because of cases like this:
95 # cat <<EOF
96 # 1 $(echo 2
97 # echo 3) 4
98 # EOF
99 here_lines = [] # type: List[Tuple[SourceLine, int]]
100 last_line = None # type: Tuple[SourceLine, int]
101 strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103 while True:
104 src_line, unused_offset = line_reader.GetLine()
105
106 if src_line is None: # EOF
107 # An unterminated here doc is just a warning in bash. We make it
108 # fatal because we want to be strict, and because it causes problems
109 # reporting other errors.
110 # Attribute it to the << in <<EOF for now.
111 p_die("Couldn't find terminator for here doc that starts here",
112 h.op)
113
114 assert len(src_line.content) != 0 # None should be the empty line
115
116 line = src_line.content
117
118 # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119 # the first tab.
120 start_offset = 0
121 if strip_leading_tabs:
122 n = len(line)
123 i = 0 # used after loop exit
124 while i < n:
125 if line[i] != '\t':
126 break
127 i += 1
128 start_offset = i
129
130 if line[start_offset:].rstrip() == delimiter:
131 last_line = (src_line, start_offset)
132 break
133
134 here_lines.append((src_line, start_offset))
135
136 return here_lines, last_line
137
138
139def _MakeLiteralHereLines(
140 here_lines, # type: List[Tuple[SourceLine, int]]
141 arena, # type: Arena
142 do_lossless, # type: bool
143):
144 # type: (...) -> List[word_part_t]
145 """Create a Token for each line.
146
147 For <<'EOF' and <<-'EOF' - single quoted rule
148
149 <<- has non-zero start_offset
150 """
151 # less precise type, because List[T] is an invariant type
152 tokens = [] # type: List[word_part_t]
153 for src_line, start_offset in here_lines:
154
155 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
156 # arena invariant, but don't refer to it.
157 if do_lossless: # avoid garbage, doesn't affect correctness
158 arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, src_line,
159 None)
160
161 t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
162 src_line, src_line.content[start_offset:])
163 tokens.append(t)
164 return tokens
165
166
167def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
168 # type: (ParseContext, Redir, _Reader, Arena) -> None
169 """Fill in attributes of a pending here doc node."""
170 h = cast(redir_param.HereDoc, r.arg)
171 # "If any character in word is quoted, the delimiter shall be formed by
172 # performing quote removal on word, and the here-document lines shall not
173 # be expanded. Otherwise, the delimiter shall be the word itself."
174 # NOTE: \EOF counts, or even E\OF
175 ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
176 if not ok:
177 p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
178
179 here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
180
181 if delim_quoted:
182 # <<'EOF' and <<-'EOF' - Literal for each line.
183 h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
184 parse_ctx.do_lossless)
185 else:
186 # <<EOF and <<-EOF - Parse as word
187 line_reader = reader.VirtualLineReader(arena, here_lines,
188 parse_ctx.do_lossless)
189 w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
190 w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
191
192 end_line, start_offset = last_line
193
194 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
195 # arena invariant, but don't refer to it.
196 if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
197 arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, end_line, None)
198
199 # Create a Token with the end terminator. Maintains the invariant that the
200 # tokens "add up".
201 h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
202 len(end_line.content), end_line, '')
203
204
205def _MakeAssignPair(parse_ctx, preparsed, arena):
206 # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
207 """Create an AssignPair from a 4-tuples from DetectShAssignment."""
208
209 left_token = preparsed.left
210 close_token = preparsed.close
211
212 lhs = None # type: sh_lhs_t
213
214 if left_token.id == Id.Lit_VarLike: # s=1
215 if lexer.IsPlusEquals(left_token):
216 var_name = lexer.TokenSliceRight(left_token, -2)
217 op = assign_op_e.PlusEqual
218 else:
219 var_name = lexer.TokenSliceRight(left_token, -1)
220 op = assign_op_e.Equal
221
222 lhs = sh_lhs.Name(left_token, var_name)
223
224 elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
225 var_name = lexer.TokenSliceRight(left_token, -1)
226 if lexer.IsPlusEquals(close_token):
227 op = assign_op_e.PlusEqual
228 else:
229 op = assign_op_e.Equal
230
231 assert left_token.line == close_token.line, \
232 '%s and %s not on same line' % (left_token, close_token)
233
234 left_pos = left_token.col + left_token.length
235 index_str = left_token.line.content[left_pos:close_token.col]
236 lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
237
238 elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
239 var_name = lexer.TokenSliceRight(left_token, -1)
240 if lexer.IsPlusEquals(close_token):
241 op = assign_op_e.PlusEqual
242 else:
243 op = assign_op_e.Equal
244
245 # Similar to SnipCodeString / SnipCodeBlock
246 if left_token.line == close_token.line:
247 # extract what's between brackets
248 s = left_token.col + left_token.length
249 code_str = left_token.line.content[s:close_token.col]
250 else:
251 raise NotImplementedError('%s != %s' %
252 (left_token.line, close_token.line))
253 a_parser = parse_ctx.MakeArithParser(code_str)
254
255 # a[i+1]= is a LHS
256 src = source.Reparsed('array LHS', left_token, close_token)
257 with alloc.ctx_SourceCode(arena, src):
258 index_node = a_parser.Parse() # may raise error.Parse
259
260 lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
261
262 else:
263 raise AssertionError()
264
265 # TODO: Should we also create a rhs_expr.ArrayLiteral here?
266 parts = preparsed.w.parts
267 offset = preparsed.part_offset
268
269 n = len(parts)
270 if offset == n:
271 rhs = rhs_word.Empty # type: rhs_word_t
272 else:
273 w = CompoundWord(parts[offset:])
274 word_.TildeDetectAssign(w)
275 rhs = w
276
277 return AssignPair(left_token, lhs, op, rhs)
278
279
280def _AppendMoreEnv(preparsed_list, more_env):
281 # type: (List[ParsedAssignment], List[EnvPair]) -> None
282 """Helper to modify a SimpleCommand node.
283
284 Args:
285 preparsed: a list of 4-tuples from DetectShAssignment
286 more_env: a list to append env_pairs to
287 """
288 for preparsed in preparsed_list:
289 left_token = preparsed.left
290
291 if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
292 p_die(
293 "Environment binding shouldn't look like an array assignment",
294 left_token)
295
296 if lexer.IsPlusEquals(left_token):
297 p_die('Expected = in environment binding, got +=', left_token)
298
299 var_name = lexer.TokenSliceRight(left_token, -1)
300
301 parts = preparsed.w.parts
302 n = len(parts)
303 offset = preparsed.part_offset
304 if offset == n:
305 rhs = rhs_word.Empty # type: rhs_word_t
306 else:
307 w = CompoundWord(parts[offset:])
308 word_.TildeDetectAssign(w)
309 rhs = w
310
311 more_env.append(EnvPair(left_token, var_name, rhs))
312
313
314def _SplitSimpleCommandPrefix(words):
315 # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
316 """Second pass of SimpleCommand parsing: look for assignment words."""
317 preparsed_list = [] # type: List[ParsedAssignment]
318 suffix_words = [] # type: List[CompoundWord]
319
320 done_prefix = False
321 for w in words:
322 if done_prefix:
323 suffix_words.append(w)
324 continue
325
326 left_token, close_token, part_offset = word_.DetectShAssignment(w)
327 if left_token:
328 preparsed_list.append(
329 ParsedAssignment(left_token, close_token, part_offset, w))
330 else:
331 done_prefix = True
332 suffix_words.append(w)
333
334 return preparsed_list, suffix_words
335
336
337def _MakeSimpleCommand(
338 preparsed_list, # type: List[ParsedAssignment]
339 suffix_words, # type: List[CompoundWord]
340 redirects, # type: List[Redir]
341 typed_args, # type: Optional[ArgList]
342 block, # type: Optional[LiteralBlock]
343):
344 # type: (...) -> command.Simple
345 """Create an command.Simple node."""
346
347 # FOO=(1 2 3) ls is not allowed.
348 for preparsed in preparsed_list:
349 if word_.HasArrayPart(preparsed.w):
350 p_die("Environment bindings can't contain array literals",
351 loc.Word(preparsed.w))
352
353 # NOTE: It would be possible to add this check back. But it already happens
354 # at runtime in EvalWordSequence2.
355 # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
356 if 0:
357 for w in suffix_words:
358 if word_.HasArrayPart(w):
359 p_die("Commands can't contain array literals", loc.Word(w))
360
361 assert len(suffix_words) != 0
362 # {a,b,c} # Use { before brace detection
363 # ~/bin/ls # Use ~ before tilde detection
364 part0 = suffix_words[0].parts[0]
365 blame_tok = location.LeftTokenForWordPart(part0)
366
367 # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
368 # can't implement bash's behavior of having say {~bob,~jane}/src work,
369 # because we only have a BracedTree.
370 # This is documented in spec/brace-expansion.
371 # NOTE: Technically we could do expansion outside of 'oshc translate', but it
372 # doesn't seem worth it.
373 words2 = braces.BraceDetectAll(suffix_words)
374 words3 = word_.TildeDetectAll(words2)
375
376 more_env = [] # type: List[EnvPair]
377 _AppendMoreEnv(preparsed_list, more_env)
378
379 # do_fork by default
380 return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
381 block, True)
382
383
384class VarChecker(object):
385 """Statically check for proc and variable usage errors."""
386
387 def __init__(self):
388 # type: () -> None
389 """
390 Args:
391 oil_proc: Whether to disallow nested proc/function declarations
392 """
393 # self.tokens for location info: 'proc' or another token
394 self.tokens = [] # type: List[Token]
395 self.names = [] # type: List[Dict[str, Id_t]]
396
397 def Push(self, blame_tok):
398 # type: (Token) -> None
399 """Called when we enter a shell function, proc, or func.
400
401 Bash allows this, but it's confusing because it's the same as two
402 functions at the top level.
403
404 f() {
405 g() {
406 echo 'top level function defined in another one'
407 }
408 }
409
410 YSH disallows nested procs and funcs.
411 """
412 if len(self.tokens) != 0:
413 if blame_tok.id == Id.KW_Proc:
414 p_die("procs must be defined at the top level", blame_tok)
415 if blame_tok.id == Id.KW_Func:
416 p_die("funcs must be defined at the top level", blame_tok)
417 if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
418 p_die("shell functions can't be defined inside proc or func",
419 blame_tok)
420
421 self.tokens.append(blame_tok)
422 entry = {} # type: Dict[str, Id_t]
423 self.names.append(entry)
424
425 def Pop(self):
426 # type: () -> None
427 self.names.pop()
428 self.tokens.pop()
429
430 def Check(self, keyword_id, var_name, blame_tok):
431 # type: (Id_t, str, Token) -> None
432 """Check for declaration / mutation errors in proc and func.
433
434 var x
435 x already declared
436 setvar x:
437 x is not declared
438 setglobal x:
439 No errors are possible; we would need all these many conditions to
440 statically know the names:
441 - no 'source'
442 - shopt -u copy_env.
443 - AND use lib has to be static
444
445 What about bare assignment in Hay? I think these are dynamic checks --
446 there is no static check. Hay is for building up data imperatively,
447 and then LATER, right before main(), it can be type checked.
448
449 Package {
450 version = '3.11'
451 version = '3.12'
452 }
453 """
454 # No static checks are the global level! Because of 'source', var and
455 # setvar are essentially the same.
456 if len(self.names) == 0:
457 return
458
459 top = self.names[-1]
460 if keyword_id == Id.KW_Var:
461 if var_name in top:
462 p_die('%r was already declared' % var_name, blame_tok)
463 else:
464 top[var_name] = keyword_id
465
466 if keyword_id == Id.KW_SetVar:
467 if var_name not in top:
468 # Note: the solution could be setglobal, etc.
469 p_die(
470 "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
471 var_name, blame_tok)
472
473
474class ctx_VarChecker(object):
475
476 def __init__(self, var_checker, blame_tok):
477 # type: (VarChecker, Token) -> None
478 var_checker.Push(blame_tok)
479 self.var_checker = var_checker
480
481 def __enter__(self):
482 # type: () -> None
483 pass
484
485 def __exit__(self, type, value, traceback):
486 # type: (Any, Any, Any) -> None
487 self.var_checker.Pop()
488
489
490class ctx_CmdMode(object):
491
492 def __init__(self, cmd_parse, new_cmd_mode):
493 # type: (CommandParser, cmd_mode_t) -> None
494 self.cmd_parse = cmd_parse
495 self.prev_cmd_mode = cmd_parse.cmd_mode
496 cmd_parse.cmd_mode = new_cmd_mode
497
498 def __enter__(self):
499 # type: () -> None
500 pass
501
502 def __exit__(self, type, value, traceback):
503 # type: (Any, Any, Any) -> None
504 self.cmd_parse.cmd_mode = self.prev_cmd_mode
505
506
507SECONDARY_KEYWORDS = [
508 Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
509 Id.KW_Esac
510]
511
512
513class CommandParser(object):
514 """Recursive descent parser derived from POSIX shell grammar.
515
516 This is a BNF grammar:
517 https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
518
519 - Augmented with both bash/OSH and YSH constructs.
520
521 - We use regex-like iteration rather than recursive references
522 ? means optional (0 or 1)
523 * means 0 or more
524 + means 1 or more
525
526 - Keywords are spelled in Caps:
527 If Elif Case
528
529 - Operator tokens are quoted:
530 '(' '|'
531
532 or can be spelled directly if it matters:
533
534 Op_LParen Op_Pipe
535
536 - Non-terminals are snake_case:
537 brace_group subshell
538
539 Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
540 the production should be in the method docstrings, e.g.
541
542 def ParseSubshell():
543 "
544 subshell : '(' compound_list ')'
545
546 Looking at Op_LParen # Comment to say how this method is called
547 "
548
549 The grammar may be factored to make parsing easier.
550 """
551
552 def __init__(self,
553 parse_ctx,
554 parse_opts,
555 w_parser,
556 lexer,
557 line_reader,
558 eof_id=Id.Eof_Real):
559 # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
560 self.parse_ctx = parse_ctx
561 self.aliases = parse_ctx.aliases # aliases to expand at parse time
562
563 self.parse_opts = parse_opts
564 self.w_parser = w_parser # type: WordParser # for normal parsing
565 self.lexer = lexer # for pushing hints, lookahead to (
566 self.line_reader = line_reader # for here docs
567 self.eof_id = eof_id
568
569 self.arena = line_reader.arena # for adding here doc and alias spans
570 self.aliases_in_flight = [] # type: AliasesInFlight
571
572 # A hacky boolean to remove 'if cd / {' ambiguity.
573 self.allow_block = True
574
575 # Stack of booleans for nested Attr and SHELL nodes.
576 # Attr nodes allow bare assignment x = 42, but not shell x=42.
577 # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
578 # nodes, but x42 is still allowed.
579 #
580 # Note: this stack could be optimized by turning it into an integer and
581 # binary encoding.
582 self.hay_attrs_stack = [] # type: List[bool]
583
584 # Note: VarChecker is instantiated with each CommandParser, which means
585 # that two 'proc foo' -- inside a command sub and outside -- don't
586 # conflict, because they use different CommandParser instances. I think
587 # this OK but you can imagine different behaviors.
588 self.var_checker = VarChecker()
589
590 self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
591
592 self.Reset()
593
594 # Init_() function for "keyword arg"
595 def Init_AliasesInFlight(self, aliases_in_flight):
596 # type: (AliasesInFlight) -> None
597 self.aliases_in_flight = aliases_in_flight
598
599 def Reset(self):
600 # type: () -> None
601 """Reset our own internal state.
602
603 Called by the interactive loop.
604 """
605 # Cursor state set by _GetWord()
606 self.next_lex_mode = lex_mode_e.ShCommand
607 self.cur_word = None # type: word_t # current word
608 self.c_kind = Kind.Undefined
609 self.c_id = Id.Undefined_Tok
610
611 self.pending_here_docs = [
612 ] # type: List[Redir] # should have HereLiteral arg
613
614 def ResetInputObjects(self):
615 # type: () -> None
616 """Reset the internal state of our inputs.
617
618 Called by the interactive loop.
619 """
620 self.w_parser.Reset()
621 self.lexer.ResetInputObjects()
622 self.line_reader.Reset()
623
624 def _SetNext(self):
625 # type: () -> None
626 """Call this when you no longer need the current token.
627
628 This method is lazy. A subsequent call to _GetWord() will
629 actually read the next Token.
630 """
631 self.next_lex_mode = lex_mode_e.ShCommand
632
633 def _SetNextBrack(self):
634 # type: () -> None
635 self.next_lex_mode = lex_mode_e.ShCommandBrack
636
637 def _GetWord(self):
638 # type: () -> None
639 """Call this when you need to make a decision based on Id or Kind.
640
641 If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
642 self.c_id and self.c_kind.
643
644 Otherwise it does nothing.
645 """
646 if self.next_lex_mode != lex_mode_e.Undefined:
647 w = self.w_parser.ReadWord(self.next_lex_mode)
648 #log("w %s", w)
649
650 # Here docs only happen in command mode, so other kinds of newlines don't
651 # count.
652 if w.tag() == word_e.Operator:
653 tok = cast(Token, w)
654 if tok.id == Id.Op_Newline:
655 for h in self.pending_here_docs:
656 _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
657 self.arena)
658 del self.pending_here_docs[:] # No .clear() until Python 3.3.
659
660 self.cur_word = w
661
662 self.c_kind = word_.CommandKind(self.cur_word)
663 self.c_id = word_.CommandId(self.cur_word)
664 self.next_lex_mode = lex_mode_e.Undefined
665
666 def _Eat(self, c_id, msg=None):
667 # type: (Id_t, Optional[str]) -> word_t
668 """Consume a word of a type, maybe showing a custom error message.
669
670 Args:
671 c_id: the Id we expected
672 msg: improved error message
673 """
674 self._GetWord()
675 if self.c_id != c_id:
676 if msg is None:
677 msg = 'Expected word type %s, got %s' % (
678 ui.PrettyId(c_id), ui.PrettyId(self.c_id))
679 p_die(msg, loc.Word(self.cur_word))
680
681 skipped = self.cur_word
682 self._SetNext()
683 return skipped
684
685 def _NewlineOk(self):
686 # type: () -> None
687 """Check for optional newline and consume it."""
688 self._GetWord()
689 if self.c_id == Id.Op_Newline:
690 self._SetNext()
691
692 def _AtSecondaryKeyword(self):
693 # type: () -> bool
694 self._GetWord()
695 if self.c_id in SECONDARY_KEYWORDS:
696 return True
697 return False
698
699 def ParseRedirect(self):
700 # type: () -> Redir
701 self._GetWord()
702 assert self.c_kind == Kind.Redir, self.cur_word
703 op_tok = cast(Token, self.cur_word) # for MyPy
704
705 # Note: the lexer could take distinguish between
706 # >out
707 # 3>out
708 # {fd}>out
709 #
710 # which would make the code below faster. But small string optimization
711 # would also speed it up, since redirects are small.
712
713 # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
714 # possibly "unify" the IDs by subtracting a constant like 8 or 16?
715
716 op_val = lexer.TokenVal(op_tok)
717 if op_val[0] == '{':
718 pos = op_val.find('}')
719 assert pos != -1 # lexer ensures this
720 where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
721
722 elif op_val[0].isdigit():
723 pos = 1
724 if op_val[1].isdigit():
725 pos = 2
726 where = redir_loc.Fd(int(op_val[:pos]))
727
728 else:
729 where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
730
731 self._SetNext()
732
733 self._GetWord()
734 # Other redirect
735 if self.c_kind != Kind.Word:
736 p_die('Invalid token after redirect operator',
737 loc.Word(self.cur_word))
738
739 # Here doc
740 if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
741 arg = redir_param.HereDoc.CreateNull()
742 arg.here_begin = self.cur_word
743 arg.stdin_parts = []
744
745 r = Redir(op_tok, where, arg)
746
747 self.pending_here_docs.append(r) # will be filled on next newline.
748
749 self._SetNext()
750 return r
751
752 arg_word = self.cur_word
753 tilde = word_.TildeDetect(arg_word)
754 if tilde:
755 arg_word = tilde
756 self._SetNext()
757
758 # We should never get Empty, Token, etc.
759 assert arg_word.tag() == word_e.Compound, arg_word
760 return Redir(op_tok, where, cast(CompoundWord, arg_word))
761
762 def _ParseRedirectList(self):
763 # type: () -> List[Redir]
764 """Try parsing any redirects at the cursor.
765
766 This is used for blocks only, not commands.
767 """
768 redirects = [] # type: List[Redir]
769 while True:
770 # This prediction needs to ONLY accept redirect operators. Should we
771 # make them a separate Kind?
772 self._GetWord()
773 if self.c_kind != Kind.Redir:
774 break
775
776 node = self.ParseRedirect()
777 redirects.append(node)
778 self._SetNext()
779
780 return redirects
781
782 def _ScanSimpleCommand(self):
783 # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
784 """YSH extends simple commands with typed args and blocks.
785
786 Shell has a recursive grammar, which awkwardly expresses
787 non-grammatical rules:
788
789 simple_command : cmd_prefix cmd_word cmd_suffix
790 | cmd_prefix cmd_word
791 | cmd_prefix
792 | cmd_name cmd_suffix
793 | cmd_name
794 ;
795 cmd_name : WORD /* Apply rule 7a */
796 ;
797 cmd_word : WORD /* Apply rule 7b */
798 ;
799 cmd_prefix : io_redirect
800 | cmd_prefix io_redirect
801 | ASSIGNMENT_WORD
802 | cmd_prefix ASSIGNMENT_WORD
803 ;
804 cmd_suffix : io_redirect
805 | cmd_suffix io_redirect
806 | WORD
807 | cmd_suffix WORD
808
809 YSH grammar:
810
811 simple_command =
812 cmd_prefix* word+ typed_args? BraceGroup? cmd_suffix*
813
814 typed_args =
815 '(' arglist ')'
816 | '[' arglist ']'
817
818 Notably, redirects shouldn't appear after between typed args and
819 BraceGroup.
820 """
821 redirects = [] # type: List[Redir]
822 words = [] # type: List[CompoundWord]
823 typed_args = None # type: Optional[ArgList]
824 block = None # type: Optional[LiteralBlock]
825
826 first_word_caps = False # does first word look like Caps, but not CAPS
827
828 i = 0
829 while True:
830 self._GetWord()
831 if self.c_kind == Kind.Redir:
832 node = self.ParseRedirect()
833 redirects.append(node)
834
835 elif self.c_kind == Kind.Word:
836 if self.parse_opts.parse_brace():
837 # Treat { and } more like operators
838 if self.c_id == Id.Lit_LBrace:
839 if self.allow_block: # Disabled for if/while condition, etc.
840
841 # allow x = 42
842 self.hay_attrs_stack.append(first_word_caps)
843 brace_group = self.ParseBraceGroup()
844
845 # So we can get the source code back later
846 lines = self.arena.SaveLinesAndDiscard(
847 brace_group.left, brace_group.right)
848 block = LiteralBlock(brace_group, lines)
849
850 self.hay_attrs_stack.pop()
851
852 if 0:
853 print('--')
854 block.PrettyPrint()
855 print('\n--')
856 break
857 elif self.c_id == Id.Lit_RBrace:
858 # Another thing: { echo hi }
859 # We're DONE!!!
860 break
861
862 w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
863
864 if i == 0:
865 # Disallow leading =a because it's confusing
866 part0 = w.parts[0]
867 if part0.tag() == word_part_e.Literal:
868 tok = cast(Token, part0)
869 if tok.id == Id.Lit_Equals:
870 p_die(
871 "=word isn't allowed. Hint: add a space after =, or quote it",
872 tok)
873
874 # Is the first word a Hay Attr word?
875 ok, word_str, quoted = word_.StaticEval(w)
876 # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
877 if (ok and len(word_str) and word_str[0].isupper() and
878 not word_str.isupper()):
879 first_word_caps = True
880 #log('W %s', word_str)
881
882 words.append(w)
883
884 elif self.c_id == Id.Op_LParen:
885 # 1. Check that there's a preceding space
886 prev_byte = self.lexer.ByteLookBack()
887 if prev_byte not in (SPACE_CH, TAB_CH):
888 if self.parse_opts.parse_at():
889 p_die('Space required before (',
890 loc.Word(self.cur_word))
891 else:
892 # inline func call like @sorted(x) is invalid in OSH, but the
893 # solution isn't a space
894 p_die(
895 'Unexpected left paren (might need a space before it)',
896 loc.Word(self.cur_word))
897
898 # 2. Check that it's not (). We disallow this because it's a no-op and
899 # there could be confusion with shell func defs.
900 # For some reason we need to call lexer.LookPastSpace, not
901 # w_parser.LookPastSpace. I think this is because we're at (, which is
902 # an operator token. All the other cases are like 'x=', which is PART
903 # of a word, and we don't know if it will end.
904 next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
905 if next_id == Id.Op_RParen:
906 p_die('Empty arg list not allowed',
907 loc.Word(self.cur_word))
908
909 typed_args = self.w_parser.ParseProcCallArgs(
910 grammar_nt.ysh_eager_arglist)
911
912 elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
913 typed_args = self.w_parser.ParseProcCallArgs(
914 grammar_nt.ysh_lazy_arglist)
915
916 else:
917 break
918
919 self._SetNextBrack() # Allow bracket for SECOND word on
920 i += 1
921 return redirects, words, typed_args, block
922
923 def _MaybeExpandAliases(self, words):
924 # type: (List[CompoundWord]) -> Optional[command_t]
925 """Try to expand aliases.
926
927 Args:
928 words: A list of Compound
929
930 Returns:
931 A new LST node, or None.
932
933 Our implementation of alias has two design choices:
934 - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
935 - What grammar rule to parse the expanded alias buffer with. In our case
936 it's ParseCommand().
937
938 This doesn't quite match what other shells do, but I can't figure out a
939 better places.
940
941 Most test cases pass, except for ones like:
942
943 alias LBRACE='{'
944 LBRACE echo one; echo two; }
945
946 alias MULTILINE='echo 1
947 echo 2
948 echo 3'
949 MULTILINE
950
951 NOTE: dash handles aliases in a totally different way. It has a global
952 variable checkkwd in parser.c. It assigns it all over the grammar, like
953 this:
954
955 checkkwd = CHKNL | CHKKWD | CHKALIAS;
956
957 The readtoken() function checks (checkkwd & CHKALIAS) and then calls
958 lookupalias(). This seems to provide a consistent behavior among shells,
959 but it's less modular and testable.
960
961 Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
962
963 Returns:
964 A command node if any aliases were expanded, or None otherwise.
965 """
966 # Start a new list if there aren't any. This will be passed recursively
967 # through CommandParser instances.
968 aliases_in_flight = (self.aliases_in_flight
969 if len(self.aliases_in_flight) else [])
970
971 # for error message
972 first_word_str = None # type: Optional[str]
973 argv0_loc = loc.Word(words[0])
974
975 expanded = [] # type: List[str]
976 i = 0
977 n = len(words)
978
979 while i < n:
980 w = words[i]
981
982 ok, word_str, quoted = word_.StaticEval(w)
983 if not ok or quoted:
984 break
985
986 alias_exp = self.aliases.get(word_str)
987 if alias_exp is None:
988 break
989
990 # Prevent infinite loops. This is subtle: we want to prevent infinite
991 # expansion of alias echo='echo x'. But we don't want to prevent
992 # expansion of the second word in 'echo echo', so we add 'i' to
993 # "aliases_in_flight".
994 if (word_str, i) in aliases_in_flight:
995 break
996
997 if i == 0:
998 first_word_str = word_str # for error message
999
1000 #log('%r -> %r', word_str, alias_exp)
1001 aliases_in_flight.append((word_str, i))
1002 expanded.append(alias_exp)
1003 i += 1
1004
1005 if not alias_exp.endswith(' '):
1006 # alias e='echo [ ' is the same expansion as
1007 # alias e='echo ['
1008 # The trailing space indicates whether we should continue to expand
1009 # aliases; it's not part of it.
1010 expanded.append(' ')
1011 break # No more expansions
1012
1013 if len(expanded) == 0: # No expansions; caller does parsing.
1014 return None
1015
1016 # We are expanding an alias, so copy the rest of the words and re-parse.
1017 if i < n:
1018 left_tok = location.LeftTokenForWord(words[i])
1019 right_tok = location.RightTokenForWord(words[-1])
1020
1021 # OLD CONSTRAINT
1022 #assert left_tok.line_id == right_tok.line_id
1023
1024 words_str = self.arena.SnipCodeString(left_tok, right_tok)
1025 expanded.append(words_str)
1026
1027 code_str = ''.join(expanded)
1028
1029 # TODO:
1030 # Aliases break static parsing (like backticks), so use our own Arena.
1031 # This matters for Hay, which calls SaveLinesAndDiscard().
1032 # arena = alloc.Arena()
1033 arena = self.arena
1034
1035 line_reader = reader.StringLineReader(code_str, arena)
1036 cp = self.parse_ctx.MakeOshParser(line_reader)
1037 cp.Init_AliasesInFlight(aliases_in_flight)
1038
1039 # break circular dep
1040 from frontend import parse_lib
1041
1042 # The interaction between COMPLETION and ALIASES requires special care.
1043 # See docstring of BeginAliasExpansion() in parse_lib.py.
1044 src = source.Alias(first_word_str, argv0_loc)
1045 with alloc.ctx_SourceCode(arena, src):
1046 with parse_lib.ctx_Alias(self.parse_ctx.trail):
1047 try:
1048 # _ParseCommandTerm() handles multiline commands, compound
1049 # commands, etc. as opposed to ParseLogicalLine()
1050 node = cp._ParseCommandTerm()
1051 except error.Parse as e:
1052 # Failure to parse alias expansion is a fatal error
1053 # We don't need more handling here/
1054 raise
1055
1056 if 0:
1057 log('AFTER expansion:')
1058 node.PrettyPrint()
1059
1060 return node
1061
1062 def ParseSimpleCommand(self):
1063 # type: () -> command_t
1064 """Fixed transcription of the POSIX grammar (TODO: port to
1065 grammar/Shell.g)
1066
1067 io_file : '<' filename
1068 | LESSAND filename
1069 ...
1070
1071 io_here : DLESS here_end
1072 | DLESSDASH here_end
1073
1074 redirect : IO_NUMBER (io_redirect | io_here)
1075
1076 prefix_part : ASSIGNMENT_WORD | redirect
1077 cmd_part : WORD | redirect
1078
1079 assign_kw : Declare | Export | Local | Readonly
1080
1081 # Without any words it is parsed as a command, not an assignment
1082 assign_listing : assign_kw
1083
1084 # Now we have something to do (might be changing assignment flags too)
1085 # NOTE: any prefixes should be a warning, but they are allowed in shell.
1086 assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
1087
1088 # an external command, a function call, or a builtin -- a "word_command"
1089 word_command : prefix_part* cmd_part+
1090
1091 simple_command : assign_listing
1092 | assignment
1093 | proc_command
1094
1095 Simple imperative algorithm:
1096
1097 1) Read a list of words and redirects. Append them to separate lists.
1098 2) Look for the first non-assignment word. If it's declare, etc., then
1099 keep parsing words AND assign words. Otherwise, just parse words.
1100 3) If there are no non-assignment words, then it's a global assignment.
1101
1102 { redirects, global assignments } OR
1103 { redirects, prefix_bindings, words } OR
1104 { redirects, ERROR_prefix_bindings, keyword, assignments, words }
1105
1106 THEN CHECK that prefix bindings don't have any array literal parts!
1107 global assignment and keyword assignments can have the of course.
1108 well actually EXPORT shouldn't have them either -- WARNING
1109
1110 3 cases we want to warn: prefix_bindings for assignment, and array literal
1111 in prefix bindings, or export
1112
1113 A command can be an assignment word, word, or redirect on its own.
1114
1115 ls
1116 >out.txt
1117
1118 >out.txt FOO=bar # this touches the file
1119
1120 Or any sequence:
1121 ls foo bar
1122 <in.txt ls foo bar >out.txt
1123 <in.txt ls >out.txt foo bar
1124
1125 Or add one or more environment bindings:
1126 VAR=val env
1127 >out.txt VAR=val env
1128
1129 here_end vs filename is a matter of whether we test that it's quoted. e.g.
1130 <<EOF vs <<'EOF'.
1131 """
1132 redirects, words, typed_args, block = self._ScanSimpleCommand()
1133
1134 typed_loc = None # type: Optional[Token]
1135 if block:
1136 typed_loc = block.brace_group.left
1137 if typed_args:
1138 typed_loc = typed_args.left # preferred over block location
1139
1140 if len(words) == 0: # e.g. >out.txt # redirect without words
1141 assert len(redirects) != 0
1142 if typed_loc is not None:
1143 p_die("Unexpected typed args", typed_loc)
1144
1145 simple = command.Simple.CreateNull()
1146 simple.blame_tok = redirects[0].op
1147 simple.more_env = []
1148 simple.words = []
1149 simple.redirects = redirects
1150 return simple
1151
1152 preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1153 if len(preparsed_list):
1154 # Disallow X=Y inside proc and func
1155 # and inside Hay Attr blocks
1156 # But allow X=Y at the top level
1157 # for interactive use foo=bar
1158 # for global constants GLOBAL=~/src
1159 # because YSH assignment doesn't have tilde sub
1160 if len(suffix_words) == 0:
1161 if (self.cmd_mode != cmd_mode_e.Shell or
1162 (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1163 p_die('Use var/setvar to assign in YSH',
1164 preparsed_list[0].left)
1165
1166 # Set a reference to words and redirects for completion. We want to
1167 # inspect this state after a failed parse.
1168 self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1169
1170 if len(suffix_words) == 0:
1171 if typed_loc is not None:
1172 p_die("Unexpected typed args", typed_loc)
1173
1174 # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1175 pairs = [] # type: List[AssignPair]
1176 for preparsed in preparsed_list:
1177 pairs.append(
1178 _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1179
1180 left_tok = location.LeftTokenForCompoundWord(words[0])
1181 return command.ShAssignment(left_tok, pairs, redirects)
1182
1183 kind, kw_token = word_.IsControlFlow(suffix_words[0])
1184
1185 if kind == Kind.ControlFlow:
1186 if kw_token.id == Id.ControlFlow_Return:
1187 # return x - inside procs and shell functions
1188 # return (x) - inside funcs
1189 if typed_args is None:
1190 if self.cmd_mode not in (cmd_mode_e.Shell,
1191 cmd_mode_e.Proc):
1192 p_die('Shell-style returns not allowed here', kw_token)
1193 else:
1194 if self.cmd_mode != cmd_mode_e.Func:
1195 p_die('Typed return is only allowed inside func',
1196 typed_loc)
1197 if len(typed_args.pos_args) != 1:
1198 p_die("Typed return expects one argument", typed_loc)
1199 if len(typed_args.named_args) != 0:
1200 p_die("Typed return doesn't take named arguments",
1201 typed_loc)
1202 return command.Retval(kw_token, typed_args.pos_args[0])
1203
1204 if typed_loc is not None:
1205 p_die("Unexpected typed args", typed_loc)
1206 if not self.parse_opts.parse_ignored() and len(redirects):
1207 p_die("Control flow shouldn't have redirects", kw_token)
1208
1209 if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1210 p_die("Control flow shouldn't have environment bindings",
1211 preparsed_list[0].left)
1212
1213 # Attach the token for errors. (ShAssignment may not need it.)
1214 if len(suffix_words) == 1:
1215 arg_word = None # type: Optional[word_t]
1216 elif len(suffix_words) == 2:
1217 arg_word = suffix_words[1]
1218 else:
1219 p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1220 loc.Word(suffix_words[2]))
1221
1222 return command.ControlFlow(kw_token, arg_word)
1223
1224 # Alias expansion only understands words, not typed args ( ) or block { }
1225 if not typed_args and not block and self.parse_opts.expand_aliases():
1226 # If any expansions were detected, then parse again.
1227 expanded_node = self._MaybeExpandAliases(suffix_words)
1228 if expanded_node:
1229 # Attach env bindings and redirects to the expanded node.
1230 more_env = [] # type: List[EnvPair]
1231 _AppendMoreEnv(preparsed_list, more_env)
1232 exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1233 return exp
1234
1235 # TODO: check that we don't have env1=x x[1]=y env2=z here.
1236
1237 # FOO=bar printenv.py FOO
1238 node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1239 typed_args, block)
1240 return node
1241
1242 def ParseBraceGroup(self):
1243 # type: () -> BraceGroup
1244 """
1245 Original:
1246 brace_group : LBrace command_list RBrace ;
1247
1248 YSH:
1249 brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1250
1251 The doc comment can only occur if there's a newline.
1252 """
1253 ate = self._Eat(Id.Lit_LBrace)
1254 left = word_.BraceToken(ate)
1255
1256 doc_word = None # type: word_t
1257 self._GetWord()
1258 if self.c_id == Id.Op_Newline:
1259 self._SetNext()
1260 # Set a flag so we don't skip over ###
1261 with word_.ctx_EmitDocToken(self.w_parser):
1262 self._GetWord()
1263
1264 if self.c_id == Id.Ignored_Comment:
1265 doc_word = self.cur_word
1266 self._SetNext()
1267
1268 # Id.Ignored_Comment means it's a Token, or None
1269 doc_token = cast(Token, doc_word)
1270
1271 c_list = self._ParseCommandList()
1272
1273 ate = self._Eat(Id.Lit_RBrace)
1274 right = word_.BraceToken(ate)
1275
1276 # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1277 # would allow us to revert this back to None, which was changed in
1278 # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1279 # behavior saves allocations, but is less type safe.
1280 return BraceGroup(left, doc_token, c_list.children, [],
1281 right) # no redirects yet
1282
1283 def ParseDoGroup(self):
1284 # type: () -> command.DoGroup
1285 """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1286
1287 do_group : Do command_list Done ; /* Apply rule 6 */
1288 """
1289 ate = self._Eat(Id.KW_Do)
1290 do_kw = word_.AsKeywordToken(ate)
1291
1292 c_list = self._ParseCommandList() # could be anything
1293
1294 ate = self._Eat(Id.KW_Done)
1295 done_kw = word_.AsKeywordToken(ate)
1296
1297 return command.DoGroup(do_kw, c_list.children, done_kw)
1298
1299 def ParseForWords(self):
1300 # type: () -> Tuple[List[CompoundWord], Optional[Token]]
1301 """
1302 for_words : WORD* for_sep
1303 ;
1304 for_sep : ';' newline_ok
1305 | NEWLINES
1306 ;
1307 """
1308 words = [] # type: List[CompoundWord]
1309 # The span_id of any semi-colon, so we can remove it.
1310 semi_tok = None # type: Optional[Token]
1311
1312 while True:
1313 self._GetWord()
1314 if self.c_id == Id.Op_Semi:
1315 tok = cast(Token, self.cur_word)
1316 semi_tok = tok
1317 self._SetNext()
1318 self._NewlineOk()
1319 break
1320 elif self.c_id == Id.Op_Newline:
1321 self._SetNext()
1322 break
1323 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1324 break
1325
1326 if self.cur_word.tag() != word_e.Compound:
1327 # TODO: Can we also show a pointer to the 'for' keyword?
1328 p_die('Invalid word in for loop', loc.Word(self.cur_word))
1329
1330 w2 = cast(CompoundWord, self.cur_word)
1331 words.append(w2)
1332 self._SetNext()
1333 return words, semi_tok
1334
1335 def _ParseForExprLoop(self, for_kw):
1336 # type: (Token) -> command.ForExpr
1337 """
1338 Shell:
1339 for '((' init ';' cond ';' update '))' for_sep? do_group
1340
1341 YSH:
1342 for '((' init ';' cond ';' update '))' for_sep? brace_group
1343 """
1344 node = self.w_parser.ReadForExpression()
1345 node.keyword = for_kw
1346
1347 self._SetNext()
1348
1349 self._GetWord()
1350 if self.c_id == Id.Op_Semi:
1351 self._SetNext()
1352 self._NewlineOk()
1353 elif self.c_id == Id.Op_Newline:
1354 self._SetNext()
1355 elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1356 pass
1357 elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1358 pass
1359 else:
1360 p_die('Invalid word after for expression', loc.Word(self.cur_word))
1361
1362 if self.c_id == Id.Lit_LBrace:
1363 node.body = self.ParseBraceGroup()
1364 else:
1365 node.body = self.ParseDoGroup()
1366 return node
1367
1368 def _ParseForEachLoop(self, for_kw):
1369 # type: (Token) -> command.ForEach
1370 node = command.ForEach.CreateNull(alloc_lists=True)
1371 node.keyword = for_kw
1372
1373 num_iter_names = 0
1374 while True:
1375 w = self.cur_word
1376
1377 # Hack that makes the language more familiar:
1378 # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1379 # - 'x y' is also accepted but not idiomatic.
1380 UP_w = w
1381 if w.tag() == word_e.Compound:
1382 w = cast(CompoundWord, UP_w)
1383 if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1384 w.parts.pop()
1385
1386 ok, iter_name, quoted = word_.StaticEval(w)
1387 if not ok or quoted: # error: for $x
1388 p_die('Expected loop variable (a constant word)', loc.Word(w))
1389
1390 if not match.IsValidVarName(iter_name): # error: for -
1391 # TODO: consider commas?
1392 if ',' in iter_name:
1393 p_die('Loop variables look like x, y (fix spaces)',
1394 loc.Word(w))
1395 p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1396
1397 node.iter_names.append(iter_name)
1398 num_iter_names += 1
1399 self._SetNext()
1400
1401 self._GetWord()
1402 # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1403 # Subtlety: 'var' is KW_Var and is a valid loop name
1404 if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1405 break
1406
1407 if num_iter_names == 3:
1408 p_die('Unexpected word after 3 loop variables',
1409 loc.Word(self.cur_word))
1410
1411 self._NewlineOk()
1412
1413 self._GetWord()
1414 if self.c_id == Id.KW_In:
1415 # Ideally we would want ( not 'in'. But we still have to fix the bug
1416 # where we require a SPACE between in and (
1417 # for x in(y) # should be accepted, but isn't
1418
1419 expr_blame = word_.AsKeywordToken(self.cur_word)
1420
1421 self._SetNext() # skip in
1422 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1423 enode = self.w_parser.ParseYshExprForCommand()
1424 node.iterable = for_iter.YshExpr(enode, expr_blame)
1425
1426 # For simplicity, we don't accept for x in (obj); do ...
1427 self._GetWord()
1428 if self.c_id != Id.Lit_LBrace:
1429 p_die('Expected { after iterable expression',
1430 loc.Word(self.cur_word))
1431 else:
1432 semi_tok = None # type: Optional[Token]
1433 iter_words, semi_tok = self.ParseForWords()
1434 node.semi_tok = semi_tok
1435
1436 if not self.parse_opts.parse_bare_word() and len(
1437 iter_words) == 1:
1438 ok, s, quoted = word_.StaticEval(iter_words[0])
1439 if ok and match.IsValidVarName(s) and not quoted:
1440 p_die(
1441 'Surround this word with either parens or quotes (parse_bare_word)',
1442 loc.Word(iter_words[0]))
1443
1444 words2 = braces.BraceDetectAll(iter_words)
1445 words3 = word_.TildeDetectAll(words2)
1446 node.iterable = for_iter.Words(words3)
1447
1448 # Now that we know there are words, do an extra check
1449 if num_iter_names > 2:
1450 p_die('Expected at most 2 loop variables', for_kw)
1451
1452 elif self.c_id == Id.KW_Do:
1453 node.iterable = for_iter.Args # implicitly loop over "$@"
1454 # do not advance
1455
1456 elif self.c_id == Id.Op_Semi: # for x; do
1457 node.iterable = for_iter.Args # implicitly loop over "$@"
1458 self._SetNext()
1459
1460 else: # for foo BAD
1461 p_die('Unexpected word after for loop variable',
1462 loc.Word(self.cur_word))
1463
1464 self._GetWord()
1465 if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1466 node.body = self.ParseBraceGroup()
1467 else:
1468 node.body = self.ParseDoGroup()
1469
1470 return node
1471
1472 def ParseFor(self):
1473 # type: () -> command_t
1474 """
1475 TODO: Update the grammar
1476
1477 for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1478 | For '((' ... TODO
1479 """
1480 ate = self._Eat(Id.KW_For)
1481 for_kw = word_.AsKeywordToken(ate)
1482
1483 self._GetWord()
1484 if self.c_id == Id.Op_DLeftParen:
1485 if not self.parse_opts.parse_dparen():
1486 p_die("Bash for loops aren't allowed (parse_dparen)",
1487 loc.Word(self.cur_word))
1488
1489 # for (( i = 0; i < 10; i++)
1490 n1 = self._ParseForExprLoop(for_kw)
1491 n1.redirects = self._ParseRedirectList()
1492 return n1
1493 else:
1494 # for x in a b; do echo hi; done
1495 n2 = self._ParseForEachLoop(for_kw)
1496 n2.redirects = self._ParseRedirectList()
1497 return n2
1498
1499 def _ParseConditionList(self):
1500 # type: () -> condition_t
1501 """
1502 condition_list: command_list
1503
1504 This is a helper to parse a condition list for if commands and while/until
1505 loops. It will throw a parse error if there are no conditions in the list.
1506 """
1507 self.allow_block = False
1508 commands = self._ParseCommandList()
1509 self.allow_block = True
1510
1511 if len(commands.children) == 0:
1512 p_die("Expected a condition", loc.Word(self.cur_word))
1513
1514 return condition.Shell(commands.children)
1515
1516 def ParseWhileUntil(self, keyword):
1517 # type: (Token) -> command.WhileUntil
1518 """
1519 while_clause : While command_list do_group ;
1520 until_clause : Until command_list do_group ;
1521 """
1522 self._SetNext() # skip keyword
1523
1524 if (self.parse_opts.parse_paren() and
1525 self.w_parser.LookPastSpace() == Id.Op_LParen):
1526 enode = self.w_parser.ParseYshExprForCommand()
1527 cond = condition.YshExpr(enode) # type: condition_t
1528 else:
1529 cond = self._ParseConditionList()
1530
1531 # NOTE: The LSTs will be different for OSH and YSH, but the execution
1532 # should be unchanged. To be sure we should desugar.
1533 self._GetWord()
1534 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1535 # while test -f foo {
1536 body_node = self.ParseBraceGroup() # type: command_t
1537 else:
1538 body_node = self.ParseDoGroup()
1539
1540 # no redirects yet
1541 return command.WhileUntil(keyword, cond, body_node, None)
1542
1543 def ParseCaseArm(self):
1544 # type: () -> CaseArm
1545 """
1546 case_item: '('? pattern ('|' pattern)* ')'
1547 newline_ok command_term? trailer? ;
1548
1549 Looking at '(' or pattern
1550 """
1551 self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1552
1553 left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1554
1555 if self.c_id == Id.Op_LParen: # Optional (
1556 self._SetNext()
1557
1558 pat_words = [] # type: List[word_t]
1559 while True:
1560 self._GetWord()
1561 if self.c_kind != Kind.Word:
1562 p_die('Expected case pattern', loc.Word(self.cur_word))
1563 pat_words.append(self.cur_word)
1564 self._SetNext()
1565
1566 self._GetWord()
1567 if self.c_id == Id.Op_Pipe:
1568 self._SetNext()
1569 else:
1570 break
1571
1572 ate = self._Eat(Id.Right_CasePat)
1573 middle_tok = word_.AsOperatorToken(ate)
1574
1575 self._NewlineOk()
1576
1577 self._GetWord()
1578 if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
1579 c_list = self._ParseCommandTerm()
1580 action_children = c_list.children
1581 else:
1582 action_children = []
1583
1584 dsemi_tok = None # type: Token
1585 self._GetWord()
1586 if self.c_id == Id.KW_Esac: # missing last ;;
1587 pass
1588 elif self.c_id == Id.Op_DSemi:
1589 dsemi_tok = word_.AsOperatorToken(self.cur_word)
1590 self._SetNext()
1591 else:
1592 # Happens on EOF
1593 p_die('Expected ;; or esac', loc.Word(self.cur_word))
1594
1595 self._NewlineOk()
1596
1597 return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1598 action_children, dsemi_tok)
1599
1600 def ParseYshCaseArm(self, discriminant):
1601 # type: (Id_t) -> CaseArm
1602 """
1603 case_item : pattern newline_ok brace_group newline_ok
1604 pattern : pat_words
1605 | pat_exprs
1606 | pat_eggex
1607 | pat_else
1608 pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
1609 pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
1610 pat_word : WORD
1611 pat_eggex : '/' oil_eggex '/'
1612 pat_expr : '(' oil_expr ')'
1613 pat_else : '(' Id.KW_Else ')'
1614
1615 Looking at: 'pattern'
1616
1617 Note that the trailing `newline_ok` in `case_item` is handled by
1618 `ParseYshCase`. We do this because parsing that `newline_ok` returns
1619 the next "discriminant" for the next token, so it makes more sense to
1620 handle it there.
1621 """
1622 left_tok = None # type: Token
1623 pattern = None # type: pat_t
1624
1625 if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1626 # pat_exprs, pat_else or pat_eggex
1627 pattern, left_tok = self.w_parser.ParseYshCasePattern()
1628 else:
1629 # pat_words
1630 pat_words = [] # type: List[word_t]
1631 while True:
1632 self._GetWord()
1633 if self.c_kind != Kind.Word:
1634 p_die('Expected case pattern', loc.Word(self.cur_word))
1635 pat_words.append(self.cur_word)
1636 self._SetNext()
1637
1638 if not left_tok:
1639 left_tok = location.LeftTokenForWord(self.cur_word)
1640
1641 self._NewlineOk()
1642
1643 self._GetWord()
1644 if self.c_id == Id.Op_Pipe:
1645 self._SetNext()
1646 self._NewlineOk()
1647 else:
1648 break
1649 pattern = pat.Words(pat_words)
1650
1651 self._NewlineOk()
1652 action = self.ParseBraceGroup()
1653
1654 # The left token of the action is our "middle" token
1655 return CaseArm(left_tok, pattern, action.left, action.children,
1656 action.right)
1657
1658 def ParseYshCase(self, case_kw):
1659 # type: (Token) -> command.Case
1660 """
1661 ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1662
1663 Looking at: token after 'case'
1664 """
1665 enode = self.w_parser.ParseYshExprForCommand()
1666 to_match = case_arg.YshExpr(enode)
1667
1668 ate = self._Eat(Id.Lit_LBrace)
1669 arms_start = word_.BraceToken(ate)
1670
1671 discriminant = self.w_parser.NewlineOkForYshCase()
1672
1673 # Note: for now, zero arms are accepted, just like POSIX case $x in esac
1674 arms = [] # type: List[CaseArm]
1675 while discriminant != Id.Op_RBrace:
1676 arm = self.ParseYshCaseArm(discriminant)
1677 arms.append(arm)
1678
1679 discriminant = self.w_parser.NewlineOkForYshCase()
1680
1681 # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1682 # token is read as an Id.Op_RBrace, but we need to store this as a
1683 # Id.Lit_RBrace.
1684 ate = self._Eat(Id.Op_RBrace)
1685 arms_end = word_.AsOperatorToken(ate)
1686 arms_end.id = Id.Lit_RBrace
1687
1688 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1689 None)
1690
1691 def ParseOldCase(self, case_kw):
1692 # type: (Token) -> command.Case
1693 """
1694 case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1695
1696 -> Looking at WORD
1697
1698 FYI original POSIX case list, which takes pains for DSEMI
1699
1700 case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1701 """
1702 self._GetWord()
1703 w = self.cur_word
1704 if not self.parse_opts.parse_bare_word():
1705 ok, s, quoted = word_.StaticEval(w)
1706 if ok and not quoted:
1707 p_die(
1708 "This is a constant string. You may want a variable like $x (parse_bare_word)",
1709 loc.Word(w))
1710
1711 if w.tag() != word_e.Compound:
1712 p_die("Expected a word to match against", loc.Word(w))
1713
1714 to_match = case_arg.Word(w)
1715 self._SetNext() # past WORD
1716
1717 self._NewlineOk()
1718
1719 ate = self._Eat(Id.KW_In)
1720 arms_start = word_.AsKeywordToken(ate)
1721
1722 self._NewlineOk()
1723
1724 arms = [] # type: List[CaseArm]
1725 while True:
1726 self._GetWord()
1727 if self.c_id == Id.KW_Esac: # this is Kind.Word
1728 break
1729 # case arm should begin with a pattern word or (
1730 if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1731 break
1732
1733 arm = self.ParseCaseArm()
1734 arms.append(arm)
1735
1736 ate = self._Eat(Id.KW_Esac)
1737 arms_end = word_.AsKeywordToken(ate)
1738
1739 # no redirects yet
1740 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1741 None)
1742
1743 def ParseCase(self):
1744 # type: () -> command.Case
1745 """
1746 case_clause : old_case # from POSIX
1747 | ysh_case
1748 ;
1749
1750 Looking at 'Case'
1751 """
1752 case_kw = word_.AsKeywordToken(self.cur_word)
1753 self._SetNext() # past 'case'
1754
1755 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1756 return self.ParseYshCase(case_kw)
1757 else:
1758 return self.ParseOldCase(case_kw)
1759
1760 def _ParseYshElifElse(self, if_node):
1761 # type: (command.If) -> None
1762 """If test -f foo { echo foo.
1763
1764 } elif test -f bar; test -f spam { ^ we parsed up to here echo
1765 bar } else { echo none }
1766 """
1767 arms = if_node.arms
1768
1769 while self.c_id == Id.KW_Elif:
1770 elif_kw = word_.AsKeywordToken(self.cur_word)
1771 self._SetNext() # skip elif
1772 if (self.parse_opts.parse_paren() and
1773 self.w_parser.LookPastSpace() == Id.Op_LParen):
1774 enode = self.w_parser.ParseYshExprForCommand()
1775 cond = condition.YshExpr(enode) # type: condition_t
1776 else:
1777 self.allow_block = False
1778 commands = self._ParseCommandList()
1779 self.allow_block = True
1780 cond = condition.Shell(commands.children)
1781
1782 body = self.ParseBraceGroup()
1783 self._GetWord()
1784
1785 arm = IfArm(elif_kw, cond, None, body.children, [elif_kw.span_id])
1786 arms.append(arm)
1787
1788 self._GetWord()
1789 if self.c_id == Id.KW_Else:
1790 self._SetNext()
1791 body = self.ParseBraceGroup()
1792 if_node.else_action = body.children
1793
1794 def _ParseYshIf(self, if_kw, cond):
1795 # type: (Token, condition_t) -> command.If
1796 """if test -f foo {
1797
1798 # ^ we parsed up to here
1799 echo foo
1800 } elif test -f bar; test -f spam {
1801 echo bar
1802 } else {
1803 echo none
1804 }
1805 NOTE: If you do something like if test -n foo{, the parser keeps going, and
1806 the error is confusing because it doesn't point to the right place.
1807
1808 I think we might need strict_brace so that foo{ is disallowed. It has to
1809 be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1810 form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1811 Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1812 """
1813 if_node = command.If.CreateNull(alloc_lists=True)
1814 if_node.if_kw = if_kw
1815
1816 body1 = self.ParseBraceGroup()
1817 # Every arm has 1 spid, unlike shell-style
1818 # TODO: We could get the spids from the brace group.
1819 arm = IfArm(if_kw, cond, None, body1.children, [if_kw.span_id])
1820
1821 if_node.arms.append(arm)
1822
1823 self._GetWord()
1824 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1825 self._ParseYshElifElse(if_node)
1826 # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1827 # spid because that's in the BraceGroup.
1828 return if_node
1829
1830 def _ParseElifElse(self, if_node):
1831 # type: (command.If) -> None
1832 """
1833 else_part: (Elif command_list Then command_list)* Else command_list ;
1834 """
1835 arms = if_node.arms
1836
1837 self._GetWord()
1838 while self.c_id == Id.KW_Elif:
1839 elif_kw = word_.AsKeywordToken(self.cur_word)
1840 self._SetNext() # past 'elif'
1841
1842 cond = self._ParseConditionList()
1843
1844 ate = self._Eat(Id.KW_Then)
1845 then_kw = word_.AsKeywordToken(ate)
1846
1847 body = self._ParseCommandList()
1848 arm = IfArm(elif_kw, cond, then_kw, body.children,
1849 [elif_kw.span_id, then_kw.span_id])
1850
1851 arms.append(arm)
1852
1853 self._GetWord()
1854 if self.c_id == Id.KW_Else:
1855 else_kw = word_.AsKeywordToken(self.cur_word)
1856 self._SetNext() # past 'else'
1857 body = self._ParseCommandList()
1858 if_node.else_action = body.children
1859 else:
1860 else_kw = None
1861
1862 if_node.else_kw = else_kw
1863
1864 def ParseIf(self):
1865 # type: () -> command.If
1866 """
1867 if_clause : If command_list Then command_list else_part? Fi ;
1868
1869 open : '{' | Then
1870 close : '}' | Fi
1871
1872 ysh_if : If ( command_list | '(' expr ')' )
1873 open command_list else_part? close;
1874
1875 There are 2 conditionals here: parse_paren, then parse_brace
1876 """
1877 if_node = command.If.CreateNull(alloc_lists=True)
1878 if_kw = word_.AsKeywordToken(self.cur_word)
1879 if_node.if_kw = if_kw
1880 self._SetNext() # past 'if'
1881
1882 if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1883 ) == Id.Op_LParen:
1884 # if (x + 1)
1885 enode = self.w_parser.ParseYshExprForCommand()
1886 cond = condition.YshExpr(enode) # type: condition_t
1887 else:
1888 # if echo 1; echo 2; then
1889 # Remove ambiguity with if cd / {
1890 cond = self._ParseConditionList()
1891
1892 self._GetWord()
1893 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1894 return self._ParseYshIf(if_kw, cond)
1895
1896 ate = self._Eat(Id.KW_Then)
1897 then_kw = word_.AsKeywordToken(ate)
1898
1899 body = self._ParseCommandList()
1900
1901 # First arm
1902 arm = IfArm(if_kw, cond, then_kw, body.children,
1903 [if_kw.span_id, then_kw.span_id])
1904 if_node.arms.append(arm)
1905
1906 # 2nd to Nth arm
1907 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1908 self._ParseElifElse(if_node)
1909
1910 ate = self._Eat(Id.KW_Fi)
1911 if_node.fi_kw = word_.AsKeywordToken(ate)
1912
1913 return if_node
1914
1915 def ParseTime(self):
1916 # type: () -> command_t
1917 """Time [-p] pipeline.
1918
1919 According to bash help.
1920 """
1921 time_kw = word_.AsKeywordToken(self.cur_word)
1922 self._SetNext() # skip time
1923 pipeline = self.ParsePipeline()
1924 return command.TimeBlock(time_kw, pipeline)
1925
1926 def ParseCompoundCommand(self):
1927 # type: () -> command_t
1928 """
1929 Refactoring: we put io_redirect* here instead of in function_body and
1930 command.
1931
1932 compound_command : brace_group io_redirect*
1933 | subshell io_redirect*
1934 | for_clause io_redirect*
1935 | while_clause io_redirect*
1936 | until_clause io_redirect*
1937 | if_clause io_redirect*
1938 | case_clause io_redirect*
1939
1940 # bash extensions
1941 | time_clause
1942 | [[ BoolExpr ]]
1943 | (( ArithExpr ))
1944 """
1945 self._GetWord()
1946 if self.c_id == Id.Lit_LBrace:
1947 n1 = self.ParseBraceGroup()
1948 n1.redirects = self._ParseRedirectList()
1949 return n1
1950 if self.c_id == Id.Op_LParen:
1951 n2 = self.ParseSubshell()
1952 n2.redirects = self._ParseRedirectList()
1953 return n2
1954
1955 if self.c_id == Id.KW_For:
1956 # Note: Redirects parsed in this call. POSIX for and bash for (( have
1957 # redirects, but YSH for doesn't.
1958 return self.ParseFor()
1959 if self.c_id in (Id.KW_While, Id.KW_Until):
1960 keyword = word_.AsKeywordToken(self.cur_word)
1961 n3 = self.ParseWhileUntil(keyword)
1962 n3.redirects = self._ParseRedirectList()
1963 return n3
1964
1965 if self.c_id == Id.KW_If:
1966 n4 = self.ParseIf()
1967 n4.redirects = self._ParseRedirectList()
1968 return n4
1969 if self.c_id == Id.KW_Case:
1970 n5 = self.ParseCase()
1971 n5.redirects = self._ParseRedirectList()
1972 return n5
1973
1974 if self.c_id == Id.KW_DLeftBracket:
1975 n6 = self.ParseDBracket()
1976 n6.redirects = self._ParseRedirectList()
1977 return n6
1978 if self.c_id == Id.Op_DLeftParen:
1979 if not self.parse_opts.parse_dparen():
1980 p_die('You may want a space between parens (parse_dparen)',
1981 loc.Word(self.cur_word))
1982 n7 = self.ParseDParen()
1983 n7.redirects = self._ParseRedirectList()
1984 return n7
1985
1986 # bash extensions: no redirects
1987 if self.c_id == Id.KW_Time:
1988 return self.ParseTime()
1989
1990 # Happens in function body, e.g. myfunc() oops
1991 p_die('Unexpected word while parsing compound command',
1992 loc.Word(self.cur_word))
1993 assert False # for MyPy
1994
1995 def ParseFunctionDef(self):
1996 # type: () -> command.ShFunction
1997 """
1998 function_header : fname '(' ')'
1999 function_def : function_header newline_ok function_body ;
2000
2001 Precondition: Looking at the function name.
2002
2003 NOTE: There is an ambiguity with:
2004
2005 function foo ( echo hi ) and
2006 function foo () ( echo hi )
2007
2008 Bash only accepts the latter, though it doesn't really follow a grammar.
2009 """
2010 word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2011 name = word_.ShFunctionName(word0)
2012 if len(name) == 0: # example: foo$x is invalid
2013 p_die('Invalid function name', loc.Word(word0))
2014
2015 part0 = word0.parts[0]
2016 # If we got a non-empty string from ShFunctionName, this should be true.
2017 assert part0.tag() == word_part_e.Literal
2018 blame_tok = cast(Token, part0) # for ctx_VarChecker
2019
2020 self._SetNext() # move past function name
2021
2022 # Must be true because of lookahead
2023 self._GetWord()
2024 assert self.c_id == Id.Op_LParen, self.cur_word
2025
2026 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2027 self._SetNext()
2028
2029 self._GetWord()
2030 if self.c_id == Id.Right_ShFunction:
2031 # 'f ()' implies a function definition, since invoking it with no args
2032 # would just be 'f'
2033 self._SetNext()
2034
2035 self._NewlineOk()
2036
2037 func = command.ShFunction.CreateNull()
2038 func.name = name
2039 with ctx_VarChecker(self.var_checker, blame_tok):
2040 func.body = self.ParseCompoundCommand()
2041
2042 func.name_tok = location.LeftTokenForCompoundWord(word0)
2043 return func
2044 else:
2045 p_die('Expected ) in function definition', loc.Word(self.cur_word))
2046 return None
2047
2048 def ParseKshFunctionDef(self):
2049 # type: () -> command.ShFunction
2050 """
2051 ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2052 """
2053 keyword_tok = word_.AsKeywordToken(self.cur_word)
2054
2055 self._SetNext() # skip past 'function'
2056 self._GetWord()
2057
2058 cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2059 name = word_.ShFunctionName(cur_word)
2060 if len(name) == 0: # example: foo$x is invalid
2061 p_die('Invalid KSH-style function name', loc.Word(cur_word))
2062
2063 name_word = self.cur_word
2064 self._SetNext() # skip past 'function name
2065
2066 self._GetWord()
2067 if self.c_id == Id.Op_LParen:
2068 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2069 self._SetNext()
2070 self._Eat(Id.Right_ShFunction)
2071
2072 self._NewlineOk()
2073
2074 func = command.ShFunction.CreateNull()
2075 func.name = name
2076 with ctx_VarChecker(self.var_checker, keyword_tok):
2077 func.body = self.ParseCompoundCommand()
2078
2079 func.keyword = keyword_tok
2080 func.name_tok = location.LeftTokenForWord(name_word)
2081 return func
2082
2083 def ParseYshProc(self):
2084 # type: () -> Proc
2085 node = Proc.CreateNull(alloc_lists=True)
2086
2087 keyword_tok = word_.AsKeywordToken(self.cur_word)
2088 node.keyword = keyword_tok
2089
2090 with ctx_VarChecker(self.var_checker, keyword_tok):
2091 with ctx_CmdMode(self, cmd_mode_e.Proc):
2092 self.w_parser.ParseProc(node)
2093 if node.sig.tag() == proc_sig_e.Closed: # Register params
2094 sig = cast(proc_sig.Closed, node.sig)
2095
2096 # Treat 3 kinds of params as variables.
2097 wp = sig.word
2098 if wp:
2099 for param in wp.params:
2100 self.var_checker.Check(Id.KW_Var, param.name,
2101 param.blame_tok)
2102 if wp.rest_of:
2103 r = wp.rest_of
2104 self.var_checker.Check(Id.KW_Var, r.name,
2105 r.blame_tok)
2106 # We COULD register __out here but it would require a different API.
2107 #if param.prefix and param.prefix.id == Id.Arith_Colon:
2108 # self.var_checker.Check(Id.KW_Var, '__' + param.name)
2109
2110 posit = sig.positional
2111 if posit:
2112 for param in posit.params:
2113 self.var_checker.Check(Id.KW_Var, param.name,
2114 param.blame_tok)
2115 if posit.rest_of:
2116 r = posit.rest_of
2117 self.var_checker.Check(Id.KW_Var, r.name,
2118 r.blame_tok)
2119
2120 named = sig.named
2121 if named:
2122 for param in named.params:
2123 self.var_checker.Check(Id.KW_Var, param.name,
2124 param.blame_tok)
2125 if named.rest_of:
2126 r = named.rest_of
2127 self.var_checker.Check(Id.KW_Var, r.name,
2128 r.blame_tok)
2129
2130 if sig.block_param:
2131 b = sig.block_param
2132 self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2133
2134 self._SetNext()
2135 node.body = self.ParseBraceGroup()
2136 # No redirects for YSH procs (only at call site)
2137
2138 return node
2139
2140 def ParseYshFunc(self):
2141 # type: () -> Func
2142 """
2143 ysh_func: (
2144 Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2145 )
2146 Looking at KW_Func
2147 """
2148 node = Func.CreateNull(alloc_lists=True)
2149
2150 keyword_tok = word_.AsKeywordToken(self.cur_word)
2151 node.keyword = keyword_tok
2152
2153 with ctx_VarChecker(self.var_checker, keyword_tok):
2154 self.w_parser.ParseFunc(node)
2155
2156 posit = node.positional
2157 if posit:
2158 for param in posit.params:
2159 self.var_checker.Check(Id.KW_Var, param.name,
2160 param.blame_tok)
2161 if posit.rest_of:
2162 r = posit.rest_of
2163 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2164
2165 named = node.named
2166 if named:
2167 for param in named.params:
2168 self.var_checker.Check(Id.KW_Var, param.name,
2169 param.blame_tok)
2170 if named.rest_of:
2171 r = named.rest_of
2172 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2173
2174 self._SetNext()
2175 with ctx_CmdMode(self, cmd_mode_e.Func):
2176 node.body = self.ParseBraceGroup()
2177
2178 return node
2179
2180 def ParseCoproc(self):
2181 # type: () -> command_t
2182 """
2183 TODO: command.Coproc?
2184 """
2185 raise NotImplementedError()
2186
2187 def ParseSubshell(self):
2188 # type: () -> command.Subshell
2189 """
2190 subshell : '(' compound_list ')'
2191
2192 Looking at Op_LParen
2193 """
2194 left = word_.AsOperatorToken(self.cur_word)
2195 self._SetNext() # skip past (
2196
2197 # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2198 # translation stack, we want to delay it.
2199
2200 self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2201
2202 c_list = self._ParseCommandList()
2203 if len(c_list.children) == 1:
2204 child = c_list.children[0]
2205 else:
2206 child = c_list
2207
2208 ate = self._Eat(Id.Right_Subshell)
2209 right = word_.AsOperatorToken(ate)
2210
2211 return command.Subshell(left, child, right, None) # no redirects yet
2212
2213 def ParseDBracket(self):
2214 # type: () -> command.DBracket
2215 """Pass the underlying word parser off to the boolean expression
2216 parser."""
2217 left = word_.AsKeywordToken(self.cur_word)
2218 # TODO: Test interactive. Without closing ]], you should get > prompt
2219 # (PS2)
2220
2221 self._SetNext() # skip [[
2222 b_parser = bool_parse.BoolParser(self.w_parser)
2223 bnode, right = b_parser.Parse() # May raise
2224 return command.DBracket(left, bnode, right, None) # no redirects yet
2225
2226 def ParseDParen(self):
2227 # type: () -> command.DParen
2228 left = word_.AsOperatorToken(self.cur_word)
2229
2230 self._SetNext() # skip ((
2231 anode, right = self.w_parser.ReadDParen()
2232 assert anode is not None
2233
2234 return command.DParen(left, anode, right, None) # no redirects yet
2235
2236 def ParseCommand(self):
2237 # type: () -> command_t
2238 """
2239 command : simple_command
2240 | compound_command # OSH edit: io_redirect* folded in
2241 | function_def
2242 | ksh_function_def
2243
2244 # YSH extensions
2245 | proc NAME ...
2246 | const ...
2247 | var ...
2248 | setglobal ...
2249 | setref ...
2250 | setvar ...
2251 | _ EXPR
2252 | = EXPR
2253 ;
2254
2255 Note: the reason const / var are not part of compound_command is because
2256 they can't be alone in a shell function body.
2257
2258 Example:
2259 This is valid shell f() if true; then echo hi; fi
2260 This is invalid f() var x = 1
2261 """
2262 if self._AtSecondaryKeyword():
2263 p_die('Unexpected word when parsing command',
2264 loc.Word(self.cur_word))
2265
2266 # YSH Extensions
2267
2268 if self.c_id == Id.KW_Proc: # proc p { ... }
2269 # proc is hidden because of the 'local reasoning' principle. Code
2270 # inside procs should be YSH, full stop. That means ysh:upgrade is
2271 # on.
2272 if self.parse_opts.parse_proc():
2273 return self.ParseYshProc()
2274 else:
2275 # 2024-02: This avoids bad syntax errors if you type YSH code
2276 # into OSH
2277 # proc p (x) { echo hi } would actually be parsed as a
2278 # command.Simple! Shell compatibility: quote 'proc'
2279 p_die("proc is a YSH keyword, but this is OSH.",
2280 loc.Word(self.cur_word))
2281
2282 if self.c_id == Id.KW_Func: # func f(x) { ... }
2283 if self.parse_opts.parse_func():
2284 return self.ParseYshFunc()
2285 else:
2286 # Same reasoning as above, for 'proc'
2287 p_die("func is a YSH keyword, but this is OSH.",
2288 loc.Word(self.cur_word))
2289
2290 if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2291 p_die("const can't be inside proc or func. Use var instead.",
2292 loc.Word(self.cur_word))
2293
2294 if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2295 keyword_id = self.c_id
2296 kw_token = word_.LiteralToken(self.cur_word)
2297 self._SetNext()
2298 n8 = self.w_parser.ParseVarDecl(kw_token)
2299 for lhs in n8.lhs:
2300 self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2301 return n8
2302
2303 if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2304 kw_token = word_.LiteralToken(self.cur_word)
2305 self._SetNext()
2306 n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2307 return n9
2308
2309 if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2310 # = 42 + a[i]
2311 # call mylist->append('x')
2312
2313 keyword = word_.LiteralToken(self.cur_word)
2314 assert keyword is not None
2315 self._SetNext()
2316 enode = self.w_parser.ParseCommandExpr()
2317 return command.Expr(keyword, enode)
2318
2319 if self.c_id == Id.KW_Function:
2320 return self.ParseKshFunctionDef()
2321
2322 if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2323 Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2324 Id.KW_If, Id.KW_Case, Id.KW_Time):
2325 return self.ParseCompoundCommand()
2326
2327 # Syntax error for '}' starting a line, which all shells disallow.
2328 if self.c_id == Id.Lit_RBrace:
2329 p_die('Unexpected right brace', loc.Word(self.cur_word))
2330
2331 if self.c_kind == Kind.Redir: # Leading redirect
2332 return self.ParseSimpleCommand()
2333
2334 if self.c_kind == Kind.Word:
2335 # ensured by Kind.Word
2336 cur_word = cast(CompoundWord, self.cur_word)
2337
2338 # NOTE: At the top level, only Token and Compound are possible.
2339 # Can this be modelled better in the type system, removing asserts?
2340 #
2341 # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2342 # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2343 # That requires 2 tokens of lookahead, which we don't have
2344 #
2345 # Or maybe we don't just have ParseSimpleCommand -- we will have
2346 # ParseYshCommand or something
2347
2348 if (self.w_parser.LookAheadFuncParens() and
2349 not word_.IsVarLike(cur_word)):
2350 return self.ParseFunctionDef() # f() { echo; } # function
2351
2352 # Parse x = 1+2*3 when inside HayNode { } blocks
2353 parts = cur_word.parts
2354 if self.parse_opts.parse_equals() and len(parts) == 1:
2355 part0 = parts[0]
2356 if part0.tag() == word_part_e.Literal:
2357 tok = cast(Token, part0)
2358 if (match.IsValidVarName(tok.tval) and
2359 self.w_parser.LookPastSpace() == Id.Lit_Equals):
2360 assert tok.id == Id.Lit_Chars, tok
2361
2362 if len(self.hay_attrs_stack
2363 ) and self.hay_attrs_stack[-1]:
2364 # Note: no static var_checker.Check() for bare assignment
2365 enode = self.w_parser.ParseBareDecl()
2366 self._SetNext() # Somehow this is necessary
2367 # TODO: Use BareDecl here. Well, do that when we
2368 # treat it as const or lazy.
2369 return command.VarDecl(
2370 None,
2371 [NameType(tok, lexer.TokenVal(tok), None)],
2372 enode)
2373 else:
2374 self._SetNext()
2375 self._GetWord()
2376 p_die(
2377 'Unexpected = (Hint: use var/setvar, or quote it)',
2378 loc.Word(self.cur_word))
2379
2380 # echo foo
2381 # f=(a b c) # array
2382 # array[1+2]+=1
2383 return self.ParseSimpleCommand()
2384
2385 if self.c_kind == Kind.Eof:
2386 p_die("Unexpected EOF while parsing command",
2387 loc.Word(self.cur_word))
2388
2389 # NOTE: This only happens in batch mode in the second turn of the loop!
2390 # e.g. )
2391 p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2392
2393 assert False # for MyPy
2394
2395 def ParsePipeline(self):
2396 # type: () -> command_t
2397 """
2398 pipeline : Bang? command ( '|' newline_ok command )* ;
2399 """
2400 negated = None # type: Optional[Token]
2401
2402 self._GetWord()
2403 if self.c_id == Id.KW_Bang:
2404 negated = word_.AsKeywordToken(self.cur_word)
2405 self._SetNext()
2406
2407 child = self.ParseCommand()
2408 assert child is not None
2409
2410 children = [child]
2411
2412 self._GetWord()
2413 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2414 if negated is not None:
2415 node = command.Pipeline(negated, children, [])
2416 return node
2417 else:
2418 return child # no pipeline
2419
2420 # | or |&
2421 ops = [] # type: List[Token]
2422 while True:
2423 op = word_.AsOperatorToken(self.cur_word)
2424 ops.append(op)
2425
2426 self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2427 self._NewlineOk()
2428
2429 child = self.ParseCommand()
2430 children.append(child)
2431
2432 self._GetWord()
2433 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2434 break
2435
2436 return command.Pipeline(negated, children, ops)
2437
2438 def ParseAndOr(self):
2439 # type: () -> command_t
2440 self._GetWord()
2441 if self.c_id == Id.Word_Compound:
2442 first_word_tok = word_.LiteralToken(self.cur_word)
2443 if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
2444 # We got '...', so parse in multiline mode
2445 self._SetNext()
2446 with word_.ctx_Multiline(self.w_parser):
2447 return self._ParseAndOr()
2448
2449 # Parse in normal mode, not multiline
2450 return self._ParseAndOr()
2451
2452 def _ParseAndOr(self):
2453 # type: () -> command_t
2454 """
2455 and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
2456 | pipeline
2457
2458 Note that it is left recursive and left associative. We parse it
2459 iteratively with a token of lookahead.
2460 """
2461 child = self.ParsePipeline()
2462 assert child is not None
2463
2464 self._GetWord()
2465 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2466 return child
2467
2468 ops = [] # type: List[Token]
2469 children = [child]
2470
2471 while True:
2472 ops.append(word_.AsOperatorToken(self.cur_word))
2473
2474 self._SetNext() # skip past || &&
2475 self._NewlineOk()
2476
2477 child = self.ParsePipeline()
2478 children.append(child)
2479
2480 self._GetWord()
2481 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2482 break
2483
2484 return command.AndOr(children, ops)
2485
2486 # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2487
2488 # At the top level, we execute after every line, e.g. to
2489 # - process alias (a form of dynamic parsing)
2490 # - process 'exit', because invalid syntax might appear after it
2491
2492 # On the other hand, for a while loop body, we parse the whole thing at once,
2493 # and then execute it. We don't want to parse it over and over again!
2494
2495 # COMPARE
2496 # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2497 # command_term : and_or (trailer and_or)* ; # CHILDREN
2498
2499 def _ParseCommandLine(self):
2500 # type: () -> command_t
2501 """
2502 command_line : and_or (sync_op and_or)* trailer? ;
2503 trailer : sync_op newline_ok
2504 | NEWLINES;
2505 sync_op : '&' | ';';
2506
2507 NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2508 there is another command word after the sync op.
2509
2510 But it's easier to express imperatively. Do the following in a loop:
2511 1. ParseAndOr
2512 2. Peek.
2513 a. If there's a newline, then return. (We're only parsing a single
2514 line.)
2515 b. If there's a sync_op, process it. Then look for a newline and
2516 return. Otherwise, parse another AndOr.
2517 """
2518 # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2519 # I don't think we should add anything else here; otherwise it will be
2520 # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2521 END_LIST = [Id.Op_Newline, Id.Eof_Real]
2522
2523 children = [] # type: List[command_t]
2524 done = False
2525 while not done:
2526 child = self.ParseAndOr()
2527
2528 self._GetWord()
2529 if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2530 tok = cast(Token, self.cur_word) # for MyPy
2531 child = command.Sentence(child, tok)
2532 self._SetNext()
2533
2534 self._GetWord()
2535 if self.c_id in END_LIST:
2536 done = True
2537
2538 elif self.c_id in END_LIST:
2539 done = True
2540
2541 else:
2542 # e.g. echo a(b)
2543 p_die('Invalid word while parsing command line',
2544 loc.Word(self.cur_word))
2545
2546 children.append(child)
2547
2548 # Simplify the AST.
2549 if len(children) > 1:
2550 return command.CommandList(children)
2551 else:
2552 return children[0]
2553
2554 def _ParseCommandTerm(self):
2555 # type: () -> command.CommandList
2556 """"
2557 command_term : and_or (trailer and_or)* ;
2558 trailer : sync_op newline_ok
2559 | NEWLINES;
2560 sync_op : '&' | ';';
2561
2562 This is handled in imperative style, like _ParseCommandLine.
2563 Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2564 which is slightly different. (HOW? Is it the DSEMI?)
2565
2566 Returns:
2567 syntax_asdl.command
2568 """
2569 # Token types that will end the command term.
2570 END_LIST = [self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi]
2571
2572 # NOTE: This is similar to _ParseCommandLine.
2573 #
2574 # - Why aren't we doing END_LIST in _ParseCommandLine?
2575 # - Because you will never be inside $() at the top level.
2576 # - We also know it will end in a newline. It can't end in "fi"!
2577 # - example: if true; then { echo hi; } fi
2578
2579 children = [] # type: List[command_t]
2580 done = False
2581 while not done:
2582 # Most keywords are valid "first words". But do/done/then do not BEGIN
2583 # commands, so they are not valid.
2584 if self._AtSecondaryKeyword():
2585 break
2586
2587 child = self.ParseAndOr()
2588
2589 self._GetWord()
2590 if self.c_id == Id.Op_Newline:
2591 self._SetNext()
2592
2593 self._GetWord()
2594 if self.c_id in END_LIST:
2595 done = True
2596
2597 elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2598 tok = cast(Token, self.cur_word) # for MyPy
2599 child = command.Sentence(child, tok)
2600 self._SetNext()
2601
2602 self._GetWord()
2603 if self.c_id == Id.Op_Newline:
2604 self._SetNext() # skip over newline
2605
2606 # Test if we should keep going. There might be another command after
2607 # the semi and newline.
2608 self._GetWord()
2609 if self.c_id in END_LIST: # \n EOF
2610 done = True
2611
2612 elif self.c_id in END_LIST: # ; EOF
2613 done = True
2614
2615 elif self.c_id in END_LIST: # EOF
2616 done = True
2617
2618 # For if test -f foo; test -f bar {
2619 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2620 done = True
2621
2622 elif self.c_kind != Kind.Word:
2623 # e.g. f() { echo (( x )) ; }
2624 # but can't fail on 'fi fi', see osh/cmd_parse_test.py
2625
2626 #log("Invalid %s", self.cur_word)
2627 p_die("Invalid word while parsing command list",
2628 loc.Word(self.cur_word))
2629
2630 children.append(child)
2631
2632 return command.CommandList(children)
2633
2634 def _ParseCommandList(self):
2635 # type: () -> command.CommandList
2636 """
2637 command_list : newline_ok command_term trailer? ;
2638
2639 This one is called by all the compound commands. It's basically a command
2640 block.
2641
2642 NOTE: Rather than translating the CFG directly, the code follows a style
2643 more like this: more like this: (and_or trailer)+. It makes capture
2644 easier.
2645 """
2646 self._NewlineOk()
2647 return self._ParseCommandTerm()
2648
2649 def ParseLogicalLine(self):
2650 # type: () -> command_t
2651 """Parse a single line for main_loop.
2652
2653 A wrapper around _ParseCommandLine(). Similar but not identical to
2654 _ParseCommandList() and ParseCommandSub().
2655
2656 Raises:
2657 ParseError
2658 """
2659 self._NewlineOk()
2660 self._GetWord()
2661 if self.c_id == Id.Eof_Real:
2662 return None # main loop checks for here docs
2663 node = self._ParseCommandLine()
2664 return node
2665
2666 def ParseInteractiveLine(self):
2667 # type: () -> parse_result_t
2668 """Parse a single line for Interactive main_loop.
2669
2670 Different from ParseLogicalLine because newlines are handled differently.
2671
2672 Raises:
2673 ParseError
2674 """
2675 self._GetWord()
2676 if self.c_id == Id.Op_Newline:
2677 return parse_result.EmptyLine
2678 if self.c_id == Id.Eof_Real:
2679 return parse_result.Eof
2680
2681 node = self._ParseCommandLine()
2682 return parse_result.Node(node)
2683
2684 def ParseCommandSub(self):
2685 # type: () -> command_t
2686 """Parse $(echo hi) and `echo hi` for word_parse.py.
2687
2688 They can have multiple lines, like this: echo $( echo one echo
2689 two )
2690 """
2691 self._NewlineOk()
2692
2693 self._GetWord()
2694 if self.c_kind == Kind.Eof: # e.g. $()
2695 return command.NoOp
2696
2697 c_list = self._ParseCommandTerm()
2698 if len(c_list.children) == 1:
2699 return c_list.children[0]
2700 else:
2701 return c_list
2702
2703 def CheckForPendingHereDocs(self):
2704 # type: () -> None
2705 # NOTE: This happens when there is no newline at the end of a file, like
2706 # osh -c 'cat <<EOF'
2707 if len(self.pending_here_docs):
2708 node = self.pending_here_docs[0] # Just show the first one?
2709 h = cast(redir_param.HereDoc, node.arg)
2710 p_die('Unterminated here doc began here', loc.Word(h.here_begin))