OILS / osh / cmd_parse.py View on Github | oilshell.org

2723 lines, 1403 significant
1# Copyright 2016 Andy Chu. All rights reserved.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7"""
8cmd_parse.py - Parse high level shell commands.
9"""
10from __future__ import print_function
11
12from _devbuild.gen import grammar_nt
13from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
14from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15from _devbuild.gen.syntax_asdl import (
16 loc,
17 SourceLine,
18 source,
19 parse_result,
20 parse_result_t,
21 command,
22 command_t,
23 condition,
24 condition_t,
25 for_iter,
26 ArgList,
27 BraceGroup,
28 LiteralBlock,
29 CaseArm,
30 case_arg,
31 IfArm,
32 pat,
33 pat_t,
34 Redir,
35 redir_param,
36 redir_loc,
37 redir_loc_t,
38 word_e,
39 word_t,
40 CompoundWord,
41 Token,
42 word_part_e,
43 word_part_t,
44 rhs_word,
45 rhs_word_t,
46 sh_lhs,
47 sh_lhs_t,
48 AssignPair,
49 EnvPair,
50 ParsedAssignment,
51 assign_op_e,
52 NameType,
53 proc_sig,
54 proc_sig_e,
55 Proc,
56 Func,
57)
58from core import alloc
59from core import error
60from core.error import p_die
61from core import ui
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from frontend import match
66from frontend import reader
67from mycpp.mylib import log
68from osh import braces
69from osh import bool_parse
70from osh import word_
71
72from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73if TYPE_CHECKING:
74 from core.alloc import Arena
75 from core import optview
76 from frontend.lexer import Lexer
77 from frontend.parse_lib import ParseContext, AliasesInFlight
78 from frontend.reader import _Reader
79 from osh.word_parse import WordParser
80
81_ = Kind_str # for debug prints
82
83TAB_CH = 9 # ord('\t')
84SPACE_CH = 32 # ord(' ')
85
86
87def _ReadHereLines(
88 line_reader, # type: _Reader
89 h, # type: Redir
90 delimiter, # type: str
91):
92 # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93 # NOTE: We read all lines at once, instead of parsing line-by-line,
94 # because of cases like this:
95 # cat <<EOF
96 # 1 $(echo 2
97 # echo 3) 4
98 # EOF
99 here_lines = [] # type: List[Tuple[SourceLine, int]]
100 last_line = None # type: Tuple[SourceLine, int]
101 strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103 while True:
104 src_line, unused_offset = line_reader.GetLine()
105
106 if src_line is None: # EOF
107 # An unterminated here doc is just a warning in bash. We make it
108 # fatal because we want to be strict, and because it causes problems
109 # reporting other errors.
110 # Attribute it to the << in <<EOF for now.
111 p_die("Couldn't find terminator for here doc that starts here",
112 h.op)
113
114 assert len(src_line.content) != 0 # None should be the empty line
115
116 line = src_line.content
117
118 # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119 # the first tab.
120 start_offset = 0
121 if strip_leading_tabs:
122 n = len(line)
123 i = 0 # used after loop exit
124 while i < n:
125 if line[i] != '\t':
126 break
127 i += 1
128 start_offset = i
129
130 if line[start_offset:].rstrip() == delimiter:
131 last_line = (src_line, start_offset)
132 break
133
134 here_lines.append((src_line, start_offset))
135
136 return here_lines, last_line
137
138
139def _MakeLiteralHereLines(
140 here_lines, # type: List[Tuple[SourceLine, int]]
141 arena, # type: Arena
142 do_lossless, # type: bool
143):
144 # type: (...) -> List[word_part_t]
145 """Create a Token for each line.
146
147 For <<'EOF' and <<-'EOF' - single quoted rule
148
149 <<- has non-zero start_offset
150 """
151 # less precise type, because List[T] is an invariant type
152 tokens = [] # type: List[word_part_t]
153 for src_line, start_offset in here_lines:
154
155 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
156 # arena invariant, but don't refer to it.
157 #
158 # Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
159 # here docs, but it's more complex with double quoted EOF docs.
160
161 if do_lossless: # avoid garbage, doesn't affect correctness
162 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
163 src_line)
164
165 t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
166 src_line)
167 tokens.append(t)
168 return tokens
169
170
171def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
172 # type: (ParseContext, Redir, _Reader, Arena) -> None
173 """Fill in attributes of a pending here doc node."""
174 h = cast(redir_param.HereDoc, r.arg)
175 # "If any character in word is quoted, the delimiter shall be formed by
176 # performing quote removal on word, and the here-document lines shall not
177 # be expanded. Otherwise, the delimiter shall be the word itself."
178 # NOTE: \EOF counts, or even E\OF
179 ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
180 if not ok:
181 p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
182
183 here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
184
185 if delim_quoted:
186 # <<'EOF' and <<-'EOF' - Literal for each line.
187 h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
188 parse_ctx.do_lossless)
189 else:
190 # <<EOF and <<-EOF - Parse as word
191 line_reader = reader.VirtualLineReader(arena, here_lines,
192 parse_ctx.do_lossless)
193 w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
194 w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
195
196 end_line, start_offset = last_line
197
198 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
199 # arena invariant, but don't refer to it.
200 if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
201 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
202
203 # Create a Token with the end terminator. Maintains the invariant that the
204 # tokens "add up".
205 h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
206 len(end_line.content), end_line)
207
208
209def _MakeAssignPair(parse_ctx, preparsed, arena):
210 # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
211 """Create an AssignPair from a 4-tuples from DetectShAssignment."""
212
213 left_token = preparsed.left
214 close_token = preparsed.close
215
216 lhs = None # type: sh_lhs_t
217
218 if left_token.id == Id.Lit_VarLike: # s=1
219 if lexer.IsPlusEquals(left_token):
220 var_name = lexer.TokenSliceRight(left_token, -2)
221 op = assign_op_e.PlusEqual
222 else:
223 var_name = lexer.TokenSliceRight(left_token, -1)
224 op = assign_op_e.Equal
225
226 lhs = sh_lhs.Name(left_token, var_name)
227
228 elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
229 var_name = lexer.TokenSliceRight(left_token, -1)
230 if lexer.IsPlusEquals(close_token):
231 op = assign_op_e.PlusEqual
232 else:
233 op = assign_op_e.Equal
234
235 assert left_token.line == close_token.line, \
236 '%s and %s not on same line' % (left_token, close_token)
237
238 left_pos = left_token.col + left_token.length
239 index_str = left_token.line.content[left_pos:close_token.col]
240 lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
241
242 elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
243 var_name = lexer.TokenSliceRight(left_token, -1)
244 if lexer.IsPlusEquals(close_token):
245 op = assign_op_e.PlusEqual
246 else:
247 op = assign_op_e.Equal
248
249 # Similar to SnipCodeString / SnipCodeBlock
250 if left_token.line == close_token.line:
251 # extract what's between brackets
252 s = left_token.col + left_token.length
253 code_str = left_token.line.content[s:close_token.col]
254 else:
255 raise NotImplementedError('%s != %s' %
256 (left_token.line, close_token.line))
257 a_parser = parse_ctx.MakeArithParser(code_str)
258
259 # a[i+1]= is a LHS
260 src = source.Reparsed('array LHS', left_token, close_token)
261 with alloc.ctx_SourceCode(arena, src):
262 index_node = a_parser.Parse() # may raise error.Parse
263
264 lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
265
266 else:
267 raise AssertionError()
268
269 # TODO: Should we also create a rhs_expr.ArrayLiteral here?
270 parts = preparsed.w.parts
271 offset = preparsed.part_offset
272
273 n = len(parts)
274 if offset == n:
275 rhs = rhs_word.Empty # type: rhs_word_t
276 else:
277 w = CompoundWord(parts[offset:])
278 word_.TildeDetectAssign(w)
279 rhs = w
280
281 return AssignPair(left_token, lhs, op, rhs)
282
283
284def _AppendMoreEnv(preparsed_list, more_env):
285 # type: (List[ParsedAssignment], List[EnvPair]) -> None
286 """Helper to modify a SimpleCommand node.
287
288 Args:
289 preparsed: a list of 4-tuples from DetectShAssignment
290 more_env: a list to append env_pairs to
291 """
292 for preparsed in preparsed_list:
293 left_token = preparsed.left
294
295 if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
296 p_die(
297 "Environment binding shouldn't look like an array assignment",
298 left_token)
299
300 if lexer.IsPlusEquals(left_token):
301 p_die('Expected = in environment binding, got +=', left_token)
302
303 var_name = lexer.TokenSliceRight(left_token, -1)
304
305 parts = preparsed.w.parts
306 n = len(parts)
307 offset = preparsed.part_offset
308 if offset == n:
309 rhs = rhs_word.Empty # type: rhs_word_t
310 else:
311 w = CompoundWord(parts[offset:])
312 word_.TildeDetectAssign(w)
313 rhs = w
314
315 more_env.append(EnvPair(left_token, var_name, rhs))
316
317
318def _SplitSimpleCommandPrefix(words):
319 # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
320 """Second pass of SimpleCommand parsing: look for assignment words."""
321 preparsed_list = [] # type: List[ParsedAssignment]
322 suffix_words = [] # type: List[CompoundWord]
323
324 done_prefix = False
325 for w in words:
326 if done_prefix:
327 suffix_words.append(w)
328 continue
329
330 left_token, close_token, part_offset = word_.DetectShAssignment(w)
331 if left_token:
332 preparsed_list.append(
333 ParsedAssignment(left_token, close_token, part_offset, w))
334 else:
335 done_prefix = True
336 suffix_words.append(w)
337
338 return preparsed_list, suffix_words
339
340
341def _MakeSimpleCommand(
342 preparsed_list, # type: List[ParsedAssignment]
343 suffix_words, # type: List[CompoundWord]
344 redirects, # type: List[Redir]
345 typed_args, # type: Optional[ArgList]
346 block, # type: Optional[LiteralBlock]
347):
348 # type: (...) -> command.Simple
349 """Create an command.Simple node."""
350
351 # FOO=(1 2 3) ls is not allowed.
352 for preparsed in preparsed_list:
353 if word_.HasArrayPart(preparsed.w):
354 p_die("Environment bindings can't contain array literals",
355 loc.Word(preparsed.w))
356
357 # NOTE: It would be possible to add this check back. But it already happens
358 # at runtime in EvalWordSequence2.
359 # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
360 if 0:
361 for w in suffix_words:
362 if word_.HasArrayPart(w):
363 p_die("Commands can't contain array literals", loc.Word(w))
364
365 assert len(suffix_words) != 0
366 # {a,b,c} # Use { before brace detection
367 # ~/bin/ls # Use ~ before tilde detection
368 part0 = suffix_words[0].parts[0]
369 blame_tok = location.LeftTokenForWordPart(part0)
370
371 # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
372 # can't implement bash's behavior of having say {~bob,~jane}/src work,
373 # because we only have a BracedTree.
374 # This is documented in spec/brace-expansion.
375 # NOTE: Technically we could do expansion outside of 'oshc translate', but it
376 # doesn't seem worth it.
377 words2 = braces.BraceDetectAll(suffix_words)
378 words3 = word_.TildeDetectAll(words2)
379
380 more_env = [] # type: List[EnvPair]
381 _AppendMoreEnv(preparsed_list, more_env)
382
383 # do_fork by default
384 return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
385 block, True)
386
387
388class VarChecker(object):
389 """Statically check for proc and variable usage errors."""
390
391 def __init__(self):
392 # type: () -> None
393 """
394 Args:
395 oil_proc: Whether to disallow nested proc/function declarations
396 """
397 # self.tokens for location info: 'proc' or another token
398 self.tokens = [] # type: List[Token]
399 self.names = [] # type: List[Dict[str, Id_t]]
400
401 def Push(self, blame_tok):
402 # type: (Token) -> None
403 """Called when we enter a shell function, proc, or func.
404
405 Bash allows this, but it's confusing because it's the same as two
406 functions at the top level.
407
408 f() {
409 g() {
410 echo 'top level function defined in another one'
411 }
412 }
413
414 YSH disallows nested procs and funcs.
415 """
416 if len(self.tokens) != 0:
417 if blame_tok.id == Id.KW_Proc:
418 p_die("procs must be defined at the top level", blame_tok)
419 if blame_tok.id == Id.KW_Func:
420 p_die("funcs must be defined at the top level", blame_tok)
421 if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
422 p_die("shell functions can't be defined inside proc or func",
423 blame_tok)
424
425 self.tokens.append(blame_tok)
426 entry = {} # type: Dict[str, Id_t]
427 self.names.append(entry)
428
429 def Pop(self):
430 # type: () -> None
431 self.names.pop()
432 self.tokens.pop()
433
434 def Check(self, keyword_id, var_name, blame_tok):
435 # type: (Id_t, str, Token) -> None
436 """Check for declaration / mutation errors in proc and func.
437
438 var x
439 x already declared
440 setvar x:
441 x is not declared
442 setglobal x:
443 No errors are possible; we would need all these many conditions to
444 statically know the names:
445 - no 'source'
446 - shopt -u copy_env.
447 - AND use lib has to be static
448
449 What about bare assignment in Hay? I think these are dynamic checks --
450 there is no static check. Hay is for building up data imperatively,
451 and then LATER, right before main(), it can be type checked.
452
453 Package {
454 version = '3.11'
455 version = '3.12'
456 }
457 """
458 # No static checks are the global level! Because of 'source', var and
459 # setvar are essentially the same.
460 if len(self.names) == 0:
461 return
462
463 top = self.names[-1]
464 if keyword_id == Id.KW_Var:
465 if var_name in top:
466 p_die('%r was already declared' % var_name, blame_tok)
467 else:
468 top[var_name] = keyword_id
469
470 if keyword_id == Id.KW_SetVar:
471 if var_name not in top:
472 # Note: the solution could be setglobal, etc.
473 p_die(
474 "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
475 var_name, blame_tok)
476
477
478class ctx_VarChecker(object):
479
480 def __init__(self, var_checker, blame_tok):
481 # type: (VarChecker, Token) -> None
482 var_checker.Push(blame_tok)
483 self.var_checker = var_checker
484
485 def __enter__(self):
486 # type: () -> None
487 pass
488
489 def __exit__(self, type, value, traceback):
490 # type: (Any, Any, Any) -> None
491 self.var_checker.Pop()
492
493
494class ctx_CmdMode(object):
495
496 def __init__(self, cmd_parse, new_cmd_mode):
497 # type: (CommandParser, cmd_mode_t) -> None
498 self.cmd_parse = cmd_parse
499 self.prev_cmd_mode = cmd_parse.cmd_mode
500 cmd_parse.cmd_mode = new_cmd_mode
501
502 def __enter__(self):
503 # type: () -> None
504 pass
505
506 def __exit__(self, type, value, traceback):
507 # type: (Any, Any, Any) -> None
508 self.cmd_parse.cmd_mode = self.prev_cmd_mode
509
510
511SECONDARY_KEYWORDS = [
512 Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
513 Id.KW_Esac
514]
515
516
517class CommandParser(object):
518 """Recursive descent parser derived from POSIX shell grammar.
519
520 This is a BNF grammar:
521 https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
522
523 - Augmented with both bash/OSH and YSH constructs.
524
525 - We use regex-like iteration rather than recursive references
526 ? means optional (0 or 1)
527 * means 0 or more
528 + means 1 or more
529
530 - Keywords are spelled in Caps:
531 If Elif Case
532
533 - Operator tokens are quoted:
534 '(' '|'
535
536 or can be spelled directly if it matters:
537
538 Op_LParen Op_Pipe
539
540 - Non-terminals are snake_case:
541 brace_group subshell
542
543 Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
544 the production should be in the method docstrings, e.g.
545
546 def ParseSubshell():
547 "
548 subshell : '(' compound_list ')'
549
550 Looking at Op_LParen # Comment to say how this method is called
551 "
552
553 The grammar may be factored to make parsing easier.
554 """
555
556 def __init__(self,
557 parse_ctx,
558 parse_opts,
559 w_parser,
560 lexer,
561 line_reader,
562 eof_id=Id.Eof_Real):
563 # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
564 self.parse_ctx = parse_ctx
565 self.aliases = parse_ctx.aliases # aliases to expand at parse time
566
567 self.parse_opts = parse_opts
568 self.w_parser = w_parser # type: WordParser # for normal parsing
569 self.lexer = lexer # for pushing hints, lookahead to (
570 self.line_reader = line_reader # for here docs
571 self.eof_id = eof_id
572
573 self.arena = line_reader.arena # for adding here doc and alias spans
574 self.aliases_in_flight = [] # type: AliasesInFlight
575
576 # A hacky boolean to remove 'if cd / {' ambiguity.
577 self.allow_block = True
578
579 # Stack of booleans for nested Attr and SHELL nodes.
580 # Attr nodes allow bare assignment x = 42, but not shell x=42.
581 # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
582 # nodes, but x42 is still allowed.
583 #
584 # Note: this stack could be optimized by turning it into an integer and
585 # binary encoding.
586 self.hay_attrs_stack = [] # type: List[bool]
587
588 # Note: VarChecker is instantiated with each CommandParser, which means
589 # that two 'proc foo' -- inside a command sub and outside -- don't
590 # conflict, because they use different CommandParser instances. I think
591 # this OK but you can imagine different behaviors.
592 self.var_checker = VarChecker()
593
594 self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
595
596 self.Reset()
597
598 # Init_() function for "keyword arg"
599 def Init_AliasesInFlight(self, aliases_in_flight):
600 # type: (AliasesInFlight) -> None
601 self.aliases_in_flight = aliases_in_flight
602
603 def Reset(self):
604 # type: () -> None
605 """Reset our own internal state.
606
607 Called by the interactive loop.
608 """
609 # Cursor state set by _GetWord()
610 self.next_lex_mode = lex_mode_e.ShCommand
611 self.cur_word = None # type: word_t # current word
612 self.c_kind = Kind.Undefined
613 self.c_id = Id.Undefined_Tok
614
615 self.pending_here_docs = [
616 ] # type: List[Redir] # should have HereLiteral arg
617
618 def ResetInputObjects(self):
619 # type: () -> None
620 """Reset the internal state of our inputs.
621
622 Called by the interactive loop.
623 """
624 self.w_parser.Reset()
625 self.lexer.ResetInputObjects()
626 self.line_reader.Reset()
627
628 def _SetNext(self):
629 # type: () -> None
630 """Call this when you no longer need the current token.
631
632 This method is lazy. A subsequent call to _GetWord() will
633 actually read the next Token.
634 """
635 self.next_lex_mode = lex_mode_e.ShCommand
636
637 def _SetNextBrack(self):
638 # type: () -> None
639 self.next_lex_mode = lex_mode_e.ShCommandBrack
640
641 def _GetWord(self):
642 # type: () -> None
643 """Call this when you need to make a decision based on Id or Kind.
644
645 If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
646 self.c_id and self.c_kind.
647
648 Otherwise it does nothing.
649 """
650 if self.next_lex_mode != lex_mode_e.Undefined:
651 w = self.w_parser.ReadWord(self.next_lex_mode)
652 #log("w %s", w)
653
654 # Here docs only happen in command mode, so other kinds of newlines don't
655 # count.
656 if w.tag() == word_e.Operator:
657 tok = cast(Token, w)
658 if tok.id == Id.Op_Newline:
659 for h in self.pending_here_docs:
660 _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
661 self.arena)
662 del self.pending_here_docs[:] # No .clear() until Python 3.3.
663
664 self.cur_word = w
665
666 self.c_kind = word_.CommandKind(self.cur_word)
667 self.c_id = word_.CommandId(self.cur_word)
668 self.next_lex_mode = lex_mode_e.Undefined
669
670 def _Eat(self, c_id, msg=None):
671 # type: (Id_t, Optional[str]) -> word_t
672 """Consume a word of a type, maybe showing a custom error message.
673
674 Args:
675 c_id: the Id we expected
676 msg: improved error message
677 """
678 self._GetWord()
679 if self.c_id != c_id:
680 if msg is None:
681 msg = 'Expected word type %s, got %s' % (
682 ui.PrettyId(c_id), ui.PrettyId(self.c_id))
683 p_die(msg, loc.Word(self.cur_word))
684
685 skipped = self.cur_word
686 self._SetNext()
687 return skipped
688
689 def _NewlineOk(self):
690 # type: () -> None
691 """Check for optional newline and consume it."""
692 self._GetWord()
693 if self.c_id == Id.Op_Newline:
694 self._SetNext()
695
696 def _AtSecondaryKeyword(self):
697 # type: () -> bool
698 self._GetWord()
699 if self.c_id in SECONDARY_KEYWORDS:
700 return True
701 return False
702
703 def ParseRedirect(self):
704 # type: () -> Redir
705 self._GetWord()
706 assert self.c_kind == Kind.Redir, self.cur_word
707 op_tok = cast(Token, self.cur_word) # for MyPy
708
709 # Note: the lexer could take distinguish between
710 # >out
711 # 3>out
712 # {fd}>out
713 #
714 # which would make the code below faster. But small string optimization
715 # would also speed it up, since redirects are small.
716
717 # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
718 # possibly "unify" the IDs by subtracting a constant like 8 or 16?
719
720 op_val = lexer.TokenVal(op_tok)
721 if op_val[0] == '{':
722 pos = op_val.find('}')
723 assert pos != -1 # lexer ensures this
724 where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
725
726 elif op_val[0].isdigit():
727 pos = 1
728 if op_val[1].isdigit():
729 pos = 2
730 where = redir_loc.Fd(int(op_val[:pos]))
731
732 else:
733 where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
734
735 self._SetNext()
736
737 self._GetWord()
738 # Other redirect
739 if self.c_kind != Kind.Word:
740 p_die('Invalid token after redirect operator',
741 loc.Word(self.cur_word))
742
743 # Here doc
744 if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
745 arg = redir_param.HereDoc.CreateNull()
746 arg.here_begin = self.cur_word
747 arg.stdin_parts = []
748
749 r = Redir(op_tok, where, arg)
750
751 self.pending_here_docs.append(r) # will be filled on next newline.
752
753 self._SetNext()
754 return r
755
756 arg_word = self.cur_word
757 tilde = word_.TildeDetect(arg_word)
758 if tilde:
759 arg_word = tilde
760 self._SetNext()
761
762 # We should never get Empty, Token, etc.
763 assert arg_word.tag() == word_e.Compound, arg_word
764 return Redir(op_tok, where, cast(CompoundWord, arg_word))
765
766 def _ParseRedirectList(self):
767 # type: () -> List[Redir]
768 """Try parsing any redirects at the cursor.
769
770 This is used for blocks only, not commands.
771 """
772 redirects = [] # type: List[Redir]
773 while True:
774 # This prediction needs to ONLY accept redirect operators. Should we
775 # make them a separate Kind?
776 self._GetWord()
777 if self.c_kind != Kind.Redir:
778 break
779
780 node = self.ParseRedirect()
781 redirects.append(node)
782 self._SetNext()
783
784 return redirects
785
786 def _ScanSimpleCommand(self):
787 # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
788 """YSH extends simple commands with typed args and blocks.
789
790 Shell has a recursive grammar, which awkwardly expresses
791 non-grammatical rules:
792
793 simple_command : cmd_prefix cmd_word cmd_suffix
794 | cmd_prefix cmd_word
795 | cmd_prefix
796 | cmd_name cmd_suffix
797 | cmd_name
798 ;
799 cmd_name : WORD /* Apply rule 7a */
800 ;
801 cmd_word : WORD /* Apply rule 7b */
802 ;
803 cmd_prefix : io_redirect
804 | cmd_prefix io_redirect
805 | ASSIGNMENT_WORD
806 | cmd_prefix ASSIGNMENT_WORD
807 ;
808 cmd_suffix : io_redirect
809 | cmd_suffix io_redirect
810 | WORD
811 | cmd_suffix WORD
812
813 YSH grammar:
814
815 simple_command =
816 cmd_prefix* word+ typed_args? BraceGroup? cmd_suffix*
817
818 typed_args =
819 '(' arglist ')'
820 | '[' arglist ']'
821
822 Notably, redirects shouldn't appear after between typed args and
823 BraceGroup.
824 """
825 redirects = [] # type: List[Redir]
826 words = [] # type: List[CompoundWord]
827 typed_args = None # type: Optional[ArgList]
828 block = None # type: Optional[LiteralBlock]
829
830 first_word_caps = False # does first word look like Caps, but not CAPS
831
832 i = 0
833 while True:
834 self._GetWord()
835 if self.c_kind == Kind.Redir:
836 node = self.ParseRedirect()
837 redirects.append(node)
838
839 elif self.c_kind == Kind.Word:
840 if self.parse_opts.parse_brace():
841 # Treat { and } more like operators
842 if self.c_id == Id.Lit_LBrace:
843 if self.allow_block: # Disabled for if/while condition, etc.
844
845 # allow x = 42
846 self.hay_attrs_stack.append(first_word_caps)
847 brace_group = self.ParseBraceGroup()
848
849 # So we can get the source code back later
850 lines = self.arena.SaveLinesAndDiscard(
851 brace_group.left, brace_group.right)
852 block = LiteralBlock(brace_group, lines)
853
854 self.hay_attrs_stack.pop()
855
856 if 0:
857 print('--')
858 block.PrettyPrint()
859 print('\n--')
860 break
861 elif self.c_id == Id.Lit_RBrace:
862 # Another thing: { echo hi }
863 # We're DONE!!!
864 break
865
866 w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
867
868 if i == 0:
869 # Disallow leading =a because it's confusing
870 part0 = w.parts[0]
871 if part0.tag() == word_part_e.Literal:
872 tok = cast(Token, part0)
873 if tok.id == Id.Lit_Equals:
874 p_die(
875 "=word isn't allowed. Hint: add a space after =, or quote it",
876 tok)
877
878 # Is the first word a Hay Attr word?
879 #
880 # Can we remove this StaticEval() call, and just look
881 # inside Token? I think once we get rid of SHELL nodes,
882 # this will be simpler.
883
884 ok, word_str, quoted = word_.StaticEval(w)
885 # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
886 if (ok and len(word_str) and word_str[0].isupper() and
887 not word_str.isupper()):
888 first_word_caps = True
889 #log('W %s', word_str)
890
891 words.append(w)
892
893 elif self.c_id == Id.Op_LParen:
894 # 1. Check that there's a preceding space
895 prev_byte = self.lexer.ByteLookBack()
896 if prev_byte not in (SPACE_CH, TAB_CH):
897 if self.parse_opts.parse_at():
898 p_die('Space required before (',
899 loc.Word(self.cur_word))
900 else:
901 # inline func call like @sorted(x) is invalid in OSH, but the
902 # solution isn't a space
903 p_die(
904 'Unexpected left paren (might need a space before it)',
905 loc.Word(self.cur_word))
906
907 # 2. Check that it's not (). We disallow this because it's a no-op and
908 # there could be confusion with shell func defs.
909 # For some reason we need to call lexer.LookPastSpace, not
910 # w_parser.LookPastSpace. I think this is because we're at (, which is
911 # an operator token. All the other cases are like 'x=', which is PART
912 # of a word, and we don't know if it will end.
913 next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
914 if next_id == Id.Op_RParen:
915 p_die('Empty arg list not allowed',
916 loc.Word(self.cur_word))
917
918 typed_args = self.w_parser.ParseProcCallArgs(
919 grammar_nt.ysh_eager_arglist)
920
921 elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
922 typed_args = self.w_parser.ParseProcCallArgs(
923 grammar_nt.ysh_lazy_arglist)
924
925 else:
926 break
927
928 self._SetNextBrack() # Allow bracket for SECOND word on
929 i += 1
930 return redirects, words, typed_args, block
931
932 def _MaybeExpandAliases(self, words):
933 # type: (List[CompoundWord]) -> Optional[command_t]
934 """Try to expand aliases.
935
936 Args:
937 words: A list of Compound
938
939 Returns:
940 A new LST node, or None.
941
942 Our implementation of alias has two design choices:
943 - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
944 - What grammar rule to parse the expanded alias buffer with. In our case
945 it's ParseCommand().
946
947 This doesn't quite match what other shells do, but I can't figure out a
948 better places.
949
950 Most test cases pass, except for ones like:
951
952 alias LBRACE='{'
953 LBRACE echo one; echo two; }
954
955 alias MULTILINE='echo 1
956 echo 2
957 echo 3'
958 MULTILINE
959
960 NOTE: dash handles aliases in a totally different way. It has a global
961 variable checkkwd in parser.c. It assigns it all over the grammar, like
962 this:
963
964 checkkwd = CHKNL | CHKKWD | CHKALIAS;
965
966 The readtoken() function checks (checkkwd & CHKALIAS) and then calls
967 lookupalias(). This seems to provide a consistent behavior among shells,
968 but it's less modular and testable.
969
970 Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
971
972 Returns:
973 A command node if any aliases were expanded, or None otherwise.
974 """
975 # Start a new list if there aren't any. This will be passed recursively
976 # through CommandParser instances.
977 aliases_in_flight = (self.aliases_in_flight
978 if len(self.aliases_in_flight) else [])
979
980 # for error message
981 first_word_str = None # type: Optional[str]
982 argv0_loc = loc.Word(words[0])
983
984 expanded = [] # type: List[str]
985 i = 0
986 n = len(words)
987
988 while i < n:
989 w = words[i]
990
991 ok, word_str, quoted = word_.StaticEval(w)
992 if not ok or quoted:
993 break
994
995 alias_exp = self.aliases.get(word_str)
996 if alias_exp is None:
997 break
998
999 # Prevent infinite loops. This is subtle: we want to prevent infinite
1000 # expansion of alias echo='echo x'. But we don't want to prevent
1001 # expansion of the second word in 'echo echo', so we add 'i' to
1002 # "aliases_in_flight".
1003 if (word_str, i) in aliases_in_flight:
1004 break
1005
1006 if i == 0:
1007 first_word_str = word_str # for error message
1008
1009 #log('%r -> %r', word_str, alias_exp)
1010 aliases_in_flight.append((word_str, i))
1011 expanded.append(alias_exp)
1012 i += 1
1013
1014 if not alias_exp.endswith(' '):
1015 # alias e='echo [ ' is the same expansion as
1016 # alias e='echo ['
1017 # The trailing space indicates whether we should continue to expand
1018 # aliases; it's not part of it.
1019 expanded.append(' ')
1020 break # No more expansions
1021
1022 if len(expanded) == 0: # No expansions; caller does parsing.
1023 return None
1024
1025 # We are expanding an alias, so copy the rest of the words and re-parse.
1026 if i < n:
1027 left_tok = location.LeftTokenForWord(words[i])
1028 right_tok = location.RightTokenForWord(words[-1])
1029
1030 # OLD CONSTRAINT
1031 #assert left_tok.line_id == right_tok.line_id
1032
1033 words_str = self.arena.SnipCodeString(left_tok, right_tok)
1034 expanded.append(words_str)
1035
1036 code_str = ''.join(expanded)
1037
1038 # TODO:
1039 # Aliases break static parsing (like backticks), so use our own Arena.
1040 # This matters for Hay, which calls SaveLinesAndDiscard().
1041 # arena = alloc.Arena()
1042 arena = self.arena
1043
1044 line_reader = reader.StringLineReader(code_str, arena)
1045 cp = self.parse_ctx.MakeOshParser(line_reader)
1046 cp.Init_AliasesInFlight(aliases_in_flight)
1047
1048 # break circular dep
1049 from frontend import parse_lib
1050
1051 # The interaction between COMPLETION and ALIASES requires special care.
1052 # See docstring of BeginAliasExpansion() in parse_lib.py.
1053 src = source.Alias(first_word_str, argv0_loc)
1054 with alloc.ctx_SourceCode(arena, src):
1055 with parse_lib.ctx_Alias(self.parse_ctx.trail):
1056 try:
1057 # _ParseCommandTerm() handles multiline commands, compound
1058 # commands, etc. as opposed to ParseLogicalLine()
1059 node = cp._ParseCommandTerm()
1060 except error.Parse as e:
1061 # Failure to parse alias expansion is a fatal error
1062 # We don't need more handling here/
1063 raise
1064
1065 if 0:
1066 log('AFTER expansion:')
1067 node.PrettyPrint()
1068
1069 return node
1070
1071 def ParseSimpleCommand(self):
1072 # type: () -> command_t
1073 """Fixed transcription of the POSIX grammar (TODO: port to
1074 grammar/Shell.g)
1075
1076 io_file : '<' filename
1077 | LESSAND filename
1078 ...
1079
1080 io_here : DLESS here_end
1081 | DLESSDASH here_end
1082
1083 redirect : IO_NUMBER (io_redirect | io_here)
1084
1085 prefix_part : ASSIGNMENT_WORD | redirect
1086 cmd_part : WORD | redirect
1087
1088 assign_kw : Declare | Export | Local | Readonly
1089
1090 # Without any words it is parsed as a command, not an assignment
1091 assign_listing : assign_kw
1092
1093 # Now we have something to do (might be changing assignment flags too)
1094 # NOTE: any prefixes should be a warning, but they are allowed in shell.
1095 assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
1096
1097 # an external command, a function call, or a builtin -- a "word_command"
1098 word_command : prefix_part* cmd_part+
1099
1100 simple_command : assign_listing
1101 | assignment
1102 | proc_command
1103
1104 Simple imperative algorithm:
1105
1106 1) Read a list of words and redirects. Append them to separate lists.
1107 2) Look for the first non-assignment word. If it's declare, etc., then
1108 keep parsing words AND assign words. Otherwise, just parse words.
1109 3) If there are no non-assignment words, then it's a global assignment.
1110
1111 { redirects, global assignments } OR
1112 { redirects, prefix_bindings, words } OR
1113 { redirects, ERROR_prefix_bindings, keyword, assignments, words }
1114
1115 THEN CHECK that prefix bindings don't have any array literal parts!
1116 global assignment and keyword assignments can have the of course.
1117 well actually EXPORT shouldn't have them either -- WARNING
1118
1119 3 cases we want to warn: prefix_bindings for assignment, and array literal
1120 in prefix bindings, or export
1121
1122 A command can be an assignment word, word, or redirect on its own.
1123
1124 ls
1125 >out.txt
1126
1127 >out.txt FOO=bar # this touches the file
1128
1129 Or any sequence:
1130 ls foo bar
1131 <in.txt ls foo bar >out.txt
1132 <in.txt ls >out.txt foo bar
1133
1134 Or add one or more environment bindings:
1135 VAR=val env
1136 >out.txt VAR=val env
1137
1138 here_end vs filename is a matter of whether we test that it's quoted. e.g.
1139 <<EOF vs <<'EOF'.
1140 """
1141 redirects, words, typed_args, block = self._ScanSimpleCommand()
1142
1143 typed_loc = None # type: Optional[Token]
1144 if block:
1145 typed_loc = block.brace_group.left
1146 if typed_args:
1147 typed_loc = typed_args.left # preferred over block location
1148
1149 if len(words) == 0: # e.g. >out.txt # redirect without words
1150 assert len(redirects) != 0
1151 if typed_loc is not None:
1152 p_die("Unexpected typed args", typed_loc)
1153
1154 simple = command.Simple.CreateNull()
1155 simple.blame_tok = redirects[0].op
1156 simple.more_env = []
1157 simple.words = []
1158 simple.redirects = redirects
1159 return simple
1160
1161 preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1162 if len(preparsed_list):
1163 # Disallow X=Y inside proc and func
1164 # and inside Hay Attr blocks
1165 # But allow X=Y at the top level
1166 # for interactive use foo=bar
1167 # for global constants GLOBAL=~/src
1168 # because YSH assignment doesn't have tilde sub
1169 if len(suffix_words) == 0:
1170 if (self.cmd_mode != cmd_mode_e.Shell or
1171 (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1172 p_die('Use var/setvar to assign in YSH',
1173 preparsed_list[0].left)
1174
1175 # Set a reference to words and redirects for completion. We want to
1176 # inspect this state after a failed parse.
1177 self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1178
1179 if len(suffix_words) == 0:
1180 if typed_loc is not None:
1181 p_die("Unexpected typed args", typed_loc)
1182
1183 # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1184 pairs = [] # type: List[AssignPair]
1185 for preparsed in preparsed_list:
1186 pairs.append(
1187 _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1188
1189 left_tok = location.LeftTokenForCompoundWord(words[0])
1190 return command.ShAssignment(left_tok, pairs, redirects)
1191
1192 kind, kw_token = word_.IsControlFlow(suffix_words[0])
1193
1194 if kind == Kind.ControlFlow:
1195 if kw_token.id == Id.ControlFlow_Return:
1196 # return x - inside procs and shell functions
1197 # return (x) - inside funcs
1198 if typed_args is None:
1199 if self.cmd_mode not in (cmd_mode_e.Shell,
1200 cmd_mode_e.Proc):
1201 p_die('Shell-style returns not allowed here', kw_token)
1202 else:
1203 if self.cmd_mode != cmd_mode_e.Func:
1204 p_die('Typed return is only allowed inside func',
1205 typed_loc)
1206 if len(typed_args.pos_args) != 1:
1207 p_die("Typed return expects one argument", typed_loc)
1208 if len(typed_args.named_args) != 0:
1209 p_die("Typed return doesn't take named arguments",
1210 typed_loc)
1211 return command.Retval(kw_token, typed_args.pos_args[0])
1212
1213 if typed_loc is not None:
1214 p_die("Unexpected typed args", typed_loc)
1215 if not self.parse_opts.parse_ignored() and len(redirects):
1216 p_die("Control flow shouldn't have redirects", kw_token)
1217
1218 if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1219 p_die("Control flow shouldn't have environment bindings",
1220 preparsed_list[0].left)
1221
1222 # Attach the token for errors. (ShAssignment may not need it.)
1223 if len(suffix_words) == 1:
1224 arg_word = None # type: Optional[word_t]
1225 elif len(suffix_words) == 2:
1226 arg_word = suffix_words[1]
1227 else:
1228 p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1229 loc.Word(suffix_words[2]))
1230
1231 return command.ControlFlow(kw_token, arg_word)
1232
1233 # Alias expansion only understands words, not typed args ( ) or block { }
1234 if not typed_args and not block and self.parse_opts.expand_aliases():
1235 # If any expansions were detected, then parse again.
1236 expanded_node = self._MaybeExpandAliases(suffix_words)
1237 if expanded_node:
1238 # Attach env bindings and redirects to the expanded node.
1239 more_env = [] # type: List[EnvPair]
1240 _AppendMoreEnv(preparsed_list, more_env)
1241 exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1242 return exp
1243
1244 # TODO: check that we don't have env1=x x[1]=y env2=z here.
1245
1246 # FOO=bar printenv.py FOO
1247 node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1248 typed_args, block)
1249 return node
1250
1251 def ParseBraceGroup(self):
1252 # type: () -> BraceGroup
1253 """
1254 Original:
1255 brace_group : LBrace command_list RBrace ;
1256
1257 YSH:
1258 brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1259
1260 The doc comment can only occur if there's a newline.
1261 """
1262 ate = self._Eat(Id.Lit_LBrace)
1263 left = word_.BraceToken(ate)
1264
1265 doc_word = None # type: word_t
1266 self._GetWord()
1267 if self.c_id == Id.Op_Newline:
1268 self._SetNext()
1269 # Set a flag so we don't skip over ###
1270 with word_.ctx_EmitDocToken(self.w_parser):
1271 self._GetWord()
1272
1273 if self.c_id == Id.Ignored_Comment:
1274 doc_word = self.cur_word
1275 self._SetNext()
1276
1277 # Id.Ignored_Comment means it's a Token, or None
1278 doc_token = cast(Token, doc_word)
1279
1280 c_list = self._ParseCommandList()
1281
1282 ate = self._Eat(Id.Lit_RBrace)
1283 right = word_.BraceToken(ate)
1284
1285 # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1286 # would allow us to revert this back to None, which was changed in
1287 # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1288 # behavior saves allocations, but is less type safe.
1289 return BraceGroup(left, doc_token, c_list.children, [],
1290 right) # no redirects yet
1291
1292 def ParseDoGroup(self):
1293 # type: () -> command.DoGroup
1294 """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1295
1296 do_group : Do command_list Done ; /* Apply rule 6 */
1297 """
1298 ate = self._Eat(Id.KW_Do)
1299 do_kw = word_.AsKeywordToken(ate)
1300
1301 c_list = self._ParseCommandList() # could be anything
1302
1303 ate = self._Eat(Id.KW_Done)
1304 done_kw = word_.AsKeywordToken(ate)
1305
1306 return command.DoGroup(do_kw, c_list.children, done_kw)
1307
1308 def ParseForWords(self):
1309 # type: () -> Tuple[List[CompoundWord], Optional[Token]]
1310 """
1311 for_words : WORD* for_sep
1312 ;
1313 for_sep : ';' newline_ok
1314 | NEWLINES
1315 ;
1316 """
1317 words = [] # type: List[CompoundWord]
1318 # The token of any semi-colon, so we can remove it.
1319 semi_tok = None # type: Optional[Token]
1320
1321 while True:
1322 self._GetWord()
1323 if self.c_id == Id.Op_Semi:
1324 tok = cast(Token, self.cur_word)
1325 semi_tok = tok
1326 self._SetNext()
1327 self._NewlineOk()
1328 break
1329 elif self.c_id == Id.Op_Newline:
1330 self._SetNext()
1331 break
1332 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1333 break
1334
1335 if self.cur_word.tag() != word_e.Compound:
1336 # TODO: Can we also show a pointer to the 'for' keyword?
1337 p_die('Invalid word in for loop', loc.Word(self.cur_word))
1338
1339 w2 = cast(CompoundWord, self.cur_word)
1340 words.append(w2)
1341 self._SetNext()
1342 return words, semi_tok
1343
1344 def _ParseForExprLoop(self, for_kw):
1345 # type: (Token) -> command.ForExpr
1346 """
1347 Shell:
1348 for '((' init ';' cond ';' update '))' for_sep? do_group
1349
1350 YSH:
1351 for '((' init ';' cond ';' update '))' for_sep? brace_group
1352 """
1353 node = self.w_parser.ReadForExpression()
1354 node.keyword = for_kw
1355
1356 self._SetNext()
1357
1358 self._GetWord()
1359 if self.c_id == Id.Op_Semi:
1360 self._SetNext()
1361 self._NewlineOk()
1362 elif self.c_id == Id.Op_Newline:
1363 self._SetNext()
1364 elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1365 pass
1366 elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1367 pass
1368 else:
1369 p_die('Invalid word after for expression', loc.Word(self.cur_word))
1370
1371 if self.c_id == Id.Lit_LBrace:
1372 node.body = self.ParseBraceGroup()
1373 else:
1374 node.body = self.ParseDoGroup()
1375 return node
1376
1377 def _ParseForEachLoop(self, for_kw):
1378 # type: (Token) -> command.ForEach
1379 node = command.ForEach.CreateNull(alloc_lists=True)
1380 node.keyword = for_kw
1381
1382 num_iter_names = 0
1383 while True:
1384 w = self.cur_word
1385
1386 # Hack that makes the language more familiar:
1387 # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1388 # - 'x y' is also accepted but not idiomatic.
1389 UP_w = w
1390 if w.tag() == word_e.Compound:
1391 w = cast(CompoundWord, UP_w)
1392 if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1393 w.parts.pop()
1394
1395 ok, iter_name, quoted = word_.StaticEval(w)
1396 if not ok or quoted: # error: for $x
1397 p_die('Expected loop variable (a constant word)', loc.Word(w))
1398
1399 if not match.IsValidVarName(iter_name): # error: for -
1400 # TODO: consider commas?
1401 if ',' in iter_name:
1402 p_die('Loop variables look like x, y (fix spaces)',
1403 loc.Word(w))
1404 p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1405
1406 node.iter_names.append(iter_name)
1407 num_iter_names += 1
1408 self._SetNext()
1409
1410 self._GetWord()
1411 # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1412 # Subtlety: 'var' is KW_Var and is a valid loop name
1413 if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1414 break
1415
1416 if num_iter_names == 3:
1417 p_die('Unexpected word after 3 loop variables',
1418 loc.Word(self.cur_word))
1419
1420 self._NewlineOk()
1421
1422 self._GetWord()
1423 if self.c_id == Id.KW_In:
1424 # Ideally we would want ( not 'in'. But we still have to fix the bug
1425 # where we require a SPACE between in and (
1426 # for x in(y) # should be accepted, but isn't
1427
1428 expr_blame = word_.AsKeywordToken(self.cur_word)
1429
1430 self._SetNext() # skip in
1431 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1432 enode = self.w_parser.ParseYshExprForCommand()
1433 node.iterable = for_iter.YshExpr(enode, expr_blame)
1434
1435 # For simplicity, we don't accept for x in (obj); do ...
1436 self._GetWord()
1437 if self.c_id != Id.Lit_LBrace:
1438 p_die('Expected { after iterable expression',
1439 loc.Word(self.cur_word))
1440 else:
1441 semi_tok = None # type: Optional[Token]
1442 iter_words, semi_tok = self.ParseForWords()
1443 node.semi_tok = semi_tok
1444
1445 if not self.parse_opts.parse_bare_word() and len(
1446 iter_words) == 1:
1447 ok, s, quoted = word_.StaticEval(iter_words[0])
1448 if ok and match.IsValidVarName(s) and not quoted:
1449 p_die(
1450 'Surround this word with either parens or quotes (parse_bare_word)',
1451 loc.Word(iter_words[0]))
1452
1453 words2 = braces.BraceDetectAll(iter_words)
1454 words3 = word_.TildeDetectAll(words2)
1455 node.iterable = for_iter.Words(words3)
1456
1457 # Now that we know there are words, do an extra check
1458 if num_iter_names > 2:
1459 p_die('Expected at most 2 loop variables', for_kw)
1460
1461 elif self.c_id == Id.KW_Do:
1462 node.iterable = for_iter.Args # implicitly loop over "$@"
1463 # do not advance
1464
1465 elif self.c_id == Id.Op_Semi: # for x; do
1466 node.iterable = for_iter.Args # implicitly loop over "$@"
1467 self._SetNext()
1468
1469 else: # for foo BAD
1470 p_die('Unexpected word after for loop variable',
1471 loc.Word(self.cur_word))
1472
1473 self._GetWord()
1474 if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1475 node.body = self.ParseBraceGroup()
1476 else:
1477 node.body = self.ParseDoGroup()
1478
1479 return node
1480
1481 def ParseFor(self):
1482 # type: () -> command_t
1483 """
1484 TODO: Update the grammar
1485
1486 for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1487 | For '((' ... TODO
1488 """
1489 ate = self._Eat(Id.KW_For)
1490 for_kw = word_.AsKeywordToken(ate)
1491
1492 self._GetWord()
1493 if self.c_id == Id.Op_DLeftParen:
1494 if not self.parse_opts.parse_dparen():
1495 p_die("Bash for loops aren't allowed (parse_dparen)",
1496 loc.Word(self.cur_word))
1497
1498 # for (( i = 0; i < 10; i++)
1499 n1 = self._ParseForExprLoop(for_kw)
1500 n1.redirects = self._ParseRedirectList()
1501 return n1
1502 else:
1503 # for x in a b; do echo hi; done
1504 n2 = self._ParseForEachLoop(for_kw)
1505 n2.redirects = self._ParseRedirectList()
1506 return n2
1507
1508 def _ParseConditionList(self):
1509 # type: () -> condition_t
1510 """
1511 condition_list: command_list
1512
1513 This is a helper to parse a condition list for if commands and while/until
1514 loops. It will throw a parse error if there are no conditions in the list.
1515 """
1516 self.allow_block = False
1517 commands = self._ParseCommandList()
1518 self.allow_block = True
1519
1520 if len(commands.children) == 0:
1521 p_die("Expected a condition", loc.Word(self.cur_word))
1522
1523 return condition.Shell(commands.children)
1524
1525 def ParseWhileUntil(self, keyword):
1526 # type: (Token) -> command.WhileUntil
1527 """
1528 while_clause : While command_list do_group ;
1529 until_clause : Until command_list do_group ;
1530 """
1531 self._SetNext() # skip keyword
1532
1533 if (self.parse_opts.parse_paren() and
1534 self.w_parser.LookPastSpace() == Id.Op_LParen):
1535 enode = self.w_parser.ParseYshExprForCommand()
1536 cond = condition.YshExpr(enode) # type: condition_t
1537 else:
1538 cond = self._ParseConditionList()
1539
1540 # NOTE: The LSTs will be different for OSH and YSH, but the execution
1541 # should be unchanged. To be sure we should desugar.
1542 self._GetWord()
1543 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1544 # while test -f foo {
1545 body_node = self.ParseBraceGroup() # type: command_t
1546 else:
1547 body_node = self.ParseDoGroup()
1548
1549 # no redirects yet
1550 return command.WhileUntil(keyword, cond, body_node, None)
1551
1552 def ParseCaseArm(self):
1553 # type: () -> CaseArm
1554 """
1555 case_item: '('? pattern ('|' pattern)* ')'
1556 newline_ok command_term? trailer? ;
1557
1558 Looking at '(' or pattern
1559 """
1560 self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1561
1562 left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1563
1564 if self.c_id == Id.Op_LParen: # Optional (
1565 self._SetNext()
1566
1567 pat_words = [] # type: List[word_t]
1568 while True:
1569 self._GetWord()
1570 if self.c_kind != Kind.Word:
1571 p_die('Expected case pattern', loc.Word(self.cur_word))
1572 pat_words.append(self.cur_word)
1573 self._SetNext()
1574
1575 self._GetWord()
1576 if self.c_id == Id.Op_Pipe:
1577 self._SetNext()
1578 else:
1579 break
1580
1581 ate = self._Eat(Id.Right_CasePat)
1582 middle_tok = word_.AsOperatorToken(ate)
1583
1584 self._NewlineOk()
1585
1586 self._GetWord()
1587 if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
1588 Id.KW_Esac):
1589 c_list = self._ParseCommandTerm()
1590 action_children = c_list.children
1591 else:
1592 action_children = []
1593
1594 dsemi_tok = None # type: Token
1595 self._GetWord()
1596 if self.c_id == Id.KW_Esac: # missing last ;;
1597 pass
1598 elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
1599 dsemi_tok = word_.AsOperatorToken(self.cur_word)
1600 self._SetNext()
1601 else:
1602 # Happens on EOF
1603 p_die('Expected ;; or esac', loc.Word(self.cur_word))
1604
1605 self._NewlineOk()
1606
1607 return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1608 action_children, dsemi_tok)
1609
1610 def ParseYshCaseArm(self, discriminant):
1611 # type: (Id_t) -> CaseArm
1612 """
1613 case_item : pattern newline_ok brace_group newline_ok
1614 pattern : pat_words
1615 | pat_exprs
1616 | pat_eggex
1617 | pat_else
1618 pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
1619 pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
1620 pat_word : WORD
1621 pat_eggex : '/' oil_eggex '/'
1622 pat_expr : '(' oil_expr ')'
1623 pat_else : '(' Id.KW_Else ')'
1624
1625 Looking at: 'pattern'
1626
1627 Note that the trailing `newline_ok` in `case_item` is handled by
1628 `ParseYshCase`. We do this because parsing that `newline_ok` returns
1629 the next "discriminant" for the next token, so it makes more sense to
1630 handle it there.
1631 """
1632 left_tok = None # type: Token
1633 pattern = None # type: pat_t
1634
1635 if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1636 # pat_exprs, pat_else or pat_eggex
1637 pattern, left_tok = self.w_parser.ParseYshCasePattern()
1638 else:
1639 # pat_words
1640 pat_words = [] # type: List[word_t]
1641 while True:
1642 self._GetWord()
1643 if self.c_kind != Kind.Word:
1644 p_die('Expected case pattern', loc.Word(self.cur_word))
1645 pat_words.append(self.cur_word)
1646 self._SetNext()
1647
1648 if not left_tok:
1649 left_tok = location.LeftTokenForWord(self.cur_word)
1650
1651 self._NewlineOk()
1652
1653 self._GetWord()
1654 if self.c_id == Id.Op_Pipe:
1655 self._SetNext()
1656 self._NewlineOk()
1657 else:
1658 break
1659 pattern = pat.Words(pat_words)
1660
1661 self._NewlineOk()
1662 action = self.ParseBraceGroup()
1663
1664 # The left token of the action is our "middle" token
1665 return CaseArm(left_tok, pattern, action.left, action.children,
1666 action.right)
1667
1668 def ParseYshCase(self, case_kw):
1669 # type: (Token) -> command.Case
1670 """
1671 ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1672
1673 Looking at: token after 'case'
1674 """
1675 enode = self.w_parser.ParseYshExprForCommand()
1676 to_match = case_arg.YshExpr(enode)
1677
1678 ate = self._Eat(Id.Lit_LBrace)
1679 arms_start = word_.BraceToken(ate)
1680
1681 discriminant = self.w_parser.NewlineOkForYshCase()
1682
1683 # Note: for now, zero arms are accepted, just like POSIX case $x in esac
1684 arms = [] # type: List[CaseArm]
1685 while discriminant != Id.Op_RBrace:
1686 arm = self.ParseYshCaseArm(discriminant)
1687 arms.append(arm)
1688
1689 discriminant = self.w_parser.NewlineOkForYshCase()
1690
1691 # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1692 # token is read as an Id.Op_RBrace, but we need to store this as a
1693 # Id.Lit_RBrace.
1694 ate = self._Eat(Id.Op_RBrace)
1695 arms_end = word_.AsOperatorToken(ate)
1696 arms_end.id = Id.Lit_RBrace
1697
1698 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1699 None)
1700
1701 def ParseOldCase(self, case_kw):
1702 # type: (Token) -> command.Case
1703 """
1704 case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1705
1706 -> Looking at WORD
1707
1708 FYI original POSIX case list, which takes pains for DSEMI
1709
1710 case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1711 """
1712 self._GetWord()
1713 w = self.cur_word
1714 if not self.parse_opts.parse_bare_word():
1715 ok, s, quoted = word_.StaticEval(w)
1716 if ok and not quoted:
1717 p_die(
1718 "This is a constant string. You may want a variable like $x (parse_bare_word)",
1719 loc.Word(w))
1720
1721 if w.tag() != word_e.Compound:
1722 p_die("Expected a word to match against", loc.Word(w))
1723
1724 to_match = case_arg.Word(w)
1725 self._SetNext() # past WORD
1726
1727 self._NewlineOk()
1728
1729 ate = self._Eat(Id.KW_In)
1730 arms_start = word_.AsKeywordToken(ate)
1731
1732 self._NewlineOk()
1733
1734 arms = [] # type: List[CaseArm]
1735 while True:
1736 self._GetWord()
1737 if self.c_id == Id.KW_Esac: # this is Kind.Word
1738 break
1739 # case arm should begin with a pattern word or (
1740 if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1741 break
1742
1743 arm = self.ParseCaseArm()
1744 arms.append(arm)
1745
1746 ate = self._Eat(Id.KW_Esac)
1747 arms_end = word_.AsKeywordToken(ate)
1748
1749 # no redirects yet
1750 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1751 None)
1752
1753 def ParseCase(self):
1754 # type: () -> command.Case
1755 """
1756 case_clause : old_case # from POSIX
1757 | ysh_case
1758 ;
1759
1760 Looking at 'Case'
1761 """
1762 case_kw = word_.AsKeywordToken(self.cur_word)
1763 self._SetNext() # past 'case'
1764
1765 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1766 return self.ParseYshCase(case_kw)
1767 else:
1768 return self.ParseOldCase(case_kw)
1769
1770 def _ParseYshElifElse(self, if_node):
1771 # type: (command.If) -> None
1772 """If test -f foo { echo foo.
1773
1774 } elif test -f bar; test -f spam { ^ we parsed up to here echo
1775 bar } else { echo none }
1776 """
1777 arms = if_node.arms
1778
1779 while self.c_id == Id.KW_Elif:
1780 elif_kw = word_.AsKeywordToken(self.cur_word)
1781 self._SetNext() # skip elif
1782 if (self.parse_opts.parse_paren() and
1783 self.w_parser.LookPastSpace() == Id.Op_LParen):
1784 enode = self.w_parser.ParseYshExprForCommand()
1785 cond = condition.YshExpr(enode) # type: condition_t
1786 else:
1787 self.allow_block = False
1788 commands = self._ParseCommandList()
1789 self.allow_block = True
1790 cond = condition.Shell(commands.children)
1791
1792 body = self.ParseBraceGroup()
1793 self._GetWord()
1794
1795 arm = IfArm(elif_kw, cond, None, body.children, None)
1796 arms.append(arm)
1797
1798 self._GetWord()
1799 if self.c_id == Id.KW_Else:
1800 self._SetNext()
1801 body = self.ParseBraceGroup()
1802 if_node.else_action = body.children
1803
1804 def _ParseYshIf(self, if_kw, cond):
1805 # type: (Token, condition_t) -> command.If
1806 """if test -f foo {
1807
1808 # ^ we parsed up to here
1809 echo foo
1810 } elif test -f bar; test -f spam {
1811 echo bar
1812 } else {
1813 echo none
1814 }
1815 NOTE: If you do something like if test -n foo{, the parser keeps going, and
1816 the error is confusing because it doesn't point to the right place.
1817
1818 I think we might need strict_brace so that foo{ is disallowed. It has to
1819 be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1820 form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1821 Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1822 """
1823 if_node = command.If.CreateNull(alloc_lists=True)
1824 if_node.if_kw = if_kw
1825
1826 body1 = self.ParseBraceGroup()
1827 # Every arm has 1 spid, unlike shell-style
1828 # TODO: We could get the spids from the brace group.
1829 arm = IfArm(if_kw, cond, None, body1.children, None)
1830
1831 if_node.arms.append(arm)
1832
1833 self._GetWord()
1834 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1835 self._ParseYshElifElse(if_node)
1836 # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1837 # spid because that's in the BraceGroup.
1838 return if_node
1839
1840 def _ParseElifElse(self, if_node):
1841 # type: (command.If) -> None
1842 """
1843 else_part: (Elif command_list Then command_list)* Else command_list ;
1844 """
1845 arms = if_node.arms
1846
1847 self._GetWord()
1848 while self.c_id == Id.KW_Elif:
1849 elif_kw = word_.AsKeywordToken(self.cur_word)
1850 self._SetNext() # past 'elif'
1851
1852 cond = self._ParseConditionList()
1853
1854 ate = self._Eat(Id.KW_Then)
1855 then_kw = word_.AsKeywordToken(ate)
1856
1857 body = self._ParseCommandList()
1858 arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
1859
1860 arms.append(arm)
1861
1862 self._GetWord()
1863 if self.c_id == Id.KW_Else:
1864 else_kw = word_.AsKeywordToken(self.cur_word)
1865 self._SetNext() # past 'else'
1866 body = self._ParseCommandList()
1867 if_node.else_action = body.children
1868 else:
1869 else_kw = None
1870
1871 if_node.else_kw = else_kw
1872
1873 def ParseIf(self):
1874 # type: () -> command.If
1875 """
1876 if_clause : If command_list Then command_list else_part? Fi ;
1877
1878 open : '{' | Then
1879 close : '}' | Fi
1880
1881 ysh_if : If ( command_list | '(' expr ')' )
1882 open command_list else_part? close;
1883
1884 There are 2 conditionals here: parse_paren, then parse_brace
1885 """
1886 if_node = command.If.CreateNull(alloc_lists=True)
1887 if_kw = word_.AsKeywordToken(self.cur_word)
1888 if_node.if_kw = if_kw
1889 self._SetNext() # past 'if'
1890
1891 if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
1892 ) == Id.Op_LParen:
1893 # if (x + 1)
1894 enode = self.w_parser.ParseYshExprForCommand()
1895 cond = condition.YshExpr(enode) # type: condition_t
1896 else:
1897 # if echo 1; echo 2; then
1898 # Remove ambiguity with if cd / {
1899 cond = self._ParseConditionList()
1900
1901 self._GetWord()
1902 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1903 return self._ParseYshIf(if_kw, cond)
1904
1905 ate = self._Eat(Id.KW_Then)
1906 then_kw = word_.AsKeywordToken(ate)
1907
1908 body = self._ParseCommandList()
1909
1910 # First arm
1911 arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
1912 if_node.arms.append(arm)
1913
1914 # 2nd to Nth arm
1915 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1916 self._ParseElifElse(if_node)
1917
1918 ate = self._Eat(Id.KW_Fi)
1919 if_node.fi_kw = word_.AsKeywordToken(ate)
1920
1921 return if_node
1922
1923 def ParseTime(self):
1924 # type: () -> command_t
1925 """Time [-p] pipeline.
1926
1927 According to bash help.
1928 """
1929 time_kw = word_.AsKeywordToken(self.cur_word)
1930 self._SetNext() # skip time
1931 pipeline = self.ParsePipeline()
1932 return command.TimeBlock(time_kw, pipeline)
1933
1934 def ParseCompoundCommand(self):
1935 # type: () -> command_t
1936 """
1937 Refactoring: we put io_redirect* here instead of in function_body and
1938 command.
1939
1940 compound_command : brace_group io_redirect*
1941 | subshell io_redirect*
1942 | for_clause io_redirect*
1943 | while_clause io_redirect*
1944 | until_clause io_redirect*
1945 | if_clause io_redirect*
1946 | case_clause io_redirect*
1947
1948 # bash extensions
1949 | time_clause
1950 | [[ BoolExpr ]]
1951 | (( ArithExpr ))
1952 """
1953 self._GetWord()
1954 if self.c_id == Id.Lit_LBrace:
1955 n1 = self.ParseBraceGroup()
1956 n1.redirects = self._ParseRedirectList()
1957 return n1
1958 if self.c_id == Id.Op_LParen:
1959 n2 = self.ParseSubshell()
1960 n2.redirects = self._ParseRedirectList()
1961 return n2
1962
1963 if self.c_id == Id.KW_For:
1964 # Note: Redirects parsed in this call. POSIX for and bash for (( have
1965 # redirects, but YSH for doesn't.
1966 return self.ParseFor()
1967 if self.c_id in (Id.KW_While, Id.KW_Until):
1968 keyword = word_.AsKeywordToken(self.cur_word)
1969 n3 = self.ParseWhileUntil(keyword)
1970 n3.redirects = self._ParseRedirectList()
1971 return n3
1972
1973 if self.c_id == Id.KW_If:
1974 n4 = self.ParseIf()
1975 n4.redirects = self._ParseRedirectList()
1976 return n4
1977 if self.c_id == Id.KW_Case:
1978 n5 = self.ParseCase()
1979 n5.redirects = self._ParseRedirectList()
1980 return n5
1981
1982 if self.c_id == Id.KW_DLeftBracket:
1983 n6 = self.ParseDBracket()
1984 n6.redirects = self._ParseRedirectList()
1985 return n6
1986 if self.c_id == Id.Op_DLeftParen:
1987 if not self.parse_opts.parse_dparen():
1988 p_die('You may want a space between parens (parse_dparen)',
1989 loc.Word(self.cur_word))
1990 n7 = self.ParseDParen()
1991 n7.redirects = self._ParseRedirectList()
1992 return n7
1993
1994 # bash extensions: no redirects
1995 if self.c_id == Id.KW_Time:
1996 return self.ParseTime()
1997
1998 # Happens in function body, e.g. myfunc() oops
1999 p_die(
2000 'Unexpected word while parsing compound command (%s)' %
2001 Id_str(self.c_id), loc.Word(self.cur_word))
2002 assert False # for MyPy
2003
2004 def ParseFunctionDef(self):
2005 # type: () -> command.ShFunction
2006 """
2007 function_header : fname '(' ')'
2008 function_def : function_header newline_ok function_body ;
2009
2010 Precondition: Looking at the function name.
2011
2012 NOTE: There is an ambiguity with:
2013
2014 function foo ( echo hi ) and
2015 function foo () ( echo hi )
2016
2017 Bash only accepts the latter, though it doesn't really follow a grammar.
2018 """
2019 word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2020 name = word_.ShFunctionName(word0)
2021 if len(name) == 0: # example: foo$x is invalid
2022 p_die('Invalid function name', loc.Word(word0))
2023
2024 part0 = word0.parts[0]
2025 # If we got a non-empty string from ShFunctionName, this should be true.
2026 assert part0.tag() == word_part_e.Literal
2027 blame_tok = cast(Token, part0) # for ctx_VarChecker
2028
2029 self._SetNext() # move past function name
2030
2031 # Must be true because of lookahead
2032 self._GetWord()
2033 assert self.c_id == Id.Op_LParen, self.cur_word
2034
2035 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2036 self._SetNext()
2037
2038 self._GetWord()
2039 if self.c_id == Id.Right_ShFunction:
2040 # 'f ()' implies a function definition, since invoking it with no args
2041 # would just be 'f'
2042 self._SetNext()
2043
2044 self._NewlineOk()
2045
2046 func = command.ShFunction.CreateNull()
2047 func.name = name
2048 with ctx_VarChecker(self.var_checker, blame_tok):
2049 func.body = self.ParseCompoundCommand()
2050
2051 func.name_tok = location.LeftTokenForCompoundWord(word0)
2052 return func
2053 else:
2054 p_die('Expected ) in function definition', loc.Word(self.cur_word))
2055 return None
2056
2057 def ParseKshFunctionDef(self):
2058 # type: () -> command.ShFunction
2059 """
2060 ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2061 """
2062 keyword_tok = word_.AsKeywordToken(self.cur_word)
2063
2064 self._SetNext() # skip past 'function'
2065 self._GetWord()
2066
2067 cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2068 name = word_.ShFunctionName(cur_word)
2069 if len(name) == 0: # example: foo$x is invalid
2070 p_die('Invalid KSH-style function name', loc.Word(cur_word))
2071
2072 name_word = self.cur_word
2073 self._SetNext() # skip past 'function name
2074
2075 self._GetWord()
2076 if self.c_id == Id.Op_LParen:
2077 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2078 self._SetNext()
2079 self._Eat(Id.Right_ShFunction)
2080
2081 self._NewlineOk()
2082
2083 func = command.ShFunction.CreateNull()
2084 func.name = name
2085 with ctx_VarChecker(self.var_checker, keyword_tok):
2086 func.body = self.ParseCompoundCommand()
2087
2088 func.keyword = keyword_tok
2089 func.name_tok = location.LeftTokenForWord(name_word)
2090 return func
2091
2092 def ParseYshProc(self):
2093 # type: () -> Proc
2094 node = Proc.CreateNull(alloc_lists=True)
2095
2096 keyword_tok = word_.AsKeywordToken(self.cur_word)
2097 node.keyword = keyword_tok
2098
2099 with ctx_VarChecker(self.var_checker, keyword_tok):
2100 with ctx_CmdMode(self, cmd_mode_e.Proc):
2101 self.w_parser.ParseProc(node)
2102 if node.sig.tag() == proc_sig_e.Closed: # Register params
2103 sig = cast(proc_sig.Closed, node.sig)
2104
2105 # Treat 3 kinds of params as variables.
2106 wp = sig.word
2107 if wp:
2108 for param in wp.params:
2109 self.var_checker.Check(Id.KW_Var, param.name,
2110 param.blame_tok)
2111 if wp.rest_of:
2112 r = wp.rest_of
2113 self.var_checker.Check(Id.KW_Var, r.name,
2114 r.blame_tok)
2115 # We COULD register __out here but it would require a different API.
2116 #if param.prefix and param.prefix.id == Id.Arith_Colon:
2117 # self.var_checker.Check(Id.KW_Var, '__' + param.name)
2118
2119 posit = sig.positional
2120 if posit:
2121 for param in posit.params:
2122 self.var_checker.Check(Id.KW_Var, param.name,
2123 param.blame_tok)
2124 if posit.rest_of:
2125 r = posit.rest_of
2126 self.var_checker.Check(Id.KW_Var, r.name,
2127 r.blame_tok)
2128
2129 named = sig.named
2130 if named:
2131 for param in named.params:
2132 self.var_checker.Check(Id.KW_Var, param.name,
2133 param.blame_tok)
2134 if named.rest_of:
2135 r = named.rest_of
2136 self.var_checker.Check(Id.KW_Var, r.name,
2137 r.blame_tok)
2138
2139 if sig.block_param:
2140 b = sig.block_param
2141 self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2142
2143 self._SetNext()
2144 node.body = self.ParseBraceGroup()
2145 # No redirects for YSH procs (only at call site)
2146
2147 return node
2148
2149 def ParseYshFunc(self):
2150 # type: () -> Func
2151 """
2152 ysh_func: (
2153 Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2154 )
2155 Looking at KW_Func
2156 """
2157 node = Func.CreateNull(alloc_lists=True)
2158
2159 keyword_tok = word_.AsKeywordToken(self.cur_word)
2160 node.keyword = keyword_tok
2161
2162 with ctx_VarChecker(self.var_checker, keyword_tok):
2163 self.w_parser.ParseFunc(node)
2164
2165 posit = node.positional
2166 if posit:
2167 for param in posit.params:
2168 self.var_checker.Check(Id.KW_Var, param.name,
2169 param.blame_tok)
2170 if posit.rest_of:
2171 r = posit.rest_of
2172 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2173
2174 named = node.named
2175 if named:
2176 for param in named.params:
2177 self.var_checker.Check(Id.KW_Var, param.name,
2178 param.blame_tok)
2179 if named.rest_of:
2180 r = named.rest_of
2181 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2182
2183 self._SetNext()
2184 with ctx_CmdMode(self, cmd_mode_e.Func):
2185 node.body = self.ParseBraceGroup()
2186
2187 return node
2188
2189 def ParseCoproc(self):
2190 # type: () -> command_t
2191 """
2192 TODO: command.Coproc?
2193 """
2194 raise NotImplementedError()
2195
2196 def ParseSubshell(self):
2197 # type: () -> command.Subshell
2198 """
2199 subshell : '(' compound_list ')'
2200
2201 Looking at Op_LParen
2202 """
2203 left = word_.AsOperatorToken(self.cur_word)
2204 self._SetNext() # skip past (
2205
2206 # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2207 # translation stack, we want to delay it.
2208
2209 self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2210
2211 c_list = self._ParseCommandList()
2212 if len(c_list.children) == 1:
2213 child = c_list.children[0]
2214 else:
2215 child = c_list
2216
2217 ate = self._Eat(Id.Right_Subshell)
2218 right = word_.AsOperatorToken(ate)
2219
2220 return command.Subshell(left, child, right, None) # no redirects yet
2221
2222 def ParseDBracket(self):
2223 # type: () -> command.DBracket
2224 """Pass the underlying word parser off to the boolean expression
2225 parser."""
2226 left = word_.AsKeywordToken(self.cur_word)
2227 # TODO: Test interactive. Without closing ]], you should get > prompt
2228 # (PS2)
2229
2230 self._SetNext() # skip [[
2231 b_parser = bool_parse.BoolParser(self.w_parser)
2232 bnode, right = b_parser.Parse() # May raise
2233 return command.DBracket(left, bnode, right, None) # no redirects yet
2234
2235 def ParseDParen(self):
2236 # type: () -> command.DParen
2237 left = word_.AsOperatorToken(self.cur_word)
2238
2239 self._SetNext() # skip ((
2240 anode, right = self.w_parser.ReadDParen()
2241 assert anode is not None
2242
2243 return command.DParen(left, anode, right, None) # no redirects yet
2244
2245 def ParseCommand(self):
2246 # type: () -> command_t
2247 """
2248 command : simple_command
2249 | compound_command # OSH edit: io_redirect* folded in
2250 | function_def
2251 | ksh_function_def
2252
2253 # YSH extensions
2254 | proc NAME ...
2255 | const ...
2256 | var ...
2257 | setglobal ...
2258 | setref ...
2259 | setvar ...
2260 | _ EXPR
2261 | = EXPR
2262 ;
2263
2264 Note: the reason const / var are not part of compound_command is because
2265 they can't be alone in a shell function body.
2266
2267 Example:
2268 This is valid shell f() if true; then echo hi; fi
2269 This is invalid f() var x = 1
2270 """
2271 if self._AtSecondaryKeyword():
2272 p_die('Unexpected word when parsing command',
2273 loc.Word(self.cur_word))
2274
2275 # YSH Extensions
2276
2277 if self.c_id == Id.KW_Proc: # proc p { ... }
2278 # proc is hidden because of the 'local reasoning' principle. Code
2279 # inside procs should be YSH, full stop. That means ysh:upgrade is
2280 # on.
2281 if self.parse_opts.parse_proc():
2282 return self.ParseYshProc()
2283 else:
2284 # 2024-02: This avoids bad syntax errors if you type YSH code
2285 # into OSH
2286 # proc p (x) { echo hi } would actually be parsed as a
2287 # command.Simple! Shell compatibility: quote 'proc'
2288 p_die("proc is a YSH keyword, but this is OSH.",
2289 loc.Word(self.cur_word))
2290
2291 if self.c_id == Id.KW_Func: # func f(x) { ... }
2292 if self.parse_opts.parse_func():
2293 return self.ParseYshFunc()
2294 else:
2295 # Same reasoning as above, for 'proc'
2296 p_die("func is a YSH keyword, but this is OSH.",
2297 loc.Word(self.cur_word))
2298
2299 if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2300 p_die("const can't be inside proc or func. Use var instead.",
2301 loc.Word(self.cur_word))
2302
2303 if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2304 keyword_id = self.c_id
2305 kw_token = word_.LiteralToken(self.cur_word)
2306 self._SetNext()
2307 n8 = self.w_parser.ParseVarDecl(kw_token)
2308 for lhs in n8.lhs:
2309 self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2310 return n8
2311
2312 if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2313 kw_token = word_.LiteralToken(self.cur_word)
2314 self._SetNext()
2315 n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2316 return n9
2317
2318 if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2319 # = 42 + a[i]
2320 # call mylist->append('x')
2321
2322 keyword = word_.LiteralToken(self.cur_word)
2323 assert keyword is not None
2324 self._SetNext()
2325 enode = self.w_parser.ParseCommandExpr()
2326 return command.Expr(keyword, enode)
2327
2328 if self.c_id == Id.KW_Function:
2329 return self.ParseKshFunctionDef()
2330
2331 if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2332 Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2333 Id.KW_If, Id.KW_Case, Id.KW_Time):
2334 return self.ParseCompoundCommand()
2335
2336 # Syntax error for '}' starting a line, which all shells disallow.
2337 if self.c_id == Id.Lit_RBrace:
2338 p_die('Unexpected right brace', loc.Word(self.cur_word))
2339
2340 if self.c_kind == Kind.Redir: # Leading redirect
2341 return self.ParseSimpleCommand()
2342
2343 if self.c_kind == Kind.Word:
2344 # ensured by Kind.Word
2345 cur_word = cast(CompoundWord, self.cur_word)
2346
2347 # NOTE: At the top level, only Token and Compound are possible.
2348 # Can this be modelled better in the type system, removing asserts?
2349 #
2350 # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2351 # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2352 # That requires 2 tokens of lookahead, which we don't have
2353 #
2354 # Or maybe we don't just have ParseSimpleCommand -- we will have
2355 # ParseYshCommand or something
2356
2357 if (self.w_parser.LookAheadFuncParens() and
2358 not word_.IsVarLike(cur_word)):
2359 return self.ParseFunctionDef() # f() { echo; } # function
2360
2361 # Parse x = 1+2*3 when inside HayNode { } blocks
2362 parts = cur_word.parts
2363 if self.parse_opts.parse_equals() and len(parts) == 1:
2364 part0 = parts[0]
2365 if part0.tag() == word_part_e.Literal:
2366 tok = cast(Token, part0)
2367 if (match.IsValidVarName(lexer.LazyStr(tok)) and
2368 self.w_parser.LookPastSpace() == Id.Lit_Equals):
2369 assert tok.id == Id.Lit_Chars, tok
2370
2371 if len(self.hay_attrs_stack
2372 ) and self.hay_attrs_stack[-1]:
2373 # Note: no static var_checker.Check() for bare assignment
2374 enode = self.w_parser.ParseBareDecl()
2375 self._SetNext() # Somehow this is necessary
2376 # TODO: Use BareDecl here. Well, do that when we
2377 # treat it as const or lazy.
2378 return command.VarDecl(
2379 None,
2380 [NameType(tok, lexer.TokenVal(tok), None)],
2381 enode)
2382 else:
2383 self._SetNext()
2384 self._GetWord()
2385 p_die(
2386 'Unexpected = (Hint: use var/setvar, or quote it)',
2387 loc.Word(self.cur_word))
2388
2389 # echo foo
2390 # f=(a b c) # array
2391 # array[1+2]+=1
2392 return self.ParseSimpleCommand()
2393
2394 if self.c_kind == Kind.Eof:
2395 p_die("Unexpected EOF while parsing command",
2396 loc.Word(self.cur_word))
2397
2398 # NOTE: This only happens in batch mode in the second turn of the loop!
2399 # e.g. )
2400 p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2401
2402 assert False # for MyPy
2403
2404 def ParsePipeline(self):
2405 # type: () -> command_t
2406 """
2407 pipeline : Bang? command ( '|' newline_ok command )* ;
2408 """
2409 negated = None # type: Optional[Token]
2410
2411 self._GetWord()
2412 if self.c_id == Id.KW_Bang:
2413 negated = word_.AsKeywordToken(self.cur_word)
2414 self._SetNext()
2415
2416 child = self.ParseCommand()
2417 assert child is not None
2418
2419 children = [child]
2420
2421 self._GetWord()
2422 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2423 if negated is not None:
2424 node = command.Pipeline(negated, children, [])
2425 return node
2426 else:
2427 return child # no pipeline
2428
2429 # | or |&
2430 ops = [] # type: List[Token]
2431 while True:
2432 op = word_.AsOperatorToken(self.cur_word)
2433 ops.append(op)
2434
2435 self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2436 self._NewlineOk()
2437
2438 child = self.ParseCommand()
2439 children.append(child)
2440
2441 self._GetWord()
2442 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2443 break
2444
2445 return command.Pipeline(negated, children, ops)
2446
2447 def ParseAndOr(self):
2448 # type: () -> command_t
2449 self._GetWord()
2450 if self.c_id == Id.Word_Compound:
2451 first_word_tok = word_.LiteralToken(self.cur_word)
2452 if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
2453 # We got '...', so parse in multiline mode
2454 self._SetNext()
2455 with word_.ctx_Multiline(self.w_parser):
2456 return self._ParseAndOr()
2457
2458 # Parse in normal mode, not multiline
2459 return self._ParseAndOr()
2460
2461 def _ParseAndOr(self):
2462 # type: () -> command_t
2463 """
2464 and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
2465 | pipeline
2466
2467 Note that it is left recursive and left associative. We parse it
2468 iteratively with a token of lookahead.
2469 """
2470 child = self.ParsePipeline()
2471 assert child is not None
2472
2473 self._GetWord()
2474 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2475 return child
2476
2477 ops = [] # type: List[Token]
2478 children = [child]
2479
2480 while True:
2481 ops.append(word_.AsOperatorToken(self.cur_word))
2482
2483 self._SetNext() # skip past || &&
2484 self._NewlineOk()
2485
2486 child = self.ParsePipeline()
2487 children.append(child)
2488
2489 self._GetWord()
2490 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2491 break
2492
2493 return command.AndOr(children, ops)
2494
2495 # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2496
2497 # At the top level, we execute after every line, e.g. to
2498 # - process alias (a form of dynamic parsing)
2499 # - process 'exit', because invalid syntax might appear after it
2500
2501 # On the other hand, for a while loop body, we parse the whole thing at once,
2502 # and then execute it. We don't want to parse it over and over again!
2503
2504 # COMPARE
2505 # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2506 # command_term : and_or (trailer and_or)* ; # CHILDREN
2507
2508 def _ParseCommandLine(self):
2509 # type: () -> command_t
2510 """
2511 command_line : and_or (sync_op and_or)* trailer? ;
2512 trailer : sync_op newline_ok
2513 | NEWLINES;
2514 sync_op : '&' | ';';
2515
2516 NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2517 there is another command word after the sync op.
2518
2519 But it's easier to express imperatively. Do the following in a loop:
2520 1. ParseAndOr
2521 2. Peek.
2522 a. If there's a newline, then return. (We're only parsing a single
2523 line.)
2524 b. If there's a sync_op, process it. Then look for a newline and
2525 return. Otherwise, parse another AndOr.
2526 """
2527 # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2528 # I don't think we should add anything else here; otherwise it will be
2529 # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2530 END_LIST = [Id.Op_Newline, Id.Eof_Real]
2531
2532 children = [] # type: List[command_t]
2533 done = False
2534 while not done:
2535 child = self.ParseAndOr()
2536
2537 self._GetWord()
2538 if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2539 tok = cast(Token, self.cur_word) # for MyPy
2540 child = command.Sentence(child, tok)
2541 self._SetNext()
2542
2543 self._GetWord()
2544 if self.c_id in END_LIST:
2545 done = True
2546
2547 elif self.c_id in END_LIST:
2548 done = True
2549
2550 else:
2551 # e.g. echo a(b)
2552 p_die(
2553 'Invalid word while parsing command line (%s)' %
2554 Id_str(self.c_id), loc.Word(self.cur_word))
2555
2556 children.append(child)
2557
2558 # Simplify the AST.
2559 if len(children) > 1:
2560 return command.CommandList(children)
2561 else:
2562 return children[0]
2563
2564 def _ParseCommandTerm(self):
2565 # type: () -> command.CommandList
2566 """"
2567 command_term : and_or (trailer and_or)* ;
2568 trailer : sync_op newline_ok
2569 | NEWLINES;
2570 sync_op : '&' | ';';
2571
2572 This is handled in imperative style, like _ParseCommandLine.
2573 Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2574 which is slightly different. (HOW? Is it the DSEMI?)
2575
2576 Returns:
2577 syntax_asdl.command
2578 """
2579 # Token types that will end the command term.
2580 END_LIST = [
2581 self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
2582 Id.Op_SemiAmp, Id.Op_DSemiAmp
2583 ]
2584
2585 # NOTE: This is similar to _ParseCommandLine.
2586 #
2587 # - Why aren't we doing END_LIST in _ParseCommandLine?
2588 # - Because you will never be inside $() at the top level.
2589 # - We also know it will end in a newline. It can't end in "fi"!
2590 # - example: if true; then { echo hi; } fi
2591
2592 children = [] # type: List[command_t]
2593 done = False
2594 while not done:
2595 # Most keywords are valid "first words". But do/done/then do not BEGIN
2596 # commands, so they are not valid.
2597 if self._AtSecondaryKeyword():
2598 break
2599
2600 child = self.ParseAndOr()
2601
2602 self._GetWord()
2603 if self.c_id == Id.Op_Newline:
2604 self._SetNext()
2605
2606 self._GetWord()
2607 if self.c_id in END_LIST:
2608 done = True
2609
2610 elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2611 tok = cast(Token, self.cur_word) # for MyPy
2612 child = command.Sentence(child, tok)
2613 self._SetNext()
2614
2615 self._GetWord()
2616 if self.c_id == Id.Op_Newline:
2617 self._SetNext() # skip over newline
2618
2619 # Test if we should keep going. There might be another command after
2620 # the semi and newline.
2621 self._GetWord()
2622 if self.c_id in END_LIST: # \n EOF
2623 done = True
2624
2625 elif self.c_id in END_LIST: # ; EOF
2626 done = True
2627
2628 elif self.c_id in END_LIST: # EOF
2629 done = True
2630
2631 # For if test -f foo; test -f bar {
2632 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2633 done = True
2634
2635 elif self.c_kind != Kind.Word:
2636 # e.g. f() { echo (( x )) ; }
2637 # but can't fail on 'fi fi', see osh/cmd_parse_test.py
2638
2639 #log("Invalid %s", self.cur_word)
2640 p_die("Invalid word while parsing command list",
2641 loc.Word(self.cur_word))
2642
2643 children.append(child)
2644
2645 return command.CommandList(children)
2646
2647 def _ParseCommandList(self):
2648 # type: () -> command.CommandList
2649 """
2650 command_list : newline_ok command_term trailer? ;
2651
2652 This one is called by all the compound commands. It's basically a command
2653 block.
2654
2655 NOTE: Rather than translating the CFG directly, the code follows a style
2656 more like this: more like this: (and_or trailer)+. It makes capture
2657 easier.
2658 """
2659 self._NewlineOk()
2660 return self._ParseCommandTerm()
2661
2662 def ParseLogicalLine(self):
2663 # type: () -> command_t
2664 """Parse a single line for main_loop.
2665
2666 A wrapper around _ParseCommandLine(). Similar but not identical to
2667 _ParseCommandList() and ParseCommandSub().
2668
2669 Raises:
2670 ParseError
2671 """
2672 self._NewlineOk()
2673 self._GetWord()
2674 if self.c_id == Id.Eof_Real:
2675 return None # main loop checks for here docs
2676 node = self._ParseCommandLine()
2677 return node
2678
2679 def ParseInteractiveLine(self):
2680 # type: () -> parse_result_t
2681 """Parse a single line for Interactive main_loop.
2682
2683 Different from ParseLogicalLine because newlines are handled differently.
2684
2685 Raises:
2686 ParseError
2687 """
2688 self._GetWord()
2689 if self.c_id == Id.Op_Newline:
2690 return parse_result.EmptyLine
2691 if self.c_id == Id.Eof_Real:
2692 return parse_result.Eof
2693
2694 node = self._ParseCommandLine()
2695 return parse_result.Node(node)
2696
2697 def ParseCommandSub(self):
2698 # type: () -> command_t
2699 """Parse $(echo hi) and `echo hi` for word_parse.py.
2700
2701 They can have multiple lines, like this: echo $( echo one echo
2702 two )
2703 """
2704 self._NewlineOk()
2705
2706 self._GetWord()
2707 if self.c_kind == Kind.Eof: # e.g. $()
2708 return command.NoOp
2709
2710 c_list = self._ParseCommandTerm()
2711 if len(c_list.children) == 1:
2712 return c_list.children[0]
2713 else:
2714 return c_list
2715
2716 def CheckForPendingHereDocs(self):
2717 # type: () -> None
2718 # NOTE: This happens when there is no newline at the end of a file, like
2719 # osh -c 'cat <<EOF'
2720 if len(self.pending_here_docs):
2721 node = self.pending_here_docs[0] # Just show the first one?
2722 h = cast(redir_param.HereDoc, node.arg)
2723 p_die('Unterminated here doc began here', loc.Word(h.here_begin))