OILS / osh / cmd_parse.py View on Github | oilshell.org

2773 lines, 1419 significant
1# Copyright 2016 Andy Chu. All rights reserved.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7"""
8cmd_parse.py - Parse high level shell commands.
9"""
10from __future__ import print_function
11
12from _devbuild.gen import grammar_nt
13from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
14from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15from _devbuild.gen.syntax_asdl import (
16 loc,
17 SourceLine,
18 source,
19 parse_result,
20 parse_result_t,
21 command,
22 command_t,
23 condition,
24 condition_t,
25 for_iter,
26 ArgList,
27 BraceGroup,
28 LiteralBlock,
29 CaseArm,
30 case_arg,
31 IfArm,
32 pat,
33 pat_t,
34 Redir,
35 redir_param,
36 redir_loc,
37 redir_loc_t,
38 word_e,
39 word_t,
40 CompoundWord,
41 Token,
42 word_part_e,
43 word_part_t,
44 rhs_word,
45 rhs_word_t,
46 sh_lhs,
47 sh_lhs_t,
48 AssignPair,
49 EnvPair,
50 ParsedAssignment,
51 assign_op_e,
52 NameType,
53 proc_sig,
54 proc_sig_e,
55 Proc,
56 Func,
57)
58from core import alloc
59from core import error
60from core.error import p_die
61from core import ui
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from frontend import match
66from frontend import reader
67from mycpp.mylib import log
68from osh import braces
69from osh import bool_parse
70from osh import word_
71
72from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73if TYPE_CHECKING:
74 from core.alloc import Arena
75 from core import optview
76 from frontend.lexer import Lexer
77 from frontend.parse_lib import ParseContext, AliasesInFlight
78 from frontend.reader import _Reader
79 from osh.word_parse import WordParser
80
81_ = Kind_str # for debug prints
82
83TAB_CH = 9 # ord('\t')
84SPACE_CH = 32 # ord(' ')
85
86
87def _ReadHereLines(
88 line_reader, # type: _Reader
89 h, # type: Redir
90 delimiter, # type: str
91):
92 # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93 # NOTE: We read all lines at once, instead of parsing line-by-line,
94 # because of cases like this:
95 # cat <<EOF
96 # 1 $(echo 2
97 # echo 3) 4
98 # EOF
99 here_lines = [] # type: List[Tuple[SourceLine, int]]
100 last_line = None # type: Tuple[SourceLine, int]
101 strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103 while True:
104 src_line, unused_offset = line_reader.GetLine()
105
106 if src_line is None: # EOF
107 # An unterminated here doc is just a warning in bash. We make it
108 # fatal because we want to be strict, and because it causes problems
109 # reporting other errors.
110 # Attribute it to the << in <<EOF for now.
111 p_die("Couldn't find terminator for here doc that starts here",
112 h.op)
113
114 assert len(src_line.content) != 0 # None should be the empty line
115
116 line = src_line.content
117
118 # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119 # the first tab.
120 start_offset = 0
121 if strip_leading_tabs:
122 n = len(line)
123 i = 0 # used after loop exit
124 while i < n:
125 if line[i] != '\t':
126 break
127 i += 1
128 start_offset = i
129
130 if line[start_offset:].rstrip() == delimiter:
131 last_line = (src_line, start_offset)
132 break
133
134 here_lines.append((src_line, start_offset))
135
136 return here_lines, last_line
137
138
139def _MakeLiteralHereLines(
140 here_lines, # type: List[Tuple[SourceLine, int]]
141 arena, # type: Arena
142 do_lossless, # type: bool
143):
144 # type: (...) -> List[word_part_t]
145 """Create a Token for each line.
146
147 For <<'EOF' and <<-'EOF' - single quoted rule
148
149 <<- has non-zero start_offset
150 """
151 # less precise type, because List[T] is an invariant type
152 tokens = [] # type: List[word_part_t]
153 for src_line, start_offset in here_lines:
154
155 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
156 # arena invariant, but don't refer to it.
157 #
158 # Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
159 # here docs, but it's more complex with double quoted EOF docs.
160
161 if do_lossless: # avoid garbage, doesn't affect correctness
162 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
163 src_line)
164
165 t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
166 src_line)
167 tokens.append(t)
168 return tokens
169
170
171def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
172 # type: (ParseContext, Redir, _Reader, Arena) -> None
173 """Fill in attributes of a pending here doc node."""
174 h = cast(redir_param.HereDoc, r.arg)
175 # "If any character in word is quoted, the delimiter shall be formed by
176 # performing quote removal on word, and the here-document lines shall not
177 # be expanded. Otherwise, the delimiter shall be the word itself."
178 # NOTE: \EOF counts, or even E\OF
179 ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
180 if not ok:
181 p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
182
183 here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
184
185 if delim_quoted:
186 # <<'EOF' and <<-'EOF' - Literal for each line.
187 h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
188 parse_ctx.do_lossless)
189 else:
190 # <<EOF and <<-EOF - Parse as word
191 line_reader = reader.VirtualLineReader(arena, here_lines,
192 parse_ctx.do_lossless)
193 w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
194 w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
195
196 end_line, start_offset = last_line
197
198 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
199 # arena invariant, but don't refer to it.
200 if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
201 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
202
203 # Create a Token with the end terminator. Maintains the invariant that the
204 # tokens "add up".
205 h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
206 len(end_line.content), end_line)
207
208
209def _MakeAssignPair(parse_ctx, preparsed, arena):
210 # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
211 """Create an AssignPair from a 4-tuples from DetectShAssignment."""
212
213 left_token = preparsed.left
214 close_token = preparsed.close
215
216 lhs = None # type: sh_lhs_t
217
218 if left_token.id == Id.Lit_VarLike: # s=1
219 if lexer.IsPlusEquals(left_token):
220 var_name = lexer.TokenSliceRight(left_token, -2)
221 op = assign_op_e.PlusEqual
222 else:
223 var_name = lexer.TokenSliceRight(left_token, -1)
224 op = assign_op_e.Equal
225
226 lhs = sh_lhs.Name(left_token, var_name)
227
228 elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
229 var_name = lexer.TokenSliceRight(left_token, -1)
230 if lexer.IsPlusEquals(close_token):
231 op = assign_op_e.PlusEqual
232 else:
233 op = assign_op_e.Equal
234
235 assert left_token.line == close_token.line, \
236 '%s and %s not on same line' % (left_token, close_token)
237
238 left_pos = left_token.col + left_token.length
239 index_str = left_token.line.content[left_pos:close_token.col]
240 lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
241
242 elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
243 var_name = lexer.TokenSliceRight(left_token, -1)
244 if lexer.IsPlusEquals(close_token):
245 op = assign_op_e.PlusEqual
246 else:
247 op = assign_op_e.Equal
248
249 # Similar to SnipCodeString / SnipCodeBlock
250 if left_token.line == close_token.line:
251 # extract what's between brackets
252 s = left_token.col + left_token.length
253 code_str = left_token.line.content[s:close_token.col]
254 else:
255 raise NotImplementedError('%s != %s' %
256 (left_token.line, close_token.line))
257 a_parser = parse_ctx.MakeArithParser(code_str)
258
259 # a[i+1]= is a LHS
260 src = source.Reparsed('array LHS', left_token, close_token)
261 with alloc.ctx_SourceCode(arena, src):
262 index_node = a_parser.Parse() # may raise error.Parse
263
264 lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
265
266 else:
267 raise AssertionError()
268
269 # TODO: Should we also create a rhs_expr.ArrayLiteral here?
270 parts = preparsed.w.parts
271 offset = preparsed.part_offset
272
273 n = len(parts)
274 if offset == n:
275 rhs = rhs_word.Empty # type: rhs_word_t
276 else:
277 w = CompoundWord(parts[offset:])
278 word_.TildeDetectAssign(w)
279 rhs = w
280
281 return AssignPair(left_token, lhs, op, rhs)
282
283
284def _AppendMoreEnv(preparsed_list, more_env):
285 # type: (List[ParsedAssignment], List[EnvPair]) -> None
286 """Helper to modify a SimpleCommand node.
287
288 Args:
289 preparsed: a list of 4-tuples from DetectShAssignment
290 more_env: a list to append env_pairs to
291 """
292 for preparsed in preparsed_list:
293 left_token = preparsed.left
294
295 if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
296 p_die(
297 "Environment binding shouldn't look like an array assignment",
298 left_token)
299
300 if lexer.IsPlusEquals(left_token):
301 p_die('Expected = in environment binding, got +=', left_token)
302
303 var_name = lexer.TokenSliceRight(left_token, -1)
304
305 parts = preparsed.w.parts
306 n = len(parts)
307 offset = preparsed.part_offset
308 if offset == n:
309 rhs = rhs_word.Empty # type: rhs_word_t
310 else:
311 w = CompoundWord(parts[offset:])
312 word_.TildeDetectAssign(w)
313 rhs = w
314
315 more_env.append(EnvPair(left_token, var_name, rhs))
316
317
318def _SplitSimpleCommandPrefix(words):
319 # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
320 """Second pass of SimpleCommand parsing: look for assignment words."""
321 preparsed_list = [] # type: List[ParsedAssignment]
322 suffix_words = [] # type: List[CompoundWord]
323
324 done_prefix = False
325 for w in words:
326 if done_prefix:
327 suffix_words.append(w)
328 continue
329
330 left_token, close_token, part_offset = word_.DetectShAssignment(w)
331 if left_token:
332 preparsed_list.append(
333 ParsedAssignment(left_token, close_token, part_offset, w))
334 else:
335 done_prefix = True
336 suffix_words.append(w)
337
338 return preparsed_list, suffix_words
339
340
341def _MakeSimpleCommand(
342 preparsed_list, # type: List[ParsedAssignment]
343 suffix_words, # type: List[CompoundWord]
344 redirects, # type: List[Redir]
345 typed_args, # type: Optional[ArgList]
346 block, # type: Optional[LiteralBlock]
347):
348 # type: (...) -> command.Simple
349 """Create an command.Simple node."""
350
351 # FOO=(1 2 3) ls is not allowed.
352 for preparsed in preparsed_list:
353 if word_.HasArrayPart(preparsed.w):
354 p_die("Environment bindings can't contain array literals",
355 loc.Word(preparsed.w))
356
357 # NOTE: It would be possible to add this check back. But it already happens
358 # at runtime in EvalWordSequence2.
359 # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
360 if 0:
361 for w in suffix_words:
362 if word_.HasArrayPart(w):
363 p_die("Commands can't contain array literals", loc.Word(w))
364
365 assert len(suffix_words) != 0
366 # {a,b,c} # Use { before brace detection
367 # ~/bin/ls # Use ~ before tilde detection
368 part0 = suffix_words[0].parts[0]
369 blame_tok = location.LeftTokenForWordPart(part0)
370
371 # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
372 # can't implement bash's behavior of having say {~bob,~jane}/src work,
373 # because we only have a BracedTree.
374 # This is documented in spec/brace-expansion.
375 # NOTE: Technically we could do expansion outside of 'oshc translate', but it
376 # doesn't seem worth it.
377 words2 = braces.BraceDetectAll(suffix_words)
378 words3 = word_.TildeDetectAll(words2)
379
380 more_env = [] # type: List[EnvPair]
381 _AppendMoreEnv(preparsed_list, more_env)
382
383 # do_fork by default
384 return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
385 block, True)
386
387
388class VarChecker(object):
389 """Statically check for proc and variable usage errors."""
390
391 def __init__(self):
392 # type: () -> None
393 """
394 Args:
395 oil_proc: Whether to disallow nested proc/function declarations
396 """
397 # self.tokens for location info: 'proc' or another token
398 self.tokens = [] # type: List[Token]
399 self.names = [] # type: List[Dict[str, Id_t]]
400
401 def Push(self, blame_tok):
402 # type: (Token) -> None
403 """Called when we enter a shell function, proc, or func.
404
405 Bash allows this, but it's confusing because it's the same as two
406 functions at the top level.
407
408 f() {
409 g() {
410 echo 'top level function defined in another one'
411 }
412 }
413
414 YSH disallows nested procs and funcs.
415 """
416 if len(self.tokens) != 0:
417 if blame_tok.id == Id.KW_Proc:
418 p_die("procs must be defined at the top level", blame_tok)
419 if blame_tok.id == Id.KW_Func:
420 p_die("funcs must be defined at the top level", blame_tok)
421 if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
422 p_die("shell functions can't be defined inside proc or func",
423 blame_tok)
424
425 self.tokens.append(blame_tok)
426 entry = {} # type: Dict[str, Id_t]
427 self.names.append(entry)
428
429 def Pop(self):
430 # type: () -> None
431 self.names.pop()
432 self.tokens.pop()
433
434 def Check(self, keyword_id, var_name, blame_tok):
435 # type: (Id_t, str, Token) -> None
436 """Check for declaration / mutation errors in proc and func.
437
438 var x
439 x already declared
440 setvar x:
441 x is not declared
442 setglobal x:
443 No errors are possible; we would need all these many conditions to
444 statically know the names:
445 - no 'source'
446 - shopt -u copy_env.
447 - AND use lib has to be static
448
449 What about bare assignment in Hay? I think these are dynamic checks --
450 there is no static check. Hay is for building up data imperatively,
451 and then LATER, right before main(), it can be type checked.
452
453 Package {
454 version = '3.11'
455 version = '3.12'
456 }
457 """
458 # No static checks are the global level! Because of 'source', var and
459 # setvar are essentially the same.
460 if len(self.names) == 0:
461 return
462
463 top = self.names[-1]
464 if keyword_id == Id.KW_Var:
465 if var_name in top:
466 p_die('%r was already declared' % var_name, blame_tok)
467 else:
468 top[var_name] = keyword_id
469
470 if keyword_id == Id.KW_SetVar:
471 if var_name not in top:
472 # Note: the solution could be setglobal, etc.
473 p_die(
474 "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
475 var_name, blame_tok)
476
477
478class ctx_VarChecker(object):
479
480 def __init__(self, var_checker, blame_tok):
481 # type: (VarChecker, Token) -> None
482 var_checker.Push(blame_tok)
483 self.var_checker = var_checker
484
485 def __enter__(self):
486 # type: () -> None
487 pass
488
489 def __exit__(self, type, value, traceback):
490 # type: (Any, Any, Any) -> None
491 self.var_checker.Pop()
492
493
494class ctx_CmdMode(object):
495
496 def __init__(self, cmd_parse, new_cmd_mode):
497 # type: (CommandParser, cmd_mode_t) -> None
498 self.cmd_parse = cmd_parse
499 self.prev_cmd_mode = cmd_parse.cmd_mode
500 cmd_parse.cmd_mode = new_cmd_mode
501
502 def __enter__(self):
503 # type: () -> None
504 pass
505
506 def __exit__(self, type, value, traceback):
507 # type: (Any, Any, Any) -> None
508 self.cmd_parse.cmd_mode = self.prev_cmd_mode
509
510
511SECONDARY_KEYWORDS = [
512 Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
513 Id.KW_Esac
514]
515
516
517class CommandParser(object):
518 """Recursive descent parser derived from POSIX shell grammar.
519
520 This is a BNF grammar:
521 https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
522
523 - Augmented with both bash/OSH and YSH constructs.
524
525 - We use regex-like iteration rather than recursive references
526 ? means optional (0 or 1)
527 * means 0 or more
528 + means 1 or more
529
530 - Keywords are spelled in Caps:
531 If Elif Case
532
533 - Operator tokens are quoted:
534 '(' '|'
535
536 or can be spelled directly if it matters:
537
538 Op_LParen Op_Pipe
539
540 - Non-terminals are snake_case:
541 brace_group subshell
542
543 Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
544 the production should be in the method docstrings, e.g.
545
546 def ParseSubshell():
547 "
548 subshell : '(' compound_list ')'
549
550 Looking at Op_LParen # Comment to say how this method is called
551 "
552
553 The grammar may be factored to make parsing easier.
554 """
555
556 def __init__(self,
557 parse_ctx,
558 parse_opts,
559 w_parser,
560 lexer,
561 line_reader,
562 eof_id=Id.Eof_Real):
563 # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
564 self.parse_ctx = parse_ctx
565 self.aliases = parse_ctx.aliases # aliases to expand at parse time
566
567 self.parse_opts = parse_opts
568 self.w_parser = w_parser # type: WordParser # for normal parsing
569 self.lexer = lexer # for pushing hints, lookahead to (
570 self.line_reader = line_reader # for here docs
571 self.eof_id = eof_id
572
573 self.arena = line_reader.arena # for adding here doc and alias spans
574 self.aliases_in_flight = [] # type: AliasesInFlight
575
576 # A hacky boolean to remove 'if cd / {' ambiguity.
577 self.allow_block = True
578
579 # Stack of booleans for nested Attr and SHELL nodes.
580 # Attr nodes allow bare assignment x = 42, but not shell x=42.
581 # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
582 # nodes, but x42 is still allowed.
583 #
584 # Note: this stack could be optimized by turning it into an integer and
585 # binary encoding.
586 self.hay_attrs_stack = [] # type: List[bool]
587
588 # Note: VarChecker is instantiated with each CommandParser, which means
589 # that two 'proc foo' -- inside a command sub and outside -- don't
590 # conflict, because they use different CommandParser instances. I think
591 # this OK but you can imagine different behaviors.
592 self.var_checker = VarChecker()
593
594 self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
595
596 self.Reset()
597
598 # Init_() function for "keyword arg"
599 def Init_AliasesInFlight(self, aliases_in_flight):
600 # type: (AliasesInFlight) -> None
601 self.aliases_in_flight = aliases_in_flight
602
603 def Reset(self):
604 # type: () -> None
605 """Reset our own internal state.
606
607 Called by the interactive loop.
608 """
609 # Cursor state set by _GetWord()
610 self.next_lex_mode = lex_mode_e.ShCommand
611 self.cur_word = None # type: word_t # current word
612 self.c_kind = Kind.Undefined
613 self.c_id = Id.Undefined_Tok
614
615 self.pending_here_docs = [] # type: List[Redir]
616
617 def ResetInputObjects(self):
618 # type: () -> None
619 """Reset the internal state of our inputs.
620
621 Called by the interactive loop.
622 """
623 self.w_parser.Reset()
624 self.lexer.ResetInputObjects()
625 self.line_reader.Reset()
626
627 def _SetNext(self):
628 # type: () -> None
629 """Call this when you no longer need the current token.
630
631 This method is lazy. A subsequent call to _GetWord() will
632 actually read the next Token.
633 """
634 self.next_lex_mode = lex_mode_e.ShCommand
635
636 def _SetNextBrack(self):
637 # type: () -> None
638 self.next_lex_mode = lex_mode_e.ShCommandFakeBrack
639
640 def _GetWord(self):
641 # type: () -> None
642 """Call this when you need to make a decision based on Id or Kind.
643
644 If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
645 self.c_id and self.c_kind.
646
647 Otherwise it does nothing.
648 """
649 if self.next_lex_mode != lex_mode_e.Undefined:
650 w = self.w_parser.ReadWord(self.next_lex_mode)
651 #log("w %s", w)
652
653 # Here docs only happen in command mode, so other kinds of newlines don't
654 # count.
655 if w.tag() == word_e.Operator:
656 tok = cast(Token, w)
657 if tok.id == Id.Op_Newline:
658 for h in self.pending_here_docs:
659 _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
660 self.arena)
661 del self.pending_here_docs[:] # No .clear() until Python 3.3.
662
663 self.cur_word = w
664
665 self.c_kind = word_.CommandKind(self.cur_word)
666 # Has special case for Id.Lit_{LBrace,RBrace,Equals}
667 self.c_id = word_.CommandId(self.cur_word)
668 self.next_lex_mode = lex_mode_e.Undefined
669
670 def _Eat(self, c_id, msg=None):
671 # type: (Id_t, Optional[str]) -> word_t
672 """Consume a word of a type, maybe showing a custom error message.
673
674 Args:
675 c_id: the Id we expected
676 msg: improved error message
677 """
678 self._GetWord()
679 if self.c_id != c_id:
680 if msg is None:
681 msg = 'Expected word type %s, got %s' % (
682 ui.PrettyId(c_id), ui.PrettyId(self.c_id))
683 p_die(msg, loc.Word(self.cur_word))
684
685 skipped = self.cur_word
686 self._SetNext()
687 return skipped
688
689 def _NewlineOk(self):
690 # type: () -> None
691 """Check for optional newline and consume it."""
692 self._GetWord()
693 if self.c_id == Id.Op_Newline:
694 self._SetNext()
695
696 def _AtSecondaryKeyword(self):
697 # type: () -> bool
698 self._GetWord()
699 if self.c_id in SECONDARY_KEYWORDS:
700 return True
701 return False
702
703 def ParseRedirect(self):
704 # type: () -> Redir
705 self._GetWord()
706 assert self.c_kind == Kind.Redir, self.cur_word
707 op_tok = cast(Token, self.cur_word) # for MyPy
708
709 # Note: the lexer could take distinguish between
710 # >out
711 # 3>out
712 # {fd}>out
713 #
714 # which would make the code below faster. But small string optimization
715 # would also speed it up, since redirects are small.
716
717 # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
718 # possibly "unify" the IDs by subtracting a constant like 8 or 16?
719
720 op_val = lexer.TokenVal(op_tok)
721 if op_val[0] == '{':
722 pos = op_val.find('}')
723 assert pos != -1 # lexer ensures this
724 where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
725
726 elif op_val[0].isdigit():
727 pos = 1
728 if op_val[1].isdigit():
729 pos = 2
730 where = redir_loc.Fd(int(op_val[:pos]))
731
732 else:
733 where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
734
735 self._SetNext()
736
737 self._GetWord()
738 # Other redirect
739 if self.c_kind != Kind.Word:
740 p_die('Invalid token after redirect operator',
741 loc.Word(self.cur_word))
742
743 # Here doc
744 if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
745 arg = redir_param.HereDoc.CreateNull()
746 arg.here_begin = self.cur_word
747 arg.stdin_parts = []
748
749 r = Redir(op_tok, where, arg)
750
751 self.pending_here_docs.append(r) # will be filled on next newline.
752
753 self._SetNext()
754 return r
755
756 arg_word = self.cur_word
757 tilde = word_.TildeDetect(arg_word)
758 if tilde:
759 arg_word = tilde
760 self._SetNext()
761
762 # We should never get Empty, Token, etc.
763 assert arg_word.tag() == word_e.Compound, arg_word
764 return Redir(op_tok, where, cast(CompoundWord, arg_word))
765
766 def _ParseRedirectList(self):
767 # type: () -> List[Redir]
768 """Try parsing any redirects at the cursor.
769
770 This is used for blocks only, not commands.
771 """
772 redirects = [] # type: List[Redir]
773 while True:
774 # This prediction needs to ONLY accept redirect operators. Should we
775 # make them a separate Kind?
776 self._GetWord()
777 if self.c_kind != Kind.Redir:
778 break
779
780 node = self.ParseRedirect()
781 redirects.append(node)
782 self._SetNext()
783
784 return redirects
785
786 def _ScanSimpleCommand(self):
787 # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
788 """YSH extends simple commands with typed args and blocks.
789
790 Shell has a recursive grammar, which awkwardly expresses
791 non-grammatical rules:
792
793 simple_command : cmd_prefix cmd_word cmd_suffix
794 | cmd_prefix cmd_word
795 | cmd_prefix
796 | cmd_name cmd_suffix
797 | cmd_name
798 ;
799 cmd_name : WORD /* Apply rule 7a */
800 ;
801 cmd_word : WORD /* Apply rule 7b */
802 ;
803 cmd_prefix : io_redirect
804 | cmd_prefix io_redirect
805 | ASSIGNMENT_WORD
806 | cmd_prefix ASSIGNMENT_WORD
807 ;
808 cmd_suffix : io_redirect
809 | cmd_suffix io_redirect
810 | WORD
811 | cmd_suffix WORD
812
813 YSH grammar:
814
815 redirect = redir_op WORD
816 item = WORD | redirect
817
818 typed_args =
819 '(' arglist ')'
820 | '[' arglist ']'
821
822 simple_command =
823 cmd_prefix* item+ typed_args? BraceGroup? cmd_suffix*
824
825 Notably, redirects shouldn't appear after typed args, or after
826 BraceGroup.
827
828 Examples:
829
830 This is an assignment:
831 foo=1 >out
832
833 This is a command.Simple
834 >out
835
836 What about
837 >out (42)
838 """
839 redirects = [] # type: List[Redir]
840 words = [] # type: List[CompoundWord]
841 typed_args = None # type: Optional[ArgList]
842 block = None # type: Optional[LiteralBlock]
843
844 first_word_caps = False # does first word look like Caps, but not CAPS
845
846 i = 0
847 while True:
848 self._GetWord()
849
850 # If we got { }, change it to something that's not Kind.Word
851 kind2 = self.c_kind
852 if (kind2 == Kind.Word and self.parse_opts.parse_brace() and
853 self.c_id in (Id.Lit_LBrace, Id.Lit_RBrace)):
854 kind2 = Kind.Op
855
856 if kind2 == Kind.Redir:
857 node = self.ParseRedirect()
858 redirects.append(node)
859
860 elif kind2 == Kind.Word:
861 w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
862
863 if i == 0:
864 # Disallow leading =a because it's confusing
865 part0 = w.parts[0]
866 if part0.tag() == word_part_e.Literal:
867 tok = cast(Token, part0)
868 if tok.id == Id.Lit_Equals:
869 p_die(
870 "=word isn't allowed. Hint: add a space after =, or quote it",
871 tok)
872
873 # Is the first word a Hay Attr word?
874 #
875 # Can we remove this StaticEval() call, and just look
876 # inside Token? I think once we get rid of SHELL nodes,
877 # this will be simpler.
878
879 ok, word_str, quoted = word_.StaticEval(w)
880 # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
881 if (ok and len(word_str) and word_str[0].isupper() and
882 not word_str.isupper()):
883 first_word_caps = True
884 #log('W %s', word_str)
885
886 words.append(w)
887
888 else:
889 break
890
891 self._SetNextBrack() # Allow bracket for SECOND word on
892 i += 1
893
894 # my-cmd (x) or my-cmd [x]
895 self._GetWord()
896 if self.c_id == Id.Op_LParen:
897 # 1. Check that there's a preceding space
898 prev_byte = self.lexer.ByteLookBack()
899 if prev_byte not in (SPACE_CH, TAB_CH):
900 if self.parse_opts.parse_at():
901 p_die('Space required before (',
902 loc.Word(self.cur_word))
903 else:
904 # inline func call like @sorted(x) is invalid in OSH, but the
905 # solution isn't a space
906 p_die(
907 'Unexpected left paren (might need a space before it)',
908 loc.Word(self.cur_word))
909
910 # 2. Check that it's not (). We disallow this because it's a no-op and
911 # there could be confusion with shell func defs.
912 # For some reason we need to call lexer.LookPastSpace, not
913 # w_parser.LookPastSpace. I think this is because we're at (, which is
914 # an operator token. All the other cases are like 'x=', which is PART
915 # of a word, and we don't know if it will end.
916 next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
917 if next_id == Id.Op_RParen:
918 p_die('Empty arg list not allowed',
919 loc.Word(self.cur_word))
920
921 typed_args = self.w_parser.ParseProcCallArgs(
922 grammar_nt.ysh_eager_arglist)
923
924 self._SetNext()
925
926 elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
927 typed_args = self.w_parser.ParseProcCallArgs(
928 grammar_nt.ysh_lazy_arglist)
929
930 self._SetNext()
931
932 self._GetWord()
933
934 # Allow redirects after typed args, e.g.
935 # json write (x) > out.txt
936 if self.c_kind == Kind.Redir:
937 redirects.extend(self._ParseRedirectList())
938
939 # my-cmd { echo hi } my-cmd (x) { echo hi } ...
940 if (self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace and
941 # Disabled for if/while condition, etc.
942 self.allow_block):
943
944 # allow x = 42
945 self.hay_attrs_stack.append(first_word_caps)
946 brace_group = self.ParseBraceGroup()
947
948 # So we can get the source code back later
949 lines = self.arena.SaveLinesAndDiscard(brace_group.left,
950 brace_group.right)
951 block = LiteralBlock(brace_group, lines)
952
953 self.hay_attrs_stack.pop()
954
955 self._GetWord()
956
957 # Allow redirects after block, e.g.
958 # cd /tmp { echo $PWD } > out.txt
959 if self.c_kind == Kind.Redir:
960 redirects.extend(self._ParseRedirectList())
961
962 return redirects, words, typed_args, block
963
964 def _MaybeExpandAliases(self, words):
965 # type: (List[CompoundWord]) -> Optional[command_t]
966 """Try to expand aliases.
967
968 Args:
969 words: A list of Compound
970
971 Returns:
972 A new LST node, or None.
973
974 Our implementation of alias has two design choices:
975 - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
976 - What grammar rule to parse the expanded alias buffer with. In our case
977 it's ParseCommand().
978
979 This doesn't quite match what other shells do, but I can't figure out a
980 better places.
981
982 Most test cases pass, except for ones like:
983
984 alias LBRACE='{'
985 LBRACE echo one; echo two; }
986
987 alias MULTILINE='echo 1
988 echo 2
989 echo 3'
990 MULTILINE
991
992 NOTE: dash handles aliases in a totally different way. It has a global
993 variable checkkwd in parser.c. It assigns it all over the grammar, like
994 this:
995
996 checkkwd = CHKNL | CHKKWD | CHKALIAS;
997
998 The readtoken() function checks (checkkwd & CHKALIAS) and then calls
999 lookupalias(). This seems to provide a consistent behavior among shells,
1000 but it's less modular and testable.
1001
1002 Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
1003
1004 Returns:
1005 A command node if any aliases were expanded, or None otherwise.
1006 """
1007 # Start a new list if there aren't any. This will be passed recursively
1008 # through CommandParser instances.
1009 aliases_in_flight = (self.aliases_in_flight
1010 if len(self.aliases_in_flight) else [])
1011
1012 # for error message
1013 first_word_str = None # type: Optional[str]
1014 argv0_loc = loc.Word(words[0])
1015
1016 expanded = [] # type: List[str]
1017 i = 0
1018 n = len(words)
1019
1020 while i < n:
1021 w = words[i]
1022
1023 ok, word_str, quoted = word_.StaticEval(w)
1024 if not ok or quoted:
1025 break
1026
1027 alias_exp = self.aliases.get(word_str)
1028 if alias_exp is None:
1029 break
1030
1031 # Prevent infinite loops. This is subtle: we want to prevent infinite
1032 # expansion of alias echo='echo x'. But we don't want to prevent
1033 # expansion of the second word in 'echo echo', so we add 'i' to
1034 # "aliases_in_flight".
1035 if (word_str, i) in aliases_in_flight:
1036 break
1037
1038 if i == 0:
1039 first_word_str = word_str # for error message
1040
1041 #log('%r -> %r', word_str, alias_exp)
1042 aliases_in_flight.append((word_str, i))
1043 expanded.append(alias_exp)
1044 i += 1
1045
1046 if not alias_exp.endswith(' '):
1047 # alias e='echo [ ' is the same expansion as
1048 # alias e='echo ['
1049 # The trailing space indicates whether we should continue to expand
1050 # aliases; it's not part of it.
1051 expanded.append(' ')
1052 break # No more expansions
1053
1054 if len(expanded) == 0: # No expansions; caller does parsing.
1055 return None
1056
1057 # We are expanding an alias, so copy the rest of the words and re-parse.
1058 if i < n:
1059 left_tok = location.LeftTokenForWord(words[i])
1060 right_tok = location.RightTokenForWord(words[-1])
1061
1062 # OLD CONSTRAINT
1063 #assert left_tok.line_id == right_tok.line_id
1064
1065 words_str = self.arena.SnipCodeString(left_tok, right_tok)
1066 expanded.append(words_str)
1067
1068 code_str = ''.join(expanded)
1069
1070 # TODO:
1071 # Aliases break static parsing (like backticks), so use our own Arena.
1072 # This matters for Hay, which calls SaveLinesAndDiscard().
1073 # arena = alloc.Arena()
1074 arena = self.arena
1075
1076 line_reader = reader.StringLineReader(code_str, arena)
1077 cp = self.parse_ctx.MakeOshParser(line_reader)
1078 cp.Init_AliasesInFlight(aliases_in_flight)
1079
1080 # break circular dep
1081 from frontend import parse_lib
1082
1083 # The interaction between COMPLETION and ALIASES requires special care.
1084 # See docstring of BeginAliasExpansion() in parse_lib.py.
1085 src = source.Alias(first_word_str, argv0_loc)
1086 with alloc.ctx_SourceCode(arena, src):
1087 with parse_lib.ctx_Alias(self.parse_ctx.trail):
1088 try:
1089 # _ParseCommandTerm() handles multiline commands, compound
1090 # commands, etc. as opposed to ParseLogicalLine()
1091 node = cp._ParseCommandTerm()
1092 except error.Parse as e:
1093 # Failure to parse alias expansion is a fatal error
1094 # We don't need more handling here/
1095 raise
1096
1097 if 0:
1098 log('AFTER expansion:')
1099 node.PrettyPrint()
1100
1101 return node
1102
1103 def ParseSimpleCommand(self):
1104 # type: () -> command_t
1105 """Fixed transcription of the POSIX grammar
1106
1107 io_file : '<' filename
1108 | LESSAND filename
1109 ...
1110
1111 io_here : DLESS here_end
1112 | DLESSDASH here_end
1113
1114 redirect : IO_NUMBER (io_redirect | io_here)
1115
1116 prefix_part : ASSIGNMENT_WORD | redirect
1117 cmd_part : WORD | redirect
1118
1119 assign_kw : Declare | Export | Local | Readonly
1120
1121 # Without any words it is parsed as a command, not an assignment
1122 assign_listing : assign_kw
1123
1124 # Now we have something to do (might be changing assignment flags too)
1125 # NOTE: any prefixes should be a warning, but they are allowed in shell.
1126 assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
1127
1128 # an external command, a function call, or a builtin -- a "word_command"
1129 word_command : prefix_part* cmd_part+
1130
1131 simple_command : assign_listing
1132 | assignment
1133 | proc_command
1134
1135 Simple imperative algorithm:
1136
1137 1) Read a list of words and redirects. Append them to separate lists.
1138 2) Look for the first non-assignment word. If it's declare, etc., then
1139 keep parsing words AND assign words. Otherwise, just parse words.
1140 3) If there are no non-assignment words, then it's a global assignment.
1141
1142 { redirects, global assignments } OR
1143 { redirects, prefix_bindings, words } OR
1144 { redirects, ERROR_prefix_bindings, keyword, assignments, words }
1145
1146 THEN CHECK that prefix bindings don't have any array literal parts!
1147 global assignment and keyword assignments can have the of course.
1148 well actually EXPORT shouldn't have them either -- WARNING
1149
1150 3 cases we want to warn: prefix_bindings for assignment, and array literal
1151 in prefix bindings, or export
1152
1153 A command can be an assignment word, word, or redirect on its own.
1154
1155 ls
1156 >out.txt
1157
1158 >out.txt FOO=bar # this touches the file
1159
1160 Or any sequence:
1161 ls foo bar
1162 <in.txt ls foo bar >out.txt
1163 <in.txt ls >out.txt foo bar
1164
1165 Or add one or more environment bindings:
1166 VAR=val env
1167 >out.txt VAR=val env
1168
1169 here_end vs filename is a matter of whether we test that it's quoted. e.g.
1170 <<EOF vs <<'EOF'.
1171 """
1172 redirects, words, typed_args, block = self._ScanSimpleCommand()
1173
1174 typed_loc = None # type: Optional[Token]
1175 if block:
1176 typed_loc = block.brace_group.left
1177 if typed_args:
1178 typed_loc = typed_args.left # preferred over block location
1179
1180 if len(words) == 0: # e.g. >out.txt # redirect without words
1181 assert len(redirects) != 0
1182 if typed_loc is not None:
1183 p_die("Unexpected typed args", typed_loc)
1184
1185 simple = command.Simple.CreateNull()
1186 simple.blame_tok = redirects[0].op
1187 simple.more_env = []
1188 simple.words = []
1189 simple.redirects = redirects
1190 return simple
1191
1192 preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1193 if len(preparsed_list):
1194 # Disallow X=Y inside proc and func
1195 # and inside Hay Attr blocks
1196 # But allow X=Y at the top level
1197 # for interactive use foo=bar
1198 # for global constants GLOBAL=~/src
1199 # because YSH assignment doesn't have tilde sub
1200 if len(suffix_words) == 0:
1201 if (self.cmd_mode != cmd_mode_e.Shell or
1202 (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1203 p_die('Use var/setvar to assign in YSH',
1204 preparsed_list[0].left)
1205
1206 # Set a reference to words and redirects for completion. We want to
1207 # inspect this state after a failed parse.
1208 self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1209
1210 if len(suffix_words) == 0:
1211 if typed_loc is not None:
1212 p_die("Unexpected typed args", typed_loc)
1213
1214 # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1215 pairs = [] # type: List[AssignPair]
1216 for preparsed in preparsed_list:
1217 pairs.append(
1218 _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1219
1220 left_tok = location.LeftTokenForCompoundWord(words[0])
1221 return command.ShAssignment(left_tok, pairs, redirects)
1222
1223 kind, kw_token = word_.IsControlFlow(suffix_words[0])
1224
1225 if kind == Kind.ControlFlow:
1226 if kw_token.id == Id.ControlFlow_Return:
1227 # return x - inside procs and shell functions
1228 # return (x) - inside funcs
1229 if typed_args is None:
1230 if self.cmd_mode not in (cmd_mode_e.Shell,
1231 cmd_mode_e.Proc):
1232 p_die('Shell-style returns not allowed here', kw_token)
1233 else:
1234 if self.cmd_mode != cmd_mode_e.Func:
1235 p_die('Typed return is only allowed inside func',
1236 typed_loc)
1237 if len(typed_args.pos_args) != 1:
1238 p_die("Typed return expects one argument", typed_loc)
1239 if len(typed_args.named_args) != 0:
1240 p_die("Typed return doesn't take named arguments",
1241 typed_loc)
1242 return command.Retval(kw_token, typed_args.pos_args[0])
1243
1244 if typed_loc is not None:
1245 p_die("Unexpected typed args", typed_loc)
1246 if not self.parse_opts.parse_ignored() and len(redirects):
1247 p_die("Control flow shouldn't have redirects", kw_token)
1248
1249 if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1250 p_die("Control flow shouldn't have environment bindings",
1251 preparsed_list[0].left)
1252
1253 # Attach the token for errors. (ShAssignment may not need it.)
1254 if len(suffix_words) == 1:
1255 arg_word = None # type: Optional[word_t]
1256 elif len(suffix_words) == 2:
1257 arg_word = suffix_words[1]
1258 else:
1259 p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1260 loc.Word(suffix_words[2]))
1261
1262 return command.ControlFlow(kw_token, arg_word)
1263
1264 # Alias expansion only understands words, not typed args ( ) or block { }
1265 if not typed_args and not block and self.parse_opts.expand_aliases():
1266 # If any expansions were detected, then parse again.
1267 expanded_node = self._MaybeExpandAliases(suffix_words)
1268 if expanded_node:
1269 # Attach env bindings and redirects to the expanded node.
1270 more_env = [] # type: List[EnvPair]
1271 _AppendMoreEnv(preparsed_list, more_env)
1272 exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1273 return exp
1274
1275 # TODO: check that we don't have env1=x x[1]=y env2=z here.
1276
1277 # FOO=bar printenv.py FOO
1278 node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1279 typed_args, block)
1280 return node
1281
1282 def ParseBraceGroup(self):
1283 # type: () -> BraceGroup
1284 """
1285 Original:
1286 brace_group : LBrace command_list RBrace ;
1287
1288 YSH:
1289 brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1290
1291 The doc comment can only occur if there's a newline.
1292 """
1293 ate = self._Eat(Id.Lit_LBrace)
1294 left = word_.BraceToken(ate)
1295
1296 doc_word = None # type: word_t
1297 self._GetWord()
1298 if self.c_id == Id.Op_Newline:
1299 self._SetNext()
1300 # Set a flag so we don't skip over ###
1301 with word_.ctx_EmitDocToken(self.w_parser):
1302 self._GetWord()
1303
1304 if self.c_id == Id.Ignored_Comment:
1305 doc_word = self.cur_word
1306 self._SetNext()
1307
1308 # Id.Ignored_Comment means it's a Token, or None
1309 doc_token = cast(Token, doc_word)
1310
1311 c_list = self._ParseCommandList()
1312
1313 ate = self._Eat(Id.Lit_RBrace)
1314 right = word_.BraceToken(ate)
1315
1316 # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1317 # would allow us to revert this back to None, which was changed in
1318 # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1319 # behavior saves allocations, but is less type safe.
1320 return BraceGroup(left, doc_token, c_list.children, [],
1321 right) # no redirects yet
1322
1323 def ParseDoGroup(self):
1324 # type: () -> command.DoGroup
1325 """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1326
1327 do_group : Do command_list Done ; /* Apply rule 6 */
1328 """
1329 ate = self._Eat(Id.KW_Do)
1330 do_kw = word_.AsKeywordToken(ate)
1331
1332 c_list = self._ParseCommandList() # could be anything
1333
1334 ate = self._Eat(Id.KW_Done)
1335 done_kw = word_.AsKeywordToken(ate)
1336
1337 return command.DoGroup(do_kw, c_list.children, done_kw)
1338
1339 def ParseForWords(self):
1340 # type: () -> Tuple[List[CompoundWord], Optional[Token]]
1341 """
1342 for_words : WORD* for_sep
1343 ;
1344 for_sep : ';' newline_ok
1345 | NEWLINES
1346 ;
1347 """
1348 words = [] # type: List[CompoundWord]
1349 # The token of any semi-colon, so we can remove it.
1350 semi_tok = None # type: Optional[Token]
1351
1352 while True:
1353 self._GetWord()
1354 if self.c_id == Id.Op_Semi:
1355 tok = cast(Token, self.cur_word)
1356 semi_tok = tok
1357 self._SetNext()
1358 self._NewlineOk()
1359 break
1360 elif self.c_id == Id.Op_Newline:
1361 self._SetNext()
1362 break
1363 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1364 break
1365
1366 if self.cur_word.tag() != word_e.Compound:
1367 # TODO: Can we also show a pointer to the 'for' keyword?
1368 p_die('Invalid word in for loop', loc.Word(self.cur_word))
1369
1370 w2 = cast(CompoundWord, self.cur_word)
1371 words.append(w2)
1372 self._SetNext()
1373 return words, semi_tok
1374
1375 def _ParseForExprLoop(self, for_kw):
1376 # type: (Token) -> command.ForExpr
1377 """
1378 Shell:
1379 for '((' init ';' cond ';' update '))' for_sep? do_group
1380
1381 YSH:
1382 for '((' init ';' cond ';' update '))' for_sep? brace_group
1383 """
1384 node = self.w_parser.ReadForExpression()
1385 node.keyword = for_kw
1386
1387 self._SetNext()
1388
1389 self._GetWord()
1390 if self.c_id == Id.Op_Semi:
1391 self._SetNext()
1392 self._NewlineOk()
1393 elif self.c_id == Id.Op_Newline:
1394 self._SetNext()
1395 elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1396 pass
1397 elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1398 pass
1399 else:
1400 p_die('Invalid word after for expression', loc.Word(self.cur_word))
1401
1402 if self.c_id == Id.Lit_LBrace:
1403 node.body = self.ParseBraceGroup()
1404 else:
1405 node.body = self.ParseDoGroup()
1406 return node
1407
1408 def _ParseForEachLoop(self, for_kw):
1409 # type: (Token) -> command.ForEach
1410 node = command.ForEach.CreateNull(alloc_lists=True)
1411 node.keyword = for_kw
1412
1413 num_iter_names = 0
1414 while True:
1415 w = self.cur_word
1416
1417 # Hack that makes the language more familiar:
1418 # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1419 # - 'x y' is also accepted but not idiomatic.
1420 UP_w = w
1421 if w.tag() == word_e.Compound:
1422 w = cast(CompoundWord, UP_w)
1423 if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1424 w.parts.pop()
1425
1426 ok, iter_name, quoted = word_.StaticEval(w)
1427 if not ok or quoted: # error: for $x
1428 p_die('Expected loop variable (a constant word)', loc.Word(w))
1429
1430 if not match.IsValidVarName(iter_name): # error: for -
1431 # TODO: consider commas?
1432 if ',' in iter_name:
1433 p_die('Loop variables look like x, y (fix spaces)',
1434 loc.Word(w))
1435 p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1436
1437 node.iter_names.append(iter_name)
1438 num_iter_names += 1
1439 self._SetNext()
1440
1441 self._GetWord()
1442 # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1443 # Subtlety: 'var' is KW_Var and is a valid loop name
1444 if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1445 break
1446
1447 if num_iter_names == 3:
1448 p_die('Unexpected word after 3 loop variables',
1449 loc.Word(self.cur_word))
1450
1451 self._NewlineOk()
1452
1453 self._GetWord()
1454 if self.c_id == Id.KW_In:
1455 # Ideally we would want ( not 'in'. But we still have to fix the bug
1456 # where we require a SPACE between in and (
1457 # for x in(y) # should be accepted, but isn't
1458
1459 expr_blame = word_.AsKeywordToken(self.cur_word)
1460
1461 self._SetNext() # skip in
1462 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1463 enode = self.w_parser.ParseYshExprForCommand()
1464 node.iterable = for_iter.YshExpr(enode, expr_blame)
1465
1466 # For simplicity, we don't accept for x in (obj); do ...
1467 self._GetWord()
1468 if self.c_id != Id.Lit_LBrace:
1469 p_die('Expected { after iterable expression',
1470 loc.Word(self.cur_word))
1471 else:
1472 semi_tok = None # type: Optional[Token]
1473 iter_words, semi_tok = self.ParseForWords()
1474 node.semi_tok = semi_tok
1475
1476 if not self.parse_opts.parse_bare_word() and len(
1477 iter_words) == 1:
1478 ok, s, quoted = word_.StaticEval(iter_words[0])
1479 if ok and match.IsValidVarName(s) and not quoted:
1480 p_die(
1481 'Surround this word with either parens or quotes (parse_bare_word)',
1482 loc.Word(iter_words[0]))
1483
1484 words2 = braces.BraceDetectAll(iter_words)
1485 words3 = word_.TildeDetectAll(words2)
1486 node.iterable = for_iter.Words(words3)
1487
1488 # Now that we know there are words, do an extra check
1489 if num_iter_names > 2:
1490 p_die('Expected at most 2 loop variables', for_kw)
1491
1492 elif self.c_id == Id.KW_Do:
1493 node.iterable = for_iter.Args # implicitly loop over "$@"
1494 # do not advance
1495
1496 elif self.c_id == Id.Op_Semi: # for x; do
1497 node.iterable = for_iter.Args # implicitly loop over "$@"
1498 self._SetNext()
1499
1500 else: # for foo BAD
1501 p_die('Unexpected word after for loop variable',
1502 loc.Word(self.cur_word))
1503
1504 self._GetWord()
1505 if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1506 node.body = self.ParseBraceGroup()
1507 else:
1508 node.body = self.ParseDoGroup()
1509
1510 return node
1511
1512 def ParseFor(self):
1513 # type: () -> command_t
1514 """
1515 TODO: Update the grammar
1516
1517 for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1518 | For '((' ... TODO
1519 """
1520 ate = self._Eat(Id.KW_For)
1521 for_kw = word_.AsKeywordToken(ate)
1522
1523 self._GetWord()
1524 if self.c_id == Id.Op_DLeftParen:
1525 if not self.parse_opts.parse_dparen():
1526 p_die("Bash for loops aren't allowed (parse_dparen)",
1527 loc.Word(self.cur_word))
1528
1529 # for (( i = 0; i < 10; i++)
1530 n1 = self._ParseForExprLoop(for_kw)
1531 n1.redirects = self._ParseRedirectList()
1532 return n1
1533 else:
1534 # for x in a b; do echo hi; done
1535 n2 = self._ParseForEachLoop(for_kw)
1536 n2.redirects = self._ParseRedirectList()
1537 return n2
1538
1539 def _ParseConditionList(self):
1540 # type: () -> condition_t
1541 """
1542 condition_list: command_list
1543
1544 This is a helper to parse a condition list for if commands and while/until
1545 loops. It will throw a parse error if there are no conditions in the list.
1546 """
1547 self.allow_block = False
1548 commands = self._ParseCommandList()
1549 self.allow_block = True
1550
1551 if len(commands.children) == 0:
1552 p_die("Expected a condition", loc.Word(self.cur_word))
1553
1554 return condition.Shell(commands.children)
1555
1556 def ParseWhileUntil(self, keyword):
1557 # type: (Token) -> command.WhileUntil
1558 """
1559 while_clause : While command_list do_group ;
1560 until_clause : Until command_list do_group ;
1561 """
1562 self._SetNext() # skip keyword
1563
1564 if (self.parse_opts.parse_paren() and
1565 self.w_parser.LookPastSpace() == Id.Op_LParen):
1566 enode = self.w_parser.ParseYshExprForCommand()
1567 cond = condition.YshExpr(enode) # type: condition_t
1568 else:
1569 cond = self._ParseConditionList()
1570
1571 # NOTE: The LSTs will be different for OSH and YSH, but the execution
1572 # should be unchanged. To be sure we should desugar.
1573 self._GetWord()
1574 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1575 # while test -f foo {
1576 body_node = self.ParseBraceGroup() # type: command_t
1577 else:
1578 body_node = self.ParseDoGroup()
1579
1580 # no redirects yet
1581 return command.WhileUntil(keyword, cond, body_node, None)
1582
1583 def ParseCaseArm(self):
1584 # type: () -> CaseArm
1585 """
1586 case_item: '('? pattern ('|' pattern)* ')'
1587 newline_ok command_term? trailer? ;
1588
1589 Looking at '(' or pattern
1590 """
1591 self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1592
1593 left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1594
1595 if self.c_id == Id.Op_LParen: # Optional (
1596 self._SetNext()
1597
1598 pat_words = [] # type: List[word_t]
1599 while True:
1600 self._GetWord()
1601 if self.c_kind != Kind.Word:
1602 p_die('Expected case pattern', loc.Word(self.cur_word))
1603 pat_words.append(self.cur_word)
1604 self._SetNext()
1605
1606 self._GetWord()
1607 if self.c_id == Id.Op_Pipe:
1608 self._SetNext()
1609 else:
1610 break
1611
1612 ate = self._Eat(Id.Right_CasePat)
1613 middle_tok = word_.AsOperatorToken(ate)
1614
1615 self._NewlineOk()
1616
1617 self._GetWord()
1618 if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
1619 Id.KW_Esac):
1620 c_list = self._ParseCommandTerm()
1621 action_children = c_list.children
1622 else:
1623 action_children = []
1624
1625 dsemi_tok = None # type: Token
1626 self._GetWord()
1627 if self.c_id == Id.KW_Esac: # missing last ;;
1628 pass
1629 elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
1630 dsemi_tok = word_.AsOperatorToken(self.cur_word)
1631 self._SetNext()
1632 else:
1633 # Happens on EOF
1634 p_die('Expected ;; or esac', loc.Word(self.cur_word))
1635
1636 self._NewlineOk()
1637
1638 return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1639 action_children, dsemi_tok)
1640
1641 def ParseYshCaseArm(self, discriminant):
1642 # type: (Id_t) -> CaseArm
1643 """
1644 case_item : pattern newline_ok brace_group newline_ok
1645 pattern : pat_words
1646 | pat_exprs
1647 | pat_eggex
1648 | pat_else
1649 pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
1650 pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
1651 pat_word : WORD
1652 pat_eggex : '/' oil_eggex '/'
1653 pat_expr : '(' oil_expr ')'
1654 pat_else : '(' Id.KW_Else ')'
1655
1656 Looking at: 'pattern'
1657
1658 Note that the trailing `newline_ok` in `case_item` is handled by
1659 `ParseYshCase`. We do this because parsing that `newline_ok` returns
1660 the next "discriminant" for the next token, so it makes more sense to
1661 handle it there.
1662 """
1663 left_tok = None # type: Token
1664 pattern = None # type: pat_t
1665
1666 if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1667 # pat_exprs, pat_else or pat_eggex
1668 pattern, left_tok = self.w_parser.ParseYshCasePattern()
1669 else:
1670 # pat_words
1671 pat_words = [] # type: List[word_t]
1672 while True:
1673 self._GetWord()
1674 if self.c_kind != Kind.Word:
1675 p_die('Expected case pattern', loc.Word(self.cur_word))
1676 pat_words.append(self.cur_word)
1677 self._SetNext()
1678
1679 if not left_tok:
1680 left_tok = location.LeftTokenForWord(self.cur_word)
1681
1682 self._NewlineOk()
1683
1684 self._GetWord()
1685 if self.c_id == Id.Op_Pipe:
1686 self._SetNext()
1687 self._NewlineOk()
1688 else:
1689 break
1690 pattern = pat.Words(pat_words)
1691
1692 self._NewlineOk()
1693 action = self.ParseBraceGroup()
1694
1695 # The left token of the action is our "middle" token
1696 return CaseArm(left_tok, pattern, action.left, action.children,
1697 action.right)
1698
1699 def ParseYshCase(self, case_kw):
1700 # type: (Token) -> command.Case
1701 """
1702 ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1703
1704 Looking at: token after 'case'
1705 """
1706 enode = self.w_parser.ParseYshExprForCommand()
1707 to_match = case_arg.YshExpr(enode)
1708
1709 ate = self._Eat(Id.Lit_LBrace)
1710 arms_start = word_.BraceToken(ate)
1711
1712 discriminant = self.w_parser.NewlineOkForYshCase()
1713
1714 # Note: for now, zero arms are accepted, just like POSIX case $x in esac
1715 arms = [] # type: List[CaseArm]
1716 while discriminant != Id.Op_RBrace:
1717 arm = self.ParseYshCaseArm(discriminant)
1718 arms.append(arm)
1719
1720 discriminant = self.w_parser.NewlineOkForYshCase()
1721
1722 # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1723 # token is read as an Id.Op_RBrace, but we need to store this as a
1724 # Id.Lit_RBrace.
1725 ate = self._Eat(Id.Op_RBrace)
1726 arms_end = word_.AsOperatorToken(ate)
1727 arms_end.id = Id.Lit_RBrace
1728
1729 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1730 None)
1731
1732 def ParseOldCase(self, case_kw):
1733 # type: (Token) -> command.Case
1734 """
1735 case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1736
1737 -> Looking at WORD
1738
1739 FYI original POSIX case list, which takes pains for DSEMI
1740
1741 case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1742 """
1743 self._GetWord()
1744 w = self.cur_word
1745 if not self.parse_opts.parse_bare_word():
1746 ok, s, quoted = word_.StaticEval(w)
1747 if ok and not quoted:
1748 p_die(
1749 "This is a constant string. You may want a variable like $x (parse_bare_word)",
1750 loc.Word(w))
1751
1752 if w.tag() != word_e.Compound:
1753 p_die("Expected a word to match against", loc.Word(w))
1754
1755 to_match = case_arg.Word(w)
1756 self._SetNext() # past WORD
1757
1758 self._NewlineOk()
1759
1760 ate = self._Eat(Id.KW_In)
1761 arms_start = word_.AsKeywordToken(ate)
1762
1763 self._NewlineOk()
1764
1765 arms = [] # type: List[CaseArm]
1766 while True:
1767 self._GetWord()
1768 if self.c_id == Id.KW_Esac:
1769 break
1770 # case arm should begin with a pattern word or (
1771 if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1772 break
1773
1774 arm = self.ParseCaseArm()
1775 arms.append(arm)
1776
1777 ate = self._Eat(Id.KW_Esac)
1778 arms_end = word_.AsKeywordToken(ate)
1779
1780 # no redirects yet
1781 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1782 None)
1783
1784 def ParseCase(self):
1785 # type: () -> command.Case
1786 """
1787 case_clause : old_case # from POSIX
1788 | ysh_case
1789 ;
1790
1791 Looking at 'Case'
1792 """
1793 case_kw = word_.AsKeywordToken(self.cur_word)
1794 self._SetNext() # past 'case'
1795
1796 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1797 return self.ParseYshCase(case_kw)
1798 else:
1799 return self.ParseOldCase(case_kw)
1800
1801 def _ParseYshElifElse(self, if_node):
1802 # type: (command.If) -> None
1803 """If test -f foo { echo foo.
1804
1805 } elif test -f bar; test -f spam { ^ we parsed up to here echo
1806 bar } else { echo none }
1807 """
1808 arms = if_node.arms
1809
1810 while self.c_id == Id.KW_Elif:
1811 elif_kw = word_.AsKeywordToken(self.cur_word)
1812 self._SetNext() # skip elif
1813 if (self.parse_opts.parse_paren() and
1814 self.w_parser.LookPastSpace() == Id.Op_LParen):
1815 enode = self.w_parser.ParseYshExprForCommand()
1816 cond = condition.YshExpr(enode) # type: condition_t
1817 else:
1818 self.allow_block = False
1819 commands = self._ParseCommandList()
1820 self.allow_block = True
1821 cond = condition.Shell(commands.children)
1822
1823 body = self.ParseBraceGroup()
1824 self._GetWord()
1825
1826 arm = IfArm(elif_kw, cond, None, body.children, None)
1827 arms.append(arm)
1828
1829 self._GetWord()
1830 if self.c_id == Id.KW_Else:
1831 self._SetNext()
1832 body = self.ParseBraceGroup()
1833 if_node.else_action = body.children
1834
1835 def _ParseYshIf(self, if_kw, cond):
1836 # type: (Token, condition_t) -> command.If
1837 """
1838 if test -f foo {
1839 # ^ we parsed up to here
1840 echo foo
1841 } elif test -f bar; test -f spam {
1842 echo bar
1843 } else {
1844 echo none
1845 }
1846 NOTE: If you do something like if test -n foo{, the parser keeps going, and
1847 the error is confusing because it doesn't point to the right place.
1848
1849 I think we might need strict_brace so that foo{ is disallowed. It has to
1850 be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1851 form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1852 Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1853 """
1854 if_node = command.If.CreateNull(alloc_lists=True)
1855 if_node.if_kw = if_kw
1856
1857 body1 = self.ParseBraceGroup()
1858 # Every arm has 1 spid, unlike shell-style
1859 # TODO: We could get the spids from the brace group.
1860 arm = IfArm(if_kw, cond, None, body1.children, None)
1861
1862 if_node.arms.append(arm)
1863
1864 self._GetWord()
1865 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1866 self._ParseYshElifElse(if_node)
1867 # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1868 # spid because that's in the BraceGroup.
1869 return if_node
1870
1871 def _ParseElifElse(self, if_node):
1872 # type: (command.If) -> None
1873 """
1874 else_part: (Elif command_list Then command_list)* Else command_list ;
1875 """
1876 arms = if_node.arms
1877
1878 self._GetWord()
1879 while self.c_id == Id.KW_Elif:
1880 elif_kw = word_.AsKeywordToken(self.cur_word)
1881 self._SetNext() # past 'elif'
1882
1883 cond = self._ParseConditionList()
1884
1885 ate = self._Eat(Id.KW_Then)
1886 then_kw = word_.AsKeywordToken(ate)
1887
1888 body = self._ParseCommandList()
1889 arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
1890
1891 arms.append(arm)
1892
1893 self._GetWord()
1894 if self.c_id == Id.KW_Else:
1895 else_kw = word_.AsKeywordToken(self.cur_word)
1896 self._SetNext() # past 'else'
1897 body = self._ParseCommandList()
1898 if_node.else_action = body.children
1899 else:
1900 else_kw = None
1901
1902 if_node.else_kw = else_kw
1903
1904 def ParseIf(self):
1905 # type: () -> command.If
1906 """
1907 if_clause : If command_list Then command_list else_part? Fi ;
1908
1909 open : '{' | Then
1910 close : '}' | Fi
1911
1912 ysh_if : If ( command_list | '(' expr ')' )
1913 open command_list else_part? close;
1914
1915 There are 2 conditionals here: parse_paren, then parse_brace
1916 """
1917 if_node = command.If.CreateNull(alloc_lists=True)
1918 if_kw = word_.AsKeywordToken(self.cur_word)
1919 if_node.if_kw = if_kw
1920 self._SetNext() # past 'if'
1921
1922 if (self.parse_opts.parse_paren() and
1923 self.w_parser.LookPastSpace() == Id.Op_LParen):
1924 # if (x + 1)
1925 enode = self.w_parser.ParseYshExprForCommand()
1926 cond = condition.YshExpr(enode) # type: condition_t
1927 else:
1928 # if echo 1; echo 2; then
1929 # Remove ambiguity with if cd / {
1930 cond = self._ParseConditionList()
1931
1932 self._GetWord()
1933 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1934 return self._ParseYshIf(if_kw, cond)
1935
1936 ate = self._Eat(Id.KW_Then)
1937 then_kw = word_.AsKeywordToken(ate)
1938
1939 body = self._ParseCommandList()
1940
1941 # First arm
1942 arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
1943 if_node.arms.append(arm)
1944
1945 # 2nd to Nth arm
1946 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1947 self._ParseElifElse(if_node)
1948
1949 ate = self._Eat(Id.KW_Fi)
1950 if_node.fi_kw = word_.AsKeywordToken(ate)
1951
1952 return if_node
1953
1954 def ParseTime(self):
1955 # type: () -> command_t
1956 """Time [-p] pipeline.
1957
1958 According to bash help.
1959 """
1960 time_kw = word_.AsKeywordToken(self.cur_word)
1961 self._SetNext() # skip time
1962 pipeline = self.ParsePipeline()
1963 return command.TimeBlock(time_kw, pipeline)
1964
1965 def ParseCompoundCommand(self):
1966 # type: () -> command_t
1967 """
1968 Refactoring: we put io_redirect* here instead of in function_body and
1969 command.
1970
1971 compound_command : brace_group io_redirect*
1972 | subshell io_redirect*
1973 | for_clause io_redirect*
1974 | while_clause io_redirect*
1975 | until_clause io_redirect*
1976 | if_clause io_redirect*
1977 | case_clause io_redirect*
1978
1979 # bash extensions
1980 | time_clause
1981 | [[ BoolExpr ]]
1982 | (( ArithExpr ))
1983 """
1984 self._GetWord()
1985 if self.c_id == Id.Lit_LBrace:
1986 n1 = self.ParseBraceGroup()
1987 n1.redirects = self._ParseRedirectList()
1988 return n1
1989 if self.c_id == Id.Op_LParen:
1990 n2 = self.ParseSubshell()
1991 n2.redirects = self._ParseRedirectList()
1992 return n2
1993
1994 if self.c_id == Id.KW_For:
1995 # Note: Redirects parsed in this call. POSIX for and bash for (( have
1996 # redirects, but YSH for doesn't.
1997 return self.ParseFor()
1998 if self.c_id in (Id.KW_While, Id.KW_Until):
1999 keyword = word_.AsKeywordToken(self.cur_word)
2000 n3 = self.ParseWhileUntil(keyword)
2001 n3.redirects = self._ParseRedirectList()
2002 return n3
2003
2004 if self.c_id == Id.KW_If:
2005 n4 = self.ParseIf()
2006 n4.redirects = self._ParseRedirectList()
2007 return n4
2008 if self.c_id == Id.KW_Case:
2009 n5 = self.ParseCase()
2010 n5.redirects = self._ParseRedirectList()
2011 return n5
2012
2013 if self.c_id == Id.KW_DLeftBracket:
2014 if not self.parse_opts.parse_dbracket():
2015 p_die('Bash [[ not allowed in YSH (parse_dbracket)',
2016 loc.Word(self.cur_word))
2017 n6 = self.ParseDBracket()
2018 n6.redirects = self._ParseRedirectList()
2019 return n6
2020 if self.c_id == Id.Op_DLeftParen:
2021 if not self.parse_opts.parse_dparen():
2022 p_die(
2023 'Bash (( not allowed in YSH (parse_dparen, see OILS-ERR-14 for wart)',
2024 loc.Word(self.cur_word))
2025 n7 = self.ParseDParen()
2026 n7.redirects = self._ParseRedirectList()
2027 return n7
2028
2029 # bash extensions: no redirects
2030 if self.c_id == Id.KW_Time:
2031 return self.ParseTime()
2032
2033 # Happens in function body, e.g. myfunc() oops
2034 p_die(
2035 'Unexpected word while parsing compound command (%s)' %
2036 Id_str(self.c_id), loc.Word(self.cur_word))
2037 assert False # for MyPy
2038
2039 def ParseFunctionDef(self):
2040 # type: () -> command.ShFunction
2041 """
2042 function_header : fname '(' ')'
2043 function_def : function_header newline_ok function_body ;
2044
2045 Precondition: Looking at the function name.
2046
2047 NOTE: There is an ambiguity with:
2048
2049 function foo ( echo hi ) and
2050 function foo () ( echo hi )
2051
2052 Bash only accepts the latter, though it doesn't really follow a grammar.
2053 """
2054 word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2055 name = word_.ShFunctionName(word0)
2056 if len(name) == 0: # example: foo$x is invalid
2057 p_die('Invalid function name', loc.Word(word0))
2058
2059 part0 = word0.parts[0]
2060 # If we got a non-empty string from ShFunctionName, this should be true.
2061 assert part0.tag() == word_part_e.Literal
2062 blame_tok = cast(Token, part0) # for ctx_VarChecker
2063
2064 self._SetNext() # move past function name
2065
2066 # Must be true because of lookahead
2067 self._GetWord()
2068 assert self.c_id == Id.Op_LParen, self.cur_word
2069
2070 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2071 self._SetNext()
2072
2073 self._GetWord()
2074 if self.c_id == Id.Right_ShFunction:
2075 # 'f ()' implies a function definition, since invoking it with no args
2076 # would just be 'f'
2077 self._SetNext()
2078
2079 self._NewlineOk()
2080
2081 func = command.ShFunction.CreateNull()
2082 func.name = name
2083 with ctx_VarChecker(self.var_checker, blame_tok):
2084 func.body = self.ParseCompoundCommand()
2085
2086 func.name_tok = location.LeftTokenForCompoundWord(word0)
2087 return func
2088 else:
2089 p_die('Expected ) in function definition', loc.Word(self.cur_word))
2090 return None
2091
2092 def ParseKshFunctionDef(self):
2093 # type: () -> command.ShFunction
2094 """
2095 ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2096 """
2097 keyword_tok = word_.AsKeywordToken(self.cur_word)
2098
2099 self._SetNext() # skip past 'function'
2100 self._GetWord()
2101
2102 cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2103 name = word_.ShFunctionName(cur_word)
2104 if len(name) == 0: # example: foo$x is invalid
2105 p_die('Invalid KSH-style function name', loc.Word(cur_word))
2106
2107 name_word = self.cur_word
2108 self._SetNext() # skip past 'function name
2109
2110 self._GetWord()
2111 if self.c_id == Id.Op_LParen:
2112 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2113 self._SetNext()
2114 self._Eat(Id.Right_ShFunction)
2115
2116 self._NewlineOk()
2117
2118 func = command.ShFunction.CreateNull()
2119 func.name = name
2120 with ctx_VarChecker(self.var_checker, keyword_tok):
2121 func.body = self.ParseCompoundCommand()
2122
2123 func.keyword = keyword_tok
2124 func.name_tok = location.LeftTokenForWord(name_word)
2125 return func
2126
2127 def ParseYshProc(self):
2128 # type: () -> Proc
2129 node = Proc.CreateNull(alloc_lists=True)
2130
2131 keyword_tok = word_.AsKeywordToken(self.cur_word)
2132 node.keyword = keyword_tok
2133
2134 with ctx_VarChecker(self.var_checker, keyword_tok):
2135 with ctx_CmdMode(self, cmd_mode_e.Proc):
2136 self.w_parser.ParseProc(node)
2137 if node.sig.tag() == proc_sig_e.Closed: # Register params
2138 sig = cast(proc_sig.Closed, node.sig)
2139
2140 # Treat 3 kinds of params as variables.
2141 wp = sig.word
2142 if wp:
2143 for param in wp.params:
2144 self.var_checker.Check(Id.KW_Var, param.name,
2145 param.blame_tok)
2146 if wp.rest_of:
2147 r = wp.rest_of
2148 self.var_checker.Check(Id.KW_Var, r.name,
2149 r.blame_tok)
2150 # We COULD register __out here but it would require a different API.
2151 #if param.prefix and param.prefix.id == Id.Arith_Colon:
2152 # self.var_checker.Check(Id.KW_Var, '__' + param.name)
2153
2154 posit = sig.positional
2155 if posit:
2156 for param in posit.params:
2157 self.var_checker.Check(Id.KW_Var, param.name,
2158 param.blame_tok)
2159 if posit.rest_of:
2160 r = posit.rest_of
2161 self.var_checker.Check(Id.KW_Var, r.name,
2162 r.blame_tok)
2163
2164 named = sig.named
2165 if named:
2166 for param in named.params:
2167 self.var_checker.Check(Id.KW_Var, param.name,
2168 param.blame_tok)
2169 if named.rest_of:
2170 r = named.rest_of
2171 self.var_checker.Check(Id.KW_Var, r.name,
2172 r.blame_tok)
2173
2174 if sig.block_param:
2175 b = sig.block_param
2176 self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2177
2178 self._SetNext()
2179 node.body = self.ParseBraceGroup()
2180 # No redirects for YSH procs (only at call site)
2181
2182 return node
2183
2184 def ParseYshFunc(self):
2185 # type: () -> Func
2186 """
2187 ysh_func: (
2188 Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2189 )
2190 Looking at KW_Func
2191 """
2192 node = Func.CreateNull(alloc_lists=True)
2193
2194 keyword_tok = word_.AsKeywordToken(self.cur_word)
2195 node.keyword = keyword_tok
2196
2197 with ctx_VarChecker(self.var_checker, keyword_tok):
2198 self.w_parser.ParseFunc(node)
2199
2200 posit = node.positional
2201 if posit:
2202 for param in posit.params:
2203 self.var_checker.Check(Id.KW_Var, param.name,
2204 param.blame_tok)
2205 if posit.rest_of:
2206 r = posit.rest_of
2207 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2208
2209 named = node.named
2210 if named:
2211 for param in named.params:
2212 self.var_checker.Check(Id.KW_Var, param.name,
2213 param.blame_tok)
2214 if named.rest_of:
2215 r = named.rest_of
2216 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2217
2218 self._SetNext()
2219 with ctx_CmdMode(self, cmd_mode_e.Func):
2220 node.body = self.ParseBraceGroup()
2221
2222 return node
2223
2224 def ParseCoproc(self):
2225 # type: () -> command_t
2226 """
2227 TODO: command.Coproc?
2228 """
2229 raise NotImplementedError()
2230
2231 def ParseSubshell(self):
2232 # type: () -> command.Subshell
2233 """
2234 subshell : '(' compound_list ')'
2235
2236 Looking at Op_LParen
2237 """
2238 left = word_.AsOperatorToken(self.cur_word)
2239 self._SetNext() # skip past (
2240
2241 # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2242 # translation stack, we want to delay it.
2243
2244 self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2245
2246 c_list = self._ParseCommandList()
2247 if len(c_list.children) == 1:
2248 child = c_list.children[0]
2249 else:
2250 child = c_list
2251
2252 ate = self._Eat(Id.Right_Subshell)
2253 right = word_.AsOperatorToken(ate)
2254
2255 return command.Subshell(left, child, right, None) # no redirects yet
2256
2257 def ParseDBracket(self):
2258 # type: () -> command.DBracket
2259 """Pass the underlying word parser off to the boolean expression
2260 parser."""
2261 left = word_.AsKeywordToken(self.cur_word)
2262 # TODO: Test interactive. Without closing ]], you should get > prompt
2263 # (PS2)
2264
2265 self._SetNext() # skip [[
2266 b_parser = bool_parse.BoolParser(self.w_parser)
2267 bnode, right = b_parser.Parse() # May raise
2268 return command.DBracket(left, bnode, right, None) # no redirects yet
2269
2270 def ParseDParen(self):
2271 # type: () -> command.DParen
2272 left = word_.AsOperatorToken(self.cur_word)
2273
2274 self._SetNext() # skip ((
2275 anode, right = self.w_parser.ReadDParen()
2276 assert anode is not None
2277
2278 return command.DParen(left, anode, right, None) # no redirects yet
2279
2280 def ParseCommand(self):
2281 # type: () -> command_t
2282 """
2283 command : simple_command
2284 | compound_command # OSH edit: io_redirect* folded in
2285 | function_def
2286 | ksh_function_def
2287
2288 # YSH extensions
2289 | proc NAME ...
2290 | typed proc NAME ...
2291 | func NAME ...
2292 | const ...
2293 | var ...
2294 | setglobal ...
2295 | setref ...
2296 | setvar ...
2297 | call EXPR
2298 | = EXPR
2299 ;
2300
2301 Note: the reason const / var are not part of compound_command is because
2302 they can't be alone in a shell function body.
2303
2304 Example:
2305 This is valid shell f() if true; then echo hi; fi
2306 This is invalid f() var x = 1
2307 """
2308 if self._AtSecondaryKeyword():
2309 p_die('Unexpected word when parsing command',
2310 loc.Word(self.cur_word))
2311
2312 # YSH Extensions
2313
2314 if self.c_id == Id.KW_Proc: # proc p { ... }
2315 # proc is hidden because of the 'local reasoning' principle. Code
2316 # inside procs should be YSH, full stop. That means ysh:upgrade is
2317 # on.
2318 if self.parse_opts.parse_proc():
2319 return self.ParseYshProc()
2320 else:
2321 # 2024-02: This avoids bad syntax errors if you type YSH code
2322 # into OSH
2323 # proc p (x) { echo hi } would actually be parsed as a
2324 # command.Simple! Shell compatibility: quote 'proc'
2325 p_die("proc is a YSH keyword, but this is OSH.",
2326 loc.Word(self.cur_word))
2327
2328 if self.c_id == Id.KW_Typed: # typed proc p () { ... }
2329 self._SetNext()
2330 self._GetWord()
2331 if self.c_id != Id.KW_Proc:
2332 p_die("Expected 'proc' after 'typed'", loc.Word(self.cur_word))
2333
2334 if self.parse_opts.parse_proc():
2335 return self.ParseYshProc()
2336 else:
2337 p_die("typed is a YSH keyword, but this is OSH.",
2338 loc.Word(self.cur_word))
2339
2340 if self.c_id == Id.KW_Func: # func f(x) { ... }
2341 if self.parse_opts.parse_func():
2342 return self.ParseYshFunc()
2343 else:
2344 # Same reasoning as above, for 'proc'
2345 p_die("func is a YSH keyword, but this is OSH.",
2346 loc.Word(self.cur_word))
2347
2348 if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2349 p_die("const can't be inside proc or func. Use var instead.",
2350 loc.Word(self.cur_word))
2351
2352 if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2353 keyword_id = self.c_id
2354 kw_token = word_.LiteralToken(self.cur_word)
2355 self._SetNext()
2356 n8 = self.w_parser.ParseVarDecl(kw_token)
2357 for lhs in n8.lhs:
2358 self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2359 return n8
2360
2361 if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2362 kw_token = word_.LiteralToken(self.cur_word)
2363 self._SetNext()
2364 n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2365 return n9
2366
2367 if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2368 # = 42 + a[i]
2369 # call mylist->append('x')
2370
2371 keyword = word_.LiteralToken(self.cur_word)
2372 assert keyword is not None
2373 self._SetNext()
2374 enode = self.w_parser.ParseCommandExpr()
2375 return command.Expr(keyword, enode)
2376
2377 if self.c_id == Id.KW_Function:
2378 return self.ParseKshFunctionDef()
2379
2380 if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2381 Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2382 Id.KW_If, Id.KW_Case, Id.KW_Time):
2383 return self.ParseCompoundCommand()
2384
2385 # Syntax error for '}' starting a line, which all shells disallow.
2386 if self.c_id == Id.Lit_RBrace:
2387 p_die('Unexpected right brace', loc.Word(self.cur_word))
2388
2389 if self.c_kind == Kind.Redir: # Leading redirect
2390 return self.ParseSimpleCommand()
2391
2392 if self.c_kind == Kind.Word:
2393 # ensured by Kind.Word
2394 cur_word = cast(CompoundWord, self.cur_word)
2395
2396 # NOTE: At the top level, only Token and Compound are possible.
2397 # Can this be modelled better in the type system, removing asserts?
2398 #
2399 # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2400 # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2401 # That requires 2 tokens of lookahead, which we don't have
2402 #
2403 # Or maybe we don't just have ParseSimpleCommand -- we will have
2404 # ParseYshCommand or something
2405
2406 if (self.w_parser.LookAheadFuncParens() and
2407 not word_.IsVarLike(cur_word)):
2408 return self.ParseFunctionDef() # f() { echo; } # function
2409
2410 # Parse x = 1+2*3 when inside HayNode { } blocks
2411 parts = cur_word.parts
2412 if self.parse_opts.parse_equals() and len(parts) == 1:
2413 part0 = parts[0]
2414 if part0.tag() == word_part_e.Literal:
2415 tok = cast(Token, part0)
2416 if (match.IsValidVarName(lexer.LazyStr(tok)) and
2417 self.w_parser.LookPastSpace() == Id.Lit_Equals):
2418 assert tok.id == Id.Lit_Chars, tok
2419
2420 if (len(self.hay_attrs_stack) and
2421 self.hay_attrs_stack[-1]):
2422 # Note: no static var_checker.Check() for bare assignment
2423 enode = self.w_parser.ParseBareDecl()
2424 self._SetNext() # Somehow this is necessary
2425 # TODO: Use BareDecl here. Well, do that when we
2426 # treat it as const or lazy.
2427 return command.VarDecl(
2428 None,
2429 [NameType(tok, lexer.TokenVal(tok), None)],
2430 enode)
2431 else:
2432 self._SetNext()
2433 self._GetWord()
2434 p_die(
2435 'Unexpected = (Hint: use var/setvar, or quote it)',
2436 loc.Word(self.cur_word))
2437
2438 # echo foo
2439 # f=(a b c) # array
2440 # array[1+2]+=1
2441 return self.ParseSimpleCommand()
2442
2443 if self.c_kind == Kind.Eof:
2444 p_die("Unexpected EOF while parsing command",
2445 loc.Word(self.cur_word))
2446
2447 # NOTE: This only happens in batch mode in the second turn of the loop!
2448 # e.g. )
2449 p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2450
2451 assert False # for MyPy
2452
2453 def ParsePipeline(self):
2454 # type: () -> command_t
2455 """
2456 pipeline : Bang? command ( '|' newline_ok command )* ;
2457 """
2458 negated = None # type: Optional[Token]
2459
2460 self._GetWord()
2461 if self.c_id == Id.KW_Bang:
2462 negated = word_.AsKeywordToken(self.cur_word)
2463 self._SetNext()
2464
2465 child = self.ParseCommand()
2466 assert child is not None
2467
2468 children = [child]
2469
2470 self._GetWord()
2471 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2472 if negated is not None:
2473 node = command.Pipeline(negated, children, [])
2474 return node
2475 else:
2476 return child # no pipeline
2477
2478 # | or |&
2479 ops = [] # type: List[Token]
2480 while True:
2481 op = word_.AsOperatorToken(self.cur_word)
2482 ops.append(op)
2483
2484 self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2485 self._NewlineOk()
2486
2487 child = self.ParseCommand()
2488 children.append(child)
2489
2490 self._GetWord()
2491 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2492 break
2493
2494 return command.Pipeline(negated, children, ops)
2495
2496 def ParseAndOr(self):
2497 # type: () -> command_t
2498 self._GetWord()
2499 if self.c_id == Id.Lit_TDot:
2500 # We got '...', so parse in multiline mode
2501 self._SetNext()
2502 with word_.ctx_Multiline(self.w_parser):
2503 return self._ParseAndOr()
2504
2505 # Parse in normal mode, not multiline
2506 return self._ParseAndOr()
2507
2508 def _ParseAndOr(self):
2509 # type: () -> command_t
2510 """
2511 and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
2512 | pipeline
2513
2514 Note that it is left recursive and left associative. We parse it
2515 iteratively with a token of lookahead.
2516 """
2517 child = self.ParsePipeline()
2518 assert child is not None
2519
2520 self._GetWord()
2521 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2522 return child
2523
2524 ops = [] # type: List[Token]
2525 children = [child]
2526
2527 while True:
2528 ops.append(word_.AsOperatorToken(self.cur_word))
2529
2530 self._SetNext() # skip past || &&
2531 self._NewlineOk()
2532
2533 child = self.ParsePipeline()
2534 children.append(child)
2535
2536 self._GetWord()
2537 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2538 break
2539
2540 return command.AndOr(children, ops)
2541
2542 # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2543
2544 # At the top level, we execute after every line, e.g. to
2545 # - process alias (a form of dynamic parsing)
2546 # - process 'exit', because invalid syntax might appear after it
2547
2548 # On the other hand, for a while loop body, we parse the whole thing at once,
2549 # and then execute it. We don't want to parse it over and over again!
2550
2551 # COMPARE
2552 # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2553 # command_term : and_or (trailer and_or)* ; # CHILDREN
2554
2555 def _ParseCommandLine(self):
2556 # type: () -> command_t
2557 """
2558 command_line : and_or (sync_op and_or)* trailer? ;
2559 trailer : sync_op newline_ok
2560 | NEWLINES;
2561 sync_op : '&' | ';';
2562
2563 NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2564 there is another command word after the sync op.
2565
2566 But it's easier to express imperatively. Do the following in a loop:
2567 1. ParseAndOr
2568 2. Peek.
2569 a. If there's a newline, then return. (We're only parsing a single
2570 line.)
2571 b. If there's a sync_op, process it. Then look for a newline and
2572 return. Otherwise, parse another AndOr.
2573 """
2574 # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2575 # I don't think we should add anything else here; otherwise it will be
2576 # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2577 END_LIST = [Id.Op_Newline, Id.Eof_Real]
2578
2579 children = [] # type: List[command_t]
2580 done = False
2581 while not done:
2582 child = self.ParseAndOr()
2583
2584 self._GetWord()
2585 if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2586 tok = cast(Token, self.cur_word) # for MyPy
2587 child = command.Sentence(child, tok)
2588 self._SetNext()
2589
2590 self._GetWord()
2591 if self.c_id in END_LIST:
2592 done = True
2593
2594 elif self.c_id in END_LIST:
2595 done = True
2596
2597 else:
2598 # e.g. echo a(b)
2599 p_die(
2600 'Invalid word while parsing command line (%s)' %
2601 Id_str(self.c_id), loc.Word(self.cur_word))
2602
2603 children.append(child)
2604
2605 # Simplify the AST.
2606 if len(children) > 1:
2607 return command.CommandList(children)
2608 else:
2609 return children[0]
2610
2611 def _ParseCommandTerm(self):
2612 # type: () -> command.CommandList
2613 """"
2614 command_term : and_or (trailer and_or)* ;
2615 trailer : sync_op newline_ok
2616 | NEWLINES;
2617 sync_op : '&' | ';';
2618
2619 This is handled in imperative style, like _ParseCommandLine.
2620 Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2621 which is slightly different. (HOW? Is it the DSEMI?)
2622
2623 Returns:
2624 syntax_asdl.command
2625 """
2626 # Token types that will end the command term.
2627 END_LIST = [
2628 self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
2629 Id.Op_SemiAmp, Id.Op_DSemiAmp
2630 ]
2631
2632 # NOTE: This is similar to _ParseCommandLine.
2633 #
2634 # - Why aren't we doing END_LIST in _ParseCommandLine?
2635 # - Because you will never be inside $() at the top level.
2636 # - We also know it will end in a newline. It can't end in "fi"!
2637 # - example: if true; then { echo hi; } fi
2638
2639 children = [] # type: List[command_t]
2640 done = False
2641 while not done:
2642 # Most keywords are valid "first words". But do/done/then do not BEGIN
2643 # commands, so they are not valid.
2644 if self._AtSecondaryKeyword():
2645 break
2646
2647 child = self.ParseAndOr()
2648
2649 self._GetWord()
2650 if self.c_id == Id.Op_Newline:
2651 self._SetNext()
2652
2653 self._GetWord()
2654 if self.c_id in END_LIST:
2655 done = True
2656
2657 elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2658 tok = cast(Token, self.cur_word) # for MyPy
2659 child = command.Sentence(child, tok)
2660 self._SetNext()
2661
2662 self._GetWord()
2663 if self.c_id == Id.Op_Newline:
2664 self._SetNext() # skip over newline
2665
2666 # Test if we should keep going. There might be another command after
2667 # the semi and newline.
2668 self._GetWord()
2669 if self.c_id in END_LIST: # \n EOF
2670 done = True
2671
2672 elif self.c_id in END_LIST: # ; EOF
2673 done = True
2674
2675 elif self.c_id in END_LIST: # EOF
2676 done = True
2677
2678 # For if test -f foo; test -f bar {
2679 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2680 done = True
2681
2682 elif self.c_kind != Kind.Word:
2683 # e.g. f() { echo (( x )) ; }
2684 # but can't fail on 'fi fi', see osh/cmd_parse_test.py
2685
2686 #log("Invalid %s", self.cur_word)
2687 p_die("Invalid word while parsing command list",
2688 loc.Word(self.cur_word))
2689
2690 children.append(child)
2691
2692 return command.CommandList(children)
2693
2694 def _ParseCommandList(self):
2695 # type: () -> command.CommandList
2696 """
2697 command_list : newline_ok command_term trailer? ;
2698
2699 This one is called by all the compound commands. It's basically a command
2700 block.
2701
2702 NOTE: Rather than translating the CFG directly, the code follows a style
2703 more like this: more like this: (and_or trailer)+. It makes capture
2704 easier.
2705 """
2706 self._NewlineOk()
2707 return self._ParseCommandTerm()
2708
2709 def ParseLogicalLine(self):
2710 # type: () -> command_t
2711 """Parse a single line for main_loop.
2712
2713 A wrapper around _ParseCommandLine(). Similar but not identical to
2714 _ParseCommandList() and ParseCommandSub().
2715
2716 Raises:
2717 ParseError
2718 """
2719 self._NewlineOk()
2720 self._GetWord()
2721 if self.c_id == Id.Eof_Real:
2722 return None # main loop checks for here docs
2723 node = self._ParseCommandLine()
2724 return node
2725
2726 def ParseInteractiveLine(self):
2727 # type: () -> parse_result_t
2728 """Parse a single line for Interactive main_loop.
2729
2730 Different from ParseLogicalLine because newlines are handled differently.
2731
2732 Raises:
2733 ParseError
2734 """
2735 self._GetWord()
2736 if self.c_id == Id.Op_Newline:
2737 return parse_result.EmptyLine
2738 if self.c_id == Id.Eof_Real:
2739 return parse_result.Eof
2740
2741 node = self._ParseCommandLine()
2742 return parse_result.Node(node)
2743
2744 def ParseCommandSub(self):
2745 # type: () -> command_t
2746 """Parse $(echo hi) and `echo hi` for word_parse.py.
2747
2748 They can have multiple lines, like this: echo $( echo one echo
2749 two )
2750 """
2751 self._NewlineOk()
2752
2753 self._GetWord()
2754 if self.c_kind == Kind.Eof: # e.g. $()
2755 return command.NoOp
2756
2757 c_list = self._ParseCommandTerm()
2758 if len(c_list.children) == 1:
2759 return c_list.children[0]
2760 else:
2761 return c_list
2762
2763 def CheckForPendingHereDocs(self):
2764 # type: () -> None
2765 # NOTE: This happens when there is no newline at the end of a file, like
2766 # osh -c 'cat <<EOF'
2767 if len(self.pending_here_docs):
2768 node = self.pending_here_docs[0] # Just show the first one?
2769 h = cast(redir_param.HereDoc, node.arg)
2770 p_die('Unterminated here doc began here', loc.Word(h.here_begin))
2771
2772
2773# vim: sw=4