OILS / osh / cmd_parse.py View on Github | oilshell.org

2771 lines, 1417 significant
1# Copyright 2016 Andy Chu. All rights reserved.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7"""
8cmd_parse.py - Parse high level shell commands.
9"""
10from __future__ import print_function
11
12from _devbuild.gen import grammar_nt
13from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
14from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15from _devbuild.gen.syntax_asdl import (
16 loc,
17 SourceLine,
18 source,
19 parse_result,
20 parse_result_t,
21 command,
22 command_t,
23 condition,
24 condition_t,
25 for_iter,
26 ArgList,
27 BraceGroup,
28 LiteralBlock,
29 CaseArm,
30 case_arg,
31 IfArm,
32 pat,
33 pat_t,
34 Redir,
35 redir_param,
36 redir_loc,
37 redir_loc_t,
38 word_e,
39 word_t,
40 CompoundWord,
41 Token,
42 word_part_e,
43 word_part_t,
44 rhs_word,
45 rhs_word_t,
46 sh_lhs,
47 sh_lhs_t,
48 AssignPair,
49 EnvPair,
50 ParsedAssignment,
51 assign_op_e,
52 NameType,
53 proc_sig,
54 proc_sig_e,
55 Proc,
56 Func,
57)
58from core import alloc
59from core import error
60from core.error import p_die
61from core import ui
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from frontend import match
66from frontend import reader
67from mycpp.mylib import log
68from osh import braces
69from osh import bool_parse
70from osh import word_
71
72from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73if TYPE_CHECKING:
74 from core.alloc import Arena
75 from core import optview
76 from frontend.lexer import Lexer
77 from frontend.parse_lib import ParseContext, AliasesInFlight
78 from frontend.reader import _Reader
79 from osh.word_parse import WordParser
80
81_ = Kind_str # for debug prints
82
83TAB_CH = 9 # ord('\t')
84SPACE_CH = 32 # ord(' ')
85
86
87def _ReadHereLines(
88 line_reader, # type: _Reader
89 h, # type: Redir
90 delimiter, # type: str
91):
92 # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93 # NOTE: We read all lines at once, instead of parsing line-by-line,
94 # because of cases like this:
95 # cat <<EOF
96 # 1 $(echo 2
97 # echo 3) 4
98 # EOF
99 here_lines = [] # type: List[Tuple[SourceLine, int]]
100 last_line = None # type: Tuple[SourceLine, int]
101 strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103 while True:
104 src_line, unused_offset = line_reader.GetLine()
105
106 if src_line is None: # EOF
107 # An unterminated here doc is just a warning in bash. We make it
108 # fatal because we want to be strict, and because it causes problems
109 # reporting other errors.
110 # Attribute it to the << in <<EOF for now.
111 p_die("Couldn't find terminator for here doc that starts here",
112 h.op)
113
114 assert len(src_line.content) != 0 # None should be the empty line
115
116 line = src_line.content
117
118 # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119 # the first tab.
120 start_offset = 0
121 if strip_leading_tabs:
122 n = len(line)
123 i = 0 # used after loop exit
124 while i < n:
125 if line[i] != '\t':
126 break
127 i += 1
128 start_offset = i
129
130 if line[start_offset:].rstrip() == delimiter:
131 last_line = (src_line, start_offset)
132 break
133
134 here_lines.append((src_line, start_offset))
135
136 return here_lines, last_line
137
138
139def _MakeLiteralHereLines(
140 here_lines, # type: List[Tuple[SourceLine, int]]
141 arena, # type: Arena
142 do_lossless, # type: bool
143):
144 # type: (...) -> List[word_part_t]
145 """Create a Token for each line.
146
147 For <<'EOF' and <<-'EOF' - single quoted rule
148
149 <<- has non-zero start_offset
150 """
151 # less precise type, because List[T] is an invariant type
152 tokens = [] # type: List[word_part_t]
153 for src_line, start_offset in here_lines:
154
155 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
156 # arena invariant, but don't refer to it.
157 #
158 # Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
159 # here docs, but it's more complex with double quoted EOF docs.
160
161 if do_lossless: # avoid garbage, doesn't affect correctness
162 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
163 src_line)
164
165 t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
166 src_line)
167 tokens.append(t)
168 return tokens
169
170
171def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
172 # type: (ParseContext, Redir, _Reader, Arena) -> None
173 """Fill in attributes of a pending here doc node."""
174 h = cast(redir_param.HereDoc, r.arg)
175 # "If any character in word is quoted, the delimiter shall be formed by
176 # performing quote removal on word, and the here-document lines shall not
177 # be expanded. Otherwise, the delimiter shall be the word itself."
178 # NOTE: \EOF counts, or even E\OF
179 ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
180 if not ok:
181 p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
182
183 here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
184
185 if delim_quoted:
186 # <<'EOF' and <<-'EOF' - Literal for each line.
187 h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
188 parse_ctx.do_lossless)
189 else:
190 # <<EOF and <<-EOF - Parse as word
191 line_reader = reader.VirtualLineReader(arena, here_lines,
192 parse_ctx.do_lossless)
193 w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
194 w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
195
196 end_line, start_offset = last_line
197
198 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
199 # arena invariant, but don't refer to it.
200 if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
201 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
202
203 # Create a Token with the end terminator. Maintains the invariant that the
204 # tokens "add up".
205 h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
206 len(end_line.content), end_line)
207
208
209def _MakeAssignPair(parse_ctx, preparsed, arena):
210 # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
211 """Create an AssignPair from a 4-tuples from DetectShAssignment."""
212
213 left_token = preparsed.left
214 close_token = preparsed.close
215
216 lhs = None # type: sh_lhs_t
217
218 if left_token.id == Id.Lit_VarLike: # s=1
219 if lexer.IsPlusEquals(left_token):
220 var_name = lexer.TokenSliceRight(left_token, -2)
221 op = assign_op_e.PlusEqual
222 else:
223 var_name = lexer.TokenSliceRight(left_token, -1)
224 op = assign_op_e.Equal
225
226 lhs = sh_lhs.Name(left_token, var_name)
227
228 elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
229 var_name = lexer.TokenSliceRight(left_token, -1)
230 if lexer.IsPlusEquals(close_token):
231 op = assign_op_e.PlusEqual
232 else:
233 op = assign_op_e.Equal
234
235 assert left_token.line == close_token.line, \
236 '%s and %s not on same line' % (left_token, close_token)
237
238 left_pos = left_token.col + left_token.length
239 index_str = left_token.line.content[left_pos:close_token.col]
240 lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
241
242 elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
243 var_name = lexer.TokenSliceRight(left_token, -1)
244 if lexer.IsPlusEquals(close_token):
245 op = assign_op_e.PlusEqual
246 else:
247 op = assign_op_e.Equal
248
249 # Similar to SnipCodeString / SnipCodeBlock
250 if left_token.line == close_token.line:
251 # extract what's between brackets
252 s = left_token.col + left_token.length
253 code_str = left_token.line.content[s:close_token.col]
254 else:
255 raise NotImplementedError('%s != %s' %
256 (left_token.line, close_token.line))
257 a_parser = parse_ctx.MakeArithParser(code_str)
258
259 # a[i+1]= is a LHS
260 src = source.Reparsed('array LHS', left_token, close_token)
261 with alloc.ctx_SourceCode(arena, src):
262 index_node = a_parser.Parse() # may raise error.Parse
263
264 lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
265
266 else:
267 raise AssertionError()
268
269 # TODO: Should we also create a rhs_expr.ArrayLiteral here?
270 parts = preparsed.w.parts
271 offset = preparsed.part_offset
272
273 n = len(parts)
274 if offset == n:
275 rhs = rhs_word.Empty # type: rhs_word_t
276 else:
277 w = CompoundWord(parts[offset:])
278 word_.TildeDetectAssign(w)
279 rhs = w
280
281 return AssignPair(left_token, lhs, op, rhs)
282
283
284def _AppendMoreEnv(preparsed_list, more_env):
285 # type: (List[ParsedAssignment], List[EnvPair]) -> None
286 """Helper to modify a SimpleCommand node.
287
288 Args:
289 preparsed: a list of 4-tuples from DetectShAssignment
290 more_env: a list to append env_pairs to
291 """
292 for preparsed in preparsed_list:
293 left_token = preparsed.left
294
295 if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
296 p_die(
297 "Environment binding shouldn't look like an array assignment",
298 left_token)
299
300 if lexer.IsPlusEquals(left_token):
301 p_die('Expected = in environment binding, got +=', left_token)
302
303 var_name = lexer.TokenSliceRight(left_token, -1)
304
305 parts = preparsed.w.parts
306 n = len(parts)
307 offset = preparsed.part_offset
308 if offset == n:
309 rhs = rhs_word.Empty # type: rhs_word_t
310 else:
311 w = CompoundWord(parts[offset:])
312 word_.TildeDetectAssign(w)
313 rhs = w
314
315 more_env.append(EnvPair(left_token, var_name, rhs))
316
317
318def _SplitSimpleCommandPrefix(words):
319 # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
320 """Second pass of SimpleCommand parsing: look for assignment words."""
321 preparsed_list = [] # type: List[ParsedAssignment]
322 suffix_words = [] # type: List[CompoundWord]
323
324 done_prefix = False
325 for w in words:
326 if done_prefix:
327 suffix_words.append(w)
328 continue
329
330 left_token, close_token, part_offset = word_.DetectShAssignment(w)
331 if left_token:
332 preparsed_list.append(
333 ParsedAssignment(left_token, close_token, part_offset, w))
334 else:
335 done_prefix = True
336 suffix_words.append(w)
337
338 return preparsed_list, suffix_words
339
340
341def _MakeSimpleCommand(
342 preparsed_list, # type: List[ParsedAssignment]
343 suffix_words, # type: List[CompoundWord]
344 redirects, # type: List[Redir]
345 typed_args, # type: Optional[ArgList]
346 block, # type: Optional[LiteralBlock]
347):
348 # type: (...) -> command.Simple
349 """Create an command.Simple node."""
350
351 # FOO=(1 2 3) ls is not allowed.
352 for preparsed in preparsed_list:
353 if word_.HasArrayPart(preparsed.w):
354 p_die("Environment bindings can't contain array literals",
355 loc.Word(preparsed.w))
356
357 # NOTE: It would be possible to add this check back. But it already happens
358 # at runtime in EvalWordSequence2.
359 # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
360 if 0:
361 for w in suffix_words:
362 if word_.HasArrayPart(w):
363 p_die("Commands can't contain array literals", loc.Word(w))
364
365 assert len(suffix_words) != 0
366 # {a,b,c} # Use { before brace detection
367 # ~/bin/ls # Use ~ before tilde detection
368 part0 = suffix_words[0].parts[0]
369 blame_tok = location.LeftTokenForWordPart(part0)
370
371 # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
372 # can't implement bash's behavior of having say {~bob,~jane}/src work,
373 # because we only have a BracedTree.
374 # This is documented in spec/brace-expansion.
375 # NOTE: Technically we could do expansion outside of 'oshc translate', but it
376 # doesn't seem worth it.
377 words2 = braces.BraceDetectAll(suffix_words)
378 words3 = word_.TildeDetectAll(words2)
379
380 more_env = [] # type: List[EnvPair]
381 _AppendMoreEnv(preparsed_list, more_env)
382
383 # do_fork by default
384 return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
385 block, True)
386
387
388class VarChecker(object):
389 """Statically check for proc and variable usage errors."""
390
391 def __init__(self):
392 # type: () -> None
393 """
394 Args:
395 oil_proc: Whether to disallow nested proc/function declarations
396 """
397 # self.tokens for location info: 'proc' or another token
398 self.tokens = [] # type: List[Token]
399 self.names = [] # type: List[Dict[str, Id_t]]
400
401 def Push(self, blame_tok):
402 # type: (Token) -> None
403 """Called when we enter a shell function, proc, or func.
404
405 Bash allows this, but it's confusing because it's the same as two
406 functions at the top level.
407
408 f() {
409 g() {
410 echo 'top level function defined in another one'
411 }
412 }
413
414 YSH disallows nested procs and funcs.
415 """
416 if len(self.tokens) != 0:
417 if blame_tok.id == Id.KW_Proc:
418 p_die("procs must be defined at the top level", blame_tok)
419 if blame_tok.id == Id.KW_Func:
420 p_die("funcs must be defined at the top level", blame_tok)
421 if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
422 p_die("shell functions can't be defined inside proc or func",
423 blame_tok)
424
425 self.tokens.append(blame_tok)
426 entry = {} # type: Dict[str, Id_t]
427 self.names.append(entry)
428
429 def Pop(self):
430 # type: () -> None
431 self.names.pop()
432 self.tokens.pop()
433
434 def Check(self, keyword_id, var_name, blame_tok):
435 # type: (Id_t, str, Token) -> None
436 """Check for declaration / mutation errors in proc and func.
437
438 var x
439 x already declared
440 setvar x:
441 x is not declared
442 setglobal x:
443 No errors are possible; we would need all these many conditions to
444 statically know the names:
445 - no 'source'
446 - shopt -u copy_env.
447 - AND use lib has to be static
448
449 What about bare assignment in Hay? I think these are dynamic checks --
450 there is no static check. Hay is for building up data imperatively,
451 and then LATER, right before main(), it can be type checked.
452
453 Package {
454 version = '3.11'
455 version = '3.12'
456 }
457 """
458 # No static checks are the global level! Because of 'source', var and
459 # setvar are essentially the same.
460 if len(self.names) == 0:
461 return
462
463 top = self.names[-1]
464 if keyword_id == Id.KW_Var:
465 if var_name in top:
466 p_die('%r was already declared' % var_name, blame_tok)
467 else:
468 top[var_name] = keyword_id
469
470 if keyword_id == Id.KW_SetVar:
471 if var_name not in top:
472 # Note: the solution could be setglobal, etc.
473 p_die(
474 "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
475 var_name, blame_tok)
476
477
478class ctx_VarChecker(object):
479
480 def __init__(self, var_checker, blame_tok):
481 # type: (VarChecker, Token) -> None
482 var_checker.Push(blame_tok)
483 self.var_checker = var_checker
484
485 def __enter__(self):
486 # type: () -> None
487 pass
488
489 def __exit__(self, type, value, traceback):
490 # type: (Any, Any, Any) -> None
491 self.var_checker.Pop()
492
493
494class ctx_CmdMode(object):
495
496 def __init__(self, cmd_parse, new_cmd_mode):
497 # type: (CommandParser, cmd_mode_t) -> None
498 self.cmd_parse = cmd_parse
499 self.prev_cmd_mode = cmd_parse.cmd_mode
500 cmd_parse.cmd_mode = new_cmd_mode
501
502 def __enter__(self):
503 # type: () -> None
504 pass
505
506 def __exit__(self, type, value, traceback):
507 # type: (Any, Any, Any) -> None
508 self.cmd_parse.cmd_mode = self.prev_cmd_mode
509
510
511SECONDARY_KEYWORDS = [
512 Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
513 Id.KW_Esac
514]
515
516
517class CommandParser(object):
518 """Recursive descent parser derived from POSIX shell grammar.
519
520 This is a BNF grammar:
521 https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
522
523 - Augmented with both bash/OSH and YSH constructs.
524
525 - We use regex-like iteration rather than recursive references
526 ? means optional (0 or 1)
527 * means 0 or more
528 + means 1 or more
529
530 - Keywords are spelled in Caps:
531 If Elif Case
532
533 - Operator tokens are quoted:
534 '(' '|'
535
536 or can be spelled directly if it matters:
537
538 Op_LParen Op_Pipe
539
540 - Non-terminals are snake_case:
541 brace_group subshell
542
543 Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
544 the production should be in the method docstrings, e.g.
545
546 def ParseSubshell():
547 "
548 subshell : '(' compound_list ')'
549
550 Looking at Op_LParen # Comment to say how this method is called
551 "
552
553 The grammar may be factored to make parsing easier.
554 """
555
556 def __init__(self,
557 parse_ctx,
558 parse_opts,
559 w_parser,
560 lexer,
561 line_reader,
562 eof_id=Id.Eof_Real):
563 # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
564 self.parse_ctx = parse_ctx
565 self.aliases = parse_ctx.aliases # aliases to expand at parse time
566
567 self.parse_opts = parse_opts
568 self.w_parser = w_parser # type: WordParser # for normal parsing
569 self.lexer = lexer # for pushing hints, lookahead to (
570 self.line_reader = line_reader # for here docs
571 self.eof_id = eof_id
572
573 self.arena = line_reader.arena # for adding here doc and alias spans
574 self.aliases_in_flight = [] # type: AliasesInFlight
575
576 # A hacky boolean to remove 'if cd / {' ambiguity.
577 self.allow_block = True
578
579 # Stack of booleans for nested Attr and SHELL nodes.
580 # Attr nodes allow bare assignment x = 42, but not shell x=42.
581 # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
582 # nodes, but x42 is still allowed.
583 #
584 # Note: this stack could be optimized by turning it into an integer and
585 # binary encoding.
586 self.hay_attrs_stack = [] # type: List[bool]
587
588 # Note: VarChecker is instantiated with each CommandParser, which means
589 # that two 'proc foo' -- inside a command sub and outside -- don't
590 # conflict, because they use different CommandParser instances. I think
591 # this OK but you can imagine different behaviors.
592 self.var_checker = VarChecker()
593
594 self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
595
596 self.Reset()
597
598 # Init_() function for "keyword arg"
599 def Init_AliasesInFlight(self, aliases_in_flight):
600 # type: (AliasesInFlight) -> None
601 self.aliases_in_flight = aliases_in_flight
602
603 def Reset(self):
604 # type: () -> None
605 """Reset our own internal state.
606
607 Called by the interactive loop.
608 """
609 # Cursor state set by _GetWord()
610 self.next_lex_mode = lex_mode_e.ShCommand
611 self.cur_word = None # type: word_t # current word
612 self.c_kind = Kind.Undefined
613 self.c_id = Id.Undefined_Tok
614
615 self.pending_here_docs = [] # type: List[Redir]
616
617 def ResetInputObjects(self):
618 # type: () -> None
619 """Reset the internal state of our inputs.
620
621 Called by the interactive loop.
622 """
623 self.w_parser.Reset()
624 self.lexer.ResetInputObjects()
625 self.line_reader.Reset()
626
627 def _SetNext(self):
628 # type: () -> None
629 """Call this when you no longer need the current token.
630
631 This method is lazy. A subsequent call to _GetWord() will
632 actually read the next Token.
633 """
634 self.next_lex_mode = lex_mode_e.ShCommand
635
636 def _SetNextBrack(self):
637 # type: () -> None
638 self.next_lex_mode = lex_mode_e.ShCommandFakeBrack
639
640 def _GetWord(self):
641 # type: () -> None
642 """Call this when you need to make a decision based on Id or Kind.
643
644 If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
645 self.c_id and self.c_kind.
646
647 Otherwise it does nothing.
648 """
649 if self.next_lex_mode != lex_mode_e.Undefined:
650 w = self.w_parser.ReadWord(self.next_lex_mode)
651 #log("w %s", w)
652
653 # Here docs only happen in command mode, so other kinds of newlines don't
654 # count.
655 if w.tag() == word_e.Operator:
656 tok = cast(Token, w)
657 if tok.id == Id.Op_Newline:
658 for h in self.pending_here_docs:
659 _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
660 self.arena)
661 del self.pending_here_docs[:] # No .clear() until Python 3.3.
662
663 self.cur_word = w
664
665 self.c_kind = word_.CommandKind(self.cur_word)
666 # Has special case for Id.Lit_{LBrace,RBrace,Equals}
667 self.c_id = word_.CommandId(self.cur_word)
668 self.next_lex_mode = lex_mode_e.Undefined
669
670 def _Eat(self, c_id, msg=None):
671 # type: (Id_t, Optional[str]) -> word_t
672 """Consume a word of a type, maybe showing a custom error message.
673
674 Args:
675 c_id: the Id we expected
676 msg: improved error message
677 """
678 self._GetWord()
679 if self.c_id != c_id:
680 if msg is None:
681 msg = 'Expected word type %s, got %s' % (
682 ui.PrettyId(c_id), ui.PrettyId(self.c_id))
683 p_die(msg, loc.Word(self.cur_word))
684
685 skipped = self.cur_word
686 self._SetNext()
687 return skipped
688
689 def _NewlineOk(self):
690 # type: () -> None
691 """Check for optional newline and consume it."""
692 self._GetWord()
693 if self.c_id == Id.Op_Newline:
694 self._SetNext()
695
696 def _AtSecondaryKeyword(self):
697 # type: () -> bool
698 self._GetWord()
699 if self.c_id in SECONDARY_KEYWORDS:
700 return True
701 return False
702
703 def ParseRedirect(self):
704 # type: () -> Redir
705 self._GetWord()
706 assert self.c_kind == Kind.Redir, self.cur_word
707 op_tok = cast(Token, self.cur_word) # for MyPy
708
709 # Note: the lexer could take distinguish between
710 # >out
711 # 3>out
712 # {fd}>out
713 #
714 # which would make the code below faster. But small string optimization
715 # would also speed it up, since redirects are small.
716
717 # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
718 # possibly "unify" the IDs by subtracting a constant like 8 or 16?
719
720 op_val = lexer.TokenVal(op_tok)
721 if op_val[0] == '{':
722 pos = op_val.find('}')
723 assert pos != -1 # lexer ensures this
724 where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
725
726 elif op_val[0].isdigit():
727 pos = 1
728 if op_val[1].isdigit():
729 pos = 2
730 where = redir_loc.Fd(int(op_val[:pos]))
731
732 else:
733 where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
734
735 self._SetNext()
736
737 self._GetWord()
738 # Other redirect
739 if self.c_kind != Kind.Word:
740 p_die('Invalid token after redirect operator',
741 loc.Word(self.cur_word))
742
743 # Here doc
744 if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
745 arg = redir_param.HereDoc.CreateNull()
746 arg.here_begin = self.cur_word
747 arg.stdin_parts = []
748
749 r = Redir(op_tok, where, arg)
750
751 self.pending_here_docs.append(r) # will be filled on next newline.
752
753 self._SetNext()
754 return r
755
756 arg_word = self.cur_word
757 tilde = word_.TildeDetect(arg_word)
758 if tilde:
759 arg_word = tilde
760 self._SetNext()
761
762 # We should never get Empty, Token, etc.
763 assert arg_word.tag() == word_e.Compound, arg_word
764 return Redir(op_tok, where, cast(CompoundWord, arg_word))
765
766 def _ParseRedirectList(self):
767 # type: () -> List[Redir]
768 """Try parsing any redirects at the cursor.
769
770 This is used for blocks only, not commands.
771 """
772 redirects = [] # type: List[Redir]
773 while True:
774 # This prediction needs to ONLY accept redirect operators. Should we
775 # make them a separate Kind?
776 self._GetWord()
777 if self.c_kind != Kind.Redir:
778 break
779
780 node = self.ParseRedirect()
781 redirects.append(node)
782 self._SetNext()
783
784 return redirects
785
786 def _ScanSimpleCommand(self):
787 # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
788 """YSH extends simple commands with typed args and blocks.
789
790 Shell has a recursive grammar, which awkwardly expresses
791 non-grammatical rules:
792
793 simple_command : cmd_prefix cmd_word cmd_suffix
794 | cmd_prefix cmd_word
795 | cmd_prefix
796 | cmd_name cmd_suffix
797 | cmd_name
798 ;
799 cmd_name : WORD /* Apply rule 7a */
800 ;
801 cmd_word : WORD /* Apply rule 7b */
802 ;
803 cmd_prefix : io_redirect
804 | cmd_prefix io_redirect
805 | ASSIGNMENT_WORD
806 | cmd_prefix ASSIGNMENT_WORD
807 ;
808 cmd_suffix : io_redirect
809 | cmd_suffix io_redirect
810 | WORD
811 | cmd_suffix WORD
812
813 YSH grammar:
814
815 redirect = redir_op WORD
816 item = WORD | redirect
817
818 typed_args =
819 '(' arglist ')'
820 | '[' arglist ']'
821
822 simple_command =
823 cmd_prefix* item+ typed_args? BraceGroup? cmd_suffix*
824
825 Notably, redirects shouldn't appear after typed args, or after
826 BraceGroup.
827
828 Examples:
829
830 This is an assignment:
831 foo=1 >out
832
833 This is a command.Simple
834 >out
835
836 What about
837 >out (42)
838 """
839 redirects = [] # type: List[Redir]
840 words = [] # type: List[CompoundWord]
841 typed_args = None # type: Optional[ArgList]
842 block = None # type: Optional[LiteralBlock]
843
844 first_word_caps = False # does first word look like Caps, but not CAPS
845
846 i = 0
847 while True:
848 self._GetWord()
849
850 # If we got { }, change it to something that's not Kind.Word
851 kind2 = self.c_kind
852 if (kind2 == Kind.Word and self.parse_opts.parse_brace() and
853 self.c_id in (Id.Lit_LBrace, Id.Lit_RBrace)):
854 kind2 = Kind.Op
855
856 if kind2 == Kind.Redir:
857 node = self.ParseRedirect()
858 redirects.append(node)
859
860 elif kind2 == Kind.Word:
861 w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
862
863 if i == 0:
864 # Disallow leading =a because it's confusing
865 part0 = w.parts[0]
866 if part0.tag() == word_part_e.Literal:
867 tok = cast(Token, part0)
868 if tok.id == Id.Lit_Equals:
869 p_die(
870 "=word isn't allowed. Hint: add a space after =, or quote it",
871 tok)
872
873 # Is the first word a Hay Attr word?
874 #
875 # Can we remove this StaticEval() call, and just look
876 # inside Token? I think once we get rid of SHELL nodes,
877 # this will be simpler.
878
879 ok, word_str, quoted = word_.StaticEval(w)
880 # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
881 if (ok and len(word_str) and word_str[0].isupper() and
882 not word_str.isupper()):
883 first_word_caps = True
884 #log('W %s', word_str)
885
886 words.append(w)
887
888 else:
889 break
890
891 self._SetNextBrack() # Allow bracket for SECOND word on
892 i += 1
893
894 # my-cmd (x) or my-cmd [x]
895 self._GetWord()
896 if self.c_id == Id.Op_LParen:
897 # 1. Check that there's a preceding space
898 prev_byte = self.lexer.ByteLookBack()
899 if prev_byte not in (SPACE_CH, TAB_CH):
900 if self.parse_opts.parse_at():
901 p_die('Space required before (', loc.Word(self.cur_word))
902 else:
903 # inline func call like @sorted(x) is invalid in OSH, but the
904 # solution isn't a space
905 p_die(
906 'Unexpected left paren (might need a space before it)',
907 loc.Word(self.cur_word))
908
909 # 2. Check that it's not (). We disallow this because it's a no-op and
910 # there could be confusion with shell func defs.
911 # For some reason we need to call lexer.LookPastSpace, not
912 # w_parser.LookPastSpace. I think this is because we're at (, which is
913 # an operator token. All the other cases are like 'x=', which is PART
914 # of a word, and we don't know if it will end.
915 next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
916 if next_id == Id.Op_RParen:
917 p_die('Empty arg list not allowed', loc.Word(self.cur_word))
918
919 typed_args = self.w_parser.ParseProcCallArgs(
920 grammar_nt.ysh_eager_arglist)
921
922 self._SetNext()
923
924 elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
925 typed_args = self.w_parser.ParseProcCallArgs(
926 grammar_nt.ysh_lazy_arglist)
927
928 self._SetNext()
929
930 self._GetWord()
931
932 # Allow redirects after typed args, e.g.
933 # json write (x) > out.txt
934 if self.c_kind == Kind.Redir:
935 redirects.extend(self._ParseRedirectList())
936
937 # my-cmd { echo hi } my-cmd (x) { echo hi } ...
938 if (self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace and
939 # Disabled for if/while condition, etc.
940 self.allow_block):
941
942 # allow x = 42
943 self.hay_attrs_stack.append(first_word_caps)
944 brace_group = self.ParseBraceGroup()
945
946 # So we can get the source code back later
947 lines = self.arena.SaveLinesAndDiscard(brace_group.left,
948 brace_group.right)
949 block = LiteralBlock(brace_group, lines)
950
951 self.hay_attrs_stack.pop()
952
953 self._GetWord()
954
955 # Allow redirects after block, e.g.
956 # cd /tmp { echo $PWD } > out.txt
957 if self.c_kind == Kind.Redir:
958 redirects.extend(self._ParseRedirectList())
959
960 return redirects, words, typed_args, block
961
962 def _MaybeExpandAliases(self, words):
963 # type: (List[CompoundWord]) -> Optional[command_t]
964 """Try to expand aliases.
965
966 Args:
967 words: A list of Compound
968
969 Returns:
970 A new LST node, or None.
971
972 Our implementation of alias has two design choices:
973 - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
974 - What grammar rule to parse the expanded alias buffer with. In our case
975 it's ParseCommand().
976
977 This doesn't quite match what other shells do, but I can't figure out a
978 better places.
979
980 Most test cases pass, except for ones like:
981
982 alias LBRACE='{'
983 LBRACE echo one; echo two; }
984
985 alias MULTILINE='echo 1
986 echo 2
987 echo 3'
988 MULTILINE
989
990 NOTE: dash handles aliases in a totally different way. It has a global
991 variable checkkwd in parser.c. It assigns it all over the grammar, like
992 this:
993
994 checkkwd = CHKNL | CHKKWD | CHKALIAS;
995
996 The readtoken() function checks (checkkwd & CHKALIAS) and then calls
997 lookupalias(). This seems to provide a consistent behavior among shells,
998 but it's less modular and testable.
999
1000 Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
1001
1002 Returns:
1003 A command node if any aliases were expanded, or None otherwise.
1004 """
1005 # Start a new list if there aren't any. This will be passed recursively
1006 # through CommandParser instances.
1007 aliases_in_flight = (self.aliases_in_flight
1008 if len(self.aliases_in_flight) else [])
1009
1010 # for error message
1011 first_word_str = None # type: Optional[str]
1012 argv0_loc = loc.Word(words[0])
1013
1014 expanded = [] # type: List[str]
1015 i = 0
1016 n = len(words)
1017
1018 while i < n:
1019 w = words[i]
1020
1021 ok, word_str, quoted = word_.StaticEval(w)
1022 if not ok or quoted:
1023 break
1024
1025 alias_exp = self.aliases.get(word_str)
1026 if alias_exp is None:
1027 break
1028
1029 # Prevent infinite loops. This is subtle: we want to prevent infinite
1030 # expansion of alias echo='echo x'. But we don't want to prevent
1031 # expansion of the second word in 'echo echo', so we add 'i' to
1032 # "aliases_in_flight".
1033 if (word_str, i) in aliases_in_flight:
1034 break
1035
1036 if i == 0:
1037 first_word_str = word_str # for error message
1038
1039 #log('%r -> %r', word_str, alias_exp)
1040 aliases_in_flight.append((word_str, i))
1041 expanded.append(alias_exp)
1042 i += 1
1043
1044 if not alias_exp.endswith(' '):
1045 # alias e='echo [ ' is the same expansion as
1046 # alias e='echo ['
1047 # The trailing space indicates whether we should continue to expand
1048 # aliases; it's not part of it.
1049 expanded.append(' ')
1050 break # No more expansions
1051
1052 if len(expanded) == 0: # No expansions; caller does parsing.
1053 return None
1054
1055 # We are expanding an alias, so copy the rest of the words and re-parse.
1056 if i < n:
1057 left_tok = location.LeftTokenForWord(words[i])
1058 right_tok = location.RightTokenForWord(words[-1])
1059
1060 # OLD CONSTRAINT
1061 #assert left_tok.line_id == right_tok.line_id
1062
1063 words_str = self.arena.SnipCodeString(left_tok, right_tok)
1064 expanded.append(words_str)
1065
1066 code_str = ''.join(expanded)
1067
1068 # TODO:
1069 # Aliases break static parsing (like backticks), so use our own Arena.
1070 # This matters for Hay, which calls SaveLinesAndDiscard().
1071 # arena = alloc.Arena()
1072 arena = self.arena
1073
1074 line_reader = reader.StringLineReader(code_str, arena)
1075 cp = self.parse_ctx.MakeOshParser(line_reader)
1076 cp.Init_AliasesInFlight(aliases_in_flight)
1077
1078 # break circular dep
1079 from frontend import parse_lib
1080
1081 # The interaction between COMPLETION and ALIASES requires special care.
1082 # See docstring of BeginAliasExpansion() in parse_lib.py.
1083 src = source.Alias(first_word_str, argv0_loc)
1084 with alloc.ctx_SourceCode(arena, src):
1085 with parse_lib.ctx_Alias(self.parse_ctx.trail):
1086 try:
1087 # _ParseCommandTerm() handles multiline commands, compound
1088 # commands, etc. as opposed to ParseLogicalLine()
1089 node = cp._ParseCommandTerm()
1090 except error.Parse as e:
1091 # Failure to parse alias expansion is a fatal error
1092 # We don't need more handling here/
1093 raise
1094
1095 if 0:
1096 log('AFTER expansion:')
1097 node.PrettyPrint()
1098
1099 return node
1100
1101 def ParseSimpleCommand(self):
1102 # type: () -> command_t
1103 """Fixed transcription of the POSIX grammar
1104
1105 io_file : '<' filename
1106 | LESSAND filename
1107 ...
1108
1109 io_here : DLESS here_end
1110 | DLESSDASH here_end
1111
1112 redirect : IO_NUMBER (io_redirect | io_here)
1113
1114 prefix_part : ASSIGNMENT_WORD | redirect
1115 cmd_part : WORD | redirect
1116
1117 assign_kw : Declare | Export | Local | Readonly
1118
1119 # Without any words it is parsed as a command, not an assignment
1120 assign_listing : assign_kw
1121
1122 # Now we have something to do (might be changing assignment flags too)
1123 # NOTE: any prefixes should be a warning, but they are allowed in shell.
1124 assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
1125
1126 # an external command, a function call, or a builtin -- a "word_command"
1127 word_command : prefix_part* cmd_part+
1128
1129 simple_command : assign_listing
1130 | assignment
1131 | proc_command
1132
1133 Simple imperative algorithm:
1134
1135 1) Read a list of words and redirects. Append them to separate lists.
1136 2) Look for the first non-assignment word. If it's declare, etc., then
1137 keep parsing words AND assign words. Otherwise, just parse words.
1138 3) If there are no non-assignment words, then it's a global assignment.
1139
1140 { redirects, global assignments } OR
1141 { redirects, prefix_bindings, words } OR
1142 { redirects, ERROR_prefix_bindings, keyword, assignments, words }
1143
1144 THEN CHECK that prefix bindings don't have any array literal parts!
1145 global assignment and keyword assignments can have the of course.
1146 well actually EXPORT shouldn't have them either -- WARNING
1147
1148 3 cases we want to warn: prefix_bindings for assignment, and array literal
1149 in prefix bindings, or export
1150
1151 A command can be an assignment word, word, or redirect on its own.
1152
1153 ls
1154 >out.txt
1155
1156 >out.txt FOO=bar # this touches the file
1157
1158 Or any sequence:
1159 ls foo bar
1160 <in.txt ls foo bar >out.txt
1161 <in.txt ls >out.txt foo bar
1162
1163 Or add one or more environment bindings:
1164 VAR=val env
1165 >out.txt VAR=val env
1166
1167 here_end vs filename is a matter of whether we test that it's quoted. e.g.
1168 <<EOF vs <<'EOF'.
1169 """
1170 redirects, words, typed_args, block = self._ScanSimpleCommand()
1171
1172 typed_loc = None # type: Optional[Token]
1173 if block:
1174 typed_loc = block.brace_group.left
1175 if typed_args:
1176 typed_loc = typed_args.left # preferred over block location
1177
1178 if len(words) == 0: # e.g. >out.txt # redirect without words
1179 assert len(redirects) != 0
1180 if typed_loc is not None:
1181 p_die("Unexpected typed args", typed_loc)
1182
1183 simple = command.Simple.CreateNull()
1184 simple.blame_tok = redirects[0].op
1185 simple.more_env = []
1186 simple.words = []
1187 simple.redirects = redirects
1188 return simple
1189
1190 preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1191 if len(preparsed_list):
1192 # Disallow X=Y inside proc and func
1193 # and inside Hay Attr blocks
1194 # But allow X=Y at the top level
1195 # for interactive use foo=bar
1196 # for global constants GLOBAL=~/src
1197 # because YSH assignment doesn't have tilde sub
1198 if len(suffix_words) == 0:
1199 if (self.cmd_mode != cmd_mode_e.Shell or
1200 (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1201 p_die('Use var/setvar to assign in YSH',
1202 preparsed_list[0].left)
1203
1204 # Set a reference to words and redirects for completion. We want to
1205 # inspect this state after a failed parse.
1206 self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1207
1208 if len(suffix_words) == 0:
1209 if typed_loc is not None:
1210 p_die("Unexpected typed args", typed_loc)
1211
1212 # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1213 pairs = [] # type: List[AssignPair]
1214 for preparsed in preparsed_list:
1215 pairs.append(
1216 _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1217
1218 left_tok = location.LeftTokenForCompoundWord(words[0])
1219 return command.ShAssignment(left_tok, pairs, redirects)
1220
1221 kind, kw_token = word_.IsControlFlow(suffix_words[0])
1222
1223 if kind == Kind.ControlFlow:
1224 if kw_token.id == Id.ControlFlow_Return:
1225 # return x - inside procs and shell functions
1226 # return (x) - inside funcs
1227 if typed_args is None:
1228 if self.cmd_mode not in (cmd_mode_e.Shell,
1229 cmd_mode_e.Proc):
1230 p_die('Shell-style returns not allowed here', kw_token)
1231 else:
1232 if self.cmd_mode != cmd_mode_e.Func:
1233 p_die('Typed return is only allowed inside func',
1234 typed_loc)
1235 if len(typed_args.pos_args) != 1:
1236 p_die("Typed return expects one argument", typed_loc)
1237 if len(typed_args.named_args) != 0:
1238 p_die("Typed return doesn't take named arguments",
1239 typed_loc)
1240 return command.Retval(kw_token, typed_args.pos_args[0])
1241
1242 if typed_loc is not None:
1243 p_die("Unexpected typed args", typed_loc)
1244 if not self.parse_opts.parse_ignored() and len(redirects):
1245 p_die("Control flow shouldn't have redirects", kw_token)
1246
1247 if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1248 p_die("Control flow shouldn't have environment bindings",
1249 preparsed_list[0].left)
1250
1251 # Attach the token for errors. (ShAssignment may not need it.)
1252 if len(suffix_words) == 1:
1253 arg_word = None # type: Optional[word_t]
1254 elif len(suffix_words) == 2:
1255 arg_word = suffix_words[1]
1256 else:
1257 p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1258 loc.Word(suffix_words[2]))
1259
1260 return command.ControlFlow(kw_token, arg_word)
1261
1262 # Alias expansion only understands words, not typed args ( ) or block { }
1263 if not typed_args and not block and self.parse_opts.expand_aliases():
1264 # If any expansions were detected, then parse again.
1265 expanded_node = self._MaybeExpandAliases(suffix_words)
1266 if expanded_node:
1267 # Attach env bindings and redirects to the expanded node.
1268 more_env = [] # type: List[EnvPair]
1269 _AppendMoreEnv(preparsed_list, more_env)
1270 exp = command.ExpandedAlias(expanded_node, redirects, more_env)
1271 return exp
1272
1273 # TODO: check that we don't have env1=x x[1]=y env2=z here.
1274
1275 # FOO=bar printenv.py FOO
1276 node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
1277 typed_args, block)
1278 return node
1279
1280 def ParseBraceGroup(self):
1281 # type: () -> BraceGroup
1282 """
1283 Original:
1284 brace_group : LBrace command_list RBrace ;
1285
1286 YSH:
1287 brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1288
1289 The doc comment can only occur if there's a newline.
1290 """
1291 ate = self._Eat(Id.Lit_LBrace)
1292 left = word_.BraceToken(ate)
1293
1294 doc_word = None # type: word_t
1295 self._GetWord()
1296 if self.c_id == Id.Op_Newline:
1297 self._SetNext()
1298 # Set a flag so we don't skip over ###
1299 with word_.ctx_EmitDocToken(self.w_parser):
1300 self._GetWord()
1301
1302 if self.c_id == Id.Ignored_Comment:
1303 doc_word = self.cur_word
1304 self._SetNext()
1305
1306 # Id.Ignored_Comment means it's a Token, or None
1307 doc_token = cast(Token, doc_word)
1308
1309 c_list = self._ParseCommandList()
1310
1311 ate = self._Eat(Id.Lit_RBrace)
1312 right = word_.BraceToken(ate)
1313
1314 # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1315 # would allow us to revert this back to None, which was changed in
1316 # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1317 # behavior saves allocations, but is less type safe.
1318 return BraceGroup(left, doc_token, c_list.children, [],
1319 right) # no redirects yet
1320
1321 def ParseDoGroup(self):
1322 # type: () -> command.DoGroup
1323 """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1324
1325 do_group : Do command_list Done ; /* Apply rule 6 */
1326 """
1327 ate = self._Eat(Id.KW_Do)
1328 do_kw = word_.AsKeywordToken(ate)
1329
1330 c_list = self._ParseCommandList() # could be anything
1331
1332 ate = self._Eat(Id.KW_Done)
1333 done_kw = word_.AsKeywordToken(ate)
1334
1335 return command.DoGroup(do_kw, c_list.children, done_kw)
1336
1337 def ParseForWords(self):
1338 # type: () -> Tuple[List[CompoundWord], Optional[Token]]
1339 """
1340 for_words : WORD* for_sep
1341 ;
1342 for_sep : ';' newline_ok
1343 | NEWLINES
1344 ;
1345 """
1346 words = [] # type: List[CompoundWord]
1347 # The token of any semi-colon, so we can remove it.
1348 semi_tok = None # type: Optional[Token]
1349
1350 while True:
1351 self._GetWord()
1352 if self.c_id == Id.Op_Semi:
1353 tok = cast(Token, self.cur_word)
1354 semi_tok = tok
1355 self._SetNext()
1356 self._NewlineOk()
1357 break
1358 elif self.c_id == Id.Op_Newline:
1359 self._SetNext()
1360 break
1361 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1362 break
1363
1364 if self.cur_word.tag() != word_e.Compound:
1365 # TODO: Can we also show a pointer to the 'for' keyword?
1366 p_die('Invalid word in for loop', loc.Word(self.cur_word))
1367
1368 w2 = cast(CompoundWord, self.cur_word)
1369 words.append(w2)
1370 self._SetNext()
1371 return words, semi_tok
1372
1373 def _ParseForExprLoop(self, for_kw):
1374 # type: (Token) -> command.ForExpr
1375 """
1376 Shell:
1377 for '((' init ';' cond ';' update '))' for_sep? do_group
1378
1379 YSH:
1380 for '((' init ';' cond ';' update '))' for_sep? brace_group
1381 """
1382 node = self.w_parser.ReadForExpression()
1383 node.keyword = for_kw
1384
1385 self._SetNext()
1386
1387 self._GetWord()
1388 if self.c_id == Id.Op_Semi:
1389 self._SetNext()
1390 self._NewlineOk()
1391 elif self.c_id == Id.Op_Newline:
1392 self._SetNext()
1393 elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1394 pass
1395 elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1396 pass
1397 else:
1398 p_die('Invalid word after for expression', loc.Word(self.cur_word))
1399
1400 if self.c_id == Id.Lit_LBrace:
1401 node.body = self.ParseBraceGroup()
1402 else:
1403 node.body = self.ParseDoGroup()
1404 return node
1405
1406 def _ParseForEachLoop(self, for_kw):
1407 # type: (Token) -> command.ForEach
1408 node = command.ForEach.CreateNull(alloc_lists=True)
1409 node.keyword = for_kw
1410
1411 num_iter_names = 0
1412 while True:
1413 w = self.cur_word
1414
1415 # Hack that makes the language more familiar:
1416 # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1417 # - 'x y' is also accepted but not idiomatic.
1418 UP_w = w
1419 if w.tag() == word_e.Compound:
1420 w = cast(CompoundWord, UP_w)
1421 if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1422 w.parts.pop()
1423
1424 ok, iter_name, quoted = word_.StaticEval(w)
1425 if not ok or quoted: # error: for $x
1426 p_die('Expected loop variable (a constant word)', loc.Word(w))
1427
1428 if not match.IsValidVarName(iter_name): # error: for -
1429 # TODO: consider commas?
1430 if ',' in iter_name:
1431 p_die('Loop variables look like x, y (fix spaces)',
1432 loc.Word(w))
1433 p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1434
1435 node.iter_names.append(iter_name)
1436 num_iter_names += 1
1437 self._SetNext()
1438
1439 self._GetWord()
1440 # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1441 # Subtlety: 'var' is KW_Var and is a valid loop name
1442 if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1443 break
1444
1445 if num_iter_names == 3:
1446 p_die('Unexpected word after 3 loop variables',
1447 loc.Word(self.cur_word))
1448
1449 self._NewlineOk()
1450
1451 self._GetWord()
1452 if self.c_id == Id.KW_In:
1453 # Ideally we would want ( not 'in'. But we still have to fix the bug
1454 # where we require a SPACE between in and (
1455 # for x in(y) # should be accepted, but isn't
1456
1457 expr_blame = word_.AsKeywordToken(self.cur_word)
1458
1459 self._SetNext() # skip in
1460 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1461 enode = self.w_parser.ParseYshExprForCommand()
1462 node.iterable = for_iter.YshExpr(enode, expr_blame)
1463
1464 # For simplicity, we don't accept for x in (obj); do ...
1465 self._GetWord()
1466 if self.c_id != Id.Lit_LBrace:
1467 p_die('Expected { after iterable expression',
1468 loc.Word(self.cur_word))
1469 else:
1470 semi_tok = None # type: Optional[Token]
1471 iter_words, semi_tok = self.ParseForWords()
1472 node.semi_tok = semi_tok
1473
1474 if not self.parse_opts.parse_bare_word() and len(
1475 iter_words) == 1:
1476 ok, s, quoted = word_.StaticEval(iter_words[0])
1477 if ok and match.IsValidVarName(s) and not quoted:
1478 p_die(
1479 'Surround this word with either parens or quotes (parse_bare_word)',
1480 loc.Word(iter_words[0]))
1481
1482 words2 = braces.BraceDetectAll(iter_words)
1483 words3 = word_.TildeDetectAll(words2)
1484 node.iterable = for_iter.Words(words3)
1485
1486 # Now that we know there are words, do an extra check
1487 if num_iter_names > 2:
1488 p_die('Expected at most 2 loop variables', for_kw)
1489
1490 elif self.c_id == Id.KW_Do:
1491 node.iterable = for_iter.Args # implicitly loop over "$@"
1492 # do not advance
1493
1494 elif self.c_id == Id.Op_Semi: # for x; do
1495 node.iterable = for_iter.Args # implicitly loop over "$@"
1496 self._SetNext()
1497
1498 else: # for foo BAD
1499 p_die('Unexpected word after for loop variable',
1500 loc.Word(self.cur_word))
1501
1502 self._GetWord()
1503 if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1504 node.body = self.ParseBraceGroup()
1505 else:
1506 node.body = self.ParseDoGroup()
1507
1508 return node
1509
1510 def ParseFor(self):
1511 # type: () -> command_t
1512 """
1513 TODO: Update the grammar
1514
1515 for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1516 | For '((' ... TODO
1517 """
1518 ate = self._Eat(Id.KW_For)
1519 for_kw = word_.AsKeywordToken(ate)
1520
1521 self._GetWord()
1522 if self.c_id == Id.Op_DLeftParen:
1523 if not self.parse_opts.parse_dparen():
1524 p_die("Bash for loops aren't allowed (parse_dparen)",
1525 loc.Word(self.cur_word))
1526
1527 # for (( i = 0; i < 10; i++)
1528 n1 = self._ParseForExprLoop(for_kw)
1529 n1.redirects = self._ParseRedirectList()
1530 return n1
1531 else:
1532 # for x in a b; do echo hi; done
1533 n2 = self._ParseForEachLoop(for_kw)
1534 n2.redirects = self._ParseRedirectList()
1535 return n2
1536
1537 def _ParseConditionList(self):
1538 # type: () -> condition_t
1539 """
1540 condition_list: command_list
1541
1542 This is a helper to parse a condition list for if commands and while/until
1543 loops. It will throw a parse error if there are no conditions in the list.
1544 """
1545 self.allow_block = False
1546 commands = self._ParseCommandList()
1547 self.allow_block = True
1548
1549 if len(commands.children) == 0:
1550 p_die("Expected a condition", loc.Word(self.cur_word))
1551
1552 return condition.Shell(commands.children)
1553
1554 def ParseWhileUntil(self, keyword):
1555 # type: (Token) -> command.WhileUntil
1556 """
1557 while_clause : While command_list do_group ;
1558 until_clause : Until command_list do_group ;
1559 """
1560 self._SetNext() # skip keyword
1561
1562 if (self.parse_opts.parse_paren() and
1563 self.w_parser.LookPastSpace() == Id.Op_LParen):
1564 enode = self.w_parser.ParseYshExprForCommand()
1565 cond = condition.YshExpr(enode) # type: condition_t
1566 else:
1567 cond = self._ParseConditionList()
1568
1569 # NOTE: The LSTs will be different for OSH and YSH, but the execution
1570 # should be unchanged. To be sure we should desugar.
1571 self._GetWord()
1572 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1573 # while test -f foo {
1574 body_node = self.ParseBraceGroup() # type: command_t
1575 else:
1576 body_node = self.ParseDoGroup()
1577
1578 # no redirects yet
1579 return command.WhileUntil(keyword, cond, body_node, None)
1580
1581 def ParseCaseArm(self):
1582 # type: () -> CaseArm
1583 """
1584 case_item: '('? pattern ('|' pattern)* ')'
1585 newline_ok command_term? trailer? ;
1586
1587 Looking at '(' or pattern
1588 """
1589 self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1590
1591 left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1592
1593 if self.c_id == Id.Op_LParen: # Optional (
1594 self._SetNext()
1595
1596 pat_words = [] # type: List[word_t]
1597 while True:
1598 self._GetWord()
1599 if self.c_kind != Kind.Word:
1600 p_die('Expected case pattern', loc.Word(self.cur_word))
1601 pat_words.append(self.cur_word)
1602 self._SetNext()
1603
1604 self._GetWord()
1605 if self.c_id == Id.Op_Pipe:
1606 self._SetNext()
1607 else:
1608 break
1609
1610 ate = self._Eat(Id.Right_CasePat)
1611 middle_tok = word_.AsOperatorToken(ate)
1612
1613 self._NewlineOk()
1614
1615 self._GetWord()
1616 if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
1617 Id.KW_Esac):
1618 c_list = self._ParseCommandTerm()
1619 action_children = c_list.children
1620 else:
1621 action_children = []
1622
1623 dsemi_tok = None # type: Token
1624 self._GetWord()
1625 if self.c_id == Id.KW_Esac: # missing last ;;
1626 pass
1627 elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
1628 dsemi_tok = word_.AsOperatorToken(self.cur_word)
1629 self._SetNext()
1630 else:
1631 # Happens on EOF
1632 p_die('Expected ;; or esac', loc.Word(self.cur_word))
1633
1634 self._NewlineOk()
1635
1636 return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1637 action_children, dsemi_tok)
1638
1639 def ParseYshCaseArm(self, discriminant):
1640 # type: (Id_t) -> CaseArm
1641 """
1642 case_item : pattern newline_ok brace_group newline_ok
1643 pattern : pat_words
1644 | pat_exprs
1645 | pat_eggex
1646 | pat_else
1647 pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
1648 pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
1649 pat_word : WORD
1650 pat_eggex : '/' oil_eggex '/'
1651 pat_expr : '(' oil_expr ')'
1652 pat_else : '(' Id.KW_Else ')'
1653
1654 Looking at: 'pattern'
1655
1656 Note that the trailing `newline_ok` in `case_item` is handled by
1657 `ParseYshCase`. We do this because parsing that `newline_ok` returns
1658 the next "discriminant" for the next token, so it makes more sense to
1659 handle it there.
1660 """
1661 left_tok = None # type: Token
1662 pattern = None # type: pat_t
1663
1664 if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1665 # pat_exprs, pat_else or pat_eggex
1666 pattern, left_tok = self.w_parser.ParseYshCasePattern()
1667 else:
1668 # pat_words
1669 pat_words = [] # type: List[word_t]
1670 while True:
1671 self._GetWord()
1672 if self.c_kind != Kind.Word:
1673 p_die('Expected case pattern', loc.Word(self.cur_word))
1674 pat_words.append(self.cur_word)
1675 self._SetNext()
1676
1677 if not left_tok:
1678 left_tok = location.LeftTokenForWord(self.cur_word)
1679
1680 self._NewlineOk()
1681
1682 self._GetWord()
1683 if self.c_id == Id.Op_Pipe:
1684 self._SetNext()
1685 self._NewlineOk()
1686 else:
1687 break
1688 pattern = pat.Words(pat_words)
1689
1690 self._NewlineOk()
1691 action = self.ParseBraceGroup()
1692
1693 # The left token of the action is our "middle" token
1694 return CaseArm(left_tok, pattern, action.left, action.children,
1695 action.right)
1696
1697 def ParseYshCase(self, case_kw):
1698 # type: (Token) -> command.Case
1699 """
1700 ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1701
1702 Looking at: token after 'case'
1703 """
1704 enode = self.w_parser.ParseYshExprForCommand()
1705 to_match = case_arg.YshExpr(enode)
1706
1707 ate = self._Eat(Id.Lit_LBrace)
1708 arms_start = word_.BraceToken(ate)
1709
1710 discriminant = self.w_parser.NewlineOkForYshCase()
1711
1712 # Note: for now, zero arms are accepted, just like POSIX case $x in esac
1713 arms = [] # type: List[CaseArm]
1714 while discriminant != Id.Op_RBrace:
1715 arm = self.ParseYshCaseArm(discriminant)
1716 arms.append(arm)
1717
1718 discriminant = self.w_parser.NewlineOkForYshCase()
1719
1720 # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1721 # token is read as an Id.Op_RBrace, but we need to store this as a
1722 # Id.Lit_RBrace.
1723 ate = self._Eat(Id.Op_RBrace)
1724 arms_end = word_.AsOperatorToken(ate)
1725 arms_end.id = Id.Lit_RBrace
1726
1727 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1728 None)
1729
1730 def ParseOldCase(self, case_kw):
1731 # type: (Token) -> command.Case
1732 """
1733 case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1734
1735 -> Looking at WORD
1736
1737 FYI original POSIX case list, which takes pains for DSEMI
1738
1739 case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1740 """
1741 self._GetWord()
1742 w = self.cur_word
1743 if not self.parse_opts.parse_bare_word():
1744 ok, s, quoted = word_.StaticEval(w)
1745 if ok and not quoted:
1746 p_die(
1747 "This is a constant string. You may want a variable like $x (parse_bare_word)",
1748 loc.Word(w))
1749
1750 if w.tag() != word_e.Compound:
1751 p_die("Expected a word to match against", loc.Word(w))
1752
1753 to_match = case_arg.Word(w)
1754 self._SetNext() # past WORD
1755
1756 self._NewlineOk()
1757
1758 ate = self._Eat(Id.KW_In)
1759 arms_start = word_.AsKeywordToken(ate)
1760
1761 self._NewlineOk()
1762
1763 arms = [] # type: List[CaseArm]
1764 while True:
1765 self._GetWord()
1766 if self.c_id == Id.KW_Esac:
1767 break
1768 # case arm should begin with a pattern word or (
1769 if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1770 break
1771
1772 arm = self.ParseCaseArm()
1773 arms.append(arm)
1774
1775 ate = self._Eat(Id.KW_Esac)
1776 arms_end = word_.AsKeywordToken(ate)
1777
1778 # no redirects yet
1779 return command.Case(case_kw, to_match, arms_start, arms, arms_end,
1780 None)
1781
1782 def ParseCase(self):
1783 # type: () -> command.Case
1784 """
1785 case_clause : old_case # from POSIX
1786 | ysh_case
1787 ;
1788
1789 Looking at 'Case'
1790 """
1791 case_kw = word_.AsKeywordToken(self.cur_word)
1792 self._SetNext() # past 'case'
1793
1794 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1795 return self.ParseYshCase(case_kw)
1796 else:
1797 return self.ParseOldCase(case_kw)
1798
1799 def _ParseYshElifElse(self, if_node):
1800 # type: (command.If) -> None
1801 """If test -f foo { echo foo.
1802
1803 } elif test -f bar; test -f spam { ^ we parsed up to here echo
1804 bar } else { echo none }
1805 """
1806 arms = if_node.arms
1807
1808 while self.c_id == Id.KW_Elif:
1809 elif_kw = word_.AsKeywordToken(self.cur_word)
1810 self._SetNext() # skip elif
1811 if (self.parse_opts.parse_paren() and
1812 self.w_parser.LookPastSpace() == Id.Op_LParen):
1813 enode = self.w_parser.ParseYshExprForCommand()
1814 cond = condition.YshExpr(enode) # type: condition_t
1815 else:
1816 self.allow_block = False
1817 commands = self._ParseCommandList()
1818 self.allow_block = True
1819 cond = condition.Shell(commands.children)
1820
1821 body = self.ParseBraceGroup()
1822 self._GetWord()
1823
1824 arm = IfArm(elif_kw, cond, None, body.children, None)
1825 arms.append(arm)
1826
1827 self._GetWord()
1828 if self.c_id == Id.KW_Else:
1829 self._SetNext()
1830 body = self.ParseBraceGroup()
1831 if_node.else_action = body.children
1832
1833 def _ParseYshIf(self, if_kw, cond):
1834 # type: (Token, condition_t) -> command.If
1835 """
1836 if test -f foo {
1837 # ^ we parsed up to here
1838 echo foo
1839 } elif test -f bar; test -f spam {
1840 echo bar
1841 } else {
1842 echo none
1843 }
1844 NOTE: If you do something like if test -n foo{, the parser keeps going, and
1845 the error is confusing because it doesn't point to the right place.
1846
1847 I think we might need strict_brace so that foo{ is disallowed. It has to
1848 be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1849 form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1850 Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1851 """
1852 if_node = command.If.CreateNull(alloc_lists=True)
1853 if_node.if_kw = if_kw
1854
1855 body1 = self.ParseBraceGroup()
1856 # Every arm has 1 spid, unlike shell-style
1857 # TODO: We could get the spids from the brace group.
1858 arm = IfArm(if_kw, cond, None, body1.children, None)
1859
1860 if_node.arms.append(arm)
1861
1862 self._GetWord()
1863 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1864 self._ParseYshElifElse(if_node)
1865 # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1866 # spid because that's in the BraceGroup.
1867 return if_node
1868
1869 def _ParseElifElse(self, if_node):
1870 # type: (command.If) -> None
1871 """
1872 else_part: (Elif command_list Then command_list)* Else command_list ;
1873 """
1874 arms = if_node.arms
1875
1876 self._GetWord()
1877 while self.c_id == Id.KW_Elif:
1878 elif_kw = word_.AsKeywordToken(self.cur_word)
1879 self._SetNext() # past 'elif'
1880
1881 cond = self._ParseConditionList()
1882
1883 ate = self._Eat(Id.KW_Then)
1884 then_kw = word_.AsKeywordToken(ate)
1885
1886 body = self._ParseCommandList()
1887 arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
1888
1889 arms.append(arm)
1890
1891 self._GetWord()
1892 if self.c_id == Id.KW_Else:
1893 else_kw = word_.AsKeywordToken(self.cur_word)
1894 self._SetNext() # past 'else'
1895 body = self._ParseCommandList()
1896 if_node.else_action = body.children
1897 else:
1898 else_kw = None
1899
1900 if_node.else_kw = else_kw
1901
1902 def ParseIf(self):
1903 # type: () -> command.If
1904 """
1905 if_clause : If command_list Then command_list else_part? Fi ;
1906
1907 open : '{' | Then
1908 close : '}' | Fi
1909
1910 ysh_if : If ( command_list | '(' expr ')' )
1911 open command_list else_part? close;
1912
1913 There are 2 conditionals here: parse_paren, then parse_brace
1914 """
1915 if_node = command.If.CreateNull(alloc_lists=True)
1916 if_kw = word_.AsKeywordToken(self.cur_word)
1917 if_node.if_kw = if_kw
1918 self._SetNext() # past 'if'
1919
1920 if (self.parse_opts.parse_paren() and
1921 self.w_parser.LookPastSpace() == Id.Op_LParen):
1922 # if (x + 1)
1923 enode = self.w_parser.ParseYshExprForCommand()
1924 cond = condition.YshExpr(enode) # type: condition_t
1925 else:
1926 # if echo 1; echo 2; then
1927 # Remove ambiguity with if cd / {
1928 cond = self._ParseConditionList()
1929
1930 self._GetWord()
1931 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1932 return self._ParseYshIf(if_kw, cond)
1933
1934 ate = self._Eat(Id.KW_Then)
1935 then_kw = word_.AsKeywordToken(ate)
1936
1937 body = self._ParseCommandList()
1938
1939 # First arm
1940 arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
1941 if_node.arms.append(arm)
1942
1943 # 2nd to Nth arm
1944 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1945 self._ParseElifElse(if_node)
1946
1947 ate = self._Eat(Id.KW_Fi)
1948 if_node.fi_kw = word_.AsKeywordToken(ate)
1949
1950 return if_node
1951
1952 def ParseTime(self):
1953 # type: () -> command_t
1954 """Time [-p] pipeline.
1955
1956 According to bash help.
1957 """
1958 time_kw = word_.AsKeywordToken(self.cur_word)
1959 self._SetNext() # skip time
1960 pipeline = self.ParsePipeline()
1961 return command.TimeBlock(time_kw, pipeline)
1962
1963 def ParseCompoundCommand(self):
1964 # type: () -> command_t
1965 """
1966 Refactoring: we put io_redirect* here instead of in function_body and
1967 command.
1968
1969 compound_command : brace_group io_redirect*
1970 | subshell io_redirect*
1971 | for_clause io_redirect*
1972 | while_clause io_redirect*
1973 | until_clause io_redirect*
1974 | if_clause io_redirect*
1975 | case_clause io_redirect*
1976
1977 # bash extensions
1978 | time_clause
1979 | [[ BoolExpr ]]
1980 | (( ArithExpr ))
1981 """
1982 self._GetWord()
1983 if self.c_id == Id.Lit_LBrace:
1984 n1 = self.ParseBraceGroup()
1985 n1.redirects = self._ParseRedirectList()
1986 return n1
1987 if self.c_id == Id.Op_LParen:
1988 n2 = self.ParseSubshell()
1989 n2.redirects = self._ParseRedirectList()
1990 return n2
1991
1992 if self.c_id == Id.KW_For:
1993 # Note: Redirects parsed in this call. POSIX for and bash for (( have
1994 # redirects, but YSH for doesn't.
1995 return self.ParseFor()
1996 if self.c_id in (Id.KW_While, Id.KW_Until):
1997 keyword = word_.AsKeywordToken(self.cur_word)
1998 n3 = self.ParseWhileUntil(keyword)
1999 n3.redirects = self._ParseRedirectList()
2000 return n3
2001
2002 if self.c_id == Id.KW_If:
2003 n4 = self.ParseIf()
2004 n4.redirects = self._ParseRedirectList()
2005 return n4
2006 if self.c_id == Id.KW_Case:
2007 n5 = self.ParseCase()
2008 n5.redirects = self._ParseRedirectList()
2009 return n5
2010
2011 if self.c_id == Id.KW_DLeftBracket:
2012 if not self.parse_opts.parse_dbracket():
2013 p_die('Bash [[ not allowed in YSH (parse_dbracket)',
2014 loc.Word(self.cur_word))
2015 n6 = self.ParseDBracket()
2016 n6.redirects = self._ParseRedirectList()
2017 return n6
2018 if self.c_id == Id.Op_DLeftParen:
2019 if not self.parse_opts.parse_dparen():
2020 p_die(
2021 'Bash (( not allowed in YSH (parse_dparen, see OILS-ERR-14 for wart)',
2022 loc.Word(self.cur_word))
2023 n7 = self.ParseDParen()
2024 n7.redirects = self._ParseRedirectList()
2025 return n7
2026
2027 # bash extensions: no redirects
2028 if self.c_id == Id.KW_Time:
2029 return self.ParseTime()
2030
2031 # Happens in function body, e.g. myfunc() oops
2032 p_die(
2033 'Unexpected word while parsing compound command (%s)' %
2034 Id_str(self.c_id), loc.Word(self.cur_word))
2035 assert False # for MyPy
2036
2037 def ParseFunctionDef(self):
2038 # type: () -> command.ShFunction
2039 """
2040 function_header : fname '(' ')'
2041 function_def : function_header newline_ok function_body ;
2042
2043 Precondition: Looking at the function name.
2044
2045 NOTE: There is an ambiguity with:
2046
2047 function foo ( echo hi ) and
2048 function foo () ( echo hi )
2049
2050 Bash only accepts the latter, though it doesn't really follow a grammar.
2051 """
2052 word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2053 name = word_.ShFunctionName(word0)
2054 if len(name) == 0: # example: foo$x is invalid
2055 p_die('Invalid function name', loc.Word(word0))
2056
2057 part0 = word0.parts[0]
2058 # If we got a non-empty string from ShFunctionName, this should be true.
2059 assert part0.tag() == word_part_e.Literal
2060 blame_tok = cast(Token, part0) # for ctx_VarChecker
2061
2062 self._SetNext() # move past function name
2063
2064 # Must be true because of lookahead
2065 self._GetWord()
2066 assert self.c_id == Id.Op_LParen, self.cur_word
2067
2068 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2069 self._SetNext()
2070
2071 self._GetWord()
2072 if self.c_id == Id.Right_ShFunction:
2073 # 'f ()' implies a function definition, since invoking it with no args
2074 # would just be 'f'
2075 self._SetNext()
2076
2077 self._NewlineOk()
2078
2079 func = command.ShFunction.CreateNull()
2080 func.name = name
2081 with ctx_VarChecker(self.var_checker, blame_tok):
2082 func.body = self.ParseCompoundCommand()
2083
2084 func.name_tok = location.LeftTokenForCompoundWord(word0)
2085 return func
2086 else:
2087 p_die('Expected ) in function definition', loc.Word(self.cur_word))
2088 return None
2089
2090 def ParseKshFunctionDef(self):
2091 # type: () -> command.ShFunction
2092 """
2093 ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2094 """
2095 keyword_tok = word_.AsKeywordToken(self.cur_word)
2096
2097 self._SetNext() # skip past 'function'
2098 self._GetWord()
2099
2100 cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2101 name = word_.ShFunctionName(cur_word)
2102 if len(name) == 0: # example: foo$x is invalid
2103 p_die('Invalid KSH-style function name', loc.Word(cur_word))
2104
2105 name_word = self.cur_word
2106 self._SetNext() # skip past 'function name
2107
2108 self._GetWord()
2109 if self.c_id == Id.Op_LParen:
2110 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2111 self._SetNext()
2112 self._Eat(Id.Right_ShFunction)
2113
2114 self._NewlineOk()
2115
2116 func = command.ShFunction.CreateNull()
2117 func.name = name
2118 with ctx_VarChecker(self.var_checker, keyword_tok):
2119 func.body = self.ParseCompoundCommand()
2120
2121 func.keyword = keyword_tok
2122 func.name_tok = location.LeftTokenForWord(name_word)
2123 return func
2124
2125 def ParseYshProc(self):
2126 # type: () -> Proc
2127 node = Proc.CreateNull(alloc_lists=True)
2128
2129 keyword_tok = word_.AsKeywordToken(self.cur_word)
2130 node.keyword = keyword_tok
2131
2132 with ctx_VarChecker(self.var_checker, keyword_tok):
2133 with ctx_CmdMode(self, cmd_mode_e.Proc):
2134 self.w_parser.ParseProc(node)
2135 if node.sig.tag() == proc_sig_e.Closed: # Register params
2136 sig = cast(proc_sig.Closed, node.sig)
2137
2138 # Treat 3 kinds of params as variables.
2139 wp = sig.word
2140 if wp:
2141 for param in wp.params:
2142 self.var_checker.Check(Id.KW_Var, param.name,
2143 param.blame_tok)
2144 if wp.rest_of:
2145 r = wp.rest_of
2146 self.var_checker.Check(Id.KW_Var, r.name,
2147 r.blame_tok)
2148 # We COULD register __out here but it would require a different API.
2149 #if param.prefix and param.prefix.id == Id.Arith_Colon:
2150 # self.var_checker.Check(Id.KW_Var, '__' + param.name)
2151
2152 posit = sig.positional
2153 if posit:
2154 for param in posit.params:
2155 self.var_checker.Check(Id.KW_Var, param.name,
2156 param.blame_tok)
2157 if posit.rest_of:
2158 r = posit.rest_of
2159 self.var_checker.Check(Id.KW_Var, r.name,
2160 r.blame_tok)
2161
2162 named = sig.named
2163 if named:
2164 for param in named.params:
2165 self.var_checker.Check(Id.KW_Var, param.name,
2166 param.blame_tok)
2167 if named.rest_of:
2168 r = named.rest_of
2169 self.var_checker.Check(Id.KW_Var, r.name,
2170 r.blame_tok)
2171
2172 if sig.block_param:
2173 b = sig.block_param
2174 self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2175
2176 self._SetNext()
2177 node.body = self.ParseBraceGroup()
2178 # No redirects for YSH procs (only at call site)
2179
2180 return node
2181
2182 def ParseYshFunc(self):
2183 # type: () -> Func
2184 """
2185 ysh_func: (
2186 Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2187 )
2188 Looking at KW_Func
2189 """
2190 node = Func.CreateNull(alloc_lists=True)
2191
2192 keyword_tok = word_.AsKeywordToken(self.cur_word)
2193 node.keyword = keyword_tok
2194
2195 with ctx_VarChecker(self.var_checker, keyword_tok):
2196 self.w_parser.ParseFunc(node)
2197
2198 posit = node.positional
2199 if posit:
2200 for param in posit.params:
2201 self.var_checker.Check(Id.KW_Var, param.name,
2202 param.blame_tok)
2203 if posit.rest_of:
2204 r = posit.rest_of
2205 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2206
2207 named = node.named
2208 if named:
2209 for param in named.params:
2210 self.var_checker.Check(Id.KW_Var, param.name,
2211 param.blame_tok)
2212 if named.rest_of:
2213 r = named.rest_of
2214 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2215
2216 self._SetNext()
2217 with ctx_CmdMode(self, cmd_mode_e.Func):
2218 node.body = self.ParseBraceGroup()
2219
2220 return node
2221
2222 def ParseCoproc(self):
2223 # type: () -> command_t
2224 """
2225 TODO: command.Coproc?
2226 """
2227 raise NotImplementedError()
2228
2229 def ParseSubshell(self):
2230 # type: () -> command.Subshell
2231 """
2232 subshell : '(' compound_list ')'
2233
2234 Looking at Op_LParen
2235 """
2236 left = word_.AsOperatorToken(self.cur_word)
2237 self._SetNext() # skip past (
2238
2239 # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2240 # translation stack, we want to delay it.
2241
2242 self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2243
2244 c_list = self._ParseCommandList()
2245 if len(c_list.children) == 1:
2246 child = c_list.children[0]
2247 else:
2248 child = c_list
2249
2250 ate = self._Eat(Id.Right_Subshell)
2251 right = word_.AsOperatorToken(ate)
2252
2253 return command.Subshell(left, child, right, None) # no redirects yet
2254
2255 def ParseDBracket(self):
2256 # type: () -> command.DBracket
2257 """Pass the underlying word parser off to the boolean expression
2258 parser."""
2259 left = word_.AsKeywordToken(self.cur_word)
2260 # TODO: Test interactive. Without closing ]], you should get > prompt
2261 # (PS2)
2262
2263 self._SetNext() # skip [[
2264 b_parser = bool_parse.BoolParser(self.w_parser)
2265 bnode, right = b_parser.Parse() # May raise
2266 return command.DBracket(left, bnode, right, None) # no redirects yet
2267
2268 def ParseDParen(self):
2269 # type: () -> command.DParen
2270 left = word_.AsOperatorToken(self.cur_word)
2271
2272 self._SetNext() # skip ((
2273 anode, right = self.w_parser.ReadDParen()
2274 assert anode is not None
2275
2276 return command.DParen(left, anode, right, None) # no redirects yet
2277
2278 def ParseCommand(self):
2279 # type: () -> command_t
2280 """
2281 command : simple_command
2282 | compound_command # OSH edit: io_redirect* folded in
2283 | function_def
2284 | ksh_function_def
2285
2286 # YSH extensions
2287 | proc NAME ...
2288 | typed proc NAME ...
2289 | func NAME ...
2290 | const ...
2291 | var ...
2292 | setglobal ...
2293 | setref ...
2294 | setvar ...
2295 | call EXPR
2296 | = EXPR
2297 ;
2298
2299 Note: the reason const / var are not part of compound_command is because
2300 they can't be alone in a shell function body.
2301
2302 Example:
2303 This is valid shell f() if true; then echo hi; fi
2304 This is invalid f() var x = 1
2305 """
2306 if self._AtSecondaryKeyword():
2307 p_die('Unexpected word when parsing command',
2308 loc.Word(self.cur_word))
2309
2310 # YSH Extensions
2311
2312 if self.c_id == Id.KW_Proc: # proc p { ... }
2313 # proc is hidden because of the 'local reasoning' principle. Code
2314 # inside procs should be YSH, full stop. That means ysh:upgrade is
2315 # on.
2316 if self.parse_opts.parse_proc():
2317 return self.ParseYshProc()
2318 else:
2319 # 2024-02: This avoids bad syntax errors if you type YSH code
2320 # into OSH
2321 # proc p (x) { echo hi } would actually be parsed as a
2322 # command.Simple! Shell compatibility: quote 'proc'
2323 p_die("proc is a YSH keyword, but this is OSH.",
2324 loc.Word(self.cur_word))
2325
2326 if self.c_id == Id.KW_Typed: # typed proc p () { ... }
2327 self._SetNext()
2328 self._GetWord()
2329 if self.c_id != Id.KW_Proc:
2330 p_die("Expected 'proc' after 'typed'", loc.Word(self.cur_word))
2331
2332 if self.parse_opts.parse_proc():
2333 return self.ParseYshProc()
2334 else:
2335 p_die("typed is a YSH keyword, but this is OSH.",
2336 loc.Word(self.cur_word))
2337
2338 if self.c_id == Id.KW_Func: # func f(x) { ... }
2339 if self.parse_opts.parse_func():
2340 return self.ParseYshFunc()
2341 else:
2342 # Same reasoning as above, for 'proc'
2343 p_die("func is a YSH keyword, but this is OSH.",
2344 loc.Word(self.cur_word))
2345
2346 if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2347 p_die("const can't be inside proc or func. Use var instead.",
2348 loc.Word(self.cur_word))
2349
2350 if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2351 keyword_id = self.c_id
2352 kw_token = word_.LiteralToken(self.cur_word)
2353 self._SetNext()
2354 n8 = self.w_parser.ParseVarDecl(kw_token)
2355 for lhs in n8.lhs:
2356 self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2357 return n8
2358
2359 if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2360 kw_token = word_.LiteralToken(self.cur_word)
2361 self._SetNext()
2362 n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2363 return n9
2364
2365 if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2366 # = 42 + a[i]
2367 # call mylist->append('x')
2368
2369 keyword = word_.LiteralToken(self.cur_word)
2370 assert keyword is not None
2371 self._SetNext()
2372 enode = self.w_parser.ParseCommandExpr()
2373 return command.Expr(keyword, enode)
2374
2375 if self.c_id == Id.KW_Function:
2376 return self.ParseKshFunctionDef()
2377
2378 if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2379 Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2380 Id.KW_If, Id.KW_Case, Id.KW_Time):
2381 return self.ParseCompoundCommand()
2382
2383 # Syntax error for '}' starting a line, which all shells disallow.
2384 if self.c_id == Id.Lit_RBrace:
2385 p_die('Unexpected right brace', loc.Word(self.cur_word))
2386
2387 if self.c_kind == Kind.Redir: # Leading redirect
2388 return self.ParseSimpleCommand()
2389
2390 if self.c_kind == Kind.Word:
2391 # ensured by Kind.Word
2392 cur_word = cast(CompoundWord, self.cur_word)
2393
2394 # NOTE: At the top level, only Token and Compound are possible.
2395 # Can this be modelled better in the type system, removing asserts?
2396 #
2397 # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2398 # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2399 # That requires 2 tokens of lookahead, which we don't have
2400 #
2401 # Or maybe we don't just have ParseSimpleCommand -- we will have
2402 # ParseYshCommand or something
2403
2404 if (self.w_parser.LookAheadFuncParens() and
2405 not word_.IsVarLike(cur_word)):
2406 return self.ParseFunctionDef() # f() { echo; } # function
2407
2408 # Parse x = 1+2*3 when inside HayNode { } blocks
2409 parts = cur_word.parts
2410 if self.parse_opts.parse_equals() and len(parts) == 1:
2411 part0 = parts[0]
2412 if part0.tag() == word_part_e.Literal:
2413 tok = cast(Token, part0)
2414 if (match.IsValidVarName(lexer.LazyStr(tok)) and
2415 self.w_parser.LookPastSpace() == Id.Lit_Equals):
2416 assert tok.id == Id.Lit_Chars, tok
2417
2418 if (len(self.hay_attrs_stack) and
2419 self.hay_attrs_stack[-1]):
2420 # Note: no static var_checker.Check() for bare assignment
2421 enode = self.w_parser.ParseBareDecl()
2422 self._SetNext() # Somehow this is necessary
2423 # TODO: Use BareDecl here. Well, do that when we
2424 # treat it as const or lazy.
2425 return command.VarDecl(
2426 None,
2427 [NameType(tok, lexer.TokenVal(tok), None)],
2428 enode)
2429 else:
2430 self._SetNext()
2431 self._GetWord()
2432 p_die(
2433 'Unexpected = (Hint: use var/setvar, or quote it)',
2434 loc.Word(self.cur_word))
2435
2436 # echo foo
2437 # f=(a b c) # array
2438 # array[1+2]+=1
2439 return self.ParseSimpleCommand()
2440
2441 if self.c_kind == Kind.Eof:
2442 p_die("Unexpected EOF while parsing command",
2443 loc.Word(self.cur_word))
2444
2445 # NOTE: This only happens in batch mode in the second turn of the loop!
2446 # e.g. )
2447 p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2448
2449 assert False # for MyPy
2450
2451 def ParsePipeline(self):
2452 # type: () -> command_t
2453 """
2454 pipeline : Bang? command ( '|' newline_ok command )* ;
2455 """
2456 negated = None # type: Optional[Token]
2457
2458 self._GetWord()
2459 if self.c_id == Id.KW_Bang:
2460 negated = word_.AsKeywordToken(self.cur_word)
2461 self._SetNext()
2462
2463 child = self.ParseCommand()
2464 assert child is not None
2465
2466 children = [child]
2467
2468 self._GetWord()
2469 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2470 if negated is not None:
2471 node = command.Pipeline(negated, children, [])
2472 return node
2473 else:
2474 return child # no pipeline
2475
2476 # | or |&
2477 ops = [] # type: List[Token]
2478 while True:
2479 op = word_.AsOperatorToken(self.cur_word)
2480 ops.append(op)
2481
2482 self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2483 self._NewlineOk()
2484
2485 child = self.ParseCommand()
2486 children.append(child)
2487
2488 self._GetWord()
2489 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2490 break
2491
2492 return command.Pipeline(negated, children, ops)
2493
2494 def ParseAndOr(self):
2495 # type: () -> command_t
2496 self._GetWord()
2497 if self.c_id == Id.Lit_TDot:
2498 # We got '...', so parse in multiline mode
2499 self._SetNext()
2500 with word_.ctx_Multiline(self.w_parser):
2501 return self._ParseAndOr()
2502
2503 # Parse in normal mode, not multiline
2504 return self._ParseAndOr()
2505
2506 def _ParseAndOr(self):
2507 # type: () -> command_t
2508 """
2509 and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
2510 | pipeline
2511
2512 Note that it is left recursive and left associative. We parse it
2513 iteratively with a token of lookahead.
2514 """
2515 child = self.ParsePipeline()
2516 assert child is not None
2517
2518 self._GetWord()
2519 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2520 return child
2521
2522 ops = [] # type: List[Token]
2523 children = [child]
2524
2525 while True:
2526 ops.append(word_.AsOperatorToken(self.cur_word))
2527
2528 self._SetNext() # skip past || &&
2529 self._NewlineOk()
2530
2531 child = self.ParsePipeline()
2532 children.append(child)
2533
2534 self._GetWord()
2535 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2536 break
2537
2538 return command.AndOr(children, ops)
2539
2540 # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2541
2542 # At the top level, we execute after every line, e.g. to
2543 # - process alias (a form of dynamic parsing)
2544 # - process 'exit', because invalid syntax might appear after it
2545
2546 # On the other hand, for a while loop body, we parse the whole thing at once,
2547 # and then execute it. We don't want to parse it over and over again!
2548
2549 # COMPARE
2550 # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2551 # command_term : and_or (trailer and_or)* ; # CHILDREN
2552
2553 def _ParseCommandLine(self):
2554 # type: () -> command_t
2555 """
2556 command_line : and_or (sync_op and_or)* trailer? ;
2557 trailer : sync_op newline_ok
2558 | NEWLINES;
2559 sync_op : '&' | ';';
2560
2561 NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2562 there is another command word after the sync op.
2563
2564 But it's easier to express imperatively. Do the following in a loop:
2565 1. ParseAndOr
2566 2. Peek.
2567 a. If there's a newline, then return. (We're only parsing a single
2568 line.)
2569 b. If there's a sync_op, process it. Then look for a newline and
2570 return. Otherwise, parse another AndOr.
2571 """
2572 # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2573 # I don't think we should add anything else here; otherwise it will be
2574 # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2575 END_LIST = [Id.Op_Newline, Id.Eof_Real]
2576
2577 children = [] # type: List[command_t]
2578 done = False
2579 while not done:
2580 child = self.ParseAndOr()
2581
2582 self._GetWord()
2583 if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2584 tok = cast(Token, self.cur_word) # for MyPy
2585 child = command.Sentence(child, tok)
2586 self._SetNext()
2587
2588 self._GetWord()
2589 if self.c_id in END_LIST:
2590 done = True
2591
2592 elif self.c_id in END_LIST:
2593 done = True
2594
2595 else:
2596 # e.g. echo a(b)
2597 p_die(
2598 'Invalid word while parsing command line (%s)' %
2599 Id_str(self.c_id), loc.Word(self.cur_word))
2600
2601 children.append(child)
2602
2603 # Simplify the AST.
2604 if len(children) > 1:
2605 return command.CommandList(children)
2606 else:
2607 return children[0]
2608
2609 def _ParseCommandTerm(self):
2610 # type: () -> command.CommandList
2611 """"
2612 command_term : and_or (trailer and_or)* ;
2613 trailer : sync_op newline_ok
2614 | NEWLINES;
2615 sync_op : '&' | ';';
2616
2617 This is handled in imperative style, like _ParseCommandLine.
2618 Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2619 which is slightly different. (HOW? Is it the DSEMI?)
2620
2621 Returns:
2622 syntax_asdl.command
2623 """
2624 # Token types that will end the command term.
2625 END_LIST = [
2626 self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
2627 Id.Op_SemiAmp, Id.Op_DSemiAmp
2628 ]
2629
2630 # NOTE: This is similar to _ParseCommandLine.
2631 #
2632 # - Why aren't we doing END_LIST in _ParseCommandLine?
2633 # - Because you will never be inside $() at the top level.
2634 # - We also know it will end in a newline. It can't end in "fi"!
2635 # - example: if true; then { echo hi; } fi
2636
2637 children = [] # type: List[command_t]
2638 done = False
2639 while not done:
2640 # Most keywords are valid "first words". But do/done/then do not BEGIN
2641 # commands, so they are not valid.
2642 if self._AtSecondaryKeyword():
2643 break
2644
2645 child = self.ParseAndOr()
2646
2647 self._GetWord()
2648 if self.c_id == Id.Op_Newline:
2649 self._SetNext()
2650
2651 self._GetWord()
2652 if self.c_id in END_LIST:
2653 done = True
2654
2655 elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2656 tok = cast(Token, self.cur_word) # for MyPy
2657 child = command.Sentence(child, tok)
2658 self._SetNext()
2659
2660 self._GetWord()
2661 if self.c_id == Id.Op_Newline:
2662 self._SetNext() # skip over newline
2663
2664 # Test if we should keep going. There might be another command after
2665 # the semi and newline.
2666 self._GetWord()
2667 if self.c_id in END_LIST: # \n EOF
2668 done = True
2669
2670 elif self.c_id in END_LIST: # ; EOF
2671 done = True
2672
2673 elif self.c_id in END_LIST: # EOF
2674 done = True
2675
2676 # For if test -f foo; test -f bar {
2677 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2678 done = True
2679
2680 elif self.c_kind != Kind.Word:
2681 # e.g. f() { echo (( x )) ; }
2682 # but can't fail on 'fi fi', see osh/cmd_parse_test.py
2683
2684 #log("Invalid %s", self.cur_word)
2685 p_die("Invalid word while parsing command list",
2686 loc.Word(self.cur_word))
2687
2688 children.append(child)
2689
2690 return command.CommandList(children)
2691
2692 def _ParseCommandList(self):
2693 # type: () -> command.CommandList
2694 """
2695 command_list : newline_ok command_term trailer? ;
2696
2697 This one is called by all the compound commands. It's basically a command
2698 block.
2699
2700 NOTE: Rather than translating the CFG directly, the code follows a style
2701 more like this: more like this: (and_or trailer)+. It makes capture
2702 easier.
2703 """
2704 self._NewlineOk()
2705 return self._ParseCommandTerm()
2706
2707 def ParseLogicalLine(self):
2708 # type: () -> command_t
2709 """Parse a single line for main_loop.
2710
2711 A wrapper around _ParseCommandLine(). Similar but not identical to
2712 _ParseCommandList() and ParseCommandSub().
2713
2714 Raises:
2715 ParseError
2716 """
2717 self._NewlineOk()
2718 self._GetWord()
2719 if self.c_id == Id.Eof_Real:
2720 return None # main loop checks for here docs
2721 node = self._ParseCommandLine()
2722 return node
2723
2724 def ParseInteractiveLine(self):
2725 # type: () -> parse_result_t
2726 """Parse a single line for Interactive main_loop.
2727
2728 Different from ParseLogicalLine because newlines are handled differently.
2729
2730 Raises:
2731 ParseError
2732 """
2733 self._GetWord()
2734 if self.c_id == Id.Op_Newline:
2735 return parse_result.EmptyLine
2736 if self.c_id == Id.Eof_Real:
2737 return parse_result.Eof
2738
2739 node = self._ParseCommandLine()
2740 return parse_result.Node(node)
2741
2742 def ParseCommandSub(self):
2743 # type: () -> command_t
2744 """Parse $(echo hi) and `echo hi` for word_parse.py.
2745
2746 They can have multiple lines, like this: echo $( echo one echo
2747 two )
2748 """
2749 self._NewlineOk()
2750
2751 self._GetWord()
2752 if self.c_kind == Kind.Eof: # e.g. $()
2753 return command.NoOp
2754
2755 c_list = self._ParseCommandTerm()
2756 if len(c_list.children) == 1:
2757 return c_list.children[0]
2758 else:
2759 return c_list
2760
2761 def CheckForPendingHereDocs(self):
2762 # type: () -> None
2763 # NOTE: This happens when there is no newline at the end of a file, like
2764 # osh -c 'cat <<EOF'
2765 if len(self.pending_here_docs):
2766 node = self.pending_here_docs[0] # Just show the first one?
2767 h = cast(redir_param.HereDoc, node.arg)
2768 p_die('Unterminated here doc began here', loc.Word(h.here_begin))
2769
2770
2771# vim: sw=4