| 1 | # Copyright 2016 Andy Chu. All rights reserved.
|
| 2 | # Licensed under the Apache License, Version 2.0 (the "License");
|
| 3 | # you may not use this file except in compliance with the License.
|
| 4 | # You may obtain a copy of the License at
|
| 5 | #
|
| 6 | # http://www.apache.org/licenses/LICENSE-2.0
|
| 7 | """
|
| 8 | cmd_parse.py - Parse high level shell commands.
|
| 9 | """
|
| 10 | from __future__ import print_function
|
| 11 |
|
| 12 | from _devbuild.gen import grammar_nt
|
| 13 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind, Kind_str
|
| 14 | from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
|
| 15 | from _devbuild.gen.syntax_asdl import (
|
| 16 | loc,
|
| 17 | SourceLine,
|
| 18 | source,
|
| 19 | parse_result,
|
| 20 | parse_result_t,
|
| 21 | command,
|
| 22 | command_t,
|
| 23 | condition,
|
| 24 | condition_t,
|
| 25 | for_iter,
|
| 26 | ArgList,
|
| 27 | BraceGroup,
|
| 28 | LiteralBlock,
|
| 29 | CaseArm,
|
| 30 | case_arg,
|
| 31 | IfArm,
|
| 32 | pat,
|
| 33 | pat_t,
|
| 34 | Redir,
|
| 35 | redir_param,
|
| 36 | redir_loc,
|
| 37 | redir_loc_t,
|
| 38 | word_e,
|
| 39 | word_t,
|
| 40 | CompoundWord,
|
| 41 | Token,
|
| 42 | word_part_e,
|
| 43 | word_part_t,
|
| 44 | rhs_word,
|
| 45 | rhs_word_t,
|
| 46 | sh_lhs,
|
| 47 | sh_lhs_t,
|
| 48 | AssignPair,
|
| 49 | EnvPair,
|
| 50 | ParsedAssignment,
|
| 51 | assign_op_e,
|
| 52 | NameType,
|
| 53 | proc_sig,
|
| 54 | proc_sig_e,
|
| 55 | Proc,
|
| 56 | Func,
|
| 57 | )
|
| 58 | from core import alloc
|
| 59 | from core import error
|
| 60 | from core.error import p_die
|
| 61 | from core import ui
|
| 62 | from frontend import consts
|
| 63 | from frontend import lexer
|
| 64 | from frontend import location
|
| 65 | from frontend import match
|
| 66 | from frontend import reader
|
| 67 | from mycpp.mylib import log
|
| 68 | from osh import braces
|
| 69 | from osh import bool_parse
|
| 70 | from osh import word_
|
| 71 |
|
| 72 | from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
|
| 73 | if TYPE_CHECKING:
|
| 74 | from core.alloc import Arena
|
| 75 | from core import optview
|
| 76 | from frontend.lexer import Lexer
|
| 77 | from frontend.parse_lib import ParseContext, AliasesInFlight
|
| 78 | from frontend.reader import _Reader
|
| 79 | from osh.word_parse import WordParser
|
| 80 |
|
| 81 | _ = Kind_str # for debug prints
|
| 82 |
|
| 83 | TAB_CH = 9 # ord('\t')
|
| 84 | SPACE_CH = 32 # ord(' ')
|
| 85 |
|
| 86 |
|
| 87 | def _ReadHereLines(
|
| 88 | line_reader, # type: _Reader
|
| 89 | h, # type: Redir
|
| 90 | delimiter, # type: str
|
| 91 | ):
|
| 92 | # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
|
| 93 | # NOTE: We read all lines at once, instead of parsing line-by-line,
|
| 94 | # because of cases like this:
|
| 95 | # cat <<EOF
|
| 96 | # 1 $(echo 2
|
| 97 | # echo 3) 4
|
| 98 | # EOF
|
| 99 | here_lines = [] # type: List[Tuple[SourceLine, int]]
|
| 100 | last_line = None # type: Tuple[SourceLine, int]
|
| 101 | strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
|
| 102 |
|
| 103 | while True:
|
| 104 | src_line, unused_offset = line_reader.GetLine()
|
| 105 |
|
| 106 | if src_line is None: # EOF
|
| 107 | # An unterminated here doc is just a warning in bash. We make it
|
| 108 | # fatal because we want to be strict, and because it causes problems
|
| 109 | # reporting other errors.
|
| 110 | # Attribute it to the << in <<EOF for now.
|
| 111 | p_die("Couldn't find terminator for here doc that starts here",
|
| 112 | h.op)
|
| 113 |
|
| 114 | assert len(src_line.content) != 0 # None should be the empty line
|
| 115 |
|
| 116 | line = src_line.content
|
| 117 |
|
| 118 | # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
|
| 119 | # the first tab.
|
| 120 | start_offset = 0
|
| 121 | if strip_leading_tabs:
|
| 122 | n = len(line)
|
| 123 | i = 0 # used after loop exit
|
| 124 | while i < n:
|
| 125 | if line[i] != '\t':
|
| 126 | break
|
| 127 | i += 1
|
| 128 | start_offset = i
|
| 129 |
|
| 130 | if line[start_offset:].rstrip() == delimiter:
|
| 131 | last_line = (src_line, start_offset)
|
| 132 | break
|
| 133 |
|
| 134 | here_lines.append((src_line, start_offset))
|
| 135 |
|
| 136 | return here_lines, last_line
|
| 137 |
|
| 138 |
|
| 139 | def _MakeLiteralHereLines(
|
| 140 | here_lines, # type: List[Tuple[SourceLine, int]]
|
| 141 | arena, # type: Arena
|
| 142 | do_lossless, # type: bool
|
| 143 | ):
|
| 144 | # type: (...) -> List[word_part_t]
|
| 145 | """Create a Token for each line.
|
| 146 |
|
| 147 | For <<'EOF' and <<-'EOF' - single quoted rule
|
| 148 |
|
| 149 | <<- has non-zero start_offset
|
| 150 | """
|
| 151 | # less precise type, because List[T] is an invariant type
|
| 152 | tokens = [] # type: List[word_part_t]
|
| 153 | for src_line, start_offset in here_lines:
|
| 154 |
|
| 155 | # Maintain lossless invariant for STRIPPED tabs: add a Token to the
|
| 156 | # arena invariant, but don't refer to it.
|
| 157 | if do_lossless: # avoid garbage, doesn't affect correctness
|
| 158 | arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, src_line,
|
| 159 | None)
|
| 160 |
|
| 161 | t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
|
| 162 | src_line, src_line.content[start_offset:])
|
| 163 | tokens.append(t)
|
| 164 | return tokens
|
| 165 |
|
| 166 |
|
| 167 | def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
|
| 168 | # type: (ParseContext, Redir, _Reader, Arena) -> None
|
| 169 | """Fill in attributes of a pending here doc node."""
|
| 170 | h = cast(redir_param.HereDoc, r.arg)
|
| 171 | # "If any character in word is quoted, the delimiter shall be formed by
|
| 172 | # performing quote removal on word, and the here-document lines shall not
|
| 173 | # be expanded. Otherwise, the delimiter shall be the word itself."
|
| 174 | # NOTE: \EOF counts, or even E\OF
|
| 175 | ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
|
| 176 | if not ok:
|
| 177 | p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
|
| 178 |
|
| 179 | here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
|
| 180 |
|
| 181 | if delim_quoted:
|
| 182 | # <<'EOF' and <<-'EOF' - Literal for each line.
|
| 183 | h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
|
| 184 | parse_ctx.do_lossless)
|
| 185 | else:
|
| 186 | # <<EOF and <<-EOF - Parse as word
|
| 187 | line_reader = reader.VirtualLineReader(arena, here_lines,
|
| 188 | parse_ctx.do_lossless)
|
| 189 | w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
|
| 190 | w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
|
| 191 |
|
| 192 | end_line, start_offset = last_line
|
| 193 |
|
| 194 | # Maintain lossless invariant for STRIPPED tabs: add a Token to the
|
| 195 | # arena invariant, but don't refer to it.
|
| 196 | if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
|
| 197 | arena.NewToken(Id.Ignored_HereTabs, 0, start_offset, end_line, None)
|
| 198 |
|
| 199 | # Create a Token with the end terminator. Maintains the invariant that the
|
| 200 | # tokens "add up".
|
| 201 | h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
|
| 202 | len(end_line.content), end_line, '')
|
| 203 |
|
| 204 |
|
| 205 | def _MakeAssignPair(parse_ctx, preparsed, arena):
|
| 206 | # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
|
| 207 | """Create an AssignPair from a 4-tuples from DetectShAssignment."""
|
| 208 |
|
| 209 | left_token = preparsed.left
|
| 210 | close_token = preparsed.close
|
| 211 |
|
| 212 | lhs = None # type: sh_lhs_t
|
| 213 |
|
| 214 | if left_token.id == Id.Lit_VarLike: # s=1
|
| 215 | if lexer.IsPlusEquals(left_token):
|
| 216 | var_name = lexer.TokenSliceRight(left_token, -2)
|
| 217 | op = assign_op_e.PlusEqual
|
| 218 | else:
|
| 219 | var_name = lexer.TokenSliceRight(left_token, -1)
|
| 220 | op = assign_op_e.Equal
|
| 221 |
|
| 222 | lhs = sh_lhs.Name(left_token, var_name)
|
| 223 |
|
| 224 | elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
|
| 225 | var_name = lexer.TokenSliceRight(left_token, -1)
|
| 226 | if lexer.IsPlusEquals(close_token):
|
| 227 | op = assign_op_e.PlusEqual
|
| 228 | else:
|
| 229 | op = assign_op_e.Equal
|
| 230 |
|
| 231 | assert left_token.line == close_token.line, \
|
| 232 | '%s and %s not on same line' % (left_token, close_token)
|
| 233 |
|
| 234 | left_pos = left_token.col + left_token.length
|
| 235 | index_str = left_token.line.content[left_pos:close_token.col]
|
| 236 | lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
|
| 237 |
|
| 238 | elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
|
| 239 | var_name = lexer.TokenSliceRight(left_token, -1)
|
| 240 | if lexer.IsPlusEquals(close_token):
|
| 241 | op = assign_op_e.PlusEqual
|
| 242 | else:
|
| 243 | op = assign_op_e.Equal
|
| 244 |
|
| 245 | # Similar to SnipCodeString / SnipCodeBlock
|
| 246 | if left_token.line == close_token.line:
|
| 247 | # extract what's between brackets
|
| 248 | s = left_token.col + left_token.length
|
| 249 | code_str = left_token.line.content[s:close_token.col]
|
| 250 | else:
|
| 251 | raise NotImplementedError('%s != %s' %
|
| 252 | (left_token.line, close_token.line))
|
| 253 | a_parser = parse_ctx.MakeArithParser(code_str)
|
| 254 |
|
| 255 | # a[i+1]= is a LHS
|
| 256 | src = source.Reparsed('array LHS', left_token, close_token)
|
| 257 | with alloc.ctx_SourceCode(arena, src):
|
| 258 | index_node = a_parser.Parse() # may raise error.Parse
|
| 259 |
|
| 260 | lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
|
| 261 |
|
| 262 | else:
|
| 263 | raise AssertionError()
|
| 264 |
|
| 265 | # TODO: Should we also create a rhs_expr.ArrayLiteral here?
|
| 266 | parts = preparsed.w.parts
|
| 267 | offset = preparsed.part_offset
|
| 268 |
|
| 269 | n = len(parts)
|
| 270 | if offset == n:
|
| 271 | rhs = rhs_word.Empty # type: rhs_word_t
|
| 272 | else:
|
| 273 | # tmp2 is for intersection of C++/MyPy type systems
|
| 274 | tmp2 = CompoundWord(parts[offset:])
|
| 275 | word_.TildeDetectAssign(tmp2)
|
| 276 | rhs = tmp2
|
| 277 |
|
| 278 | return AssignPair(left_token, lhs, op, rhs)
|
| 279 |
|
| 280 |
|
| 281 | def _AppendMoreEnv(preparsed_list, more_env):
|
| 282 | # type: (List[ParsedAssignment], List[EnvPair]) -> None
|
| 283 | """Helper to modify a SimpleCommand node.
|
| 284 |
|
| 285 | Args:
|
| 286 | preparsed: a list of 4-tuples from DetectShAssignment
|
| 287 | more_env: a list to append env_pairs to
|
| 288 | """
|
| 289 | for preparsed in preparsed_list:
|
| 290 | left_token = preparsed.left
|
| 291 |
|
| 292 | if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
|
| 293 | p_die(
|
| 294 | "Environment binding shouldn't look like an array assignment",
|
| 295 | left_token)
|
| 296 |
|
| 297 | if lexer.IsPlusEquals(left_token):
|
| 298 | p_die('Expected = in environment binding, got +=', left_token)
|
| 299 |
|
| 300 | var_name = lexer.TokenSliceRight(left_token, -1)
|
| 301 |
|
| 302 | parts = preparsed.w.parts
|
| 303 | n = len(parts)
|
| 304 | offset = preparsed.part_offset
|
| 305 | if offset == n:
|
| 306 | val = rhs_word.Empty # type: rhs_word_t
|
| 307 | else:
|
| 308 | val = CompoundWord(parts[offset:])
|
| 309 |
|
| 310 | more_env.append(EnvPair(left_token, var_name, val))
|
| 311 |
|
| 312 |
|
| 313 | def _SplitSimpleCommandPrefix(words):
|
| 314 | # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
|
| 315 | """Second pass of SimpleCommand parsing: look for assignment words."""
|
| 316 | preparsed_list = [] # type: List[ParsedAssignment]
|
| 317 | suffix_words = [] # type: List[CompoundWord]
|
| 318 |
|
| 319 | done_prefix = False
|
| 320 | for w in words:
|
| 321 | if done_prefix:
|
| 322 | suffix_words.append(w)
|
| 323 | continue
|
| 324 |
|
| 325 | left_token, close_token, part_offset = word_.DetectShAssignment(w)
|
| 326 | if left_token:
|
| 327 | preparsed_list.append(
|
| 328 | ParsedAssignment(left_token, close_token, part_offset, w))
|
| 329 | else:
|
| 330 | done_prefix = True
|
| 331 | suffix_words.append(w)
|
| 332 |
|
| 333 | return preparsed_list, suffix_words
|
| 334 |
|
| 335 |
|
| 336 | def _MakeSimpleCommand(
|
| 337 | preparsed_list, # type: List[ParsedAssignment]
|
| 338 | suffix_words, # type: List[CompoundWord]
|
| 339 | redirects, # type: List[Redir]
|
| 340 | typed_args, # type: Optional[ArgList]
|
| 341 | block, # type: Optional[LiteralBlock]
|
| 342 | ):
|
| 343 | # type: (...) -> command.Simple
|
| 344 | """Create an command.Simple node."""
|
| 345 |
|
| 346 | # FOO=(1 2 3) ls is not allowed.
|
| 347 | for preparsed in preparsed_list:
|
| 348 | if word_.HasArrayPart(preparsed.w):
|
| 349 | p_die("Environment bindings can't contain array literals",
|
| 350 | loc.Word(preparsed.w))
|
| 351 |
|
| 352 | # NOTE: It would be possible to add this check back. But it already happens
|
| 353 | # at runtime in EvalWordSequence2.
|
| 354 | # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
|
| 355 | if 0:
|
| 356 | for w in suffix_words:
|
| 357 | if word_.HasArrayPart(w):
|
| 358 | p_die("Commands can't contain array literals", loc.Word(w))
|
| 359 |
|
| 360 | assert len(suffix_words) != 0
|
| 361 | # {a,b,c} # Use { before brace detection
|
| 362 | # ~/bin/ls # Use ~ before tilde detection
|
| 363 | part0 = suffix_words[0].parts[0]
|
| 364 | blame_tok = location.LeftTokenForWordPart(part0)
|
| 365 |
|
| 366 | # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
|
| 367 | # can't implement bash's behavior of having say {~bob,~jane}/src work,
|
| 368 | # because we only have a BracedTree.
|
| 369 | # This is documented in spec/brace-expansion.
|
| 370 | # NOTE: Technically we could do expansion outside of 'oshc translate', but it
|
| 371 | # doesn't seem worth it.
|
| 372 | words2 = braces.BraceDetectAll(suffix_words)
|
| 373 | words3 = word_.TildeDetectAll(words2)
|
| 374 |
|
| 375 | more_env = [] # type: List[EnvPair]
|
| 376 | _AppendMoreEnv(preparsed_list, more_env)
|
| 377 |
|
| 378 | # do_fork by default
|
| 379 | return command.Simple(blame_tok, more_env, words3, redirects, typed_args,
|
| 380 | block, True)
|
| 381 |
|
| 382 |
|
| 383 | class VarChecker(object):
|
| 384 | """Statically check for proc and variable usage errors."""
|
| 385 |
|
| 386 | def __init__(self):
|
| 387 | # type: () -> None
|
| 388 | """
|
| 389 | Args:
|
| 390 | oil_proc: Whether to disallow nested proc/function declarations
|
| 391 | """
|
| 392 | # self.tokens for location info: 'proc' or another token
|
| 393 | self.tokens = [] # type: List[Token]
|
| 394 | self.names = [] # type: List[Dict[str, Id_t]]
|
| 395 |
|
| 396 | def Push(self, blame_tok):
|
| 397 | # type: (Token) -> None
|
| 398 | """Called when we enter a shell function, proc, or func.
|
| 399 |
|
| 400 | Bash allows this, but it's confusing because it's the same as two
|
| 401 | functions at the top level.
|
| 402 |
|
| 403 | f() {
|
| 404 | g() {
|
| 405 | echo 'top level function defined in another one'
|
| 406 | }
|
| 407 | }
|
| 408 |
|
| 409 | YSH disallows nested procs and funcs.
|
| 410 | """
|
| 411 | if len(self.tokens) != 0:
|
| 412 | if blame_tok.id == Id.KW_Proc:
|
| 413 | p_die("procs must be defined at the top level", blame_tok)
|
| 414 | if blame_tok.id == Id.KW_Func:
|
| 415 | p_die("funcs must be defined at the top level", blame_tok)
|
| 416 | if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
|
| 417 | p_die("shell functions can't be defined inside proc or func",
|
| 418 | blame_tok)
|
| 419 |
|
| 420 | self.tokens.append(blame_tok)
|
| 421 | entry = {} # type: Dict[str, Id_t]
|
| 422 | self.names.append(entry)
|
| 423 |
|
| 424 | def Pop(self):
|
| 425 | # type: () -> None
|
| 426 | self.names.pop()
|
| 427 | self.tokens.pop()
|
| 428 |
|
| 429 | def Check(self, keyword_id, var_name, blame_tok):
|
| 430 | # type: (Id_t, str, Token) -> None
|
| 431 | """Check for declaration / mutation errors in proc and func.
|
| 432 |
|
| 433 | var x
|
| 434 | x already declared
|
| 435 | setvar x:
|
| 436 | x is not declared
|
| 437 | setglobal x:
|
| 438 | No errors are possible; we would need all these many conditions to
|
| 439 | statically know the names:
|
| 440 | - no 'source'
|
| 441 | - shopt -u copy_env.
|
| 442 | - AND use lib has to be static
|
| 443 |
|
| 444 | What about bare assignment in Hay? I think these are dynamic checks --
|
| 445 | there is no static check. Hay is for building up data imperatively,
|
| 446 | and then LATER, right before main(), it can be type checked.
|
| 447 |
|
| 448 | Package {
|
| 449 | version = '3.11'
|
| 450 | version = '3.12'
|
| 451 | }
|
| 452 | """
|
| 453 | # No static checks are the global level! Because of 'source', var and
|
| 454 | # setvar are essentially the same.
|
| 455 | if len(self.names) == 0:
|
| 456 | return
|
| 457 |
|
| 458 | top = self.names[-1]
|
| 459 | if keyword_id == Id.KW_Var:
|
| 460 | if var_name in top:
|
| 461 | p_die('%r was already declared' % var_name, blame_tok)
|
| 462 | else:
|
| 463 | top[var_name] = keyword_id
|
| 464 |
|
| 465 | if keyword_id == Id.KW_SetVar:
|
| 466 | if var_name not in top:
|
| 467 | # Note: the solution could be setglobal, etc.
|
| 468 | p_die(
|
| 469 | "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
|
| 470 | var_name, blame_tok)
|
| 471 |
|
| 472 |
|
| 473 | class ctx_VarChecker(object):
|
| 474 |
|
| 475 | def __init__(self, var_checker, blame_tok):
|
| 476 | # type: (VarChecker, Token) -> None
|
| 477 | var_checker.Push(blame_tok)
|
| 478 | self.var_checker = var_checker
|
| 479 |
|
| 480 | def __enter__(self):
|
| 481 | # type: () -> None
|
| 482 | pass
|
| 483 |
|
| 484 | def __exit__(self, type, value, traceback):
|
| 485 | # type: (Any, Any, Any) -> None
|
| 486 | self.var_checker.Pop()
|
| 487 |
|
| 488 |
|
| 489 | class ctx_CmdMode(object):
|
| 490 |
|
| 491 | def __init__(self, cmd_parse, new_cmd_mode):
|
| 492 | # type: (CommandParser, cmd_mode_t) -> None
|
| 493 | self.cmd_parse = cmd_parse
|
| 494 | self.prev_cmd_mode = cmd_parse.cmd_mode
|
| 495 | cmd_parse.cmd_mode = new_cmd_mode
|
| 496 |
|
| 497 | def __enter__(self):
|
| 498 | # type: () -> None
|
| 499 | pass
|
| 500 |
|
| 501 | def __exit__(self, type, value, traceback):
|
| 502 | # type: (Any, Any, Any) -> None
|
| 503 | self.cmd_parse.cmd_mode = self.prev_cmd_mode
|
| 504 |
|
| 505 |
|
| 506 | SECONDARY_KEYWORDS = [
|
| 507 | Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
|
| 508 | Id.KW_Esac
|
| 509 | ]
|
| 510 |
|
| 511 |
|
| 512 | class CommandParser(object):
|
| 513 | """Recursive descent parser derived from POSIX shell grammar.
|
| 514 |
|
| 515 | This is a BNF grammar:
|
| 516 | https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
|
| 517 |
|
| 518 | - Augmented with both bash/OSH and YSH constructs.
|
| 519 |
|
| 520 | - We use regex-like iteration rather than recursive references
|
| 521 | ? means optional (0 or 1)
|
| 522 | * means 0 or more
|
| 523 | + means 1 or more
|
| 524 |
|
| 525 | - Keywords are spelled in Caps:
|
| 526 | If Elif Case
|
| 527 |
|
| 528 | - Operator tokens are quoted:
|
| 529 | '(' '|'
|
| 530 |
|
| 531 | or can be spelled directly if it matters:
|
| 532 |
|
| 533 | Op_LParen Op_Pipe
|
| 534 |
|
| 535 | - Non-terminals are snake_case:
|
| 536 | brace_group subshell
|
| 537 |
|
| 538 | Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
|
| 539 | the production should be in the method docstrings, e.g.
|
| 540 |
|
| 541 | def ParseSubshell():
|
| 542 | "
|
| 543 | subshell : '(' compound_list ')'
|
| 544 |
|
| 545 | Looking at Op_LParen # Comment to say how this method is called
|
| 546 | "
|
| 547 |
|
| 548 | The grammar may be factored to make parsing easier.
|
| 549 | """
|
| 550 |
|
| 551 | def __init__(self,
|
| 552 | parse_ctx,
|
| 553 | parse_opts,
|
| 554 | w_parser,
|
| 555 | lexer,
|
| 556 | line_reader,
|
| 557 | eof_id=Id.Eof_Real):
|
| 558 | # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
|
| 559 | self.parse_ctx = parse_ctx
|
| 560 | self.aliases = parse_ctx.aliases # aliases to expand at parse time
|
| 561 |
|
| 562 | self.parse_opts = parse_opts
|
| 563 | self.w_parser = w_parser # type: WordParser # for normal parsing
|
| 564 | self.lexer = lexer # for pushing hints, lookahead to (
|
| 565 | self.line_reader = line_reader # for here docs
|
| 566 | self.eof_id = eof_id
|
| 567 |
|
| 568 | self.arena = line_reader.arena # for adding here doc and alias spans
|
| 569 | self.aliases_in_flight = [] # type: AliasesInFlight
|
| 570 |
|
| 571 | # A hacky boolean to remove 'if cd / {' ambiguity.
|
| 572 | self.allow_block = True
|
| 573 |
|
| 574 | # Stack of booleans for nested Attr and SHELL nodes.
|
| 575 | # Attr nodes allow bare assignment x = 42, but not shell x=42.
|
| 576 | # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
|
| 577 | # nodes, but x42 is still allowed.
|
| 578 | #
|
| 579 | # Note: this stack could be optimized by turning it into an integer and
|
| 580 | # binary encoding.
|
| 581 | self.hay_attrs_stack = [] # type: List[bool]
|
| 582 |
|
| 583 | # Note: VarChecker is instantiated with each CommandParser, which means
|
| 584 | # that two 'proc foo' -- inside a command sub and outside -- don't
|
| 585 | # conflict, because they use different CommandParser instances. I think
|
| 586 | # this OK but you can imagine different behaviors.
|
| 587 | self.var_checker = VarChecker()
|
| 588 |
|
| 589 | self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
|
| 590 |
|
| 591 | self.Reset()
|
| 592 |
|
| 593 | # Init_() function for "keyword arg"
|
| 594 | def Init_AliasesInFlight(self, aliases_in_flight):
|
| 595 | # type: (AliasesInFlight) -> None
|
| 596 | self.aliases_in_flight = aliases_in_flight
|
| 597 |
|
| 598 | def Reset(self):
|
| 599 | # type: () -> None
|
| 600 | """Reset our own internal state.
|
| 601 |
|
| 602 | Called by the interactive loop.
|
| 603 | """
|
| 604 | # Cursor state set by _GetWord()
|
| 605 | self.next_lex_mode = lex_mode_e.ShCommand
|
| 606 | self.cur_word = None # type: word_t # current word
|
| 607 | self.c_kind = Kind.Undefined
|
| 608 | self.c_id = Id.Undefined_Tok
|
| 609 |
|
| 610 | self.pending_here_docs = [
|
| 611 | ] # type: List[Redir] # should have HereLiteral arg
|
| 612 |
|
| 613 | def ResetInputObjects(self):
|
| 614 | # type: () -> None
|
| 615 | """Reset the internal state of our inputs.
|
| 616 |
|
| 617 | Called by the interactive loop.
|
| 618 | """
|
| 619 | self.w_parser.Reset()
|
| 620 | self.lexer.ResetInputObjects()
|
| 621 | self.line_reader.Reset()
|
| 622 |
|
| 623 | def _SetNext(self):
|
| 624 | # type: () -> None
|
| 625 | """Call this when you no longer need the current token.
|
| 626 |
|
| 627 | This method is lazy. A subsequent call to _GetWord() will
|
| 628 | actually read the next Token.
|
| 629 | """
|
| 630 | self.next_lex_mode = lex_mode_e.ShCommand
|
| 631 |
|
| 632 | def _SetNextBrack(self):
|
| 633 | # type: () -> None
|
| 634 | self.next_lex_mode = lex_mode_e.ShCommandBrack
|
| 635 |
|
| 636 | def _GetWord(self):
|
| 637 | # type: () -> None
|
| 638 | """Call this when you need to make a decision based on Id or Kind.
|
| 639 |
|
| 640 | If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
|
| 641 | self.c_id and self.c_kind.
|
| 642 |
|
| 643 | Otherwise it does nothing.
|
| 644 | """
|
| 645 | if self.next_lex_mode != lex_mode_e.Undefined:
|
| 646 | w = self.w_parser.ReadWord(self.next_lex_mode)
|
| 647 | #log("w %s", w)
|
| 648 |
|
| 649 | # Here docs only happen in command mode, so other kinds of newlines don't
|
| 650 | # count.
|
| 651 | if w.tag() == word_e.Operator:
|
| 652 | tok = cast(Token, w)
|
| 653 | if tok.id == Id.Op_Newline:
|
| 654 | for h in self.pending_here_docs:
|
| 655 | _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
|
| 656 | self.arena)
|
| 657 | del self.pending_here_docs[:] # No .clear() until Python 3.3.
|
| 658 |
|
| 659 | self.cur_word = w
|
| 660 |
|
| 661 | self.c_kind = word_.CommandKind(self.cur_word)
|
| 662 | self.c_id = word_.CommandId(self.cur_word)
|
| 663 | self.next_lex_mode = lex_mode_e.Undefined
|
| 664 |
|
| 665 | def _Eat(self, c_id, msg=None):
|
| 666 | # type: (Id_t, Optional[str]) -> word_t
|
| 667 | """Consume a word of a type, maybe showing a custom error message.
|
| 668 |
|
| 669 | Args:
|
| 670 | c_id: the Id we expected
|
| 671 | msg: improved error message
|
| 672 | """
|
| 673 | self._GetWord()
|
| 674 | if self.c_id != c_id:
|
| 675 | if msg is None:
|
| 676 | msg = 'Expected word type %s, got %s' % (
|
| 677 | ui.PrettyId(c_id), ui.PrettyId(self.c_id))
|
| 678 | p_die(msg, loc.Word(self.cur_word))
|
| 679 |
|
| 680 | skipped = self.cur_word
|
| 681 | self._SetNext()
|
| 682 | return skipped
|
| 683 |
|
| 684 | def _NewlineOk(self):
|
| 685 | # type: () -> None
|
| 686 | """Check for optional newline and consume it."""
|
| 687 | self._GetWord()
|
| 688 | if self.c_id == Id.Op_Newline:
|
| 689 | self._SetNext()
|
| 690 |
|
| 691 | def _AtSecondaryKeyword(self):
|
| 692 | # type: () -> bool
|
| 693 | self._GetWord()
|
| 694 | if self.c_id in SECONDARY_KEYWORDS:
|
| 695 | return True
|
| 696 | return False
|
| 697 |
|
| 698 | def ParseRedirect(self):
|
| 699 | # type: () -> Redir
|
| 700 | self._GetWord()
|
| 701 | assert self.c_kind == Kind.Redir, self.cur_word
|
| 702 | op_tok = cast(Token, self.cur_word) # for MyPy
|
| 703 |
|
| 704 | # Note: the lexer could take distinguish between
|
| 705 | # >out
|
| 706 | # 3>out
|
| 707 | # {fd}>out
|
| 708 | #
|
| 709 | # which would make the code below faster. But small string optimization
|
| 710 | # would also speed it up, since redirects are small.
|
| 711 |
|
| 712 | # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
|
| 713 | # possibly "unify" the IDs by subtracting a constant like 8 or 16?
|
| 714 |
|
| 715 | op_val = lexer.TokenVal(op_tok)
|
| 716 | if op_val[0] == '{':
|
| 717 | pos = op_val.find('}')
|
| 718 | assert pos != -1 # lexer ensures this
|
| 719 | where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
|
| 720 |
|
| 721 | elif op_val[0].isdigit():
|
| 722 | pos = 1
|
| 723 | if op_val[1].isdigit():
|
| 724 | pos = 2
|
| 725 | where = redir_loc.Fd(int(op_val[:pos]))
|
| 726 |
|
| 727 | else:
|
| 728 | where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
|
| 729 |
|
| 730 | self._SetNext()
|
| 731 |
|
| 732 | self._GetWord()
|
| 733 | # Other redirect
|
| 734 | if self.c_kind != Kind.Word:
|
| 735 | p_die('Invalid token after redirect operator',
|
| 736 | loc.Word(self.cur_word))
|
| 737 |
|
| 738 | # Here doc
|
| 739 | if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
|
| 740 | arg = redir_param.HereDoc.CreateNull()
|
| 741 | arg.here_begin = self.cur_word
|
| 742 | arg.stdin_parts = []
|
| 743 |
|
| 744 | r = Redir(op_tok, where, arg)
|
| 745 |
|
| 746 | self.pending_here_docs.append(r) # will be filled on next newline.
|
| 747 |
|
| 748 | self._SetNext()
|
| 749 | return r
|
| 750 |
|
| 751 | arg_word = self.cur_word
|
| 752 | tilde = word_.TildeDetect(arg_word)
|
| 753 | if tilde:
|
| 754 | arg_word = tilde
|
| 755 | self._SetNext()
|
| 756 |
|
| 757 | # We should never get Empty, Token, etc.
|
| 758 | assert arg_word.tag() == word_e.Compound, arg_word
|
| 759 | return Redir(op_tok, where, cast(CompoundWord, arg_word))
|
| 760 |
|
| 761 | def _ParseRedirectList(self):
|
| 762 | # type: () -> List[Redir]
|
| 763 | """Try parsing any redirects at the cursor.
|
| 764 |
|
| 765 | This is used for blocks only, not commands.
|
| 766 | """
|
| 767 | redirects = [] # type: List[Redir]
|
| 768 | while True:
|
| 769 | # This prediction needs to ONLY accept redirect operators. Should we
|
| 770 | # make them a separate Kind?
|
| 771 | self._GetWord()
|
| 772 | if self.c_kind != Kind.Redir:
|
| 773 | break
|
| 774 |
|
| 775 | node = self.ParseRedirect()
|
| 776 | redirects.append(node)
|
| 777 | self._SetNext()
|
| 778 |
|
| 779 | return redirects
|
| 780 |
|
| 781 | def _ScanSimpleCommand(self):
|
| 782 | # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
|
| 783 | """YSH extends simple commands with typed args and blocks.
|
| 784 |
|
| 785 | Shell has a recursive grammar, which awkwardly expresses
|
| 786 | non-grammatical rules:
|
| 787 |
|
| 788 | simple_command : cmd_prefix cmd_word cmd_suffix
|
| 789 | | cmd_prefix cmd_word
|
| 790 | | cmd_prefix
|
| 791 | | cmd_name cmd_suffix
|
| 792 | | cmd_name
|
| 793 | ;
|
| 794 | cmd_name : WORD /* Apply rule 7a */
|
| 795 | ;
|
| 796 | cmd_word : WORD /* Apply rule 7b */
|
| 797 | ;
|
| 798 | cmd_prefix : io_redirect
|
| 799 | | cmd_prefix io_redirect
|
| 800 | | ASSIGNMENT_WORD
|
| 801 | | cmd_prefix ASSIGNMENT_WORD
|
| 802 | ;
|
| 803 | cmd_suffix : io_redirect
|
| 804 | | cmd_suffix io_redirect
|
| 805 | | WORD
|
| 806 | | cmd_suffix WORD
|
| 807 |
|
| 808 | YSH grammar:
|
| 809 |
|
| 810 | simple_command =
|
| 811 | cmd_prefix* word+ typed_args? BraceGroup? cmd_suffix*
|
| 812 |
|
| 813 | typed_args =
|
| 814 | '(' arglist ')'
|
| 815 | | '[' arglist ']'
|
| 816 |
|
| 817 | Notably, redirects shouldn't appear after between typed args and
|
| 818 | BraceGroup.
|
| 819 | """
|
| 820 | redirects = [] # type: List[Redir]
|
| 821 | words = [] # type: List[CompoundWord]
|
| 822 | typed_args = None # type: Optional[ArgList]
|
| 823 | block = None # type: Optional[LiteralBlock]
|
| 824 |
|
| 825 | first_word_caps = False # does first word look like Caps, but not CAPS
|
| 826 |
|
| 827 | i = 0
|
| 828 | while True:
|
| 829 | self._GetWord()
|
| 830 | if self.c_kind == Kind.Redir:
|
| 831 | node = self.ParseRedirect()
|
| 832 | redirects.append(node)
|
| 833 |
|
| 834 | elif self.c_kind == Kind.Word:
|
| 835 | if self.parse_opts.parse_brace():
|
| 836 | # Treat { and } more like operators
|
| 837 | if self.c_id == Id.Lit_LBrace:
|
| 838 | if self.allow_block: # Disabled for if/while condition, etc.
|
| 839 |
|
| 840 | # allow x = 42
|
| 841 | self.hay_attrs_stack.append(first_word_caps)
|
| 842 | brace_group = self.ParseBraceGroup()
|
| 843 |
|
| 844 | # So we can get the source code back later
|
| 845 | lines = self.arena.SaveLinesAndDiscard(
|
| 846 | brace_group.left, brace_group.right)
|
| 847 | block = LiteralBlock(brace_group, lines)
|
| 848 |
|
| 849 | self.hay_attrs_stack.pop()
|
| 850 |
|
| 851 | if 0:
|
| 852 | print('--')
|
| 853 | block.PrettyPrint()
|
| 854 | print('\n--')
|
| 855 | break
|
| 856 | elif self.c_id == Id.Lit_RBrace:
|
| 857 | # Another thing: { echo hi }
|
| 858 | # We're DONE!!!
|
| 859 | break
|
| 860 |
|
| 861 | w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
|
| 862 |
|
| 863 | if i == 0:
|
| 864 | # Disallow leading =a because it's confusing
|
| 865 | part0 = w.parts[0]
|
| 866 | if part0.tag() == word_part_e.Literal:
|
| 867 | tok = cast(Token, part0)
|
| 868 | if tok.id == Id.Lit_Equals:
|
| 869 | p_die(
|
| 870 | "=word isn't allowed. Hint: add a space after =, or quote it",
|
| 871 | tok)
|
| 872 |
|
| 873 | # Is the first word a Hay Attr word?
|
| 874 | ok, word_str, quoted = word_.StaticEval(w)
|
| 875 | # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
|
| 876 | if (ok and len(word_str) and word_str[0].isupper() and
|
| 877 | not word_str.isupper()):
|
| 878 | first_word_caps = True
|
| 879 | #log('W %s', word_str)
|
| 880 |
|
| 881 | words.append(w)
|
| 882 |
|
| 883 | elif self.c_id == Id.Op_LParen:
|
| 884 | # 1. Check that there's a preceding space
|
| 885 | prev_byte = self.lexer.ByteLookBack()
|
| 886 | if prev_byte not in (SPACE_CH, TAB_CH):
|
| 887 | if self.parse_opts.parse_at():
|
| 888 | p_die('Space required before (',
|
| 889 | loc.Word(self.cur_word))
|
| 890 | else:
|
| 891 | # inline func call like @sorted(x) is invalid in OSH, but the
|
| 892 | # solution isn't a space
|
| 893 | p_die(
|
| 894 | 'Unexpected left paren (might need a space before it)',
|
| 895 | loc.Word(self.cur_word))
|
| 896 |
|
| 897 | # 2. Check that it's not (). We disallow this because it's a no-op and
|
| 898 | # there could be confusion with shell func defs.
|
| 899 | # For some reason we need to call lexer.LookPastSpace, not
|
| 900 | # w_parser.LookPastSpace. I think this is because we're at (, which is
|
| 901 | # an operator token. All the other cases are like 'x=', which is PART
|
| 902 | # of a word, and we don't know if it will end.
|
| 903 | next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
|
| 904 | if next_id == Id.Op_RParen:
|
| 905 | p_die('Empty arg list not allowed',
|
| 906 | loc.Word(self.cur_word))
|
| 907 |
|
| 908 | typed_args = self.w_parser.ParseProcCallArgs(
|
| 909 | grammar_nt.ysh_eager_arglist)
|
| 910 |
|
| 911 | elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
|
| 912 | typed_args = self.w_parser.ParseProcCallArgs(
|
| 913 | grammar_nt.ysh_lazy_arglist)
|
| 914 |
|
| 915 | else:
|
| 916 | break
|
| 917 |
|
| 918 | self._SetNextBrack() # Allow bracket for SECOND word on
|
| 919 | i += 1
|
| 920 | return redirects, words, typed_args, block
|
| 921 |
|
| 922 | def _MaybeExpandAliases(self, words):
|
| 923 | # type: (List[CompoundWord]) -> Optional[command_t]
|
| 924 | """Try to expand aliases.
|
| 925 |
|
| 926 | Args:
|
| 927 | words: A list of Compound
|
| 928 |
|
| 929 | Returns:
|
| 930 | A new LST node, or None.
|
| 931 |
|
| 932 | Our implementation of alias has two design choices:
|
| 933 | - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
|
| 934 | - What grammar rule to parse the expanded alias buffer with. In our case
|
| 935 | it's ParseCommand().
|
| 936 |
|
| 937 | This doesn't quite match what other shells do, but I can't figure out a
|
| 938 | better places.
|
| 939 |
|
| 940 | Most test cases pass, except for ones like:
|
| 941 |
|
| 942 | alias LBRACE='{'
|
| 943 | LBRACE echo one; echo two; }
|
| 944 |
|
| 945 | alias MULTILINE='echo 1
|
| 946 | echo 2
|
| 947 | echo 3'
|
| 948 | MULTILINE
|
| 949 |
|
| 950 | NOTE: dash handles aliases in a totally different way. It has a global
|
| 951 | variable checkkwd in parser.c. It assigns it all over the grammar, like
|
| 952 | this:
|
| 953 |
|
| 954 | checkkwd = CHKNL | CHKKWD | CHKALIAS;
|
| 955 |
|
| 956 | The readtoken() function checks (checkkwd & CHKALIAS) and then calls
|
| 957 | lookupalias(). This seems to provide a consistent behavior among shells,
|
| 958 | but it's less modular and testable.
|
| 959 |
|
| 960 | Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
|
| 961 |
|
| 962 | Returns:
|
| 963 | A command node if any aliases were expanded, or None otherwise.
|
| 964 | """
|
| 965 | # Start a new list if there aren't any. This will be passed recursively
|
| 966 | # through CommandParser instances.
|
| 967 | aliases_in_flight = (self.aliases_in_flight
|
| 968 | if len(self.aliases_in_flight) else [])
|
| 969 |
|
| 970 | # for error message
|
| 971 | first_word_str = None # type: Optional[str]
|
| 972 | argv0_loc = loc.Word(words[0])
|
| 973 |
|
| 974 | expanded = [] # type: List[str]
|
| 975 | i = 0
|
| 976 | n = len(words)
|
| 977 |
|
| 978 | while i < n:
|
| 979 | w = words[i]
|
| 980 |
|
| 981 | ok, word_str, quoted = word_.StaticEval(w)
|
| 982 | if not ok or quoted:
|
| 983 | break
|
| 984 |
|
| 985 | alias_exp = self.aliases.get(word_str)
|
| 986 | if alias_exp is None:
|
| 987 | break
|
| 988 |
|
| 989 | # Prevent infinite loops. This is subtle: we want to prevent infinite
|
| 990 | # expansion of alias echo='echo x'. But we don't want to prevent
|
| 991 | # expansion of the second word in 'echo echo', so we add 'i' to
|
| 992 | # "aliases_in_flight".
|
| 993 | if (word_str, i) in aliases_in_flight:
|
| 994 | break
|
| 995 |
|
| 996 | if i == 0:
|
| 997 | first_word_str = word_str # for error message
|
| 998 |
|
| 999 | #log('%r -> %r', word_str, alias_exp)
|
| 1000 | aliases_in_flight.append((word_str, i))
|
| 1001 | expanded.append(alias_exp)
|
| 1002 | i += 1
|
| 1003 |
|
| 1004 | if not alias_exp.endswith(' '):
|
| 1005 | # alias e='echo [ ' is the same expansion as
|
| 1006 | # alias e='echo ['
|
| 1007 | # The trailing space indicates whether we should continue to expand
|
| 1008 | # aliases; it's not part of it.
|
| 1009 | expanded.append(' ')
|
| 1010 | break # No more expansions
|
| 1011 |
|
| 1012 | if len(expanded) == 0: # No expansions; caller does parsing.
|
| 1013 | return None
|
| 1014 |
|
| 1015 | # We are expanding an alias, so copy the rest of the words and re-parse.
|
| 1016 | if i < n:
|
| 1017 | left_tok = location.LeftTokenForWord(words[i])
|
| 1018 | right_tok = location.RightTokenForWord(words[-1])
|
| 1019 |
|
| 1020 | # OLD CONSTRAINT
|
| 1021 | #assert left_tok.line_id == right_tok.line_id
|
| 1022 |
|
| 1023 | words_str = self.arena.SnipCodeString(left_tok, right_tok)
|
| 1024 | expanded.append(words_str)
|
| 1025 |
|
| 1026 | code_str = ''.join(expanded)
|
| 1027 |
|
| 1028 | # TODO:
|
| 1029 | # Aliases break static parsing (like backticks), so use our own Arena.
|
| 1030 | # This matters for Hay, which calls SaveLinesAndDiscard().
|
| 1031 | # arena = alloc.Arena()
|
| 1032 | arena = self.arena
|
| 1033 |
|
| 1034 | line_reader = reader.StringLineReader(code_str, arena)
|
| 1035 | cp = self.parse_ctx.MakeOshParser(line_reader)
|
| 1036 | cp.Init_AliasesInFlight(aliases_in_flight)
|
| 1037 |
|
| 1038 | # break circular dep
|
| 1039 | from frontend import parse_lib
|
| 1040 |
|
| 1041 | # The interaction between COMPLETION and ALIASES requires special care.
|
| 1042 | # See docstring of BeginAliasExpansion() in parse_lib.py.
|
| 1043 | src = source.Alias(first_word_str, argv0_loc)
|
| 1044 | with alloc.ctx_SourceCode(arena, src):
|
| 1045 | with parse_lib.ctx_Alias(self.parse_ctx.trail):
|
| 1046 | try:
|
| 1047 | # _ParseCommandTerm() handles multiline commands, compound
|
| 1048 | # commands, etc. as opposed to ParseLogicalLine()
|
| 1049 | node = cp._ParseCommandTerm()
|
| 1050 | except error.Parse as e:
|
| 1051 | # Failure to parse alias expansion is a fatal error
|
| 1052 | # We don't need more handling here/
|
| 1053 | raise
|
| 1054 |
|
| 1055 | if 0:
|
| 1056 | log('AFTER expansion:')
|
| 1057 | node.PrettyPrint()
|
| 1058 |
|
| 1059 | return node
|
| 1060 |
|
| 1061 | def ParseSimpleCommand(self):
|
| 1062 | # type: () -> command_t
|
| 1063 | """Fixed transcription of the POSIX grammar (TODO: port to
|
| 1064 | grammar/Shell.g)
|
| 1065 |
|
| 1066 | io_file : '<' filename
|
| 1067 | | LESSAND filename
|
| 1068 | ...
|
| 1069 |
|
| 1070 | io_here : DLESS here_end
|
| 1071 | | DLESSDASH here_end
|
| 1072 |
|
| 1073 | redirect : IO_NUMBER (io_redirect | io_here)
|
| 1074 |
|
| 1075 | prefix_part : ASSIGNMENT_WORD | redirect
|
| 1076 | cmd_part : WORD | redirect
|
| 1077 |
|
| 1078 | assign_kw : Declare | Export | Local | Readonly
|
| 1079 |
|
| 1080 | # Without any words it is parsed as a command, not an assignment
|
| 1081 | assign_listing : assign_kw
|
| 1082 |
|
| 1083 | # Now we have something to do (might be changing assignment flags too)
|
| 1084 | # NOTE: any prefixes should be a warning, but they are allowed in shell.
|
| 1085 | assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
|
| 1086 |
|
| 1087 | # an external command, a function call, or a builtin -- a "word_command"
|
| 1088 | word_command : prefix_part* cmd_part+
|
| 1089 |
|
| 1090 | simple_command : assign_listing
|
| 1091 | | assignment
|
| 1092 | | proc_command
|
| 1093 |
|
| 1094 | Simple imperative algorithm:
|
| 1095 |
|
| 1096 | 1) Read a list of words and redirects. Append them to separate lists.
|
| 1097 | 2) Look for the first non-assignment word. If it's declare, etc., then
|
| 1098 | keep parsing words AND assign words. Otherwise, just parse words.
|
| 1099 | 3) If there are no non-assignment words, then it's a global assignment.
|
| 1100 |
|
| 1101 | { redirects, global assignments } OR
|
| 1102 | { redirects, prefix_bindings, words } OR
|
| 1103 | { redirects, ERROR_prefix_bindings, keyword, assignments, words }
|
| 1104 |
|
| 1105 | THEN CHECK that prefix bindings don't have any array literal parts!
|
| 1106 | global assignment and keyword assignments can have the of course.
|
| 1107 | well actually EXPORT shouldn't have them either -- WARNING
|
| 1108 |
|
| 1109 | 3 cases we want to warn: prefix_bindings for assignment, and array literal
|
| 1110 | in prefix bindings, or export
|
| 1111 |
|
| 1112 | A command can be an assignment word, word, or redirect on its own.
|
| 1113 |
|
| 1114 | ls
|
| 1115 | >out.txt
|
| 1116 |
|
| 1117 | >out.txt FOO=bar # this touches the file
|
| 1118 |
|
| 1119 | Or any sequence:
|
| 1120 | ls foo bar
|
| 1121 | <in.txt ls foo bar >out.txt
|
| 1122 | <in.txt ls >out.txt foo bar
|
| 1123 |
|
| 1124 | Or add one or more environment bindings:
|
| 1125 | VAR=val env
|
| 1126 | >out.txt VAR=val env
|
| 1127 |
|
| 1128 | here_end vs filename is a matter of whether we test that it's quoted. e.g.
|
| 1129 | <<EOF vs <<'EOF'.
|
| 1130 | """
|
| 1131 | redirects, words, typed_args, block = self._ScanSimpleCommand()
|
| 1132 |
|
| 1133 | typed_loc = None # type: Optional[Token]
|
| 1134 | if block:
|
| 1135 | typed_loc = block.brace_group.left
|
| 1136 | if typed_args:
|
| 1137 | typed_loc = typed_args.left # preferred over block location
|
| 1138 |
|
| 1139 | if len(words) == 0: # e.g. >out.txt # redirect without words
|
| 1140 | assert len(redirects) != 0
|
| 1141 | if typed_loc is not None:
|
| 1142 | p_die("Unexpected typed args", typed_loc)
|
| 1143 |
|
| 1144 | simple = command.Simple.CreateNull()
|
| 1145 | simple.blame_tok = redirects[0].op
|
| 1146 | simple.more_env = []
|
| 1147 | simple.words = []
|
| 1148 | simple.redirects = redirects
|
| 1149 | return simple
|
| 1150 |
|
| 1151 | preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
|
| 1152 | if len(preparsed_list):
|
| 1153 | # Disallow X=Y inside proc and func
|
| 1154 | # and inside Hay Attr blocks
|
| 1155 | # But allow X=Y at the top level
|
| 1156 | # for interactive use foo=bar
|
| 1157 | # for global constants GLOBAL=~/src
|
| 1158 | # because YSH assignment doesn't have tilde sub
|
| 1159 | if len(suffix_words) == 0:
|
| 1160 | if (self.cmd_mode != cmd_mode_e.Shell or
|
| 1161 | (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
|
| 1162 | p_die('Use var/setvar to assign in YSH',
|
| 1163 | preparsed_list[0].left)
|
| 1164 |
|
| 1165 | # Set a reference to words and redirects for completion. We want to
|
| 1166 | # inspect this state after a failed parse.
|
| 1167 | self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
|
| 1168 |
|
| 1169 | if len(suffix_words) == 0:
|
| 1170 | if typed_loc is not None:
|
| 1171 | p_die("Unexpected typed args", typed_loc)
|
| 1172 |
|
| 1173 | # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
|
| 1174 | pairs = [] # type: List[AssignPair]
|
| 1175 | for preparsed in preparsed_list:
|
| 1176 | pairs.append(
|
| 1177 | _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
|
| 1178 |
|
| 1179 | left_tok = location.LeftTokenForCompoundWord(words[0])
|
| 1180 | return command.ShAssignment(left_tok, pairs, redirects)
|
| 1181 |
|
| 1182 | kind, kw_token = word_.IsControlFlow(suffix_words[0])
|
| 1183 |
|
| 1184 | if kind == Kind.ControlFlow:
|
| 1185 | if kw_token.id == Id.ControlFlow_Return:
|
| 1186 | # return x - inside procs and shell functions
|
| 1187 | # return (x) - inside funcs
|
| 1188 | if typed_args is None:
|
| 1189 | if self.cmd_mode not in (cmd_mode_e.Shell,
|
| 1190 | cmd_mode_e.Proc):
|
| 1191 | p_die('Shell-style returns not allowed here', kw_token)
|
| 1192 | else:
|
| 1193 | if self.cmd_mode != cmd_mode_e.Func:
|
| 1194 | p_die('Typed return is only allowed inside func',
|
| 1195 | typed_loc)
|
| 1196 | if len(typed_args.pos_args) != 1:
|
| 1197 | p_die("Typed return expects one argument", typed_loc)
|
| 1198 | if len(typed_args.named_args) != 0:
|
| 1199 | p_die("Typed return doesn't take named arguments",
|
| 1200 | typed_loc)
|
| 1201 | return command.Retval(kw_token, typed_args.pos_args[0])
|
| 1202 |
|
| 1203 | if typed_loc is not None:
|
| 1204 | p_die("Unexpected typed args", typed_loc)
|
| 1205 | if not self.parse_opts.parse_ignored() and len(redirects):
|
| 1206 | p_die("Control flow shouldn't have redirects", kw_token)
|
| 1207 |
|
| 1208 | if len(preparsed_list): # FOO=bar local spam=eggs not allowed
|
| 1209 | p_die("Control flow shouldn't have environment bindings",
|
| 1210 | preparsed_list[0].left)
|
| 1211 |
|
| 1212 | # Attach the token for errors. (ShAssignment may not need it.)
|
| 1213 | if len(suffix_words) == 1:
|
| 1214 | arg_word = None # type: Optional[word_t]
|
| 1215 | elif len(suffix_words) == 2:
|
| 1216 | arg_word = suffix_words[1]
|
| 1217 | else:
|
| 1218 | p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
|
| 1219 | loc.Word(suffix_words[2]))
|
| 1220 |
|
| 1221 | return command.ControlFlow(kw_token, arg_word)
|
| 1222 |
|
| 1223 | # Alias expansion only understands words, not typed args ( ) or block { }
|
| 1224 | if not typed_args and not block and self.parse_opts.expand_aliases():
|
| 1225 | # If any expansions were detected, then parse again.
|
| 1226 | expanded_node = self._MaybeExpandAliases(suffix_words)
|
| 1227 | if expanded_node:
|
| 1228 | # Attach env bindings and redirects to the expanded node.
|
| 1229 | more_env = [] # type: List[EnvPair]
|
| 1230 | _AppendMoreEnv(preparsed_list, more_env)
|
| 1231 | exp = command.ExpandedAlias(expanded_node, redirects, more_env)
|
| 1232 | return exp
|
| 1233 |
|
| 1234 | # TODO: check that we don't have env1=x x[1]=y env2=z here.
|
| 1235 |
|
| 1236 | # FOO=bar printenv.py FOO
|
| 1237 | node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects,
|
| 1238 | typed_args, block)
|
| 1239 | return node
|
| 1240 |
|
| 1241 | def ParseBraceGroup(self):
|
| 1242 | # type: () -> BraceGroup
|
| 1243 | """
|
| 1244 | Original:
|
| 1245 | brace_group : LBrace command_list RBrace ;
|
| 1246 |
|
| 1247 | YSH:
|
| 1248 | brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
|
| 1249 |
|
| 1250 | The doc comment can only occur if there's a newline.
|
| 1251 | """
|
| 1252 | ate = self._Eat(Id.Lit_LBrace)
|
| 1253 | left = word_.BraceToken(ate)
|
| 1254 |
|
| 1255 | doc_word = None # type: word_t
|
| 1256 | self._GetWord()
|
| 1257 | if self.c_id == Id.Op_Newline:
|
| 1258 | self._SetNext()
|
| 1259 | # Set a flag so we don't skip over ###
|
| 1260 | with word_.ctx_EmitDocToken(self.w_parser):
|
| 1261 | self._GetWord()
|
| 1262 |
|
| 1263 | if self.c_id == Id.Ignored_Comment:
|
| 1264 | doc_word = self.cur_word
|
| 1265 | self._SetNext()
|
| 1266 |
|
| 1267 | # Id.Ignored_Comment means it's a Token, or None
|
| 1268 | doc_token = cast(Token, doc_word)
|
| 1269 |
|
| 1270 | c_list = self._ParseCommandList()
|
| 1271 |
|
| 1272 | ate = self._Eat(Id.Lit_RBrace)
|
| 1273 | right = word_.BraceToken(ate)
|
| 1274 |
|
| 1275 | # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
|
| 1276 | # would allow us to revert this back to None, which was changed in
|
| 1277 | # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
|
| 1278 | # behavior saves allocations, but is less type safe.
|
| 1279 | return BraceGroup(left, doc_token, c_list.children, [],
|
| 1280 | right) # no redirects yet
|
| 1281 |
|
| 1282 | def ParseDoGroup(self):
|
| 1283 | # type: () -> command.DoGroup
|
| 1284 | """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
|
| 1285 |
|
| 1286 | do_group : Do command_list Done ; /* Apply rule 6 */
|
| 1287 | """
|
| 1288 | ate = self._Eat(Id.KW_Do)
|
| 1289 | do_kw = word_.AsKeywordToken(ate)
|
| 1290 |
|
| 1291 | c_list = self._ParseCommandList() # could be anything
|
| 1292 |
|
| 1293 | ate = self._Eat(Id.KW_Done)
|
| 1294 | done_kw = word_.AsKeywordToken(ate)
|
| 1295 |
|
| 1296 | return command.DoGroup(do_kw, c_list.children, done_kw)
|
| 1297 |
|
| 1298 | def ParseForWords(self):
|
| 1299 | # type: () -> Tuple[List[CompoundWord], Optional[Token]]
|
| 1300 | """
|
| 1301 | for_words : WORD* for_sep
|
| 1302 | ;
|
| 1303 | for_sep : ';' newline_ok
|
| 1304 | | NEWLINES
|
| 1305 | ;
|
| 1306 | """
|
| 1307 | words = [] # type: List[CompoundWord]
|
| 1308 | # The span_id of any semi-colon, so we can remove it.
|
| 1309 | semi_tok = None # type: Optional[Token]
|
| 1310 |
|
| 1311 | while True:
|
| 1312 | self._GetWord()
|
| 1313 | if self.c_id == Id.Op_Semi:
|
| 1314 | tok = cast(Token, self.cur_word)
|
| 1315 | semi_tok = tok
|
| 1316 | self._SetNext()
|
| 1317 | self._NewlineOk()
|
| 1318 | break
|
| 1319 | elif self.c_id == Id.Op_Newline:
|
| 1320 | self._SetNext()
|
| 1321 | break
|
| 1322 | elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
| 1323 | break
|
| 1324 |
|
| 1325 | if self.cur_word.tag() != word_e.Compound:
|
| 1326 | # TODO: Can we also show a pointer to the 'for' keyword?
|
| 1327 | p_die('Invalid word in for loop', loc.Word(self.cur_word))
|
| 1328 |
|
| 1329 | w2 = cast(CompoundWord, self.cur_word)
|
| 1330 | words.append(w2)
|
| 1331 | self._SetNext()
|
| 1332 | return words, semi_tok
|
| 1333 |
|
| 1334 | def _ParseForExprLoop(self, for_kw):
|
| 1335 | # type: (Token) -> command.ForExpr
|
| 1336 | """
|
| 1337 | Shell:
|
| 1338 | for '((' init ';' cond ';' update '))' for_sep? do_group
|
| 1339 |
|
| 1340 | YSH:
|
| 1341 | for '((' init ';' cond ';' update '))' for_sep? brace_group
|
| 1342 | """
|
| 1343 | node = self.w_parser.ReadForExpression()
|
| 1344 | node.keyword = for_kw
|
| 1345 |
|
| 1346 | self._SetNext()
|
| 1347 |
|
| 1348 | self._GetWord()
|
| 1349 | if self.c_id == Id.Op_Semi:
|
| 1350 | self._SetNext()
|
| 1351 | self._NewlineOk()
|
| 1352 | elif self.c_id == Id.Op_Newline:
|
| 1353 | self._SetNext()
|
| 1354 | elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
|
| 1355 | pass
|
| 1356 | elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
|
| 1357 | pass
|
| 1358 | else:
|
| 1359 | p_die('Invalid word after for expression', loc.Word(self.cur_word))
|
| 1360 |
|
| 1361 | if self.c_id == Id.Lit_LBrace:
|
| 1362 | node.body = self.ParseBraceGroup()
|
| 1363 | else:
|
| 1364 | node.body = self.ParseDoGroup()
|
| 1365 | return node
|
| 1366 |
|
| 1367 | def _ParseForEachLoop(self, for_kw):
|
| 1368 | # type: (Token) -> command.ForEach
|
| 1369 | node = command.ForEach.CreateNull(alloc_lists=True)
|
| 1370 | node.keyword = for_kw
|
| 1371 |
|
| 1372 | num_iter_names = 0
|
| 1373 | while True:
|
| 1374 | w = self.cur_word
|
| 1375 |
|
| 1376 | # Hack that makes the language more familiar:
|
| 1377 | # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
|
| 1378 | # - 'x y' is also accepted but not idiomatic.
|
| 1379 | UP_w = w
|
| 1380 | if w.tag() == word_e.Compound:
|
| 1381 | w = cast(CompoundWord, UP_w)
|
| 1382 | if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
|
| 1383 | w.parts.pop()
|
| 1384 |
|
| 1385 | ok, iter_name, quoted = word_.StaticEval(w)
|
| 1386 | if not ok or quoted: # error: for $x
|
| 1387 | p_die('Expected loop variable (a constant word)', loc.Word(w))
|
| 1388 |
|
| 1389 | if not match.IsValidVarName(iter_name): # error: for -
|
| 1390 | # TODO: consider commas?
|
| 1391 | if ',' in iter_name:
|
| 1392 | p_die('Loop variables look like x, y (fix spaces)',
|
| 1393 | loc.Word(w))
|
| 1394 | p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
|
| 1395 |
|
| 1396 | node.iter_names.append(iter_name)
|
| 1397 | num_iter_names += 1
|
| 1398 | self._SetNext()
|
| 1399 |
|
| 1400 | self._GetWord()
|
| 1401 | # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
|
| 1402 | # Subtlety: 'var' is KW_Var and is a valid loop name
|
| 1403 | if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
|
| 1404 | break
|
| 1405 |
|
| 1406 | if num_iter_names == 3:
|
| 1407 | p_die('Unexpected word after 3 loop variables',
|
| 1408 | loc.Word(self.cur_word))
|
| 1409 |
|
| 1410 | self._NewlineOk()
|
| 1411 |
|
| 1412 | self._GetWord()
|
| 1413 | if self.c_id == Id.KW_In:
|
| 1414 | # Ideally we would want ( not 'in'. But we still have to fix the bug
|
| 1415 | # where we require a SPACE between in and (
|
| 1416 | # for x in(y) # should be accepted, but isn't
|
| 1417 |
|
| 1418 | expr_blame = word_.AsKeywordToken(self.cur_word)
|
| 1419 |
|
| 1420 | self._SetNext() # skip in
|
| 1421 | if self.w_parser.LookPastSpace() == Id.Op_LParen:
|
| 1422 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1423 | node.iterable = for_iter.YshExpr(enode, expr_blame)
|
| 1424 |
|
| 1425 | # For simplicity, we don't accept for x in (obj); do ...
|
| 1426 | self._GetWord()
|
| 1427 | if self.c_id != Id.Lit_LBrace:
|
| 1428 | p_die('Expected { after iterable expression',
|
| 1429 | loc.Word(self.cur_word))
|
| 1430 | else:
|
| 1431 | semi_tok = None # type: Optional[Token]
|
| 1432 | iter_words, semi_tok = self.ParseForWords()
|
| 1433 | node.semi_tok = semi_tok
|
| 1434 |
|
| 1435 | if not self.parse_opts.parse_bare_word() and len(
|
| 1436 | iter_words) == 1:
|
| 1437 | ok, s, quoted = word_.StaticEval(iter_words[0])
|
| 1438 | if ok and match.IsValidVarName(s) and not quoted:
|
| 1439 | p_die(
|
| 1440 | 'Surround this word with either parens or quotes (parse_bare_word)',
|
| 1441 | loc.Word(iter_words[0]))
|
| 1442 |
|
| 1443 | words2 = braces.BraceDetectAll(iter_words)
|
| 1444 | words3 = word_.TildeDetectAll(words2)
|
| 1445 | node.iterable = for_iter.Words(words3)
|
| 1446 |
|
| 1447 | # Now that we know there are words, do an extra check
|
| 1448 | if num_iter_names > 2:
|
| 1449 | p_die('Expected at most 2 loop variables', for_kw)
|
| 1450 |
|
| 1451 | elif self.c_id == Id.KW_Do:
|
| 1452 | node.iterable = for_iter.Args # implicitly loop over "$@"
|
| 1453 | # do not advance
|
| 1454 |
|
| 1455 | elif self.c_id == Id.Op_Semi: # for x; do
|
| 1456 | node.iterable = for_iter.Args # implicitly loop over "$@"
|
| 1457 | self._SetNext()
|
| 1458 |
|
| 1459 | else: # for foo BAD
|
| 1460 | p_die('Unexpected word after for loop variable',
|
| 1461 | loc.Word(self.cur_word))
|
| 1462 |
|
| 1463 | self._GetWord()
|
| 1464 | if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
|
| 1465 | node.body = self.ParseBraceGroup()
|
| 1466 | else:
|
| 1467 | node.body = self.ParseDoGroup()
|
| 1468 |
|
| 1469 | return node
|
| 1470 |
|
| 1471 | def ParseFor(self):
|
| 1472 | # type: () -> command_t
|
| 1473 | """
|
| 1474 | TODO: Update the grammar
|
| 1475 |
|
| 1476 | for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
|
| 1477 | | For '((' ... TODO
|
| 1478 | """
|
| 1479 | ate = self._Eat(Id.KW_For)
|
| 1480 | for_kw = word_.AsKeywordToken(ate)
|
| 1481 |
|
| 1482 | self._GetWord()
|
| 1483 | if self.c_id == Id.Op_DLeftParen:
|
| 1484 | if not self.parse_opts.parse_dparen():
|
| 1485 | p_die("Bash for loops aren't allowed (parse_dparen)",
|
| 1486 | loc.Word(self.cur_word))
|
| 1487 |
|
| 1488 | # for (( i = 0; i < 10; i++)
|
| 1489 | n1 = self._ParseForExprLoop(for_kw)
|
| 1490 | n1.redirects = self._ParseRedirectList()
|
| 1491 | return n1
|
| 1492 | else:
|
| 1493 | # for x in a b; do echo hi; done
|
| 1494 | n2 = self._ParseForEachLoop(for_kw)
|
| 1495 | n2.redirects = self._ParseRedirectList()
|
| 1496 | return n2
|
| 1497 |
|
| 1498 | def _ParseConditionList(self):
|
| 1499 | # type: () -> condition_t
|
| 1500 | """
|
| 1501 | condition_list: command_list
|
| 1502 |
|
| 1503 | This is a helper to parse a condition list for if commands and while/until
|
| 1504 | loops. It will throw a parse error if there are no conditions in the list.
|
| 1505 | """
|
| 1506 | self.allow_block = False
|
| 1507 | commands = self._ParseCommandList()
|
| 1508 | self.allow_block = True
|
| 1509 |
|
| 1510 | if len(commands.children) == 0:
|
| 1511 | p_die("Expected a condition", loc.Word(self.cur_word))
|
| 1512 |
|
| 1513 | return condition.Shell(commands.children)
|
| 1514 |
|
| 1515 | def ParseWhileUntil(self, keyword):
|
| 1516 | # type: (Token) -> command.WhileUntil
|
| 1517 | """
|
| 1518 | while_clause : While command_list do_group ;
|
| 1519 | until_clause : Until command_list do_group ;
|
| 1520 | """
|
| 1521 | self._SetNext() # skip keyword
|
| 1522 |
|
| 1523 | if (self.parse_opts.parse_paren() and
|
| 1524 | self.w_parser.LookPastSpace() == Id.Op_LParen):
|
| 1525 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1526 | cond = condition.YshExpr(enode) # type: condition_t
|
| 1527 | else:
|
| 1528 | cond = self._ParseConditionList()
|
| 1529 |
|
| 1530 | # NOTE: The LSTs will be different for OSH and YSH, but the execution
|
| 1531 | # should be unchanged. To be sure we should desugar.
|
| 1532 | self._GetWord()
|
| 1533 | if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
| 1534 | # while test -f foo {
|
| 1535 | body_node = self.ParseBraceGroup() # type: command_t
|
| 1536 | else:
|
| 1537 | body_node = self.ParseDoGroup()
|
| 1538 |
|
| 1539 | # no redirects yet
|
| 1540 | return command.WhileUntil(keyword, cond, body_node, None)
|
| 1541 |
|
| 1542 | def ParseCaseArm(self):
|
| 1543 | # type: () -> CaseArm
|
| 1544 | """
|
| 1545 | case_item: '('? pattern ('|' pattern)* ')'
|
| 1546 | newline_ok command_term? trailer? ;
|
| 1547 |
|
| 1548 | Looking at '(' or pattern
|
| 1549 | """
|
| 1550 | self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
|
| 1551 |
|
| 1552 | left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
|
| 1553 |
|
| 1554 | if self.c_id == Id.Op_LParen: # Optional (
|
| 1555 | self._SetNext()
|
| 1556 |
|
| 1557 | pat_words = [] # type: List[word_t]
|
| 1558 | while True:
|
| 1559 | self._GetWord()
|
| 1560 | if self.c_kind != Kind.Word:
|
| 1561 | p_die('Expected case pattern', loc.Word(self.cur_word))
|
| 1562 | pat_words.append(self.cur_word)
|
| 1563 | self._SetNext()
|
| 1564 |
|
| 1565 | self._GetWord()
|
| 1566 | if self.c_id == Id.Op_Pipe:
|
| 1567 | self._SetNext()
|
| 1568 | else:
|
| 1569 | break
|
| 1570 |
|
| 1571 | ate = self._Eat(Id.Right_CasePat)
|
| 1572 | middle_tok = word_.AsOperatorToken(ate)
|
| 1573 |
|
| 1574 | self._NewlineOk()
|
| 1575 |
|
| 1576 | self._GetWord()
|
| 1577 | if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
|
| 1578 | c_list = self._ParseCommandTerm()
|
| 1579 | action_children = c_list.children
|
| 1580 | else:
|
| 1581 | action_children = []
|
| 1582 |
|
| 1583 | dsemi_tok = None # type: Token
|
| 1584 | self._GetWord()
|
| 1585 | if self.c_id == Id.KW_Esac: # missing last ;;
|
| 1586 | pass
|
| 1587 | elif self.c_id == Id.Op_DSemi:
|
| 1588 | dsemi_tok = word_.AsOperatorToken(self.cur_word)
|
| 1589 | self._SetNext()
|
| 1590 | else:
|
| 1591 | # Happens on EOF
|
| 1592 | p_die('Expected ;; or esac', loc.Word(self.cur_word))
|
| 1593 |
|
| 1594 | self._NewlineOk()
|
| 1595 |
|
| 1596 | return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
|
| 1597 | action_children, dsemi_tok)
|
| 1598 |
|
| 1599 | def ParseYshCaseArm(self, discriminant):
|
| 1600 | # type: (Id_t) -> CaseArm
|
| 1601 | """
|
| 1602 | case_item : pattern newline_ok brace_group newline_ok
|
| 1603 | pattern : pat_words
|
| 1604 | | pat_exprs
|
| 1605 | | pat_eggex
|
| 1606 | | pat_else
|
| 1607 | pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
|
| 1608 | pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
|
| 1609 | pat_word : WORD
|
| 1610 | pat_eggex : '/' oil_eggex '/'
|
| 1611 | pat_expr : '(' oil_expr ')'
|
| 1612 | pat_else : '(' Id.KW_Else ')'
|
| 1613 |
|
| 1614 | Looking at: 'pattern'
|
| 1615 |
|
| 1616 | Note that the trailing `newline_ok` in `case_item` is handled by
|
| 1617 | `ParseYshCase`. We do this because parsing that `newline_ok` returns
|
| 1618 | the next "discriminant" for the next token, so it makes more sense to
|
| 1619 | handle it there.
|
| 1620 | """
|
| 1621 | left_tok = None # type: Token
|
| 1622 | pattern = None # type: pat_t
|
| 1623 |
|
| 1624 | if discriminant in (Id.Op_LParen, Id.Arith_Slash):
|
| 1625 | # pat_exprs, pat_else or pat_eggex
|
| 1626 | pattern, left_tok = self.w_parser.ParseYshCasePattern()
|
| 1627 | else:
|
| 1628 | # pat_words
|
| 1629 | pat_words = [] # type: List[word_t]
|
| 1630 | while True:
|
| 1631 | self._GetWord()
|
| 1632 | if self.c_kind != Kind.Word:
|
| 1633 | p_die('Expected case pattern', loc.Word(self.cur_word))
|
| 1634 | pat_words.append(self.cur_word)
|
| 1635 | self._SetNext()
|
| 1636 |
|
| 1637 | if not left_tok:
|
| 1638 | left_tok = location.LeftTokenForWord(self.cur_word)
|
| 1639 |
|
| 1640 | self._NewlineOk()
|
| 1641 |
|
| 1642 | self._GetWord()
|
| 1643 | if self.c_id == Id.Op_Pipe:
|
| 1644 | self._SetNext()
|
| 1645 | self._NewlineOk()
|
| 1646 | else:
|
| 1647 | break
|
| 1648 | pattern = pat.Words(pat_words)
|
| 1649 |
|
| 1650 | self._NewlineOk()
|
| 1651 | action = self.ParseBraceGroup()
|
| 1652 |
|
| 1653 | # The left token of the action is our "middle" token
|
| 1654 | return CaseArm(left_tok, pattern, action.left, action.children,
|
| 1655 | action.right)
|
| 1656 |
|
| 1657 | def ParseYshCase(self, case_kw):
|
| 1658 | # type: (Token) -> command.Case
|
| 1659 | """
|
| 1660 | ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
|
| 1661 |
|
| 1662 | Looking at: token after 'case'
|
| 1663 | """
|
| 1664 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1665 | to_match = case_arg.YshExpr(enode)
|
| 1666 |
|
| 1667 | ate = self._Eat(Id.Lit_LBrace)
|
| 1668 | arms_start = word_.BraceToken(ate)
|
| 1669 |
|
| 1670 | discriminant = self.w_parser.NewlineOkForYshCase()
|
| 1671 |
|
| 1672 | # Note: for now, zero arms are accepted, just like POSIX case $x in esac
|
| 1673 | arms = [] # type: List[CaseArm]
|
| 1674 | while discriminant != Id.Op_RBrace:
|
| 1675 | arm = self.ParseYshCaseArm(discriminant)
|
| 1676 | arms.append(arm)
|
| 1677 |
|
| 1678 | discriminant = self.w_parser.NewlineOkForYshCase()
|
| 1679 |
|
| 1680 | # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
|
| 1681 | # token is read as an Id.Op_RBrace, but we need to store this as a
|
| 1682 | # Id.Lit_RBrace.
|
| 1683 | ate = self._Eat(Id.Op_RBrace)
|
| 1684 | arms_end = word_.AsOperatorToken(ate)
|
| 1685 | arms_end.id = Id.Lit_RBrace
|
| 1686 |
|
| 1687 | return command.Case(case_kw, to_match, arms_start, arms, arms_end,
|
| 1688 | None)
|
| 1689 |
|
| 1690 | def ParseOldCase(self, case_kw):
|
| 1691 | # type: (Token) -> command.Case
|
| 1692 | """
|
| 1693 | case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
|
| 1694 |
|
| 1695 | -> Looking at WORD
|
| 1696 |
|
| 1697 | FYI original POSIX case list, which takes pains for DSEMI
|
| 1698 |
|
| 1699 | case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
|
| 1700 | """
|
| 1701 | self._GetWord()
|
| 1702 | w = self.cur_word
|
| 1703 | if not self.parse_opts.parse_bare_word():
|
| 1704 | ok, s, quoted = word_.StaticEval(w)
|
| 1705 | if ok and not quoted:
|
| 1706 | p_die(
|
| 1707 | "This is a constant string. You may want a variable like $x (parse_bare_word)",
|
| 1708 | loc.Word(w))
|
| 1709 |
|
| 1710 | if w.tag() != word_e.Compound:
|
| 1711 | p_die("Expected a word to match against", loc.Word(w))
|
| 1712 |
|
| 1713 | to_match = case_arg.Word(w)
|
| 1714 | self._SetNext() # past WORD
|
| 1715 |
|
| 1716 | self._NewlineOk()
|
| 1717 |
|
| 1718 | ate = self._Eat(Id.KW_In)
|
| 1719 | arms_start = word_.AsKeywordToken(ate)
|
| 1720 |
|
| 1721 | self._NewlineOk()
|
| 1722 |
|
| 1723 | arms = [] # type: List[CaseArm]
|
| 1724 | while True:
|
| 1725 | self._GetWord()
|
| 1726 | if self.c_id == Id.KW_Esac: # this is Kind.Word
|
| 1727 | break
|
| 1728 | # case arm should begin with a pattern word or (
|
| 1729 | if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
|
| 1730 | break
|
| 1731 |
|
| 1732 | arm = self.ParseCaseArm()
|
| 1733 | arms.append(arm)
|
| 1734 |
|
| 1735 | ate = self._Eat(Id.KW_Esac)
|
| 1736 | arms_end = word_.AsKeywordToken(ate)
|
| 1737 |
|
| 1738 | # no redirects yet
|
| 1739 | return command.Case(case_kw, to_match, arms_start, arms, arms_end,
|
| 1740 | None)
|
| 1741 |
|
| 1742 | def ParseCase(self):
|
| 1743 | # type: () -> command.Case
|
| 1744 | """
|
| 1745 | case_clause : old_case # from POSIX
|
| 1746 | | ysh_case
|
| 1747 | ;
|
| 1748 |
|
| 1749 | Looking at 'Case'
|
| 1750 | """
|
| 1751 | case_kw = word_.AsKeywordToken(self.cur_word)
|
| 1752 | self._SetNext() # past 'case'
|
| 1753 |
|
| 1754 | if self.w_parser.LookPastSpace() == Id.Op_LParen:
|
| 1755 | return self.ParseYshCase(case_kw)
|
| 1756 | else:
|
| 1757 | return self.ParseOldCase(case_kw)
|
| 1758 |
|
| 1759 | def _ParseYshElifElse(self, if_node):
|
| 1760 | # type: (command.If) -> None
|
| 1761 | """If test -f foo { echo foo.
|
| 1762 |
|
| 1763 | } elif test -f bar; test -f spam { ^ we parsed up to here echo
|
| 1764 | bar } else { echo none }
|
| 1765 | """
|
| 1766 | arms = if_node.arms
|
| 1767 |
|
| 1768 | while self.c_id == Id.KW_Elif:
|
| 1769 | elif_kw = word_.AsKeywordToken(self.cur_word)
|
| 1770 | self._SetNext() # skip elif
|
| 1771 | if (self.parse_opts.parse_paren() and
|
| 1772 | self.w_parser.LookPastSpace() == Id.Op_LParen):
|
| 1773 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1774 | cond = condition.YshExpr(enode) # type: condition_t
|
| 1775 | else:
|
| 1776 | self.allow_block = False
|
| 1777 | commands = self._ParseCommandList()
|
| 1778 | self.allow_block = True
|
| 1779 | cond = condition.Shell(commands.children)
|
| 1780 |
|
| 1781 | body = self.ParseBraceGroup()
|
| 1782 | self._GetWord()
|
| 1783 |
|
| 1784 | arm = IfArm(elif_kw, cond, None, body.children, [elif_kw.span_id])
|
| 1785 | arms.append(arm)
|
| 1786 |
|
| 1787 | self._GetWord()
|
| 1788 | if self.c_id == Id.KW_Else:
|
| 1789 | self._SetNext()
|
| 1790 | body = self.ParseBraceGroup()
|
| 1791 | if_node.else_action = body.children
|
| 1792 |
|
| 1793 | def _ParseYshIf(self, if_kw, cond):
|
| 1794 | # type: (Token, condition_t) -> command.If
|
| 1795 | """if test -f foo {
|
| 1796 |
|
| 1797 | # ^ we parsed up to here
|
| 1798 | echo foo
|
| 1799 | } elif test -f bar; test -f spam {
|
| 1800 | echo bar
|
| 1801 | } else {
|
| 1802 | echo none
|
| 1803 | }
|
| 1804 | NOTE: If you do something like if test -n foo{, the parser keeps going, and
|
| 1805 | the error is confusing because it doesn't point to the right place.
|
| 1806 |
|
| 1807 | I think we might need strict_brace so that foo{ is disallowed. It has to
|
| 1808 | be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
|
| 1809 | form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
|
| 1810 | Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
|
| 1811 | """
|
| 1812 | if_node = command.If.CreateNull(alloc_lists=True)
|
| 1813 | if_node.if_kw = if_kw
|
| 1814 |
|
| 1815 | body1 = self.ParseBraceGroup()
|
| 1816 | # Every arm has 1 spid, unlike shell-style
|
| 1817 | # TODO: We could get the spids from the brace group.
|
| 1818 | arm = IfArm(if_kw, cond, None, body1.children, [if_kw.span_id])
|
| 1819 |
|
| 1820 | if_node.arms.append(arm)
|
| 1821 |
|
| 1822 | self._GetWord()
|
| 1823 | if self.c_id in (Id.KW_Elif, Id.KW_Else):
|
| 1824 | self._ParseYshElifElse(if_node)
|
| 1825 | # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
|
| 1826 | # spid because that's in the BraceGroup.
|
| 1827 | return if_node
|
| 1828 |
|
| 1829 | def _ParseElifElse(self, if_node):
|
| 1830 | # type: (command.If) -> None
|
| 1831 | """
|
| 1832 | else_part: (Elif command_list Then command_list)* Else command_list ;
|
| 1833 | """
|
| 1834 | arms = if_node.arms
|
| 1835 |
|
| 1836 | self._GetWord()
|
| 1837 | while self.c_id == Id.KW_Elif:
|
| 1838 | elif_kw = word_.AsKeywordToken(self.cur_word)
|
| 1839 | self._SetNext() # past 'elif'
|
| 1840 |
|
| 1841 | cond = self._ParseConditionList()
|
| 1842 |
|
| 1843 | ate = self._Eat(Id.KW_Then)
|
| 1844 | then_kw = word_.AsKeywordToken(ate)
|
| 1845 |
|
| 1846 | body = self._ParseCommandList()
|
| 1847 | arm = IfArm(elif_kw, cond, then_kw, body.children,
|
| 1848 | [elif_kw.span_id, then_kw.span_id])
|
| 1849 |
|
| 1850 | arms.append(arm)
|
| 1851 |
|
| 1852 | self._GetWord()
|
| 1853 | if self.c_id == Id.KW_Else:
|
| 1854 | else_kw = word_.AsKeywordToken(self.cur_word)
|
| 1855 | self._SetNext() # past 'else'
|
| 1856 | body = self._ParseCommandList()
|
| 1857 | if_node.else_action = body.children
|
| 1858 | else:
|
| 1859 | else_kw = None
|
| 1860 |
|
| 1861 | if_node.else_kw = else_kw
|
| 1862 |
|
| 1863 | def ParseIf(self):
|
| 1864 | # type: () -> command.If
|
| 1865 | """
|
| 1866 | if_clause : If command_list Then command_list else_part? Fi ;
|
| 1867 |
|
| 1868 | open : '{' | Then
|
| 1869 | close : '}' | Fi
|
| 1870 |
|
| 1871 | ysh_if : If ( command_list | '(' expr ')' )
|
| 1872 | open command_list else_part? close;
|
| 1873 |
|
| 1874 | There are 2 conditionals here: parse_paren, then parse_brace
|
| 1875 | """
|
| 1876 | if_node = command.If.CreateNull(alloc_lists=True)
|
| 1877 | if_kw = word_.AsKeywordToken(self.cur_word)
|
| 1878 | if_node.if_kw = if_kw
|
| 1879 | self._SetNext() # past 'if'
|
| 1880 |
|
| 1881 | if self.parse_opts.parse_paren() and self.w_parser.LookPastSpace(
|
| 1882 | ) == Id.Op_LParen:
|
| 1883 | # if (x + 1)
|
| 1884 | enode = self.w_parser.ParseYshExprForCommand()
|
| 1885 | cond = condition.YshExpr(enode) # type: condition_t
|
| 1886 | else:
|
| 1887 | # if echo 1; echo 2; then
|
| 1888 | # Remove ambiguity with if cd / {
|
| 1889 | cond = self._ParseConditionList()
|
| 1890 |
|
| 1891 | self._GetWord()
|
| 1892 | if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
| 1893 | return self._ParseYshIf(if_kw, cond)
|
| 1894 |
|
| 1895 | ate = self._Eat(Id.KW_Then)
|
| 1896 | then_kw = word_.AsKeywordToken(ate)
|
| 1897 |
|
| 1898 | body = self._ParseCommandList()
|
| 1899 |
|
| 1900 | # First arm
|
| 1901 | arm = IfArm(if_kw, cond, then_kw, body.children,
|
| 1902 | [if_kw.span_id, then_kw.span_id])
|
| 1903 | if_node.arms.append(arm)
|
| 1904 |
|
| 1905 | # 2nd to Nth arm
|
| 1906 | if self.c_id in (Id.KW_Elif, Id.KW_Else):
|
| 1907 | self._ParseElifElse(if_node)
|
| 1908 |
|
| 1909 | ate = self._Eat(Id.KW_Fi)
|
| 1910 | if_node.fi_kw = word_.AsKeywordToken(ate)
|
| 1911 |
|
| 1912 | return if_node
|
| 1913 |
|
| 1914 | def ParseTime(self):
|
| 1915 | # type: () -> command_t
|
| 1916 | """Time [-p] pipeline.
|
| 1917 |
|
| 1918 | According to bash help.
|
| 1919 | """
|
| 1920 | time_kw = word_.AsKeywordToken(self.cur_word)
|
| 1921 | self._SetNext() # skip time
|
| 1922 | pipeline = self.ParsePipeline()
|
| 1923 | return command.TimeBlock(time_kw, pipeline)
|
| 1924 |
|
| 1925 | def ParseCompoundCommand(self):
|
| 1926 | # type: () -> command_t
|
| 1927 | """
|
| 1928 | Refactoring: we put io_redirect* here instead of in function_body and
|
| 1929 | command.
|
| 1930 |
|
| 1931 | compound_command : brace_group io_redirect*
|
| 1932 | | subshell io_redirect*
|
| 1933 | | for_clause io_redirect*
|
| 1934 | | while_clause io_redirect*
|
| 1935 | | until_clause io_redirect*
|
| 1936 | | if_clause io_redirect*
|
| 1937 | | case_clause io_redirect*
|
| 1938 |
|
| 1939 | # bash extensions
|
| 1940 | | time_clause
|
| 1941 | | [[ BoolExpr ]]
|
| 1942 | | (( ArithExpr ))
|
| 1943 | """
|
| 1944 | self._GetWord()
|
| 1945 | if self.c_id == Id.Lit_LBrace:
|
| 1946 | n1 = self.ParseBraceGroup()
|
| 1947 | n1.redirects = self._ParseRedirectList()
|
| 1948 | return n1
|
| 1949 | if self.c_id == Id.Op_LParen:
|
| 1950 | n2 = self.ParseSubshell()
|
| 1951 | n2.redirects = self._ParseRedirectList()
|
| 1952 | return n2
|
| 1953 |
|
| 1954 | if self.c_id == Id.KW_For:
|
| 1955 | # Note: Redirects parsed in this call. POSIX for and bash for (( have
|
| 1956 | # redirects, but YSH for doesn't.
|
| 1957 | return self.ParseFor()
|
| 1958 | if self.c_id in (Id.KW_While, Id.KW_Until):
|
| 1959 | keyword = word_.AsKeywordToken(self.cur_word)
|
| 1960 | n3 = self.ParseWhileUntil(keyword)
|
| 1961 | n3.redirects = self._ParseRedirectList()
|
| 1962 | return n3
|
| 1963 |
|
| 1964 | if self.c_id == Id.KW_If:
|
| 1965 | n4 = self.ParseIf()
|
| 1966 | n4.redirects = self._ParseRedirectList()
|
| 1967 | return n4
|
| 1968 | if self.c_id == Id.KW_Case:
|
| 1969 | n5 = self.ParseCase()
|
| 1970 | n5.redirects = self._ParseRedirectList()
|
| 1971 | return n5
|
| 1972 |
|
| 1973 | if self.c_id == Id.KW_DLeftBracket:
|
| 1974 | n6 = self.ParseDBracket()
|
| 1975 | n6.redirects = self._ParseRedirectList()
|
| 1976 | return n6
|
| 1977 | if self.c_id == Id.Op_DLeftParen:
|
| 1978 | if not self.parse_opts.parse_dparen():
|
| 1979 | p_die('You may want a space between parens (parse_dparen)',
|
| 1980 | loc.Word(self.cur_word))
|
| 1981 | n7 = self.ParseDParen()
|
| 1982 | n7.redirects = self._ParseRedirectList()
|
| 1983 | return n7
|
| 1984 |
|
| 1985 | # bash extensions: no redirects
|
| 1986 | if self.c_id == Id.KW_Time:
|
| 1987 | return self.ParseTime()
|
| 1988 |
|
| 1989 | # Happens in function body, e.g. myfunc() oops
|
| 1990 | p_die('Unexpected word while parsing compound command',
|
| 1991 | loc.Word(self.cur_word))
|
| 1992 | assert False # for MyPy
|
| 1993 |
|
| 1994 | def ParseFunctionDef(self):
|
| 1995 | # type: () -> command.ShFunction
|
| 1996 | """
|
| 1997 | function_header : fname '(' ')'
|
| 1998 | function_def : function_header newline_ok function_body ;
|
| 1999 |
|
| 2000 | Precondition: Looking at the function name.
|
| 2001 |
|
| 2002 | NOTE: There is an ambiguity with:
|
| 2003 |
|
| 2004 | function foo ( echo hi ) and
|
| 2005 | function foo () ( echo hi )
|
| 2006 |
|
| 2007 | Bash only accepts the latter, though it doesn't really follow a grammar.
|
| 2008 | """
|
| 2009 | word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
|
| 2010 | name = word_.ShFunctionName(word0)
|
| 2011 | if len(name) == 0: # example: foo$x is invalid
|
| 2012 | p_die('Invalid function name', loc.Word(word0))
|
| 2013 |
|
| 2014 | part0 = word0.parts[0]
|
| 2015 | # If we got a non-empty string from ShFunctionName, this should be true.
|
| 2016 | assert part0.tag() == word_part_e.Literal
|
| 2017 | blame_tok = cast(Token, part0) # for ctx_VarChecker
|
| 2018 |
|
| 2019 | self._SetNext() # move past function name
|
| 2020 |
|
| 2021 | # Must be true because of lookahead
|
| 2022 | self._GetWord()
|
| 2023 | assert self.c_id == Id.Op_LParen, self.cur_word
|
| 2024 |
|
| 2025 | self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
|
| 2026 | self._SetNext()
|
| 2027 |
|
| 2028 | self._GetWord()
|
| 2029 | if self.c_id == Id.Right_ShFunction:
|
| 2030 | # 'f ()' implies a function definition, since invoking it with no args
|
| 2031 | # would just be 'f'
|
| 2032 | self._SetNext()
|
| 2033 |
|
| 2034 | self._NewlineOk()
|
| 2035 |
|
| 2036 | func = command.ShFunction.CreateNull()
|
| 2037 | func.name = name
|
| 2038 | with ctx_VarChecker(self.var_checker, blame_tok):
|
| 2039 | func.body = self.ParseCompoundCommand()
|
| 2040 |
|
| 2041 | func.name_tok = location.LeftTokenForCompoundWord(word0)
|
| 2042 | return func
|
| 2043 | else:
|
| 2044 | p_die('Expected ) in function definition', loc.Word(self.cur_word))
|
| 2045 | return None
|
| 2046 |
|
| 2047 | def ParseKshFunctionDef(self):
|
| 2048 | # type: () -> command.ShFunction
|
| 2049 | """
|
| 2050 | ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
|
| 2051 | """
|
| 2052 | keyword_tok = word_.AsKeywordToken(self.cur_word)
|
| 2053 |
|
| 2054 | self._SetNext() # skip past 'function'
|
| 2055 | self._GetWord()
|
| 2056 |
|
| 2057 | cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
|
| 2058 | name = word_.ShFunctionName(cur_word)
|
| 2059 | if len(name) == 0: # example: foo$x is invalid
|
| 2060 | p_die('Invalid KSH-style function name', loc.Word(cur_word))
|
| 2061 |
|
| 2062 | name_word = self.cur_word
|
| 2063 | self._SetNext() # skip past 'function name
|
| 2064 |
|
| 2065 | self._GetWord()
|
| 2066 | if self.c_id == Id.Op_LParen:
|
| 2067 | self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
|
| 2068 | self._SetNext()
|
| 2069 | self._Eat(Id.Right_ShFunction)
|
| 2070 |
|
| 2071 | self._NewlineOk()
|
| 2072 |
|
| 2073 | func = command.ShFunction.CreateNull()
|
| 2074 | func.name = name
|
| 2075 | with ctx_VarChecker(self.var_checker, keyword_tok):
|
| 2076 | func.body = self.ParseCompoundCommand()
|
| 2077 |
|
| 2078 | func.keyword = keyword_tok
|
| 2079 | func.name_tok = location.LeftTokenForWord(name_word)
|
| 2080 | return func
|
| 2081 |
|
| 2082 | def ParseYshProc(self):
|
| 2083 | # type: () -> Proc
|
| 2084 | node = Proc.CreateNull(alloc_lists=True)
|
| 2085 |
|
| 2086 | keyword_tok = word_.AsKeywordToken(self.cur_word)
|
| 2087 | node.keyword = keyword_tok
|
| 2088 |
|
| 2089 | with ctx_VarChecker(self.var_checker, keyword_tok):
|
| 2090 | with ctx_CmdMode(self, cmd_mode_e.Proc):
|
| 2091 | self.w_parser.ParseProc(node)
|
| 2092 | if node.sig.tag() == proc_sig_e.Closed: # Register params
|
| 2093 | sig = cast(proc_sig.Closed, node.sig)
|
| 2094 |
|
| 2095 | # Treat 3 kinds of params as variables.
|
| 2096 | wp = sig.word
|
| 2097 | if wp:
|
| 2098 | for param in wp.params:
|
| 2099 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2100 | param.blame_tok)
|
| 2101 | if wp.rest_of:
|
| 2102 | r = wp.rest_of
|
| 2103 | self.var_checker.Check(Id.KW_Var, r.name,
|
| 2104 | r.blame_tok)
|
| 2105 | # We COULD register __out here but it would require a different API.
|
| 2106 | #if param.prefix and param.prefix.id == Id.Arith_Colon:
|
| 2107 | # self.var_checker.Check(Id.KW_Var, '__' + param.name)
|
| 2108 |
|
| 2109 | posit = sig.positional
|
| 2110 | if posit:
|
| 2111 | for param in posit.params:
|
| 2112 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2113 | param.blame_tok)
|
| 2114 | if posit.rest_of:
|
| 2115 | r = posit.rest_of
|
| 2116 | self.var_checker.Check(Id.KW_Var, r.name,
|
| 2117 | r.blame_tok)
|
| 2118 |
|
| 2119 | named = sig.named
|
| 2120 | if named:
|
| 2121 | for param in named.params:
|
| 2122 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2123 | param.blame_tok)
|
| 2124 | if named.rest_of:
|
| 2125 | r = named.rest_of
|
| 2126 | self.var_checker.Check(Id.KW_Var, r.name,
|
| 2127 | r.blame_tok)
|
| 2128 |
|
| 2129 | if sig.block_param:
|
| 2130 | b = sig.block_param
|
| 2131 | self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
|
| 2132 |
|
| 2133 | self._SetNext()
|
| 2134 | node.body = self.ParseBraceGroup()
|
| 2135 | # No redirects for YSH procs (only at call site)
|
| 2136 |
|
| 2137 | return node
|
| 2138 |
|
| 2139 | def ParseYshFunc(self):
|
| 2140 | # type: () -> Func
|
| 2141 | """
|
| 2142 | ysh_func: (
|
| 2143 | Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
|
| 2144 | )
|
| 2145 | Looking at KW_Func
|
| 2146 | """
|
| 2147 | node = Func.CreateNull(alloc_lists=True)
|
| 2148 |
|
| 2149 | keyword_tok = word_.AsKeywordToken(self.cur_word)
|
| 2150 | node.keyword = keyword_tok
|
| 2151 |
|
| 2152 | with ctx_VarChecker(self.var_checker, keyword_tok):
|
| 2153 | self.w_parser.ParseFunc(node)
|
| 2154 |
|
| 2155 | posit = node.positional
|
| 2156 | if posit:
|
| 2157 | for param in posit.params:
|
| 2158 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2159 | param.blame_tok)
|
| 2160 | if posit.rest_of:
|
| 2161 | r = posit.rest_of
|
| 2162 | self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
|
| 2163 |
|
| 2164 | named = node.named
|
| 2165 | if named:
|
| 2166 | for param in named.params:
|
| 2167 | self.var_checker.Check(Id.KW_Var, param.name,
|
| 2168 | param.blame_tok)
|
| 2169 | if named.rest_of:
|
| 2170 | r = named.rest_of
|
| 2171 | self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
|
| 2172 |
|
| 2173 | self._SetNext()
|
| 2174 | with ctx_CmdMode(self, cmd_mode_e.Func):
|
| 2175 | node.body = self.ParseBraceGroup()
|
| 2176 |
|
| 2177 | return node
|
| 2178 |
|
| 2179 | def ParseCoproc(self):
|
| 2180 | # type: () -> command_t
|
| 2181 | """
|
| 2182 | TODO: command.Coproc?
|
| 2183 | """
|
| 2184 | raise NotImplementedError()
|
| 2185 |
|
| 2186 | def ParseSubshell(self):
|
| 2187 | # type: () -> command.Subshell
|
| 2188 | """
|
| 2189 | subshell : '(' compound_list ')'
|
| 2190 |
|
| 2191 | Looking at Op_LParen
|
| 2192 | """
|
| 2193 | left = word_.AsOperatorToken(self.cur_word)
|
| 2194 | self._SetNext() # skip past (
|
| 2195 |
|
| 2196 | # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
|
| 2197 | # translation stack, we want to delay it.
|
| 2198 |
|
| 2199 | self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
|
| 2200 |
|
| 2201 | c_list = self._ParseCommandList()
|
| 2202 | if len(c_list.children) == 1:
|
| 2203 | child = c_list.children[0]
|
| 2204 | else:
|
| 2205 | child = c_list
|
| 2206 |
|
| 2207 | ate = self._Eat(Id.Right_Subshell)
|
| 2208 | right = word_.AsOperatorToken(ate)
|
| 2209 |
|
| 2210 | return command.Subshell(left, child, right, None) # no redirects yet
|
| 2211 |
|
| 2212 | def ParseDBracket(self):
|
| 2213 | # type: () -> command.DBracket
|
| 2214 | """Pass the underlying word parser off to the boolean expression
|
| 2215 | parser."""
|
| 2216 | left = word_.AsKeywordToken(self.cur_word)
|
| 2217 | # TODO: Test interactive. Without closing ]], you should get > prompt
|
| 2218 | # (PS2)
|
| 2219 |
|
| 2220 | self._SetNext() # skip [[
|
| 2221 | b_parser = bool_parse.BoolParser(self.w_parser)
|
| 2222 | bnode, right = b_parser.Parse() # May raise
|
| 2223 | return command.DBracket(left, bnode, right, None) # no redirects yet
|
| 2224 |
|
| 2225 | def ParseDParen(self):
|
| 2226 | # type: () -> command.DParen
|
| 2227 | left = word_.AsOperatorToken(self.cur_word)
|
| 2228 |
|
| 2229 | self._SetNext() # skip ((
|
| 2230 | anode, right = self.w_parser.ReadDParen()
|
| 2231 | assert anode is not None
|
| 2232 |
|
| 2233 | return command.DParen(left, anode, right, None) # no redirects yet
|
| 2234 |
|
| 2235 | def ParseCommand(self):
|
| 2236 | # type: () -> command_t
|
| 2237 | """
|
| 2238 | command : simple_command
|
| 2239 | | compound_command # OSH edit: io_redirect* folded in
|
| 2240 | | function_def
|
| 2241 | | ksh_function_def
|
| 2242 |
|
| 2243 | # YSH extensions
|
| 2244 | | proc NAME ...
|
| 2245 | | const ...
|
| 2246 | | var ...
|
| 2247 | | setglobal ...
|
| 2248 | | setref ...
|
| 2249 | | setvar ...
|
| 2250 | | _ EXPR
|
| 2251 | | = EXPR
|
| 2252 | ;
|
| 2253 |
|
| 2254 | Note: the reason const / var are not part of compound_command is because
|
| 2255 | they can't be alone in a shell function body.
|
| 2256 |
|
| 2257 | Example:
|
| 2258 | This is valid shell f() if true; then echo hi; fi
|
| 2259 | This is invalid f() var x = 1
|
| 2260 | """
|
| 2261 | if self._AtSecondaryKeyword():
|
| 2262 | p_die('Unexpected word when parsing command',
|
| 2263 | loc.Word(self.cur_word))
|
| 2264 |
|
| 2265 | # YSH Extensions
|
| 2266 |
|
| 2267 | if self.c_id == Id.KW_Proc: # proc p { ... }
|
| 2268 | # proc is hidden because of the 'local reasoning' principle. Code
|
| 2269 | # inside procs should be YSH, full stop. That means ysh:upgrade is
|
| 2270 | # on.
|
| 2271 | if self.parse_opts.parse_proc():
|
| 2272 | return self.ParseYshProc()
|
| 2273 | else:
|
| 2274 | # 2024-02: This avoids bad syntax errors if you type YSH code
|
| 2275 | # into OSH
|
| 2276 | # proc p (x) { echo hi } would actually be parsed as a
|
| 2277 | # command.Simple! Shell compatibility: quote 'proc'
|
| 2278 | p_die("proc is a YSH keyword, but this is OSH.",
|
| 2279 | loc.Word(self.cur_word))
|
| 2280 |
|
| 2281 | if self.c_id == Id.KW_Func: # func f(x) { ... }
|
| 2282 | if self.parse_opts.parse_func():
|
| 2283 | return self.ParseYshFunc()
|
| 2284 | else:
|
| 2285 | # Same reasoning as above, for 'proc'
|
| 2286 | p_die("func is a YSH keyword, but this is OSH.",
|
| 2287 | loc.Word(self.cur_word))
|
| 2288 |
|
| 2289 | if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
|
| 2290 | p_die("const can't be inside proc or func. Use var instead.",
|
| 2291 | loc.Word(self.cur_word))
|
| 2292 |
|
| 2293 | if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
|
| 2294 | keyword_id = self.c_id
|
| 2295 | kw_token = word_.LiteralToken(self.cur_word)
|
| 2296 | self._SetNext()
|
| 2297 | n8 = self.w_parser.ParseVarDecl(kw_token)
|
| 2298 | for lhs in n8.lhs:
|
| 2299 | self.var_checker.Check(keyword_id, lhs.name, lhs.left)
|
| 2300 | return n8
|
| 2301 |
|
| 2302 | if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
|
| 2303 | kw_token = word_.LiteralToken(self.cur_word)
|
| 2304 | self._SetNext()
|
| 2305 | n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
|
| 2306 | return n9
|
| 2307 |
|
| 2308 | if self.c_id in (Id.KW_Call, Id.Lit_Equals):
|
| 2309 | # = 42 + a[i]
|
| 2310 | # call mylist->append('x')
|
| 2311 |
|
| 2312 | keyword = word_.LiteralToken(self.cur_word)
|
| 2313 | assert keyword is not None
|
| 2314 | self._SetNext()
|
| 2315 | enode = self.w_parser.ParseCommandExpr()
|
| 2316 | return command.Expr(keyword, enode)
|
| 2317 |
|
| 2318 | if self.c_id == Id.KW_Function:
|
| 2319 | return self.ParseKshFunctionDef()
|
| 2320 |
|
| 2321 | if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
|
| 2322 | Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
|
| 2323 | Id.KW_If, Id.KW_Case, Id.KW_Time):
|
| 2324 | return self.ParseCompoundCommand()
|
| 2325 |
|
| 2326 | # Syntax error for '}' starting a line, which all shells disallow.
|
| 2327 | if self.c_id == Id.Lit_RBrace:
|
| 2328 | p_die('Unexpected right brace', loc.Word(self.cur_word))
|
| 2329 |
|
| 2330 | if self.c_kind == Kind.Redir: # Leading redirect
|
| 2331 | return self.ParseSimpleCommand()
|
| 2332 |
|
| 2333 | if self.c_kind == Kind.Word:
|
| 2334 | # ensured by Kind.Word
|
| 2335 | cur_word = cast(CompoundWord, self.cur_word)
|
| 2336 |
|
| 2337 | # NOTE: At the top level, only Token and Compound are possible.
|
| 2338 | # Can this be modelled better in the type system, removing asserts?
|
| 2339 | #
|
| 2340 | # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
|
| 2341 | # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
|
| 2342 | # That requires 2 tokens of lookahead, which we don't have
|
| 2343 | #
|
| 2344 | # Or maybe we don't just have ParseSimpleCommand -- we will have
|
| 2345 | # ParseYshCommand or something
|
| 2346 |
|
| 2347 | if (self.w_parser.LookAheadFuncParens() and
|
| 2348 | not word_.IsVarLike(cur_word)):
|
| 2349 | return self.ParseFunctionDef() # f() { echo; } # function
|
| 2350 |
|
| 2351 | # Parse x = 1+2*3 when inside HayNode { } blocks
|
| 2352 | parts = cur_word.parts
|
| 2353 | if self.parse_opts.parse_equals() and len(parts) == 1:
|
| 2354 | part0 = parts[0]
|
| 2355 | if part0.tag() == word_part_e.Literal:
|
| 2356 | tok = cast(Token, part0)
|
| 2357 | if (match.IsValidVarName(tok.tval) and
|
| 2358 | self.w_parser.LookPastSpace() == Id.Lit_Equals):
|
| 2359 | assert tok.id == Id.Lit_Chars, tok
|
| 2360 |
|
| 2361 | if len(self.hay_attrs_stack
|
| 2362 | ) and self.hay_attrs_stack[-1]:
|
| 2363 | # Note: no static var_checker.Check() for bare assignment
|
| 2364 | enode = self.w_parser.ParseBareDecl()
|
| 2365 | self._SetNext() # Somehow this is necessary
|
| 2366 | # TODO: Use BareDecl here. Well, do that when we
|
| 2367 | # treat it as const or lazy.
|
| 2368 | return command.VarDecl(
|
| 2369 | None,
|
| 2370 | [NameType(tok, lexer.TokenVal(tok), None)],
|
| 2371 | enode)
|
| 2372 | else:
|
| 2373 | self._SetNext()
|
| 2374 | self._GetWord()
|
| 2375 | p_die(
|
| 2376 | 'Unexpected = (Hint: use var/setvar, or quote it)',
|
| 2377 | loc.Word(self.cur_word))
|
| 2378 |
|
| 2379 | # echo foo
|
| 2380 | # f=(a b c) # array
|
| 2381 | # array[1+2]+=1
|
| 2382 | return self.ParseSimpleCommand()
|
| 2383 |
|
| 2384 | if self.c_kind == Kind.Eof:
|
| 2385 | p_die("Unexpected EOF while parsing command",
|
| 2386 | loc.Word(self.cur_word))
|
| 2387 |
|
| 2388 | # NOTE: This only happens in batch mode in the second turn of the loop!
|
| 2389 | # e.g. )
|
| 2390 | p_die("Invalid word while parsing command", loc.Word(self.cur_word))
|
| 2391 |
|
| 2392 | assert False # for MyPy
|
| 2393 |
|
| 2394 | def ParsePipeline(self):
|
| 2395 | # type: () -> command_t
|
| 2396 | """
|
| 2397 | pipeline : Bang? command ( '|' newline_ok command )* ;
|
| 2398 | """
|
| 2399 | negated = None # type: Optional[Token]
|
| 2400 |
|
| 2401 | self._GetWord()
|
| 2402 | if self.c_id == Id.KW_Bang:
|
| 2403 | negated = word_.AsKeywordToken(self.cur_word)
|
| 2404 | self._SetNext()
|
| 2405 |
|
| 2406 | child = self.ParseCommand()
|
| 2407 | assert child is not None
|
| 2408 |
|
| 2409 | children = [child]
|
| 2410 |
|
| 2411 | self._GetWord()
|
| 2412 | if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
|
| 2413 | if negated is not None:
|
| 2414 | node = command.Pipeline(negated, children, [])
|
| 2415 | return node
|
| 2416 | else:
|
| 2417 | return child # no pipeline
|
| 2418 |
|
| 2419 | # | or |&
|
| 2420 | ops = [] # type: List[Token]
|
| 2421 | while True:
|
| 2422 | op = word_.AsOperatorToken(self.cur_word)
|
| 2423 | ops.append(op)
|
| 2424 |
|
| 2425 | self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
|
| 2426 | self._NewlineOk()
|
| 2427 |
|
| 2428 | child = self.ParseCommand()
|
| 2429 | children.append(child)
|
| 2430 |
|
| 2431 | self._GetWord()
|
| 2432 | if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
|
| 2433 | break
|
| 2434 |
|
| 2435 | return command.Pipeline(negated, children, ops)
|
| 2436 |
|
| 2437 | def ParseAndOr(self):
|
| 2438 | # type: () -> command_t
|
| 2439 | self._GetWord()
|
| 2440 | if self.c_id == Id.Word_Compound:
|
| 2441 | first_word_tok = word_.LiteralToken(self.cur_word)
|
| 2442 | if first_word_tok is not None and first_word_tok.id == Id.Lit_TDot:
|
| 2443 | # We got '...', so parse in multiline mode
|
| 2444 | self._SetNext()
|
| 2445 | with word_.ctx_Multiline(self.w_parser):
|
| 2446 | return self._ParseAndOr()
|
| 2447 |
|
| 2448 | # Parse in normal mode, not multiline
|
| 2449 | return self._ParseAndOr()
|
| 2450 |
|
| 2451 | def _ParseAndOr(self):
|
| 2452 | # type: () -> command_t
|
| 2453 | """
|
| 2454 | and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
|
| 2455 | | pipeline
|
| 2456 |
|
| 2457 | Note that it is left recursive and left associative. We parse it
|
| 2458 | iteratively with a token of lookahead.
|
| 2459 | """
|
| 2460 | child = self.ParsePipeline()
|
| 2461 | assert child is not None
|
| 2462 |
|
| 2463 | self._GetWord()
|
| 2464 | if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
|
| 2465 | return child
|
| 2466 |
|
| 2467 | ops = [] # type: List[Token]
|
| 2468 | children = [child]
|
| 2469 |
|
| 2470 | while True:
|
| 2471 | ops.append(word_.AsOperatorToken(self.cur_word))
|
| 2472 |
|
| 2473 | self._SetNext() # skip past || &&
|
| 2474 | self._NewlineOk()
|
| 2475 |
|
| 2476 | child = self.ParsePipeline()
|
| 2477 | children.append(child)
|
| 2478 |
|
| 2479 | self._GetWord()
|
| 2480 | if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
|
| 2481 | break
|
| 2482 |
|
| 2483 | return command.AndOr(children, ops)
|
| 2484 |
|
| 2485 | # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
|
| 2486 |
|
| 2487 | # At the top level, we execute after every line, e.g. to
|
| 2488 | # - process alias (a form of dynamic parsing)
|
| 2489 | # - process 'exit', because invalid syntax might appear after it
|
| 2490 |
|
| 2491 | # On the other hand, for a while loop body, we parse the whole thing at once,
|
| 2492 | # and then execute it. We don't want to parse it over and over again!
|
| 2493 |
|
| 2494 | # COMPARE
|
| 2495 | # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
|
| 2496 | # command_term : and_or (trailer and_or)* ; # CHILDREN
|
| 2497 |
|
| 2498 | def _ParseCommandLine(self):
|
| 2499 | # type: () -> command_t
|
| 2500 | """
|
| 2501 | command_line : and_or (sync_op and_or)* trailer? ;
|
| 2502 | trailer : sync_op newline_ok
|
| 2503 | | NEWLINES;
|
| 2504 | sync_op : '&' | ';';
|
| 2505 |
|
| 2506 | NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
|
| 2507 | there is another command word after the sync op.
|
| 2508 |
|
| 2509 | But it's easier to express imperatively. Do the following in a loop:
|
| 2510 | 1. ParseAndOr
|
| 2511 | 2. Peek.
|
| 2512 | a. If there's a newline, then return. (We're only parsing a single
|
| 2513 | line.)
|
| 2514 | b. If there's a sync_op, process it. Then look for a newline and
|
| 2515 | return. Otherwise, parse another AndOr.
|
| 2516 | """
|
| 2517 | # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
|
| 2518 | # I don't think we should add anything else here; otherwise it will be
|
| 2519 | # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
|
| 2520 | END_LIST = [Id.Op_Newline, Id.Eof_Real]
|
| 2521 |
|
| 2522 | children = [] # type: List[command_t]
|
| 2523 | done = False
|
| 2524 | while not done:
|
| 2525 | child = self.ParseAndOr()
|
| 2526 |
|
| 2527 | self._GetWord()
|
| 2528 | if self.c_id in (Id.Op_Semi, Id.Op_Amp):
|
| 2529 | tok = cast(Token, self.cur_word) # for MyPy
|
| 2530 | child = command.Sentence(child, tok)
|
| 2531 | self._SetNext()
|
| 2532 |
|
| 2533 | self._GetWord()
|
| 2534 | if self.c_id in END_LIST:
|
| 2535 | done = True
|
| 2536 |
|
| 2537 | elif self.c_id in END_LIST:
|
| 2538 | done = True
|
| 2539 |
|
| 2540 | else:
|
| 2541 | # e.g. echo a(b)
|
| 2542 | p_die('Invalid word while parsing command line',
|
| 2543 | loc.Word(self.cur_word))
|
| 2544 |
|
| 2545 | children.append(child)
|
| 2546 |
|
| 2547 | # Simplify the AST.
|
| 2548 | if len(children) > 1:
|
| 2549 | return command.CommandList(children)
|
| 2550 | else:
|
| 2551 | return children[0]
|
| 2552 |
|
| 2553 | def _ParseCommandTerm(self):
|
| 2554 | # type: () -> command.CommandList
|
| 2555 | """"
|
| 2556 | command_term : and_or (trailer and_or)* ;
|
| 2557 | trailer : sync_op newline_ok
|
| 2558 | | NEWLINES;
|
| 2559 | sync_op : '&' | ';';
|
| 2560 |
|
| 2561 | This is handled in imperative style, like _ParseCommandLine.
|
| 2562 | Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
|
| 2563 | which is slightly different. (HOW? Is it the DSEMI?)
|
| 2564 |
|
| 2565 | Returns:
|
| 2566 | syntax_asdl.command
|
| 2567 | """
|
| 2568 | # Token types that will end the command term.
|
| 2569 | END_LIST = [self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi]
|
| 2570 |
|
| 2571 | # NOTE: This is similar to _ParseCommandLine.
|
| 2572 | #
|
| 2573 | # - Why aren't we doing END_LIST in _ParseCommandLine?
|
| 2574 | # - Because you will never be inside $() at the top level.
|
| 2575 | # - We also know it will end in a newline. It can't end in "fi"!
|
| 2576 | # - example: if true; then { echo hi; } fi
|
| 2577 |
|
| 2578 | children = [] # type: List[command_t]
|
| 2579 | done = False
|
| 2580 | while not done:
|
| 2581 | # Most keywords are valid "first words". But do/done/then do not BEGIN
|
| 2582 | # commands, so they are not valid.
|
| 2583 | if self._AtSecondaryKeyword():
|
| 2584 | break
|
| 2585 |
|
| 2586 | child = self.ParseAndOr()
|
| 2587 |
|
| 2588 | self._GetWord()
|
| 2589 | if self.c_id == Id.Op_Newline:
|
| 2590 | self._SetNext()
|
| 2591 |
|
| 2592 | self._GetWord()
|
| 2593 | if self.c_id in END_LIST:
|
| 2594 | done = True
|
| 2595 |
|
| 2596 | elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
|
| 2597 | tok = cast(Token, self.cur_word) # for MyPy
|
| 2598 | child = command.Sentence(child, tok)
|
| 2599 | self._SetNext()
|
| 2600 |
|
| 2601 | self._GetWord()
|
| 2602 | if self.c_id == Id.Op_Newline:
|
| 2603 | self._SetNext() # skip over newline
|
| 2604 |
|
| 2605 | # Test if we should keep going. There might be another command after
|
| 2606 | # the semi and newline.
|
| 2607 | self._GetWord()
|
| 2608 | if self.c_id in END_LIST: # \n EOF
|
| 2609 | done = True
|
| 2610 |
|
| 2611 | elif self.c_id in END_LIST: # ; EOF
|
| 2612 | done = True
|
| 2613 |
|
| 2614 | elif self.c_id in END_LIST: # EOF
|
| 2615 | done = True
|
| 2616 |
|
| 2617 | # For if test -f foo; test -f bar {
|
| 2618 | elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
|
| 2619 | done = True
|
| 2620 |
|
| 2621 | elif self.c_kind != Kind.Word:
|
| 2622 | # e.g. f() { echo (( x )) ; }
|
| 2623 | # but can't fail on 'fi fi', see osh/cmd_parse_test.py
|
| 2624 |
|
| 2625 | #log("Invalid %s", self.cur_word)
|
| 2626 | p_die("Invalid word while parsing command list",
|
| 2627 | loc.Word(self.cur_word))
|
| 2628 |
|
| 2629 | children.append(child)
|
| 2630 |
|
| 2631 | return command.CommandList(children)
|
| 2632 |
|
| 2633 | def _ParseCommandList(self):
|
| 2634 | # type: () -> command.CommandList
|
| 2635 | """
|
| 2636 | command_list : newline_ok command_term trailer? ;
|
| 2637 |
|
| 2638 | This one is called by all the compound commands. It's basically a command
|
| 2639 | block.
|
| 2640 |
|
| 2641 | NOTE: Rather than translating the CFG directly, the code follows a style
|
| 2642 | more like this: more like this: (and_or trailer)+. It makes capture
|
| 2643 | easier.
|
| 2644 | """
|
| 2645 | self._NewlineOk()
|
| 2646 | return self._ParseCommandTerm()
|
| 2647 |
|
| 2648 | def ParseLogicalLine(self):
|
| 2649 | # type: () -> command_t
|
| 2650 | """Parse a single line for main_loop.
|
| 2651 |
|
| 2652 | A wrapper around _ParseCommandLine(). Similar but not identical to
|
| 2653 | _ParseCommandList() and ParseCommandSub().
|
| 2654 |
|
| 2655 | Raises:
|
| 2656 | ParseError
|
| 2657 | """
|
| 2658 | self._NewlineOk()
|
| 2659 | self._GetWord()
|
| 2660 | if self.c_id == Id.Eof_Real:
|
| 2661 | return None # main loop checks for here docs
|
| 2662 | node = self._ParseCommandLine()
|
| 2663 | return node
|
| 2664 |
|
| 2665 | def ParseInteractiveLine(self):
|
| 2666 | # type: () -> parse_result_t
|
| 2667 | """Parse a single line for Interactive main_loop.
|
| 2668 |
|
| 2669 | Different from ParseLogicalLine because newlines are handled differently.
|
| 2670 |
|
| 2671 | Raises:
|
| 2672 | ParseError
|
| 2673 | """
|
| 2674 | self._GetWord()
|
| 2675 | if self.c_id == Id.Op_Newline:
|
| 2676 | return parse_result.EmptyLine
|
| 2677 | if self.c_id == Id.Eof_Real:
|
| 2678 | return parse_result.Eof
|
| 2679 |
|
| 2680 | node = self._ParseCommandLine()
|
| 2681 | return parse_result.Node(node)
|
| 2682 |
|
| 2683 | def ParseCommandSub(self):
|
| 2684 | # type: () -> command_t
|
| 2685 | """Parse $(echo hi) and `echo hi` for word_parse.py.
|
| 2686 |
|
| 2687 | They can have multiple lines, like this: echo $( echo one echo
|
| 2688 | two )
|
| 2689 | """
|
| 2690 | self._NewlineOk()
|
| 2691 |
|
| 2692 | self._GetWord()
|
| 2693 | if self.c_kind == Kind.Eof: # e.g. $()
|
| 2694 | return command.NoOp
|
| 2695 |
|
| 2696 | c_list = self._ParseCommandTerm()
|
| 2697 | if len(c_list.children) == 1:
|
| 2698 | return c_list.children[0]
|
| 2699 | else:
|
| 2700 | return c_list
|
| 2701 |
|
| 2702 | def CheckForPendingHereDocs(self):
|
| 2703 | # type: () -> None
|
| 2704 | # NOTE: This happens when there is no newline at the end of a file, like
|
| 2705 | # osh -c 'cat <<EOF'
|
| 2706 | if len(self.pending_here_docs):
|
| 2707 | node = self.pending_here_docs[0] # Just show the first one?
|
| 2708 | h = cast(redir_param.HereDoc, node.arg)
|
| 2709 | p_die('Unterminated here doc began here', loc.Word(h.here_begin))
|