| 1 | #!/usr/bin/env python
|
| 2 | # Copyright 2016 Andy Chu. All rights reserved.
|
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 | # you may not use this file except in compliance with the License.
|
| 5 | # You may obtain a copy of the License at
|
| 6 | #
|
| 7 | # http://www.apache.org/licenses/LICENSE-2.0
|
| 8 | from __future__ import print_function
|
| 9 | """
|
| 10 | cmd_parse.py - Parse high level shell commands.
|
| 11 | """
|
| 12 |
|
| 13 | from asdl import const
|
| 14 |
|
| 15 | from core import braces
|
| 16 | from core import word
|
| 17 | from core import util
|
| 18 |
|
| 19 | from osh.meta import ast, Id, Kind, types
|
| 20 | from osh.lex import VAR_NAME_RE
|
| 21 | from osh.bool_parse import BoolParser
|
| 22 |
|
| 23 | log = util.log
|
| 24 | command_e = ast.command_e
|
| 25 | word_e = ast.word_e
|
| 26 | assign_op_e = ast.assign_op_e
|
| 27 | lex_mode_e = types.lex_mode_e
|
| 28 |
|
| 29 |
|
| 30 | class CommandParser(object):
|
| 31 | """
|
| 32 | Args:
|
| 33 | word_parse: to get a stream of words
|
| 34 | lexer: for lookahead in function def, PushHint of ()
|
| 35 | line_reader: for here doc
|
| 36 | """
|
| 37 | def __init__(self, w_parser, lexer, line_reader, arena):
|
| 38 | self.w_parser = w_parser # for normal parsing
|
| 39 | self.lexer = lexer # for fast lookahead to (, for function defs
|
| 40 | self.line_reader = line_reader # for here docs
|
| 41 | self.arena = arena
|
| 42 |
|
| 43 | self.Reset()
|
| 44 |
|
| 45 | def Reset(self):
|
| 46 | self.error_stack = []
|
| 47 | self.completion_stack = []
|
| 48 |
|
| 49 | # Cursor state set by _Peek()
|
| 50 | self.next_lex_mode = lex_mode_e.OUTER
|
| 51 | self.cur_word = None # current word
|
| 52 | self.c_kind = Kind.Undefined
|
| 53 | self.c_id = Id.Undefined_Tok
|
| 54 |
|
| 55 | self.pending_here_docs = []
|
| 56 |
|
| 57 | def Error(self):
|
| 58 | return self.error_stack
|
| 59 |
|
| 60 | def _BadWord(self, msg, w):
|
| 61 | """Helper function for errors involving a word.
|
| 62 |
|
| 63 | Args:
|
| 64 | msg: format string with a single %s token
|
| 65 | w: Word
|
| 66 | """
|
| 67 | self.AddErrorContext(msg, w, word=w)
|
| 68 |
|
| 69 | def AddErrorContext(self, msg, *args, **kwargs):
|
| 70 | err = util.ParseError(msg, *args, **kwargs)
|
| 71 | self.error_stack.append(err)
|
| 72 |
|
| 73 | def GetCompletionState(self):
|
| 74 | return self.completion_stack
|
| 75 |
|
| 76 | def _MaybeReadHereDocs(self):
|
| 77 | for h in self.pending_here_docs:
|
| 78 | lines = []
|
| 79 | #log('HERE %r' % h.here_end)
|
| 80 | while True:
|
| 81 | # If op is <<-, strip off all leading tabs (NOT spaces).
|
| 82 | # (in C++, just bump the start?)
|
| 83 | line_id, line = self.line_reader.GetLine()
|
| 84 |
|
| 85 | #print("LINE %r %r" % (line, h.here_end))
|
| 86 | if not line: # EOF
|
| 87 | # An unterminated here doc is just a warning in bash. We make it
|
| 88 | # fatal because we want to be strict, and because it causes problems
|
| 89 | # reporting other errors.
|
| 90 | # Attribute it to the << in <<EOF for now.
|
| 91 | self.AddErrorContext('Unterminated here doc', span_id=h.spids[0])
|
| 92 | return False
|
| 93 |
|
| 94 | # NOTE: Could do this runtime to preserve LST.
|
| 95 | if h.op_id == Id.Redir_DLessDash:
|
| 96 | line = line.lstrip('\t')
|
| 97 | if line.rstrip() == h.here_end:
|
| 98 | break
|
| 99 |
|
| 100 | lines.append((line_id, line))
|
| 101 |
|
| 102 | parts = []
|
| 103 | if h.do_expansion:
|
| 104 | # NOTE: We read all lines at once, instead of doing it line-by-line,
|
| 105 | # because of cases like this:
|
| 106 | # cat <<EOF
|
| 107 | # 1 $(echo 2
|
| 108 | # echo 3) 4
|
| 109 | # EOF
|
| 110 |
|
| 111 | from osh import parse_lib # Avoid circular import
|
| 112 | w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)
|
| 113 | word = w_parser.ReadHereDocBody()
|
| 114 | if not word:
|
| 115 | self.AddErrorContext(
|
| 116 | 'Error reading here doc body: %s', w_parser.Error())
|
| 117 | return False
|
| 118 | h.body = word
|
| 119 | h.was_filled = True
|
| 120 | else:
|
| 121 | # Each line is a single span. TODO: Add span_id to token.
|
| 122 | tokens = [
|
| 123 | ast.token(Id.Lit_Chars, line, const.NO_INTEGER)
|
| 124 | for _, line in lines]
|
| 125 | parts = [ast.LiteralPart(t) for t in tokens]
|
| 126 | h.body = ast.CompoundWord(parts)
|
| 127 | h.was_filled = True
|
| 128 |
|
| 129 | # No .clear() until Python 3.3.
|
| 130 | del self.pending_here_docs[:]
|
| 131 |
|
| 132 | return True
|
| 133 |
|
| 134 | def _Next(self, lex_mode=lex_mode_e.OUTER):
|
| 135 | """Helper method."""
|
| 136 | self.next_lex_mode = lex_mode
|
| 137 |
|
| 138 | def Peek(self):
|
| 139 | """Public method for REPL."""
|
| 140 | if not self._Peek():
|
| 141 | return None
|
| 142 | return self.cur_word
|
| 143 |
|
| 144 | def _Peek(self):
|
| 145 | """Helper method.
|
| 146 |
|
| 147 | Returns True for success and False on error. Error examples: bad command
|
| 148 | sub word, or unterminated quoted string, etc.
|
| 149 | """
|
| 150 | if self.next_lex_mode != lex_mode_e.NONE:
|
| 151 | w = self.w_parser.ReadWord(self.next_lex_mode)
|
| 152 | if w is None:
|
| 153 | error_stack = self.w_parser.Error()
|
| 154 | self.error_stack.extend(error_stack)
|
| 155 | return False
|
| 156 |
|
| 157 | # Here docs only happen in command mode, so other kinds of newlines don't
|
| 158 | # count.
|
| 159 | if w.tag == word_e.TokenWord and w.token.id == Id.Op_Newline:
|
| 160 | if not self._MaybeReadHereDocs():
|
| 161 | return False
|
| 162 |
|
| 163 | self.cur_word = w
|
| 164 |
|
| 165 | self.c_kind = word.CommandKind(self.cur_word)
|
| 166 | self.c_id = word.CommandId(self.cur_word)
|
| 167 | self.next_lex_mode = lex_mode_e.NONE
|
| 168 | #print('_Peek', self.cur_word)
|
| 169 | return True
|
| 170 |
|
| 171 | def _Eat(self, c_id):
|
| 172 | """Consume a word of a type. If it doesn't match, return False.
|
| 173 |
|
| 174 | Args:
|
| 175 | c_id: either EKeyword.* or a token type like Id.Right_Subshell.
|
| 176 | TODO: Rationalize / type check this.
|
| 177 | """
|
| 178 | if not self._Peek():
|
| 179 | return False
|
| 180 | # TODO: It would be nicer to print the word type, right now we get a number
|
| 181 | if self.c_id != c_id:
|
| 182 | self.AddErrorContext(
|
| 183 | "Expected word type %s, got %s", c_id, self.cur_word,
|
| 184 | word=self.cur_word)
|
| 185 | return False
|
| 186 | self._Next()
|
| 187 | return True
|
| 188 |
|
| 189 | def _NewlineOk(self):
|
| 190 | """Check for optional newline and consume it."""
|
| 191 | if not self._Peek():
|
| 192 | return False
|
| 193 | if self.c_id == Id.Op_Newline:
|
| 194 | self._Next()
|
| 195 | if not self._Peek():
|
| 196 | return False
|
| 197 | return True
|
| 198 |
|
| 199 | def ParseRedirect(self):
|
| 200 | """
|
| 201 | Problem: You don't know which kind of redir_node to instantiate before
|
| 202 | this? You could stuff them all in one node, and then have a switch() on
|
| 203 | the type.
|
| 204 |
|
| 205 | You need different types.
|
| 206 | """
|
| 207 | if not self._Peek(): return None
|
| 208 | assert self.c_kind == Kind.Redir, self.cur_word
|
| 209 |
|
| 210 | left_spid = self.cur_word.token.span_id
|
| 211 |
|
| 212 | # For now only supporting single digit descriptor
|
| 213 | first_char = self.cur_word.token.val[0]
|
| 214 | if first_char.isdigit():
|
| 215 | fd = int(first_char)
|
| 216 | else:
|
| 217 | fd = const.NO_INTEGER
|
| 218 |
|
| 219 | if self.c_id in (Id.Redir_DLess, Id.Redir_DLessDash): # here doc
|
| 220 | node = ast.HereDoc()
|
| 221 | node.op_id = self.c_id
|
| 222 | node.body = None # not read yet
|
| 223 | node.fd = fd
|
| 224 | node.was_filled = False
|
| 225 | node.spids.append(left_spid)
|
| 226 | self._Next()
|
| 227 |
|
| 228 | if not self._Peek(): return None
|
| 229 | # "If any character in word is quoted, the delimiter shall be formed by
|
| 230 | # performing quote removal on word, and the here-document lines shall not
|
| 231 | # be expanded. Otherwise, the delimiter shall be the word itself."
|
| 232 | # NOTE: \EOF counts, or even E\OF
|
| 233 | ok, node.here_end, quoted = word.StaticEval(self.cur_word)
|
| 234 | if not ok:
|
| 235 | self._BadWord('Error evaluating here doc delimiter: %s', self.cur_word)
|
| 236 | return None
|
| 237 | node.do_expansion = not quoted
|
| 238 | self._Next()
|
| 239 |
|
| 240 | self.pending_here_docs.append(node) # will be filled on next newline.
|
| 241 |
|
| 242 | else:
|
| 243 | node = ast.Redir()
|
| 244 | node.op_id = self.c_id
|
| 245 | node.fd = fd
|
| 246 | node.spids.append(left_spid)
|
| 247 | self._Next()
|
| 248 |
|
| 249 | if not self._Peek(): return None
|
| 250 | if self.c_kind != Kind.Word:
|
| 251 | self.AddErrorContext(
|
| 252 | 'Expected word after redirect operator', word=self.cur_word)
|
| 253 | return None
|
| 254 |
|
| 255 | new_word = word.TildeDetect(self.cur_word)
|
| 256 | node.arg_word = new_word or self.cur_word
|
| 257 | self._Next()
|
| 258 |
|
| 259 | return node
|
| 260 |
|
| 261 | def _ParseRedirectList(self):
|
| 262 | """Try parsing any redirects at the cursor.
|
| 263 |
|
| 264 | This is used for blocks only, not commands.
|
| 265 |
|
| 266 | Return None on error.
|
| 267 | """
|
| 268 | redirects = []
|
| 269 | while True:
|
| 270 | if not self._Peek(): return None
|
| 271 |
|
| 272 | # This prediction needs to ONLY accept redirect operators. Should we
|
| 273 | # make them a separate TokeNkind?
|
| 274 | if self.c_kind != Kind.Redir:
|
| 275 | break
|
| 276 |
|
| 277 | node = self.ParseRedirect()
|
| 278 | if not node:
|
| 279 | return None
|
| 280 | redirects.append(node)
|
| 281 | self._Next()
|
| 282 | return redirects
|
| 283 |
|
| 284 | def _ScanSimpleCommand(self):
|
| 285 | """First pass: Split into redirects and words."""
|
| 286 | redirects = []
|
| 287 | words = []
|
| 288 | while True:
|
| 289 | if not self._Peek(): return None
|
| 290 | if self.c_kind == Kind.Redir:
|
| 291 | node = self.ParseRedirect()
|
| 292 | if not node: return None # e.g. EOF
|
| 293 | redirects.append(node)
|
| 294 |
|
| 295 | elif self.c_kind == Kind.Word:
|
| 296 | words.append(self.cur_word)
|
| 297 |
|
| 298 | else:
|
| 299 | break
|
| 300 | self._Next()
|
| 301 | return redirects, words
|
| 302 |
|
| 303 | def _SplitSimpleCommandPrefix(self, words):
|
| 304 | """
|
| 305 | Second pass of SimpleCommand parsing: look for assignment words.
|
| 306 | """
|
| 307 | prefix_bindings = []
|
| 308 | suffix_words = []
|
| 309 |
|
| 310 | done_prefix = False
|
| 311 | for w in words:
|
| 312 | if done_prefix:
|
| 313 | suffix_words.append(w)
|
| 314 | continue
|
| 315 |
|
| 316 | left_spid = word.LeftMostSpanForWord(w)
|
| 317 |
|
| 318 | kov = word.LooksLikeAssignment(w)
|
| 319 | if kov:
|
| 320 | k, op, v = kov
|
| 321 | t = word.TildeDetect(v)
|
| 322 | if t:
|
| 323 | # t is an unevaluated word with TildeSubPart
|
| 324 | prefix_bindings.append((k, op, t, left_spid))
|
| 325 | else:
|
| 326 | prefix_bindings.append((k, op, v, left_spid)) # v is unevaluated word
|
| 327 | else:
|
| 328 | done_prefix = True
|
| 329 | suffix_words.append(w)
|
| 330 |
|
| 331 | return prefix_bindings, suffix_words
|
| 332 |
|
| 333 | def _MakeSimpleCommand(self, prefix_bindings, suffix_words, redirects):
|
| 334 | # FOO=(1 2 3) ls is not allowed
|
| 335 | for k, _, v, _ in prefix_bindings:
|
| 336 | if word.HasArrayPart(v):
|
| 337 | self.AddErrorContext(
|
| 338 | 'Unexpected array literal in binding: %s', v, word=v)
|
| 339 | return None
|
| 340 |
|
| 341 | # echo FOO=(1 2 3) is not allowed
|
| 342 | # NOTE: Other checks can be inserted here. Can resolve builtins,
|
| 343 | # functions, aliases, static PATH, etc.
|
| 344 | for w in suffix_words:
|
| 345 | kov = word.LooksLikeAssignment(w)
|
| 346 | if kov:
|
| 347 | _, _, v = kov
|
| 348 | if word.HasArrayPart(v):
|
| 349 | self.AddErrorContext('Unexpected array literal: %s', v, word=w)
|
| 350 | return None
|
| 351 |
|
| 352 | # NOTE: # In bash, {~bob,~jane}/src works, even though ~ isn't the leading
|
| 353 | # character of the initial word.
|
| 354 | # However, this means we must do tilde detection AFTER brace EXPANSION, not
|
| 355 | # just after brace DETECTION like we're doing here.
|
| 356 | # The BracedWordTree instances have to be expanded into CompoundWord
|
| 357 | # instances for the tilde detection to work.
|
| 358 | words2 = braces.BraceDetectAll(suffix_words)
|
| 359 | words3 = word.TildeDetectAll(words2)
|
| 360 |
|
| 361 | node = ast.SimpleCommand()
|
| 362 | node.words = words3
|
| 363 | node.redirects = redirects
|
| 364 | for name, op, val, left_spid in prefix_bindings:
|
| 365 | if op != assign_op_e.Equal:
|
| 366 | # NOTE: Using spid of RHS for now, since we don't have one for op.
|
| 367 | self.AddErrorContext('Expected = in environment binding, got +=',
|
| 368 | word=val)
|
| 369 | return None
|
| 370 | pair = ast.env_pair(name, val)
|
| 371 | pair.spids.append(left_spid)
|
| 372 | node.more_env.append(pair)
|
| 373 | return node
|
| 374 |
|
| 375 | def _MakeAssignment(self, assign_kw, suffix_words):
|
| 376 | # First parse flags, e.g. -r -x -a -A. None of the flags have arguments.
|
| 377 | flags = []
|
| 378 | n = len(suffix_words)
|
| 379 | i = 1
|
| 380 | while i < n:
|
| 381 | w = suffix_words[i]
|
| 382 | ok, static_val, quoted = word.StaticEval(w)
|
| 383 | if not ok or quoted:
|
| 384 | break # can't statically evaluate
|
| 385 |
|
| 386 | if static_val.startswith('-'):
|
| 387 | flags.append(static_val)
|
| 388 | else:
|
| 389 | break # not a flag, rest are args
|
| 390 | i += 1
|
| 391 |
|
| 392 | # Now parse bindings or variable names
|
| 393 | assignments = []
|
| 394 | while i < n:
|
| 395 | w = suffix_words[i]
|
| 396 | left_spid = word.LeftMostSpanForWord(w)
|
| 397 | kov = word.LooksLikeAssignment(w)
|
| 398 | if kov:
|
| 399 | k, op, v = kov
|
| 400 | t = word.TildeDetect(v)
|
| 401 | if t:
|
| 402 | # t is an unevaluated word with TildeSubPart
|
| 403 | a = (k, op, t, left_spid)
|
| 404 | else:
|
| 405 | a = (k, op, v, left_spid) # v is unevaluated word
|
| 406 | else:
|
| 407 | # In aboriginal in variables/sources: export_if_blank does export "$1".
|
| 408 | # We should allow that.
|
| 409 |
|
| 410 | # Parse this differently then?
|
| 411 | # dynamic-export?
|
| 412 | # It sets global variables.
|
| 413 | ok, static_val, quoted = word.StaticEval(w)
|
| 414 | if not ok or quoted:
|
| 415 | self.AddErrorContext(
|
| 416 | 'Variable names must be constant strings, got %s', w, word=w)
|
| 417 | return None
|
| 418 |
|
| 419 | # No value is equivalent to ''
|
| 420 | m = VAR_NAME_RE.match(static_val)
|
| 421 | if not m:
|
| 422 | self.AddErrorContext('Invalid variable name %r', static_val, word=w)
|
| 423 | return None
|
| 424 | a = (static_val, assign_op_e.Equal, None, left_spid)
|
| 425 |
|
| 426 | assignments.append(a)
|
| 427 | i += 1
|
| 428 |
|
| 429 | # TODO: Also make with LhsIndexedName
|
| 430 | pairs = []
|
| 431 | for lhs, op, rhs, spid in assignments:
|
| 432 | p = ast.assign_pair(ast.LhsName(lhs), op, rhs)
|
| 433 | p.spids.append(spid)
|
| 434 | pairs.append(p)
|
| 435 |
|
| 436 | node = ast.Assignment(assign_kw, flags, pairs)
|
| 437 |
|
| 438 | return node
|
| 439 |
|
| 440 | # Flags that indicate an assignment should be parsed like a command.
|
| 441 | _ASSIGN_COMMANDS = set([
|
| 442 | (Id.Assign_Declare, '-f'), # function defs
|
| 443 | (Id.Assign_Declare, '-F'), # function names
|
| 444 | (Id.Assign_Declare, '-p'), # print
|
| 445 |
|
| 446 | (Id.Assign_Typeset, '-f'),
|
| 447 | (Id.Assign_Typeset, '-F'),
|
| 448 | (Id.Assign_Typeset, '-p'),
|
| 449 |
|
| 450 | (Id.Assign_Local, '-p'),
|
| 451 | (Id.Assign_Readonly, '-p'),
|
| 452 | # Hm 'export -p' is more like a command. But we're parsing it
|
| 453 | # dynamically now because of some wrappers.
|
| 454 | # Maybe we could change this.
|
| 455 | #(Id.Assign_Export, '-p'),
|
| 456 | ])
|
| 457 | # Flags to parse like assignments: -a -r -x (and maybe -i)
|
| 458 |
|
| 459 | def ParseSimpleCommand(self):
|
| 460 | """
|
| 461 | Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g)
|
| 462 |
|
| 463 | io_file : '<' filename
|
| 464 | | LESSAND filename
|
| 465 | ...
|
| 466 |
|
| 467 | io_here : DLESS here_end
|
| 468 | | DLESSDASH here_end
|
| 469 |
|
| 470 | redirect : IO_NUMBER (io_redirect | io_here)
|
| 471 |
|
| 472 | prefix_part : ASSIGNMENT_WORD | redirect
|
| 473 | cmd_part : WORD | redirect
|
| 474 |
|
| 475 | assign_kw : Declare | Export | Local | Readonly
|
| 476 |
|
| 477 | # Without any words it is parsed as a command, not an assignment
|
| 478 | assign_listing : assign_kw
|
| 479 |
|
| 480 | # Now we have something to do (might be changing assignment flags too)
|
| 481 | # NOTE: any prefixes should be a warning, but they are allowed in shell.
|
| 482 | assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
|
| 483 |
|
| 484 | # an external command, a function call, or a builtin -- a "word_command"
|
| 485 | word_command : prefix_part* cmd_part+
|
| 486 |
|
| 487 | simple_command : assign_listing
|
| 488 | | assignment
|
| 489 | | proc_command
|
| 490 |
|
| 491 | Simple imperative algorithm:
|
| 492 |
|
| 493 | 1) Read a list of words and redirects. Append them to separate lists.
|
| 494 | 2) Look for the first non-assignment word. If it's declare, etc., then
|
| 495 | keep parsing words AND assign words. Otherwise, just parse words.
|
| 496 | 3) If there are no non-assignment words, then it's a global assignment.
|
| 497 |
|
| 498 | { redirects, global assignments } OR
|
| 499 | { redirects, prefix_bindings, words } OR
|
| 500 | { redirects, ERROR_prefix_bindings, keyword, assignments, words }
|
| 501 |
|
| 502 | THEN CHECK that prefix bindings don't have any array literal parts!
|
| 503 | global assignment and keyword assignments can have the of course.
|
| 504 | well actually EXPORT shouldn't have them either -- WARNING
|
| 505 |
|
| 506 | 3 cases we want to warn: prefix_bindings for assignment, and array literal
|
| 507 | in prefix bindings, or export
|
| 508 |
|
| 509 | A command can be an assignment word, word, or redirect on its own.
|
| 510 |
|
| 511 | ls
|
| 512 | >out.txt
|
| 513 |
|
| 514 | >out.txt FOO=bar # this touches the file
|
| 515 |
|
| 516 | Or any sequence:
|
| 517 | ls foo bar
|
| 518 | <in.txt ls foo bar >out.txt
|
| 519 | <in.txt ls >out.txt foo bar
|
| 520 |
|
| 521 | Or add one or more environment bindings:
|
| 522 | VAR=val env
|
| 523 | >out.txt VAR=val env
|
| 524 |
|
| 525 | here_end vs filename is a matter of whether we test that it's quoted. e.g.
|
| 526 | <<EOF vs <<'EOF'.
|
| 527 | """
|
| 528 | result = self._ScanSimpleCommand()
|
| 529 | if not result: return None
|
| 530 | redirects, words = result
|
| 531 |
|
| 532 | if not words: # e.g. >out.txt # redirect without words
|
| 533 | node = ast.SimpleCommand()
|
| 534 | node.redirects = redirects
|
| 535 | return node
|
| 536 |
|
| 537 | prefix_bindings, suffix_words = self._SplitSimpleCommandPrefix(words)
|
| 538 |
|
| 539 | if not suffix_words: # ONE=1 TWO=2 (with no other words)
|
| 540 | if redirects:
|
| 541 | binding1 = prefix_bindings[0]
|
| 542 | _, _, _, spid = binding1
|
| 543 | self.AddErrorContext('Got redirects in global assignment',
|
| 544 | span_id=spid)
|
| 545 | return None
|
| 546 |
|
| 547 | pairs = []
|
| 548 | for lhs, op, rhs, spid in prefix_bindings:
|
| 549 | p = ast.assign_pair(ast.LhsName(lhs), op, rhs)
|
| 550 | p.spids.append(spid)
|
| 551 | pairs.append(p)
|
| 552 |
|
| 553 | node = ast.Assignment(Id.Assign_None, [], pairs)
|
| 554 | left_spid = word.LeftMostSpanForWord(words[0])
|
| 555 | node.spids.append(left_spid) # no keyword spid to skip past
|
| 556 | return node
|
| 557 |
|
| 558 | kind, kw_token = word.KeywordToken(suffix_words[0])
|
| 559 |
|
| 560 | if kind == Kind.Assign:
|
| 561 | # Here we StaticEval suffix_words[1] to see if it's a command like
|
| 562 | # 'typeset -p'. Then it becomes a SimpleCommand node instead of an
|
| 563 | # Assignment. Note we're not handling duplicate flags like 'typeset
|
| 564 | # -pf'. I see this in bashdb (bash debugger) but it can just be changed
|
| 565 | # to 'typeset -p -f'.
|
| 566 | is_command = False
|
| 567 | if len(suffix_words) > 1:
|
| 568 | ok, val, _ = word.StaticEval(suffix_words[1])
|
| 569 | if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS:
|
| 570 | is_command = True
|
| 571 |
|
| 572 | if is_command: # declare -f, declare -p, typeset -p, etc.
|
| 573 | node = self._MakeSimpleCommand(prefix_bindings, suffix_words,
|
| 574 | redirects)
|
| 575 | return node
|
| 576 |
|
| 577 | else: # declare str='', declare -a array=()
|
| 578 | if redirects:
|
| 579 | # Attach the error location to the keyword. It would be more precise
|
| 580 | # to attach it to the
|
| 581 | self.AddErrorContext('Got redirects in assignment', token=kw_token)
|
| 582 | return None
|
| 583 |
|
| 584 | if prefix_bindings: # FOO=bar local spam=eggs not allowed
|
| 585 | # Use the location of the first value. TODO: Use the whole word
|
| 586 | # before splitting.
|
| 587 | _, _, v0, _ = prefix_bindings[0]
|
| 588 | self.AddErrorContext(
|
| 589 | 'Invalid prefix bindings in assignment: %s', prefix_bindings,
|
| 590 | word=v0)
|
| 591 | return None
|
| 592 |
|
| 593 | node = self._MakeAssignment(kw_token.id, suffix_words)
|
| 594 | if not node: return None
|
| 595 | node.spids.append(kw_token.span_id)
|
| 596 | return node
|
| 597 |
|
| 598 | elif kind == Kind.ControlFlow:
|
| 599 | if redirects:
|
| 600 | self.AddErrorContext('Got redirects in control flow: %s', redirects)
|
| 601 | return None
|
| 602 |
|
| 603 | if prefix_bindings: # FOO=bar local spam=eggs not allowed
|
| 604 | # Use the location of the first value. TODO: Use the whole word before
|
| 605 | # splitting.
|
| 606 | _, _, v0, _ = prefix_bindings[0]
|
| 607 | self.AddErrorContext(
|
| 608 | 'Invalid prefix bindings in control flow: %s', prefix_bindings,
|
| 609 | word=v0)
|
| 610 | return None
|
| 611 |
|
| 612 | # Attach the token for errors. (Assignment may not need it.)
|
| 613 | if len(suffix_words) == 1:
|
| 614 | arg_word = None
|
| 615 | elif len(suffix_words) == 2:
|
| 616 | arg_word = suffix_words[1]
|
| 617 | else:
|
| 618 | # Underline the extra word.
|
| 619 | self.AddErrorContext(
|
| 620 | 'Unexpected argument to %r', kw_token.val, word=suffix_words[2])
|
| 621 | return None
|
| 622 |
|
| 623 | return ast.ControlFlow(kw_token, arg_word)
|
| 624 |
|
| 625 | else:
|
| 626 | node = self._MakeSimpleCommand(prefix_bindings, suffix_words, redirects)
|
| 627 | return node
|
| 628 |
|
| 629 | def ParseBraceGroup(self):
|
| 630 | """
|
| 631 | brace_group : LBrace command_list RBrace ;
|
| 632 | """
|
| 633 | left_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 634 | if not self._Eat(Id.Lit_LBrace): return None
|
| 635 |
|
| 636 | c_list = self.ParseCommandList()
|
| 637 | if not c_list: return None
|
| 638 |
|
| 639 | # Not needed
|
| 640 | #right_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 641 | if not self._Eat(Id.Lit_RBrace): return None
|
| 642 |
|
| 643 | node = ast.BraceGroup(c_list.children)
|
| 644 | node.spids.append(left_spid)
|
| 645 | return node
|
| 646 |
|
| 647 | def ParseDoGroup(self):
|
| 648 | """
|
| 649 | Used by ForEach, ForExpr, While, Until. Should this be a Do node?
|
| 650 |
|
| 651 | do_group : Do command_list Done ; /* Apply rule 6 */
|
| 652 | """
|
| 653 | if not self._Eat(Id.KW_Do): return None
|
| 654 | do_spid = word.LeftMostSpanForWord(self.cur_word) # after _Eat
|
| 655 |
|
| 656 | c_list = self.ParseCommandList() # could be any thing
|
| 657 | if not c_list: return None
|
| 658 |
|
| 659 | if not self._Eat(Id.KW_Done): return None
|
| 660 | done_spid = word.LeftMostSpanForWord(self.cur_word) # after _Eat
|
| 661 |
|
| 662 | node = ast.DoGroup(c_list.children)
|
| 663 | node.spids.extend((do_spid, done_spid))
|
| 664 | return node
|
| 665 |
|
| 666 | def ParseForWords(self):
|
| 667 | """
|
| 668 | for_words : WORD* for_sep
|
| 669 | ;
|
| 670 | for_sep : ';' newline_ok
|
| 671 | | NEWLINES
|
| 672 | ;
|
| 673 | """
|
| 674 | words = []
|
| 675 | # The span_id of any semi-colon, so we can remove it.
|
| 676 | semi_spid = const.NO_INTEGER
|
| 677 |
|
| 678 | while True:
|
| 679 | if not self._Peek(): return None
|
| 680 | if self.c_id == Id.Op_Semi:
|
| 681 | semi_spid = self.cur_word.token.span_id # TokenWord
|
| 682 | self._Next()
|
| 683 | if not self._NewlineOk(): return None
|
| 684 | break
|
| 685 | elif self.c_id == Id.Op_Newline:
|
| 686 | self._Next()
|
| 687 | break
|
| 688 | if self.cur_word.tag != word_e.CompoundWord:
|
| 689 | # TODO: Can we also show a pointer to the 'for' keyword?
|
| 690 | self.AddErrorContext('Invalid word in for loop', word=self.cur_word)
|
| 691 | return None
|
| 692 |
|
| 693 | words.append(self.cur_word)
|
| 694 | self._Next()
|
| 695 | return words, semi_spid
|
| 696 |
|
| 697 | def _ParseForExprLoop(self):
|
| 698 | """
|
| 699 | for (( init; cond; update )) for_sep? do_group
|
| 700 | """
|
| 701 | node = self.w_parser.ReadForExpression()
|
| 702 | if not node:
|
| 703 | error_stack = self.w_parser.Error()
|
| 704 | self.error_stack.extend(error_stack)
|
| 705 | self.AddErrorContext("Parsing for expression failed")
|
| 706 | return None
|
| 707 | self._Next()
|
| 708 |
|
| 709 | if not self._Peek(): return None
|
| 710 | if self.c_id == Id.Op_Semi:
|
| 711 | self._Next()
|
| 712 | if not self._NewlineOk(): return None
|
| 713 | elif self.c_id == Id.Op_Newline:
|
| 714 | self._Next()
|
| 715 | elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
|
| 716 | pass
|
| 717 | else:
|
| 718 | self.AddErrorContext(
|
| 719 | 'Unexpected token after for expression: %s', self.cur_word,
|
| 720 | word=self.cur_word)
|
| 721 | return None
|
| 722 |
|
| 723 | body_node = self.ParseDoGroup()
|
| 724 | if not body_node: return None
|
| 725 |
|
| 726 | node.body = body_node
|
| 727 | return node
|
| 728 |
|
| 729 | def _ParseForEachLoop(self):
|
| 730 | node = ast.ForEach()
|
| 731 | node.do_arg_iter = False
|
| 732 |
|
| 733 | ok, iter_name, quoted = word.StaticEval(self.cur_word)
|
| 734 | if not ok or quoted:
|
| 735 | self.AddErrorContext(
|
| 736 | "Invalid for loop variable", word=self.cur_word)
|
| 737 | return None
|
| 738 | if not VAR_NAME_RE.match(iter_name):
|
| 739 | self.AddErrorContext(
|
| 740 | "Invalid for loop variable name", word=self.cur_word)
|
| 741 | return None
|
| 742 | node.iter_name = iter_name
|
| 743 | self._Next() # skip past name
|
| 744 |
|
| 745 | if not self._NewlineOk(): return None
|
| 746 |
|
| 747 | in_spid = const.NO_INTEGER
|
| 748 | semi_spid = const.NO_INTEGER
|
| 749 |
|
| 750 | if not self._Peek(): return None
|
| 751 | if self.c_id == Id.KW_In:
|
| 752 | self._Next() # skip in
|
| 753 |
|
| 754 | in_spid = word.LeftMostSpanForWord(self.cur_word) + 1
|
| 755 | x = self.ParseForWords()
|
| 756 | if x is None:
|
| 757 | return None
|
| 758 | iter_words, semi_spid = x
|
| 759 | words2 = braces.BraceDetectAll(iter_words)
|
| 760 | words3 = word.TildeDetectAll(words2)
|
| 761 |
|
| 762 | if iter_words is None: # empty list of words is OK
|
| 763 | return None
|
| 764 | node.iter_words = words3
|
| 765 |
|
| 766 | elif self.c_id == Id.Op_Semi:
|
| 767 | node.do_arg_iter = True # implicit for loop
|
| 768 | self._Next()
|
| 769 |
|
| 770 | elif self.c_id == Id.KW_Do:
|
| 771 | node.do_arg_iter = True # implicit for loop
|
| 772 | # do not advance
|
| 773 |
|
| 774 | else:
|
| 775 | self.AddErrorContext("Unexpected word in for loop: %s", self.cur_word,
|
| 776 | word=self.cur_word)
|
| 777 | return None
|
| 778 |
|
| 779 | node.spids.extend((in_spid, semi_spid))
|
| 780 |
|
| 781 | body_node = self.ParseDoGroup()
|
| 782 | if not body_node: return None
|
| 783 |
|
| 784 | node.body = body_node
|
| 785 | return node
|
| 786 |
|
| 787 | def ParseFor(self):
|
| 788 | """
|
| 789 | for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
|
| 790 | | For '((' ... TODO
|
| 791 | """
|
| 792 | if not self._Eat(Id.KW_For): return None
|
| 793 |
|
| 794 | if not self._Peek(): return None
|
| 795 | if self.c_id == Id.Op_DLeftParen:
|
| 796 | node = self._ParseForExprLoop()
|
| 797 | else:
|
| 798 | node = self._ParseForEachLoop()
|
| 799 |
|
| 800 | return node
|
| 801 |
|
| 802 | def ParseWhile(self):
|
| 803 | """
|
| 804 | while_clause : While command_list do_group ;
|
| 805 | """
|
| 806 | self._Next() # skip while
|
| 807 |
|
| 808 | cond_node = self.ParseCommandList()
|
| 809 | if not cond_node: return None
|
| 810 |
|
| 811 | body_node = self.ParseDoGroup()
|
| 812 | if not body_node: return None
|
| 813 |
|
| 814 | return ast.While(cond_node.children, body_node)
|
| 815 |
|
| 816 | def ParseUntil(self):
|
| 817 | """
|
| 818 | until_clause : Until command_list do_group ;
|
| 819 | """
|
| 820 | self._Next() # skip until
|
| 821 |
|
| 822 | cond_node = self.ParseCommandList()
|
| 823 | if not cond_node: return None
|
| 824 |
|
| 825 | body_node = self.ParseDoGroup()
|
| 826 | if not body_node: return None
|
| 827 |
|
| 828 | return ast.Until(cond_node.children, body_node)
|
| 829 |
|
| 830 | def ParseCaseItem(self):
|
| 831 | """
|
| 832 | case_item: '('? pattern ('|' pattern)* ')'
|
| 833 | newline_ok command_term? trailer? ;
|
| 834 | """
|
| 835 | self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
|
| 836 |
|
| 837 | left_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 838 | if self.c_id == Id.Op_LParen:
|
| 839 | self._Next()
|
| 840 |
|
| 841 | pat_words = []
|
| 842 | while True:
|
| 843 | if not self._Peek(): return None
|
| 844 | pat_words.append(self.cur_word)
|
| 845 | self._Next()
|
| 846 |
|
| 847 | if not self._Peek(): return None
|
| 848 | if self.c_id == Id.Op_Pipe:
|
| 849 | self._Next()
|
| 850 | else:
|
| 851 | break
|
| 852 |
|
| 853 | rparen_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 854 | if not self._Eat(Id.Right_CasePat): return None
|
| 855 | if not self._NewlineOk(): return None
|
| 856 |
|
| 857 | if self.c_id not in (Id.Op_DSemi, Id.KW_Esac):
|
| 858 | c_list = self.ParseCommandTerm()
|
| 859 | if not c_list: return None
|
| 860 | action_children = c_list.children
|
| 861 | else:
|
| 862 | action_children = []
|
| 863 |
|
| 864 | dsemi_spid = const.NO_INTEGER
|
| 865 | last_spid = const.NO_INTEGER
|
| 866 | if not self._Peek(): return None
|
| 867 | if self.c_id == Id.KW_Esac:
|
| 868 | last_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 869 | elif self.c_id == Id.Op_DSemi:
|
| 870 | dsemi_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 871 | self._Next()
|
| 872 | else:
|
| 873 | self.AddErrorContext('Expected DSEMI or ESAC, got %s', self.cur_word,
|
| 874 | word=self.cur_word)
|
| 875 | return None
|
| 876 |
|
| 877 | if not self._NewlineOk(): return None
|
| 878 |
|
| 879 | arm = ast.case_arm(pat_words, action_children)
|
| 880 | arm.spids.extend((left_spid, rparen_spid, dsemi_spid, last_spid))
|
| 881 | return arm
|
| 882 |
|
| 883 | def ParseCaseList(self, arms):
|
| 884 | """
|
| 885 | case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
|
| 886 | """
|
| 887 | if not self._Peek(): return None
|
| 888 |
|
| 889 | while True:
|
| 890 | # case item begins with a command word or (
|
| 891 | if self.c_id == Id.KW_Esac:
|
| 892 | break
|
| 893 | if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
|
| 894 | break
|
| 895 | arm = self.ParseCaseItem()
|
| 896 | if not arm: return None
|
| 897 |
|
| 898 | arms.append(arm)
|
| 899 | if not self._Peek(): return None
|
| 900 | # Now look for DSEMI or ESAC
|
| 901 |
|
| 902 | return True
|
| 903 |
|
| 904 | def ParseCase(self):
|
| 905 | """
|
| 906 | case_clause : Case WORD newline_ok in newline_ok case_list? Esac ;
|
| 907 | """
|
| 908 | case_node = ast.Case()
|
| 909 |
|
| 910 | case_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 911 | self._Next() # skip case
|
| 912 |
|
| 913 | if not self._Peek(): return None
|
| 914 | case_node.to_match = self.cur_word
|
| 915 | self._Next()
|
| 916 |
|
| 917 | if not self._NewlineOk(): return None
|
| 918 | in_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 919 | if not self._Eat(Id.KW_In): return None
|
| 920 | if not self._NewlineOk(): return None
|
| 921 |
|
| 922 | if self.c_id != Id.KW_Esac: # empty case list
|
| 923 | if not self.ParseCaseList(case_node.arms):
|
| 924 | self.AddErrorContext("ParseCase: error parsing case list")
|
| 925 | return None
|
| 926 | # TODO: should it return a list of nodes, and extend?
|
| 927 | if not self._Peek(): return None
|
| 928 |
|
| 929 | esac_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 930 | if not self._Eat(Id.KW_Esac): return None
|
| 931 | self._Next()
|
| 932 |
|
| 933 | case_node.spids.extend((case_spid, in_spid, esac_spid))
|
| 934 | return case_node
|
| 935 |
|
| 936 | def _ParseElifElse(self, if_node):
|
| 937 | """
|
| 938 | else_part: (Elif command_list Then command_list)* Else command_list ;
|
| 939 | """
|
| 940 | arms = if_node.arms
|
| 941 |
|
| 942 | self._Peek()
|
| 943 | while self.c_id == Id.KW_Elif:
|
| 944 | elif_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 945 |
|
| 946 | self._Next() # skip elif
|
| 947 | cond = self.ParseCommandList()
|
| 948 | if not cond: return None
|
| 949 |
|
| 950 | then_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 951 | if not self._Eat(Id.KW_Then): return None
|
| 952 |
|
| 953 | body = self.ParseCommandList()
|
| 954 | if not body: return None
|
| 955 |
|
| 956 | arm = ast.if_arm(cond.children, body.children)
|
| 957 | arm.spids.extend((elif_spid, then_spid))
|
| 958 | arms.append(arm)
|
| 959 |
|
| 960 | if self.c_id == Id.KW_Else:
|
| 961 | else_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 962 | self._Next()
|
| 963 | body = self.ParseCommandList()
|
| 964 | if not body: return None
|
| 965 | if_node.else_action = body.children
|
| 966 | else:
|
| 967 | else_spid = const.NO_INTEGER
|
| 968 |
|
| 969 | if_node.spids.append(else_spid)
|
| 970 |
|
| 971 | return True
|
| 972 |
|
| 973 | def ParseIf(self):
|
| 974 | """
|
| 975 | if_clause : If command_list Then command_list else_part? Fi ;
|
| 976 | """
|
| 977 | if_node = ast.If()
|
| 978 | self._Next() # skip if
|
| 979 |
|
| 980 | cond = self.ParseCommandList()
|
| 981 | if not cond: return None
|
| 982 |
|
| 983 | then_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 984 | if not self._Eat(Id.KW_Then): return None
|
| 985 |
|
| 986 | body = self.ParseCommandList()
|
| 987 | if not body: return None
|
| 988 |
|
| 989 | arm = ast.if_arm(cond.children, body.children)
|
| 990 | arm.spids.extend((const.NO_INTEGER, then_spid)) # no if spid at first?
|
| 991 | if_node.arms.append(arm)
|
| 992 |
|
| 993 | if self.c_id in (Id.KW_Elif, Id.KW_Else):
|
| 994 | if not self._ParseElifElse(if_node):
|
| 995 | return None
|
| 996 | else:
|
| 997 | if_node.spids.append(const.NO_INTEGER) # no else spid
|
| 998 |
|
| 999 | fi_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 1000 | if not self._Eat(Id.KW_Fi): return None
|
| 1001 |
|
| 1002 | if_node.spids.append(fi_spid)
|
| 1003 | return if_node
|
| 1004 |
|
| 1005 | def ParseTime(self):
|
| 1006 | """
|
| 1007 | time [-p] pipeline
|
| 1008 |
|
| 1009 | According to bash help.
|
| 1010 | """
|
| 1011 | self._Next() # skip time
|
| 1012 |
|
| 1013 | pipeline = self.ParsePipeline()
|
| 1014 | if not pipeline: return None
|
| 1015 | return ast.TimeBlock(pipeline)
|
| 1016 |
|
| 1017 | def ParseCompoundCommand(self):
|
| 1018 | """
|
| 1019 | compound_command : brace_group
|
| 1020 | | subshell
|
| 1021 | | for_clause
|
| 1022 | | while_clause
|
| 1023 | | until_clause
|
| 1024 | | if_clause
|
| 1025 | | case_clause
|
| 1026 | | time_clause
|
| 1027 | | [[ BoolExpr ]]
|
| 1028 | | (( ArithExpr ))
|
| 1029 | ;
|
| 1030 | """
|
| 1031 | if self.c_id == Id.Lit_LBrace:
|
| 1032 | return self.ParseBraceGroup()
|
| 1033 | if self.c_id == Id.Op_LParen:
|
| 1034 | return self.ParseSubshell()
|
| 1035 |
|
| 1036 | if self.c_id == Id.KW_For:
|
| 1037 | return self.ParseFor()
|
| 1038 | if self.c_id == Id.KW_While:
|
| 1039 | return self.ParseWhile()
|
| 1040 | if self.c_id == Id.KW_Until:
|
| 1041 | return self.ParseUntil()
|
| 1042 |
|
| 1043 | if self.c_id == Id.KW_If:
|
| 1044 | return self.ParseIf()
|
| 1045 | if self.c_id == Id.KW_Case:
|
| 1046 | return self.ParseCase()
|
| 1047 | if self.c_id == Id.KW_Time:
|
| 1048 | return self.ParseTime()
|
| 1049 |
|
| 1050 | # Example of redirect that is observable:
|
| 1051 | # $ (( $(echo one 1>&2; echo 2) > 0 )) 2> out.txt
|
| 1052 | if self.c_id == Id.KW_DLeftBracket:
|
| 1053 | return self.ParseDBracket()
|
| 1054 |
|
| 1055 | if self.c_id == Id.Op_DLeftParen:
|
| 1056 | return self.ParseDParen()
|
| 1057 |
|
| 1058 | self.AddErrorContext(
|
| 1059 | "Expected a compound command (e.g. for while if case), got %s",
|
| 1060 | self.cur_word, word=self.cur_word)
|
| 1061 | return None
|
| 1062 |
|
| 1063 | def ParseFunctionBody(self, func):
|
| 1064 | """
|
| 1065 | function_body : compound_command io_redirect* ; /* Apply rule 9 */
|
| 1066 | """
|
| 1067 | body = self.ParseCompoundCommand()
|
| 1068 | if not body: return None
|
| 1069 |
|
| 1070 | redirects = self._ParseRedirectList()
|
| 1071 | if redirects is None: return None
|
| 1072 |
|
| 1073 | func.body = body
|
| 1074 | func.redirects = redirects
|
| 1075 | return True
|
| 1076 |
|
| 1077 | def ParseFunctionDef(self):
|
| 1078 | """
|
| 1079 | function_header : fname '(' ')'
|
| 1080 | function_def : function_header newline_ok function_body ;
|
| 1081 |
|
| 1082 | Precondition: Looking at the function name.
|
| 1083 | Post condition:
|
| 1084 |
|
| 1085 | NOTE: There is an ambiguity with:
|
| 1086 |
|
| 1087 | function foo ( echo hi ) and
|
| 1088 | function foo () ( echo hi )
|
| 1089 |
|
| 1090 | Bash only accepts the latter, though it doesn't really follow a grammar.
|
| 1091 | """
|
| 1092 | left_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 1093 |
|
| 1094 | ok, name = word.AsFuncName(self.cur_word)
|
| 1095 | if not ok:
|
| 1096 | self.AddErrorContext('Invalid function name', word=self.cur_word)
|
| 1097 | return None
|
| 1098 | self._Next() # skip function name
|
| 1099 |
|
| 1100 | # Must be true because of lookahead
|
| 1101 | if not self._Peek(): return None
|
| 1102 | assert self.c_id == Id.Op_LParen, self.cur_word
|
| 1103 |
|
| 1104 | self.lexer.PushHint(Id.Op_RParen, Id.Right_FuncDef)
|
| 1105 | self._Next()
|
| 1106 |
|
| 1107 | if not self._Eat(Id.Right_FuncDef): return None
|
| 1108 | after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1
|
| 1109 |
|
| 1110 | if not self._NewlineOk(): return None
|
| 1111 |
|
| 1112 | func = ast.FuncDef()
|
| 1113 | func.name = name
|
| 1114 |
|
| 1115 | if not self.ParseFunctionBody(func):
|
| 1116 | return None
|
| 1117 |
|
| 1118 | func.spids.append(left_spid)
|
| 1119 | func.spids.append(after_name_spid)
|
| 1120 | return func
|
| 1121 |
|
| 1122 | def ParseKshFunctionDef(self):
|
| 1123 | """
|
| 1124 | ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
|
| 1125 | """
|
| 1126 | left_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 1127 |
|
| 1128 | self._Next() # skip past 'function'
|
| 1129 |
|
| 1130 | if not self._Peek(): return None
|
| 1131 | ok, name = word.AsFuncName(self.cur_word)
|
| 1132 | if not ok:
|
| 1133 | self.AddErrorContext("Invalid function name: %r", self.cur_word)
|
| 1134 | return None
|
| 1135 | after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1
|
| 1136 | self._Next() # skip past 'function name
|
| 1137 |
|
| 1138 | if not self._Peek(): return None
|
| 1139 | if self.c_id == Id.Op_LParen:
|
| 1140 | self.lexer.PushHint(Id.Op_RParen, Id.Right_FuncDef)
|
| 1141 | self._Next()
|
| 1142 | if not self._Eat(Id.Right_FuncDef): return None
|
| 1143 | # Change it: after )
|
| 1144 | after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1
|
| 1145 |
|
| 1146 | if not self._NewlineOk(): return None
|
| 1147 |
|
| 1148 | func = ast.FuncDef()
|
| 1149 | func.name = name
|
| 1150 |
|
| 1151 | if not self.ParseFunctionBody(func):
|
| 1152 | return None
|
| 1153 |
|
| 1154 | func.spids.append(left_spid)
|
| 1155 | func.spids.append(after_name_spid)
|
| 1156 | return func
|
| 1157 |
|
| 1158 | def ParseCoproc(self):
|
| 1159 | """
|
| 1160 | TODO:
|
| 1161 | """
|
| 1162 | raise NotImplementedError
|
| 1163 |
|
| 1164 | def ParseSubshell(self):
|
| 1165 | left_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 1166 | self._Next() # skip past (
|
| 1167 |
|
| 1168 | # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
|
| 1169 | # translation stack, we want to delay it.
|
| 1170 |
|
| 1171 | #print('ParseSubshell lexer.PushHint ) -> )')
|
| 1172 | self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
|
| 1173 |
|
| 1174 | c_list = self.ParseCommandList()
|
| 1175 | if not c_list: return None
|
| 1176 |
|
| 1177 | # Remove singleton CommandList as an optimization.
|
| 1178 | if len(c_list.children) == 1:
|
| 1179 | child = c_list.children[0]
|
| 1180 | else:
|
| 1181 | child = c_list
|
| 1182 | node = ast.Subshell(child)
|
| 1183 |
|
| 1184 | right_spid = word.LeftMostSpanForWord(self.cur_word)
|
| 1185 | if not self._Eat(Id.Right_Subshell): return None
|
| 1186 |
|
| 1187 | node.spids.extend((left_spid, right_spid))
|
| 1188 | return node
|
| 1189 |
|
| 1190 | def ParseDBracket(self):
|
| 1191 | """
|
| 1192 | Pass the underlying word parser off to the boolean expression parser.
|
| 1193 | """
|
| 1194 | maybe_error_word = self.cur_word
|
| 1195 | # TODO: Test interactive. Without closing ]], you should get > prompt
|
| 1196 | # (PS2)
|
| 1197 |
|
| 1198 | self._Next() # skip [[
|
| 1199 | b_parser = BoolParser(self.w_parser)
|
| 1200 | bnode = b_parser.Parse()
|
| 1201 | if not bnode:
|
| 1202 | error_stack = b_parser.Error()
|
| 1203 | self.error_stack.extend(error_stack)
|
| 1204 | self.AddErrorContext("Error parsing [[", word=maybe_error_word)
|
| 1205 | return None
|
| 1206 | return ast.DBracket(bnode)
|
| 1207 |
|
| 1208 | def ParseDParen(self):
|
| 1209 | maybe_error_word = self.cur_word
|
| 1210 | self._Next() # skip ((
|
| 1211 | #print('1 ((', self.cur_word)
|
| 1212 | anode = self.w_parser.ReadDParen()
|
| 1213 | if not anode:
|
| 1214 | error_stack = self.w_parser.Error()
|
| 1215 | self.error_stack.extend(error_stack)
|
| 1216 | self.AddErrorContext("Error parsing ((", word=maybe_error_word)
|
| 1217 | return None
|
| 1218 |
|
| 1219 | #print('2 ((', self.cur_word)
|
| 1220 | return ast.DParen(anode)
|
| 1221 |
|
| 1222 | def ParseCommand(self):
|
| 1223 | """
|
| 1224 | command : simple_command
|
| 1225 | | compound_command io_redirect*
|
| 1226 | | function_def
|
| 1227 | | ksh_function_def
|
| 1228 | ;
|
| 1229 | """
|
| 1230 | if not self._Peek(): return None
|
| 1231 |
|
| 1232 | if self.c_id == Id.KW_Function:
|
| 1233 | return self.ParseKshFunctionDef()
|
| 1234 |
|
| 1235 | if self.c_id in (
|
| 1236 | Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen, Id.Lit_LBrace,
|
| 1237 | Id.KW_For, Id.KW_While, Id.KW_Until, Id.KW_If, Id.KW_Case, Id.KW_Time):
|
| 1238 | node = self.ParseCompoundCommand()
|
| 1239 | if not node: return None
|
| 1240 | if node.tag != command_e.TimeBlock: # The only one without redirects
|
| 1241 | redirects = self._ParseRedirectList()
|
| 1242 | if redirects is None:
|
| 1243 | return None
|
| 1244 | node.redirects = redirects
|
| 1245 | return node
|
| 1246 |
|
| 1247 | # NOTE: I added this to fix cases in parse-errors.test.sh, but it doesn't
|
| 1248 | # work because Lit_RBrace is in END_LIST below.
|
| 1249 |
|
| 1250 | # TODO: KW_Do is also invalid here.
|
| 1251 | if self.c_id == Id.Lit_RBrace:
|
| 1252 | self.AddErrorContext('Unexpected }', word=self.cur_word)
|
| 1253 | return None
|
| 1254 |
|
| 1255 | if self.c_kind == Kind.Redir: # Leading redirect
|
| 1256 | return self.ParseSimpleCommand()
|
| 1257 |
|
| 1258 | if self.c_kind == Kind.Word:
|
| 1259 | if self.w_parser.LookAhead() == Id.Op_LParen: # (
|
| 1260 | kov = word.LooksLikeAssignment(self.cur_word)
|
| 1261 | if kov:
|
| 1262 | return self.ParseSimpleCommand() # f=(a b c) # array
|
| 1263 | else:
|
| 1264 | return self.ParseFunctionDef() # f() { echo; } # function
|
| 1265 |
|
| 1266 | return self.ParseSimpleCommand() # echo foo
|
| 1267 |
|
| 1268 | self.AddErrorContext(
|
| 1269 | "ParseCommand: Expected to parse a command, got %s", self.cur_word,
|
| 1270 | word=self.cur_word)
|
| 1271 | return None
|
| 1272 |
|
| 1273 | def ParsePipeline(self):
|
| 1274 | """
|
| 1275 | pipeline : Bang? command ( '|' newline_ok command )* ;
|
| 1276 | """
|
| 1277 | negated = False
|
| 1278 |
|
| 1279 | if not self._Peek(): return None
|
| 1280 | if self.c_id == Id.KW_Bang:
|
| 1281 | negated = True
|
| 1282 | self._Next()
|
| 1283 |
|
| 1284 | child = self.ParseCommand()
|
| 1285 | if not child: return None
|
| 1286 |
|
| 1287 | children = [child]
|
| 1288 |
|
| 1289 | if not self._Peek(): return None
|
| 1290 | if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
|
| 1291 | if negated:
|
| 1292 | node = ast.Pipeline(children, negated)
|
| 1293 | return node
|
| 1294 | else:
|
| 1295 | return child
|
| 1296 |
|
| 1297 | pipe_index = 0
|
| 1298 | stderr_indices = []
|
| 1299 |
|
| 1300 | if self.c_id == Id.Op_PipeAmp:
|
| 1301 | stderr_indices.append(pipe_index)
|
| 1302 | pipe_index += 1
|
| 1303 |
|
| 1304 | while True:
|
| 1305 | self._Next() # skip past Id.Op_Pipe or Id.Op_PipeAmp
|
| 1306 |
|
| 1307 | if not self._NewlineOk():
|
| 1308 | return None
|
| 1309 |
|
| 1310 | child = self.ParseCommand()
|
| 1311 | if not child:
|
| 1312 | self.AddErrorContext('Error parsing command after pipe')
|
| 1313 | # TODO: Return partial pipeline here? All signatures should (ok,
|
| 1314 | # node). Only the completion uses the node when ok is False.
|
| 1315 | return None
|
| 1316 | children.append(child)
|
| 1317 |
|
| 1318 | if not self._Peek(): return None
|
| 1319 | if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
|
| 1320 | break
|
| 1321 |
|
| 1322 | if self.c_id == Id.Op_PipeAmp:
|
| 1323 | stderr_indices.append(pipe_index)
|
| 1324 | pipe_index += 1
|
| 1325 |
|
| 1326 | node = ast.Pipeline(children, negated)
|
| 1327 | node.stderr_indices = stderr_indices
|
| 1328 | return node
|
| 1329 |
|
| 1330 | def ParseAndOr(self):
|
| 1331 | """
|
| 1332 | and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
|
| 1333 | | pipeline
|
| 1334 |
|
| 1335 | Note that it is left recursive and left associative. We parse it
|
| 1336 | iteratively with a token of lookahead.
|
| 1337 | """
|
| 1338 | child = self.ParsePipeline()
|
| 1339 | if not child: return None
|
| 1340 |
|
| 1341 | if not self._Peek(): return None
|
| 1342 | if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
|
| 1343 | return child
|
| 1344 |
|
| 1345 | ops = []
|
| 1346 | children = [child]
|
| 1347 |
|
| 1348 | while True:
|
| 1349 | ops.append(self.c_id)
|
| 1350 |
|
| 1351 | self._Next() # skip past || &&
|
| 1352 |
|
| 1353 | if not self._NewlineOk():
|
| 1354 | return None
|
| 1355 |
|
| 1356 | child = self.ParsePipeline()
|
| 1357 | if not child: return None
|
| 1358 |
|
| 1359 | children.append(child)
|
| 1360 |
|
| 1361 | if not self._Peek(): return None
|
| 1362 | if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
|
| 1363 | break
|
| 1364 |
|
| 1365 | node = ast.AndOr(ops, children)
|
| 1366 | return node
|
| 1367 |
|
| 1368 | def ParseCommandLine(self):
|
| 1369 | """
|
| 1370 | NOTE: This is only called in InteractiveLoop. Oh crap I need to really
|
| 1371 | read and execute a line at a time then?
|
| 1372 |
|
| 1373 | BUG: sleep 1 & sleep 1 & doesn't work here, when written in REPL. But it
|
| 1374 | does work with '-c', because that calls ParseFile and not ParseCommandLine
|
| 1375 | over and over.
|
| 1376 |
|
| 1377 | TODO: Get rid of ParseFile and stuff? Shouldn't be used for -c and so
|
| 1378 | forth. Just have an ExecuteLoop for now. But you still need
|
| 1379 | ParseCommandList, for internal nodes.
|
| 1380 |
|
| 1381 | command_line : and_or (sync_op and_or)* trailer? ;
|
| 1382 | trailer : sync_op newline_ok
|
| 1383 | | NEWLINES;
|
| 1384 | sync_op : '&' | ';';
|
| 1385 |
|
| 1386 | This rule causes LL(k > 1) behavior. We would have to peek to see if there
|
| 1387 | is another command word after the sync op.
|
| 1388 |
|
| 1389 | But it's easier to express imperatively. Do the following in a loop:
|
| 1390 | 1. ParseAndOr
|
| 1391 | 2. Peek.
|
| 1392 | a. If there's a newline, then return. (We're only parsing a single
|
| 1393 | line.)
|
| 1394 | b. If there's a sync_op, process it. Then look for a newline and
|
| 1395 | return. Otherwise, parse another AndOr.
|
| 1396 |
|
| 1397 | COMPARE
|
| 1398 | command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
|
| 1399 | command_term : and_or (trailer and_or)* ; # CHILDREN
|
| 1400 |
|
| 1401 | I think you should be able to factor these out.
|
| 1402 | """
|
| 1403 | children = []
|
| 1404 | done = False
|
| 1405 | while not done:
|
| 1406 | child = self.ParseAndOr()
|
| 1407 | if not child: return None
|
| 1408 |
|
| 1409 | if not self._Peek(): return None
|
| 1410 | if self.c_id in (Id.Op_Semi, Id.Op_Amp): # also Id.Op_Amp.
|
| 1411 | child = ast.Sentence(child, self.cur_word.token)
|
| 1412 | self._Next()
|
| 1413 |
|
| 1414 | if not self._Peek(): return None
|
| 1415 | if self.c_id in (Id.Op_Newline, Id.Eof_Real):
|
| 1416 | done = True
|
| 1417 |
|
| 1418 | elif self.c_id == Id.Op_Newline:
|
| 1419 | done = True
|
| 1420 |
|
| 1421 | elif self.c_id == Id.Eof_Real:
|
| 1422 | done = True
|
| 1423 |
|
| 1424 | else:
|
| 1425 | self.AddErrorContext(
|
| 1426 | 'ParseCommandLine: Unexpected token %s', self.cur_word)
|
| 1427 | return None
|
| 1428 |
|
| 1429 | children.append(child)
|
| 1430 |
|
| 1431 | return ast.CommandList(children)
|
| 1432 |
|
| 1433 | def ParseCommandTerm(self):
|
| 1434 | """"
|
| 1435 | command_term : and_or (trailer and_or)* ;
|
| 1436 | trailer : sync_op newline_ok
|
| 1437 | | NEWLINES;
|
| 1438 | sync_op : '&' | ';';
|
| 1439 |
|
| 1440 | This is handled in imperative style, like ParseCommandLine.
|
| 1441 | Called by ParseCommandList for all blocks, and also for ParseCaseItem,
|
| 1442 | which is slightly different. (HOW? Is it the DSEMI?)
|
| 1443 |
|
| 1444 | Returns:
|
| 1445 | ast.command
|
| 1446 | """
|
| 1447 | # Word types that will end the command term.
|
| 1448 | END_LIST = (
|
| 1449 | Id.Eof_Real, Id.Eof_RParen, Id.Eof_Backtick, Id.Right_Subshell,
|
| 1450 | Id.Lit_RBrace, Id.Op_DSemi)
|
| 1451 |
|
| 1452 | # NOTE: This is similar to ParseCommandLine, except there is a lot of stuff
|
| 1453 | # about here docs. Here docs are inherently line-oriented.
|
| 1454 | #
|
| 1455 | # - Why aren't we doing END_LIST in ParseCommandLine?
|
| 1456 | # - Because you will never be inside $() at the top level.
|
| 1457 | # - We also know it will end in a newline. It can't end in "fi"!
|
| 1458 | # - example: if true; then { echo hi; } fi
|
| 1459 | # - Why aren't we doing 'for c in children' too?
|
| 1460 |
|
| 1461 | children = []
|
| 1462 | done = False
|
| 1463 | while not done:
|
| 1464 | if not self._Peek(): return None
|
| 1465 | #print('====> ParseCommandTerm word', self.cur_word)
|
| 1466 |
|
| 1467 | # Most keywords are valid "first words". But do/done/then do not BEGIN
|
| 1468 | # commands, so they are not valid.
|
| 1469 | if self.c_id in (
|
| 1470 | Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
|
| 1471 | Id.KW_Esac):
|
| 1472 | break
|
| 1473 |
|
| 1474 | child = self.ParseAndOr()
|
| 1475 | if not child:
|
| 1476 | self.AddErrorContext('Error parsing AndOr in ParseCommandTerm')
|
| 1477 | return None
|
| 1478 |
|
| 1479 | if not self._Peek(): return None
|
| 1480 | if self.c_id == Id.Op_Newline:
|
| 1481 | self._Next()
|
| 1482 |
|
| 1483 | if not self._Peek(): return None
|
| 1484 | if self.c_id in END_LIST:
|
| 1485 | done = True
|
| 1486 |
|
| 1487 | elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
|
| 1488 | child = ast.Sentence(child, self.cur_word.token)
|
| 1489 | self._Next()
|
| 1490 |
|
| 1491 | if not self._Peek(): return None
|
| 1492 | if self.c_id == Id.Op_Newline:
|
| 1493 | self._Next() # skip over newline
|
| 1494 |
|
| 1495 | # Test if we should keep going. There might be another command after
|
| 1496 | # the semi and newline.
|
| 1497 | if not self._Peek(): return None
|
| 1498 | if self.c_id in END_LIST:
|
| 1499 | done = True
|
| 1500 |
|
| 1501 | elif self.c_id in END_LIST: # ; EOF
|
| 1502 | done = True
|
| 1503 |
|
| 1504 | elif self.c_id in END_LIST: # EOF
|
| 1505 | done = True
|
| 1506 |
|
| 1507 | else:
|
| 1508 | pass # e.g. "} done", "fi fi", ") fi", etc. is OK
|
| 1509 |
|
| 1510 | children.append(child)
|
| 1511 |
|
| 1512 | if not self._Peek(): return None
|
| 1513 |
|
| 1514 | return ast.CommandList(children)
|
| 1515 |
|
| 1516 | def ParseCommandList(self):
|
| 1517 | """
|
| 1518 | command_list : newline_ok command_term trailer? ;
|
| 1519 |
|
| 1520 | This one is called by all the compound commands. It's basically a command
|
| 1521 | block.
|
| 1522 |
|
| 1523 | NOTE: Rather than translating the CFG directly, the code follows a style
|
| 1524 | more like this: more like this: (and_or trailer)+. It makes capture
|
| 1525 | easier.
|
| 1526 | """
|
| 1527 | if not self._NewlineOk(): return None
|
| 1528 |
|
| 1529 | node = self.ParseCommandTerm()
|
| 1530 | if node is None: return None
|
| 1531 | assert node is not False
|
| 1532 | return node
|
| 1533 |
|
| 1534 | def ParseWholeFile(self):
|
| 1535 | """Entry point for main() in non-interactive shell.
|
| 1536 |
|
| 1537 | Very similar to ParseCommandList, but we allow empty files.
|
| 1538 |
|
| 1539 | TODO: This should be turned into a Parse and Execute loop, freeing arenas
|
| 1540 | if they don't contain functions.
|
| 1541 | """
|
| 1542 | if not self._NewlineOk(): return None
|
| 1543 |
|
| 1544 | #print('ParseFile', self.c_kind, self.cur_word)
|
| 1545 | # An empty node to execute
|
| 1546 | if self.c_kind == Kind.Eof:
|
| 1547 | return ast.NoOp()
|
| 1548 |
|
| 1549 | # This calls ParseAndOr(), but I think it should be a loop that calls
|
| 1550 | # ParseCommandLine(), like oil.InteractiveLoop.
|
| 1551 | node = self.ParseCommandTerm()
|
| 1552 | if node is None: return None
|
| 1553 | assert node is not False
|
| 1554 |
|
| 1555 | return node
|