| 1 | #!/usr/bin/env python | 
| 2 | # Copyright 2016 Andy Chu. All rights reserved. | 
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); | 
| 4 | # you may not use this file except in compliance with the License. | 
| 5 | # You may obtain a copy of the License at | 
| 6 | # | 
| 7 | #   http://www.apache.org/licenses/LICENSE-2.0 | 
| 8 | from __future__ import print_function | 
| 9 | """ | 
| 10 | cmd_parse.py - Parse high level shell commands. | 
| 11 | """ | 
| 12 |  | 
| 13 | from asdl import const | 
| 14 |  | 
| 15 | from core import braces | 
| 16 | from core import word | 
| 17 | from core import util | 
| 18 |  | 
| 19 | from osh.meta import ast, Id, Kind, types | 
| 20 | from osh.lex import VAR_NAME_RE | 
| 21 | from osh.bool_parse import BoolParser | 
| 22 |  | 
| 23 | log = util.log | 
| 24 | command_e = ast.command_e | 
| 25 | word_e = ast.word_e | 
| 26 | assign_op_e = ast.assign_op_e | 
| 27 | lex_mode_e = types.lex_mode_e | 
| 28 |  | 
| 29 |  | 
| 30 | class CommandParser(object): | 
| 31 | """ | 
| 32 | Args: | 
| 33 | word_parse: to get a stream of words | 
| 34 | lexer: for lookahead in function def, PushHint of () | 
| 35 | line_reader: for here doc | 
| 36 | """ | 
| 37 | def __init__(self, w_parser, lexer, line_reader, arena): | 
| 38 | self.w_parser = w_parser  # for normal parsing | 
| 39 | self.lexer = lexer  # for fast lookahead to (, for function defs | 
| 40 | self.line_reader = line_reader  # for here docs | 
| 41 | self.arena = arena | 
| 42 |  | 
| 43 | self.Reset() | 
| 44 |  | 
| 45 | def Reset(self): | 
| 46 | self.error_stack = [] | 
| 47 | self.completion_stack = [] | 
| 48 |  | 
| 49 | # Cursor state set by _Peek() | 
| 50 | self.next_lex_mode = lex_mode_e.OUTER | 
| 51 | self.cur_word = None  # current word | 
| 52 | self.c_kind = Kind.Undefined | 
| 53 | self.c_id = Id.Undefined_Tok | 
| 54 |  | 
| 55 | self.pending_here_docs = [] | 
| 56 |  | 
| 57 | def Error(self): | 
| 58 | return self.error_stack | 
| 59 |  | 
| 60 | def _BadWord(self, msg, w): | 
| 61 | """Helper function for errors involving a word. | 
| 62 |  | 
| 63 | Args: | 
| 64 | msg: format string with a single %s token | 
| 65 | w: Word | 
| 66 | """ | 
| 67 | self.AddErrorContext(msg, w, word=w) | 
| 68 |  | 
| 69 | def AddErrorContext(self, msg, *args, **kwargs): | 
| 70 | err = util.ParseError(msg, *args, **kwargs) | 
| 71 | self.error_stack.append(err) | 
| 72 |  | 
| 73 | def GetCompletionState(self): | 
| 74 | return self.completion_stack | 
| 75 |  | 
| 76 | def _MaybeReadHereDocs(self): | 
| 77 | for h in self.pending_here_docs: | 
| 78 | lines = [] | 
| 79 | #log('HERE %r' % h.here_end) | 
| 80 | while True: | 
| 81 | # If op is <<-, strip off all leading tabs (NOT spaces). | 
| 82 | # (in C++, just bump the start?) | 
| 83 | line_id, line = self.line_reader.GetLine() | 
| 84 |  | 
| 85 | #print("LINE %r %r" % (line, h.here_end)) | 
| 86 | if not line:  # EOF | 
| 87 | # An unterminated here doc is just a warning in bash.  We make it | 
| 88 | # fatal because we want to be strict, and because it causes problems | 
| 89 | # reporting other errors. | 
| 90 | # Attribute it to the << in <<EOF for now. | 
| 91 | self.AddErrorContext('Unterminated here doc', span_id=h.spids[0]) | 
| 92 | return False | 
| 93 |  | 
| 94 | # NOTE: Could do this runtime to preserve LST. | 
| 95 | if h.op_id == Id.Redir_DLessDash: | 
| 96 | line = line.lstrip('\t') | 
| 97 | if line.rstrip() == h.here_end: | 
| 98 | break | 
| 99 |  | 
| 100 | lines.append((line_id, line)) | 
| 101 |  | 
| 102 | parts = [] | 
| 103 | if h.do_expansion: | 
| 104 | # NOTE: We read all lines at once, instead of doing it line-by-line, | 
| 105 | # because of cases like this: | 
| 106 | # cat <<EOF | 
| 107 | # 1 $(echo 2 | 
| 108 | # echo 3) 4 | 
| 109 | # EOF | 
| 110 |  | 
| 111 | from osh import parse_lib  # Avoid circular import | 
| 112 | w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena) | 
| 113 | word = w_parser.ReadHereDocBody() | 
| 114 | if not word: | 
| 115 | self.AddErrorContext( | 
| 116 | 'Error reading here doc body: %s', w_parser.Error()) | 
| 117 | return False | 
| 118 | h.body = word | 
| 119 | h.was_filled = True | 
| 120 | else: | 
| 121 | # Each line is a single span.  TODO: Add span_id to token. | 
| 122 | tokens = [ | 
| 123 | ast.token(Id.Lit_Chars, line, const.NO_INTEGER) | 
| 124 | for _, line in lines] | 
| 125 | parts = [ast.LiteralPart(t) for t in tokens] | 
| 126 | h.body = ast.CompoundWord(parts) | 
| 127 | h.was_filled = True | 
| 128 |  | 
| 129 | # No .clear() until Python 3.3. | 
| 130 | del self.pending_here_docs[:] | 
| 131 |  | 
| 132 | return True | 
| 133 |  | 
| 134 | def _Next(self, lex_mode=lex_mode_e.OUTER): | 
| 135 | """Helper method.""" | 
| 136 | self.next_lex_mode = lex_mode | 
| 137 |  | 
| 138 | def Peek(self): | 
| 139 | """Public method for REPL.""" | 
| 140 | if not self._Peek(): | 
| 141 | return None | 
| 142 | return self.cur_word | 
| 143 |  | 
| 144 | def _Peek(self): | 
| 145 | """Helper method. | 
| 146 |  | 
| 147 | Returns True for success and False on error.  Error examples: bad command | 
| 148 | sub word, or unterminated quoted string, etc. | 
| 149 | """ | 
| 150 | if self.next_lex_mode != lex_mode_e.NONE: | 
| 151 | w = self.w_parser.ReadWord(self.next_lex_mode) | 
| 152 | if w is None: | 
| 153 | error_stack = self.w_parser.Error() | 
| 154 | self.error_stack.extend(error_stack) | 
| 155 | return False | 
| 156 |  | 
| 157 | # Here docs only happen in command mode, so other kinds of newlines don't | 
| 158 | # count. | 
| 159 | if w.tag == word_e.TokenWord and w.token.id == Id.Op_Newline: | 
| 160 | if not self._MaybeReadHereDocs(): | 
| 161 | return False | 
| 162 |  | 
| 163 | self.cur_word = w | 
| 164 |  | 
| 165 | self.c_kind = word.CommandKind(self.cur_word) | 
| 166 | self.c_id = word.CommandId(self.cur_word) | 
| 167 | self.next_lex_mode = lex_mode_e.NONE | 
| 168 | #print('_Peek', self.cur_word) | 
| 169 | return True | 
| 170 |  | 
| 171 | def _Eat(self, c_id): | 
| 172 | """Consume a word of a type.  If it doesn't match, return False. | 
| 173 |  | 
| 174 | Args: | 
| 175 | c_id: either EKeyword.* or a token type like Id.Right_Subshell. | 
| 176 | TODO: Rationalize / type check this. | 
| 177 | """ | 
| 178 | if not self._Peek(): | 
| 179 | return False | 
| 180 | # TODO: It would be nicer to print the word type, right now we get a number | 
| 181 | if self.c_id != c_id: | 
| 182 | self.AddErrorContext( | 
| 183 | "Expected word type %s, got %s", c_id, self.cur_word, | 
| 184 | word=self.cur_word) | 
| 185 | return False | 
| 186 | self._Next() | 
| 187 | return True | 
| 188 |  | 
| 189 | def _NewlineOk(self): | 
| 190 | """Check for optional newline and consume it.""" | 
| 191 | if not self._Peek(): | 
| 192 | return False | 
| 193 | if self.c_id == Id.Op_Newline: | 
| 194 | self._Next() | 
| 195 | if not self._Peek(): | 
| 196 | return False | 
| 197 | return True | 
| 198 |  | 
| 199 | def ParseRedirect(self): | 
| 200 | """ | 
| 201 | Problem: You don't know which kind of redir_node to instantiate before | 
| 202 | this?  You could stuff them all in one node, and then have a switch() on | 
| 203 | the type. | 
| 204 |  | 
| 205 | You need different types. | 
| 206 | """ | 
| 207 | if not self._Peek(): return None | 
| 208 | assert self.c_kind == Kind.Redir, self.cur_word | 
| 209 |  | 
| 210 | left_spid = self.cur_word.token.span_id | 
| 211 |  | 
| 212 | # For now only supporting single digit descriptor | 
| 213 | first_char = self.cur_word.token.val[0] | 
| 214 | if first_char.isdigit(): | 
| 215 | fd = int(first_char) | 
| 216 | else: | 
| 217 | fd = const.NO_INTEGER | 
| 218 |  | 
| 219 | if self.c_id in (Id.Redir_DLess, Id.Redir_DLessDash):  # here doc | 
| 220 | node = ast.HereDoc() | 
| 221 | node.op_id = self.c_id | 
| 222 | node.body = None  # not read yet | 
| 223 | node.fd = fd | 
| 224 | node.was_filled = False | 
| 225 | node.spids.append(left_spid) | 
| 226 | self._Next() | 
| 227 |  | 
| 228 | if not self._Peek(): return None | 
| 229 | # "If any character in word is quoted, the delimiter shall be formed by | 
| 230 | # performing quote removal on word, and the here-document lines shall not | 
| 231 | # be expanded. Otherwise, the delimiter shall be the word itself." | 
| 232 | # NOTE: \EOF counts, or even E\OF | 
| 233 | ok, node.here_end, quoted = word.StaticEval(self.cur_word) | 
| 234 | if not ok: | 
| 235 | self._BadWord('Error evaluating here doc delimiter: %s', self.cur_word) | 
| 236 | return None | 
| 237 | node.do_expansion = not quoted | 
| 238 | self._Next() | 
| 239 |  | 
| 240 | self.pending_here_docs.append(node)  # will be filled on next newline. | 
| 241 |  | 
| 242 | else: | 
| 243 | node = ast.Redir() | 
| 244 | node.op_id = self.c_id | 
| 245 | node.fd = fd | 
| 246 | node.spids.append(left_spid) | 
| 247 | self._Next() | 
| 248 |  | 
| 249 | if not self._Peek(): return None | 
| 250 | if self.c_kind != Kind.Word: | 
| 251 | self.AddErrorContext( | 
| 252 | 'Expected word after redirect operator', word=self.cur_word) | 
| 253 | return None | 
| 254 |  | 
| 255 | new_word = word.TildeDetect(self.cur_word) | 
| 256 | node.arg_word = new_word or self.cur_word | 
| 257 | self._Next() | 
| 258 |  | 
| 259 | return node | 
| 260 |  | 
| 261 | def _ParseRedirectList(self): | 
| 262 | """Try parsing any redirects at the cursor. | 
| 263 |  | 
| 264 | This is used for blocks only, not commands. | 
| 265 |  | 
| 266 | Return None on error. | 
| 267 | """ | 
| 268 | redirects = [] | 
| 269 | while True: | 
| 270 | if not self._Peek(): return None | 
| 271 |  | 
| 272 | # This prediction needs to ONLY accept redirect operators.  Should we | 
| 273 | # make them a separate TokeNkind? | 
| 274 | if self.c_kind != Kind.Redir: | 
| 275 | break | 
| 276 |  | 
| 277 | node = self.ParseRedirect() | 
| 278 | if not node: | 
| 279 | return None | 
| 280 | redirects.append(node) | 
| 281 | self._Next() | 
| 282 | return redirects | 
| 283 |  | 
| 284 | def _ScanSimpleCommand(self): | 
| 285 | """First pass: Split into redirects and words.""" | 
| 286 | redirects = [] | 
| 287 | words = [] | 
| 288 | while True: | 
| 289 | if not self._Peek(): return None | 
| 290 | if self.c_kind == Kind.Redir: | 
| 291 | node = self.ParseRedirect() | 
| 292 | if not node: return None  # e.g. EOF | 
| 293 | redirects.append(node) | 
| 294 |  | 
| 295 | elif self.c_kind == Kind.Word: | 
| 296 | words.append(self.cur_word) | 
| 297 |  | 
| 298 | else: | 
| 299 | break | 
| 300 | self._Next() | 
| 301 | return redirects, words | 
| 302 |  | 
| 303 | def _SplitSimpleCommandPrefix(self, words): | 
| 304 | """ | 
| 305 | Second pass of SimpleCommand parsing: look for assignment words. | 
| 306 | """ | 
| 307 | prefix_bindings = [] | 
| 308 | suffix_words = [] | 
| 309 |  | 
| 310 | done_prefix = False | 
| 311 | for w in words: | 
| 312 | if done_prefix: | 
| 313 | suffix_words.append(w) | 
| 314 | continue | 
| 315 |  | 
| 316 | left_spid = word.LeftMostSpanForWord(w) | 
| 317 |  | 
| 318 | kov = word.LooksLikeAssignment(w) | 
| 319 | if kov: | 
| 320 | k, op, v = kov | 
| 321 | t = word.TildeDetect(v) | 
| 322 | if t: | 
| 323 | # t is an unevaluated word with TildeSubPart | 
| 324 | prefix_bindings.append((k, op, t, left_spid)) | 
| 325 | else: | 
| 326 | prefix_bindings.append((k, op, v, left_spid))  # v is unevaluated word | 
| 327 | else: | 
| 328 | done_prefix = True | 
| 329 | suffix_words.append(w) | 
| 330 |  | 
| 331 | return prefix_bindings, suffix_words | 
| 332 |  | 
| 333 | def _MakeSimpleCommand(self, prefix_bindings, suffix_words, redirects): | 
| 334 | # FOO=(1 2 3) ls is not allowed | 
| 335 | for k, _, v, _ in prefix_bindings: | 
| 336 | if word.HasArrayPart(v): | 
| 337 | self.AddErrorContext( | 
| 338 | 'Unexpected array literal in binding: %s', v, word=v) | 
| 339 | return None | 
| 340 |  | 
| 341 | # echo FOO=(1 2 3) is not allowed | 
| 342 | # NOTE: Other checks can be inserted here.  Can resolve builtins, | 
| 343 | # functions, aliases, static PATH, etc. | 
| 344 | for w in suffix_words: | 
| 345 | kov = word.LooksLikeAssignment(w) | 
| 346 | if kov: | 
| 347 | _, _, v = kov | 
| 348 | if word.HasArrayPart(v): | 
| 349 | self.AddErrorContext('Unexpected array literal: %s', v, word=w) | 
| 350 | return None | 
| 351 |  | 
| 352 | # NOTE: # In bash, {~bob,~jane}/src works, even though ~ isn't the leading | 
| 353 | # character of the initial word. | 
| 354 | # However, this means we must do tilde detection AFTER brace EXPANSION, not | 
| 355 | # just after brace DETECTION like we're doing here. | 
| 356 | # The BracedWordTree instances have to be expanded into CompoundWord | 
| 357 | # instances for the tilde detection to work. | 
| 358 | words2 = braces.BraceDetectAll(suffix_words) | 
| 359 | words3 = word.TildeDetectAll(words2) | 
| 360 |  | 
| 361 | node = ast.SimpleCommand() | 
| 362 | node.words = words3 | 
| 363 | node.redirects = redirects | 
| 364 | for name, op, val, left_spid in prefix_bindings: | 
| 365 | if op != assign_op_e.Equal: | 
| 366 | # NOTE: Using spid of RHS for now, since we don't have one for op. | 
| 367 | self.AddErrorContext('Expected = in environment binding, got +=', | 
| 368 | word=val) | 
| 369 | return None | 
| 370 | pair = ast.env_pair(name, val) | 
| 371 | pair.spids.append(left_spid) | 
| 372 | node.more_env.append(pair) | 
| 373 | return node | 
| 374 |  | 
| 375 | def _MakeAssignment(self, assign_kw, suffix_words): | 
| 376 | # First parse flags, e.g. -r -x -a -A.  None of the flags have arguments. | 
| 377 | flags = [] | 
| 378 | n = len(suffix_words) | 
| 379 | i = 1 | 
| 380 | while i < n: | 
| 381 | w = suffix_words[i] | 
| 382 | ok, static_val, quoted = word.StaticEval(w) | 
| 383 | if not ok or quoted: | 
| 384 | break  # can't statically evaluate | 
| 385 |  | 
| 386 | if static_val.startswith('-'): | 
| 387 | flags.append(static_val) | 
| 388 | else: | 
| 389 | break  # not a flag, rest are args | 
| 390 | i += 1 | 
| 391 |  | 
| 392 | # Now parse bindings or variable names | 
| 393 | assignments = [] | 
| 394 | while i < n: | 
| 395 | w = suffix_words[i] | 
| 396 | left_spid = word.LeftMostSpanForWord(w) | 
| 397 | kov = word.LooksLikeAssignment(w) | 
| 398 | if kov: | 
| 399 | k, op, v = kov | 
| 400 | t = word.TildeDetect(v) | 
| 401 | if t: | 
| 402 | # t is an unevaluated word with TildeSubPart | 
| 403 | a = (k, op, t, left_spid) | 
| 404 | else: | 
| 405 | a = (k, op, v, left_spid)  # v is unevaluated word | 
| 406 | else: | 
| 407 | # In aboriginal in variables/sources: export_if_blank does export "$1". | 
| 408 | # We should allow that. | 
| 409 |  | 
| 410 | # Parse this differently then? | 
| 411 | # dynamic-export? | 
| 412 | # It sets global variables. | 
| 413 | ok, static_val, quoted = word.StaticEval(w) | 
| 414 | if not ok or quoted: | 
| 415 | self.AddErrorContext( | 
| 416 | 'Variable names must be constant strings, got %s', w, word=w) | 
| 417 | return None | 
| 418 |  | 
| 419 | # No value is equivalent to '' | 
| 420 | m = VAR_NAME_RE.match(static_val) | 
| 421 | if not m: | 
| 422 | self.AddErrorContext('Invalid variable name %r', static_val, word=w) | 
| 423 | return None | 
| 424 | a = (static_val, assign_op_e.Equal, None, left_spid) | 
| 425 |  | 
| 426 | assignments.append(a) | 
| 427 | i += 1 | 
| 428 |  | 
| 429 | # TODO: Also make with LhsIndexedName | 
| 430 | pairs = [] | 
| 431 | for lhs, op, rhs, spid in assignments: | 
| 432 | p = ast.assign_pair(ast.LhsName(lhs), op, rhs) | 
| 433 | p.spids.append(spid) | 
| 434 | pairs.append(p) | 
| 435 |  | 
| 436 | node = ast.Assignment(assign_kw, flags, pairs) | 
| 437 |  | 
| 438 | return node | 
| 439 |  | 
| 440 | # Flags that indicate an assignment should be parsed like a command. | 
| 441 | _ASSIGN_COMMANDS = set([ | 
| 442 | (Id.Assign_Declare, '-f'),  # function defs | 
| 443 | (Id.Assign_Declare, '-F'),  # function names | 
| 444 | (Id.Assign_Declare, '-p'),  # print | 
| 445 |  | 
| 446 | (Id.Assign_Typeset, '-f'), | 
| 447 | (Id.Assign_Typeset, '-F'), | 
| 448 | (Id.Assign_Typeset, '-p'), | 
| 449 |  | 
| 450 | (Id.Assign_Local, '-p'), | 
| 451 | (Id.Assign_Readonly, '-p'), | 
| 452 | # Hm 'export -p' is more like a command.  But we're parsing it | 
| 453 | # dynamically now because of some wrappers. | 
| 454 | # Maybe we could change this. | 
| 455 | #(Id.Assign_Export, '-p'), | 
| 456 | ]) | 
| 457 | # Flags to parse like assignments: -a -r -x (and maybe -i) | 
| 458 |  | 
| 459 | def ParseSimpleCommand(self): | 
| 460 | """ | 
| 461 | Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g) | 
| 462 |  | 
| 463 | io_file        : '<'       filename | 
| 464 | | LESSAND   filename | 
| 465 | ... | 
| 466 |  | 
| 467 | io_here        : DLESS     here_end | 
| 468 | | DLESSDASH here_end | 
| 469 |  | 
| 470 | redirect       : IO_NUMBER (io_redirect | io_here) | 
| 471 |  | 
| 472 | prefix_part    : ASSIGNMENT_WORD | redirect | 
| 473 | cmd_part       : WORD | redirect | 
| 474 |  | 
| 475 | assign_kw      : Declare | Export | Local | Readonly | 
| 476 |  | 
| 477 | # Without any words it is parsed as a command, not an assignment | 
| 478 | assign_listing : assign_kw | 
| 479 |  | 
| 480 | # Now we have something to do (might be changing assignment flags too) | 
| 481 | # NOTE: any prefixes should be a warning, but they are allowed in shell. | 
| 482 | assignment     : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+ | 
| 483 |  | 
| 484 | # an external command, a function call, or a builtin -- a "word_command" | 
| 485 | word_command   : prefix_part* cmd_part+ | 
| 486 |  | 
| 487 | simple_command : assign_listing | 
| 488 | | assignment | 
| 489 | | proc_command | 
| 490 |  | 
| 491 | Simple imperative algorithm: | 
| 492 |  | 
| 493 | 1) Read a list of words and redirects.  Append them to separate lists. | 
| 494 | 2) Look for the first non-assignment word.  If it's declare, etc., then | 
| 495 | keep parsing words AND assign words.  Otherwise, just parse words. | 
| 496 | 3) If there are no non-assignment words, then it's a global assignment. | 
| 497 |  | 
| 498 | { redirects, global assignments } OR | 
| 499 | { redirects, prefix_bindings, words } OR | 
| 500 | { redirects, ERROR_prefix_bindings, keyword, assignments, words } | 
| 501 |  | 
| 502 | THEN CHECK that prefix bindings don't have any array literal parts! | 
| 503 | global assignment and keyword assignments can have the of course. | 
| 504 | well actually EXPORT shouldn't have them either -- WARNING | 
| 505 |  | 
| 506 | 3 cases we want to warn: prefix_bindings for assignment, and array literal | 
| 507 | in prefix bindings, or export | 
| 508 |  | 
| 509 | A command can be an assignment word, word, or redirect on its own. | 
| 510 |  | 
| 511 | ls | 
| 512 | >out.txt | 
| 513 |  | 
| 514 | >out.txt FOO=bar   # this touches the file | 
| 515 |  | 
| 516 | Or any sequence: | 
| 517 | ls foo bar | 
| 518 | <in.txt ls foo bar >out.txt | 
| 519 | <in.txt ls >out.txt foo bar | 
| 520 |  | 
| 521 | Or add one or more environment bindings: | 
| 522 | VAR=val env | 
| 523 | >out.txt VAR=val env | 
| 524 |  | 
| 525 | here_end vs filename is a matter of whether we test that it's quoted.  e.g. | 
| 526 | <<EOF vs <<'EOF'. | 
| 527 | """ | 
| 528 | result = self._ScanSimpleCommand() | 
| 529 | if not result: return None | 
| 530 | redirects, words = result | 
| 531 |  | 
| 532 | if not words:  # e.g.  >out.txt  # redirect without words | 
| 533 | node = ast.SimpleCommand() | 
| 534 | node.redirects = redirects | 
| 535 | return node | 
| 536 |  | 
| 537 | prefix_bindings, suffix_words = self._SplitSimpleCommandPrefix(words) | 
| 538 |  | 
| 539 | if not suffix_words:  # ONE=1 TWO=2  (with no other words) | 
| 540 | if redirects: | 
| 541 | binding1 = prefix_bindings[0] | 
| 542 | _, _, _, spid = binding1 | 
| 543 | self.AddErrorContext('Got redirects in global assignment', | 
| 544 | span_id=spid) | 
| 545 | return None | 
| 546 |  | 
| 547 | pairs = [] | 
| 548 | for lhs, op, rhs, spid in prefix_bindings: | 
| 549 | p = ast.assign_pair(ast.LhsName(lhs), op, rhs) | 
| 550 | p.spids.append(spid) | 
| 551 | pairs.append(p) | 
| 552 |  | 
| 553 | node = ast.Assignment(Id.Assign_None, [], pairs) | 
| 554 | left_spid = word.LeftMostSpanForWord(words[0]) | 
| 555 | node.spids.append(left_spid)  # no keyword spid to skip past | 
| 556 | return node | 
| 557 |  | 
| 558 | kind, kw_token = word.KeywordToken(suffix_words[0]) | 
| 559 |  | 
| 560 | if kind == Kind.Assign: | 
| 561 | # Here we StaticEval suffix_words[1] to see if it's a command like | 
| 562 | # 'typeset -p'.  Then it becomes a SimpleCommand node instead of an | 
| 563 | # Assignment.  Note we're not handling duplicate flags like 'typeset | 
| 564 | # -pf'.  I see this in bashdb (bash debugger) but it can just be changed | 
| 565 | # to 'typeset -p -f'. | 
| 566 | is_command = False | 
| 567 | if len(suffix_words) > 1: | 
| 568 | ok, val, _ = word.StaticEval(suffix_words[1]) | 
| 569 | if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS: | 
| 570 | is_command = True | 
| 571 |  | 
| 572 | if is_command:  # declare -f, declare -p, typeset -p, etc. | 
| 573 | node = self._MakeSimpleCommand(prefix_bindings, suffix_words, | 
| 574 | redirects) | 
| 575 | return node | 
| 576 |  | 
| 577 | else:  # declare str='', declare -a array=() | 
| 578 | if redirects: | 
| 579 | # Attach the error location to the keyword.  It would be more precise | 
| 580 | # to attach it to the | 
| 581 | self.AddErrorContext('Got redirects in assignment', token=kw_token) | 
| 582 | return None | 
| 583 |  | 
| 584 | if prefix_bindings:  # FOO=bar local spam=eggs not allowed | 
| 585 | # Use the location of the first value.  TODO: Use the whole word | 
| 586 | # before splitting. | 
| 587 | _, _, v0, _ = prefix_bindings[0] | 
| 588 | self.AddErrorContext( | 
| 589 | 'Invalid prefix bindings in assignment: %s', prefix_bindings, | 
| 590 | word=v0) | 
| 591 | return None | 
| 592 |  | 
| 593 | node = self._MakeAssignment(kw_token.id, suffix_words) | 
| 594 | if not node: return None | 
| 595 | node.spids.append(kw_token.span_id) | 
| 596 | return node | 
| 597 |  | 
| 598 | elif kind == Kind.ControlFlow: | 
| 599 | if redirects: | 
| 600 | self.AddErrorContext('Got redirects in control flow: %s', redirects) | 
| 601 | return None | 
| 602 |  | 
| 603 | if prefix_bindings:  # FOO=bar local spam=eggs not allowed | 
| 604 | # Use the location of the first value.  TODO: Use the whole word before | 
| 605 | # splitting. | 
| 606 | _, _, v0, _ = prefix_bindings[0] | 
| 607 | self.AddErrorContext( | 
| 608 | 'Invalid prefix bindings in control flow: %s', prefix_bindings, | 
| 609 | word=v0) | 
| 610 | return None | 
| 611 |  | 
| 612 | # Attach the token for errors.  (Assignment may not need it.) | 
| 613 | if len(suffix_words) == 1: | 
| 614 | arg_word = None | 
| 615 | elif len(suffix_words) == 2: | 
| 616 | arg_word = suffix_words[1] | 
| 617 | else: | 
| 618 | # Underline the extra word. | 
| 619 | self.AddErrorContext( | 
| 620 | 'Unexpected argument to %r', kw_token.val, word=suffix_words[2]) | 
| 621 | return None | 
| 622 |  | 
| 623 | return ast.ControlFlow(kw_token, arg_word) | 
| 624 |  | 
| 625 | else: | 
| 626 | node = self._MakeSimpleCommand(prefix_bindings, suffix_words, redirects) | 
| 627 | return node | 
| 628 |  | 
| 629 | def ParseBraceGroup(self): | 
| 630 | """ | 
| 631 | brace_group      : LBrace command_list RBrace ; | 
| 632 | """ | 
| 633 | left_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 634 | if not self._Eat(Id.Lit_LBrace): return None | 
| 635 |  | 
| 636 | c_list = self.ParseCommandList() | 
| 637 | if not c_list: return None | 
| 638 |  | 
| 639 | # Not needed | 
| 640 | #right_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 641 | if not self._Eat(Id.Lit_RBrace): return None | 
| 642 |  | 
| 643 | node = ast.BraceGroup(c_list.children) | 
| 644 | node.spids.append(left_spid) | 
| 645 | return node | 
| 646 |  | 
| 647 | def ParseDoGroup(self): | 
| 648 | """ | 
| 649 | Used by ForEach, ForExpr, While, Until.  Should this be a Do node? | 
| 650 |  | 
| 651 | do_group         : Do command_list Done ;          /* Apply rule 6 */ | 
| 652 | """ | 
| 653 | if not self._Eat(Id.KW_Do): return None | 
| 654 | do_spid = word.LeftMostSpanForWord(self.cur_word)  # after _Eat | 
| 655 |  | 
| 656 | c_list = self.ParseCommandList()  # could be any thing | 
| 657 | if not c_list: return None | 
| 658 |  | 
| 659 | if not self._Eat(Id.KW_Done): return None | 
| 660 | done_spid = word.LeftMostSpanForWord(self.cur_word)  # after _Eat | 
| 661 |  | 
| 662 | node = ast.DoGroup(c_list.children) | 
| 663 | node.spids.extend((do_spid, done_spid)) | 
| 664 | return node | 
| 665 |  | 
| 666 | def ParseForWords(self): | 
| 667 | """ | 
| 668 | for_words        : WORD* for_sep | 
| 669 | ; | 
| 670 | for_sep          : ';' newline_ok | 
| 671 | | NEWLINES | 
| 672 | ; | 
| 673 | """ | 
| 674 | words = [] | 
| 675 | # The span_id of any semi-colon, so we can remove it. | 
| 676 | semi_spid = const.NO_INTEGER | 
| 677 |  | 
| 678 | while True: | 
| 679 | if not self._Peek(): return None | 
| 680 | if self.c_id == Id.Op_Semi: | 
| 681 | semi_spid = self.cur_word.token.span_id  # TokenWord | 
| 682 | self._Next() | 
| 683 | if not self._NewlineOk(): return None | 
| 684 | break | 
| 685 | elif self.c_id == Id.Op_Newline: | 
| 686 | self._Next() | 
| 687 | break | 
| 688 | if self.cur_word.tag != word_e.CompoundWord: | 
| 689 | # TODO: Can we also show a pointer to the 'for' keyword? | 
| 690 | self.AddErrorContext('Invalid word in for loop', word=self.cur_word) | 
| 691 | return None | 
| 692 |  | 
| 693 | words.append(self.cur_word) | 
| 694 | self._Next() | 
| 695 | return words, semi_spid | 
| 696 |  | 
| 697 | def _ParseForExprLoop(self): | 
| 698 | """ | 
| 699 | for (( init; cond; update )) for_sep? do_group | 
| 700 | """ | 
| 701 | node = self.w_parser.ReadForExpression() | 
| 702 | if not node: | 
| 703 | error_stack = self.w_parser.Error() | 
| 704 | self.error_stack.extend(error_stack) | 
| 705 | self.AddErrorContext("Parsing for expression failed") | 
| 706 | return None | 
| 707 | self._Next() | 
| 708 |  | 
| 709 | if not self._Peek(): return None | 
| 710 | if self.c_id == Id.Op_Semi: | 
| 711 | self._Next() | 
| 712 | if not self._NewlineOk(): return None | 
| 713 | elif self.c_id == Id.Op_Newline: | 
| 714 | self._Next() | 
| 715 | elif self.c_id == Id.KW_Do:  # missing semicolon/newline allowed | 
| 716 | pass | 
| 717 | else: | 
| 718 | self.AddErrorContext( | 
| 719 | 'Unexpected token after for expression: %s', self.cur_word, | 
| 720 | word=self.cur_word) | 
| 721 | return None | 
| 722 |  | 
| 723 | body_node = self.ParseDoGroup() | 
| 724 | if not body_node: return None | 
| 725 |  | 
| 726 | node.body = body_node | 
| 727 | return node | 
| 728 |  | 
| 729 | def _ParseForEachLoop(self): | 
| 730 | node = ast.ForEach() | 
| 731 | node.do_arg_iter = False | 
| 732 |  | 
| 733 | ok, iter_name, quoted = word.StaticEval(self.cur_word) | 
| 734 | if not ok or quoted: | 
| 735 | self.AddErrorContext( | 
| 736 | "Invalid for loop variable", word=self.cur_word) | 
| 737 | return None | 
| 738 | if not VAR_NAME_RE.match(iter_name): | 
| 739 | self.AddErrorContext( | 
| 740 | "Invalid for loop variable name", word=self.cur_word) | 
| 741 | return None | 
| 742 | node.iter_name = iter_name | 
| 743 | self._Next()  # skip past name | 
| 744 |  | 
| 745 | if not self._NewlineOk(): return None | 
| 746 |  | 
| 747 | in_spid = const.NO_INTEGER | 
| 748 | semi_spid = const.NO_INTEGER | 
| 749 |  | 
| 750 | if not self._Peek(): return None | 
| 751 | if self.c_id == Id.KW_In: | 
| 752 | self._Next()  # skip in | 
| 753 |  | 
| 754 | in_spid = word.LeftMostSpanForWord(self.cur_word) + 1 | 
| 755 | x = self.ParseForWords() | 
| 756 | if x is None: | 
| 757 | return None | 
| 758 | iter_words, semi_spid = x | 
| 759 | words2 = braces.BraceDetectAll(iter_words) | 
| 760 | words3 = word.TildeDetectAll(words2) | 
| 761 |  | 
| 762 | if iter_words is None:  # empty list of words is OK | 
| 763 | return None | 
| 764 | node.iter_words = words3 | 
| 765 |  | 
| 766 | elif self.c_id == Id.Op_Semi: | 
| 767 | node.do_arg_iter = True  # implicit for loop | 
| 768 | self._Next() | 
| 769 |  | 
| 770 | elif self.c_id == Id.KW_Do: | 
| 771 | node.do_arg_iter = True  # implicit for loop | 
| 772 | # do not advance | 
| 773 |  | 
| 774 | else: | 
| 775 | self.AddErrorContext("Unexpected word in for loop: %s", self.cur_word, | 
| 776 | word=self.cur_word) | 
| 777 | return None | 
| 778 |  | 
| 779 | node.spids.extend((in_spid, semi_spid)) | 
| 780 |  | 
| 781 | body_node = self.ParseDoGroup() | 
| 782 | if not body_node: return None | 
| 783 |  | 
| 784 | node.body = body_node | 
| 785 | return node | 
| 786 |  | 
| 787 | def ParseFor(self): | 
| 788 | """ | 
| 789 | for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ; | 
| 790 | | For '((' ... TODO | 
| 791 | """ | 
| 792 | if not self._Eat(Id.KW_For): return None | 
| 793 |  | 
| 794 | if not self._Peek(): return None | 
| 795 | if self.c_id == Id.Op_DLeftParen: | 
| 796 | node = self._ParseForExprLoop() | 
| 797 | else: | 
| 798 | node = self._ParseForEachLoop() | 
| 799 |  | 
| 800 | return node | 
| 801 |  | 
| 802 | def ParseWhile(self): | 
| 803 | """ | 
| 804 | while_clause     : While command_list do_group ; | 
| 805 | """ | 
| 806 | self._Next()  # skip while | 
| 807 |  | 
| 808 | cond_node = self.ParseCommandList() | 
| 809 | if not cond_node: return None | 
| 810 |  | 
| 811 | body_node = self.ParseDoGroup() | 
| 812 | if not body_node: return None | 
| 813 |  | 
| 814 | return ast.While(cond_node.children, body_node) | 
| 815 |  | 
| 816 | def ParseUntil(self): | 
| 817 | """ | 
| 818 | until_clause     : Until command_list do_group ; | 
| 819 | """ | 
| 820 | self._Next()  # skip until | 
| 821 |  | 
| 822 | cond_node = self.ParseCommandList() | 
| 823 | if not cond_node: return None | 
| 824 |  | 
| 825 | body_node = self.ParseDoGroup() | 
| 826 | if not body_node: return None | 
| 827 |  | 
| 828 | return ast.Until(cond_node.children, body_node) | 
| 829 |  | 
| 830 | def ParseCaseItem(self): | 
| 831 | """ | 
| 832 | case_item: '('? pattern ('|' pattern)* ')' | 
| 833 | newline_ok command_term? trailer? ; | 
| 834 | """ | 
| 835 | self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat) | 
| 836 |  | 
| 837 | left_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 838 | if self.c_id == Id.Op_LParen: | 
| 839 | self._Next() | 
| 840 |  | 
| 841 | pat_words = [] | 
| 842 | while True: | 
| 843 | if not self._Peek(): return None | 
| 844 | pat_words.append(self.cur_word) | 
| 845 | self._Next() | 
| 846 |  | 
| 847 | if not self._Peek(): return None | 
| 848 | if self.c_id == Id.Op_Pipe: | 
| 849 | self._Next() | 
| 850 | else: | 
| 851 | break | 
| 852 |  | 
| 853 | rparen_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 854 | if not self._Eat(Id.Right_CasePat): return None | 
| 855 | if not self._NewlineOk(): return None | 
| 856 |  | 
| 857 | if self.c_id not in (Id.Op_DSemi, Id.KW_Esac): | 
| 858 | c_list = self.ParseCommandTerm() | 
| 859 | if not c_list: return None | 
| 860 | action_children = c_list.children | 
| 861 | else: | 
| 862 | action_children = [] | 
| 863 |  | 
| 864 | dsemi_spid = const.NO_INTEGER | 
| 865 | last_spid = const.NO_INTEGER | 
| 866 | if not self._Peek(): return None | 
| 867 | if self.c_id == Id.KW_Esac: | 
| 868 | last_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 869 | elif self.c_id == Id.Op_DSemi: | 
| 870 | dsemi_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 871 | self._Next() | 
| 872 | else: | 
| 873 | self.AddErrorContext('Expected DSEMI or ESAC, got %s', self.cur_word, | 
| 874 | word=self.cur_word) | 
| 875 | return None | 
| 876 |  | 
| 877 | if not self._NewlineOk(): return None | 
| 878 |  | 
| 879 | arm = ast.case_arm(pat_words, action_children) | 
| 880 | arm.spids.extend((left_spid, rparen_spid, dsemi_spid, last_spid)) | 
| 881 | return arm | 
| 882 |  | 
| 883 | def ParseCaseList(self, arms): | 
| 884 | """ | 
| 885 | case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok; | 
| 886 | """ | 
| 887 | if not self._Peek(): return None | 
| 888 |  | 
| 889 | while True: | 
| 890 | # case item begins with a command word or ( | 
| 891 | if self.c_id == Id.KW_Esac: | 
| 892 | break | 
| 893 | if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen: | 
| 894 | break | 
| 895 | arm = self.ParseCaseItem() | 
| 896 | if not arm: return None | 
| 897 |  | 
| 898 | arms.append(arm) | 
| 899 | if not self._Peek(): return None | 
| 900 | # Now look for DSEMI or ESAC | 
| 901 |  | 
| 902 | return True | 
| 903 |  | 
| 904 | def ParseCase(self): | 
| 905 | """ | 
| 906 | case_clause      : Case WORD newline_ok in newline_ok case_list? Esac ; | 
| 907 | """ | 
| 908 | case_node = ast.Case() | 
| 909 |  | 
| 910 | case_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 911 | self._Next()  # skip case | 
| 912 |  | 
| 913 | if not self._Peek(): return None | 
| 914 | case_node.to_match = self.cur_word | 
| 915 | self._Next() | 
| 916 |  | 
| 917 | if not self._NewlineOk(): return None | 
| 918 | in_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 919 | if not self._Eat(Id.KW_In): return None | 
| 920 | if not self._NewlineOk(): return None | 
| 921 |  | 
| 922 | if self.c_id != Id.KW_Esac:  # empty case list | 
| 923 | if not self.ParseCaseList(case_node.arms): | 
| 924 | self.AddErrorContext("ParseCase: error parsing case list") | 
| 925 | return None | 
| 926 | # TODO: should it return a list of nodes, and extend? | 
| 927 | if not self._Peek(): return None | 
| 928 |  | 
| 929 | esac_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 930 | if not self._Eat(Id.KW_Esac): return None | 
| 931 | self._Next() | 
| 932 |  | 
| 933 | case_node.spids.extend((case_spid, in_spid, esac_spid)) | 
| 934 | return case_node | 
| 935 |  | 
| 936 | def _ParseElifElse(self, if_node): | 
| 937 | """ | 
| 938 | else_part: (Elif command_list Then command_list)* Else command_list ; | 
| 939 | """ | 
| 940 | arms = if_node.arms | 
| 941 |  | 
| 942 | self._Peek() | 
| 943 | while self.c_id == Id.KW_Elif: | 
| 944 | elif_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 945 |  | 
| 946 | self._Next()  # skip elif | 
| 947 | cond = self.ParseCommandList() | 
| 948 | if not cond: return None | 
| 949 |  | 
| 950 | then_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 951 | if not self._Eat(Id.KW_Then): return None | 
| 952 |  | 
| 953 | body = self.ParseCommandList() | 
| 954 | if not body: return None | 
| 955 |  | 
| 956 | arm = ast.if_arm(cond.children, body.children) | 
| 957 | arm.spids.extend((elif_spid, then_spid)) | 
| 958 | arms.append(arm) | 
| 959 |  | 
| 960 | if self.c_id == Id.KW_Else: | 
| 961 | else_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 962 | self._Next() | 
| 963 | body = self.ParseCommandList() | 
| 964 | if not body: return None | 
| 965 | if_node.else_action = body.children | 
| 966 | else: | 
| 967 | else_spid = const.NO_INTEGER | 
| 968 |  | 
| 969 | if_node.spids.append(else_spid) | 
| 970 |  | 
| 971 | return True | 
| 972 |  | 
| 973 | def ParseIf(self): | 
| 974 | """ | 
| 975 | if_clause        : If command_list Then command_list else_part? Fi ; | 
| 976 | """ | 
| 977 | if_node = ast.If() | 
| 978 | self._Next()  # skip if | 
| 979 |  | 
| 980 | cond = self.ParseCommandList() | 
| 981 | if not cond: return None | 
| 982 |  | 
| 983 | then_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 984 | if not self._Eat(Id.KW_Then): return None | 
| 985 |  | 
| 986 | body = self.ParseCommandList() | 
| 987 | if not body: return None | 
| 988 |  | 
| 989 | arm = ast.if_arm(cond.children, body.children) | 
| 990 | arm.spids.extend((const.NO_INTEGER, then_spid))  # no if spid at first? | 
| 991 | if_node.arms.append(arm) | 
| 992 |  | 
| 993 | if self.c_id in (Id.KW_Elif, Id.KW_Else): | 
| 994 | if not self._ParseElifElse(if_node): | 
| 995 | return None | 
| 996 | else: | 
| 997 | if_node.spids.append(const.NO_INTEGER)  # no else spid | 
| 998 |  | 
| 999 | fi_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 1000 | if not self._Eat(Id.KW_Fi): return None | 
| 1001 |  | 
| 1002 | if_node.spids.append(fi_spid) | 
| 1003 | return if_node | 
| 1004 |  | 
| 1005 | def ParseTime(self): | 
| 1006 | """ | 
| 1007 | time [-p] pipeline | 
| 1008 |  | 
| 1009 | According to bash help. | 
| 1010 | """ | 
| 1011 | self._Next()  # skip time | 
| 1012 |  | 
| 1013 | pipeline = self.ParsePipeline() | 
| 1014 | if not pipeline: return None | 
| 1015 | return ast.TimeBlock(pipeline) | 
| 1016 |  | 
| 1017 | def ParseCompoundCommand(self): | 
| 1018 | """ | 
| 1019 | compound_command : brace_group | 
| 1020 | | subshell | 
| 1021 | | for_clause | 
| 1022 | | while_clause | 
| 1023 | | until_clause | 
| 1024 | | if_clause | 
| 1025 | | case_clause | 
| 1026 | | time_clause | 
| 1027 | | [[ BoolExpr ]] | 
| 1028 | | (( ArithExpr )) | 
| 1029 | ; | 
| 1030 | """ | 
| 1031 | if self.c_id == Id.Lit_LBrace: | 
| 1032 | return self.ParseBraceGroup() | 
| 1033 | if self.c_id == Id.Op_LParen: | 
| 1034 | return self.ParseSubshell() | 
| 1035 |  | 
| 1036 | if self.c_id == Id.KW_For: | 
| 1037 | return self.ParseFor() | 
| 1038 | if self.c_id == Id.KW_While: | 
| 1039 | return self.ParseWhile() | 
| 1040 | if self.c_id == Id.KW_Until: | 
| 1041 | return self.ParseUntil() | 
| 1042 |  | 
| 1043 | if self.c_id == Id.KW_If: | 
| 1044 | return self.ParseIf() | 
| 1045 | if self.c_id == Id.KW_Case: | 
| 1046 | return self.ParseCase() | 
| 1047 | if self.c_id == Id.KW_Time: | 
| 1048 | return self.ParseTime() | 
| 1049 |  | 
| 1050 | # Example of redirect that is observable: | 
| 1051 | # $ (( $(echo one 1>&2; echo 2) > 0 )) 2> out.txt | 
| 1052 | if self.c_id == Id.KW_DLeftBracket: | 
| 1053 | return self.ParseDBracket() | 
| 1054 |  | 
| 1055 | if self.c_id == Id.Op_DLeftParen: | 
| 1056 | return self.ParseDParen() | 
| 1057 |  | 
| 1058 | self.AddErrorContext( | 
| 1059 | "Expected a compound command (e.g. for while if case), got %s", | 
| 1060 | self.cur_word, word=self.cur_word) | 
| 1061 | return None | 
| 1062 |  | 
| 1063 | def ParseFunctionBody(self, func): | 
| 1064 | """ | 
| 1065 | function_body    : compound_command io_redirect* ; /* Apply rule 9 */ | 
| 1066 | """ | 
| 1067 | body = self.ParseCompoundCommand() | 
| 1068 | if not body: return None | 
| 1069 |  | 
| 1070 | redirects = self._ParseRedirectList() | 
| 1071 | if redirects is None: return None | 
| 1072 |  | 
| 1073 | func.body = body | 
| 1074 | func.redirects = redirects | 
| 1075 | return True | 
| 1076 |  | 
| 1077 | def ParseFunctionDef(self): | 
| 1078 | """ | 
| 1079 | function_header : fname '(' ')' | 
| 1080 | function_def     : function_header newline_ok function_body ; | 
| 1081 |  | 
| 1082 | Precondition: Looking at the function name. | 
| 1083 | Post condition: | 
| 1084 |  | 
| 1085 | NOTE: There is an ambiguity with: | 
| 1086 |  | 
| 1087 | function foo ( echo hi ) and | 
| 1088 | function foo () ( echo hi ) | 
| 1089 |  | 
| 1090 | Bash only accepts the latter, though it doesn't really follow a grammar. | 
| 1091 | """ | 
| 1092 | left_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 1093 |  | 
| 1094 | ok, name = word.AsFuncName(self.cur_word) | 
| 1095 | if not ok: | 
| 1096 | self.AddErrorContext('Invalid function name', word=self.cur_word) | 
| 1097 | return None | 
| 1098 | self._Next()  # skip function name | 
| 1099 |  | 
| 1100 | # Must be true because of lookahead | 
| 1101 | if not self._Peek(): return None | 
| 1102 | assert self.c_id == Id.Op_LParen, self.cur_word | 
| 1103 |  | 
| 1104 | self.lexer.PushHint(Id.Op_RParen, Id.Right_FuncDef) | 
| 1105 | self._Next() | 
| 1106 |  | 
| 1107 | if not self._Eat(Id.Right_FuncDef): return None | 
| 1108 | after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1 | 
| 1109 |  | 
| 1110 | if not self._NewlineOk(): return None | 
| 1111 |  | 
| 1112 | func = ast.FuncDef() | 
| 1113 | func.name = name | 
| 1114 |  | 
| 1115 | if not self.ParseFunctionBody(func): | 
| 1116 | return None | 
| 1117 |  | 
| 1118 | func.spids.append(left_spid) | 
| 1119 | func.spids.append(after_name_spid) | 
| 1120 | return func | 
| 1121 |  | 
| 1122 | def ParseKshFunctionDef(self): | 
| 1123 | """ | 
| 1124 | ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body | 
| 1125 | """ | 
| 1126 | left_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 1127 |  | 
| 1128 | self._Next()  # skip past 'function' | 
| 1129 |  | 
| 1130 | if not self._Peek(): return None | 
| 1131 | ok, name = word.AsFuncName(self.cur_word) | 
| 1132 | if not ok: | 
| 1133 | self.AddErrorContext("Invalid function name: %r", self.cur_word) | 
| 1134 | return None | 
| 1135 | after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1 | 
| 1136 | self._Next()  # skip past 'function name | 
| 1137 |  | 
| 1138 | if not self._Peek(): return None | 
| 1139 | if self.c_id == Id.Op_LParen: | 
| 1140 | self.lexer.PushHint(Id.Op_RParen, Id.Right_FuncDef) | 
| 1141 | self._Next() | 
| 1142 | if not self._Eat(Id.Right_FuncDef): return None | 
| 1143 | # Change it: after ) | 
| 1144 | after_name_spid = word.LeftMostSpanForWord(self.cur_word) + 1 | 
| 1145 |  | 
| 1146 | if not self._NewlineOk(): return None | 
| 1147 |  | 
| 1148 | func = ast.FuncDef() | 
| 1149 | func.name = name | 
| 1150 |  | 
| 1151 | if not self.ParseFunctionBody(func): | 
| 1152 | return None | 
| 1153 |  | 
| 1154 | func.spids.append(left_spid) | 
| 1155 | func.spids.append(after_name_spid) | 
| 1156 | return func | 
| 1157 |  | 
| 1158 | def ParseCoproc(self): | 
| 1159 | """ | 
| 1160 | TODO: | 
| 1161 | """ | 
| 1162 | raise NotImplementedError | 
| 1163 |  | 
| 1164 | def ParseSubshell(self): | 
| 1165 | left_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 1166 | self._Next()  # skip past ( | 
| 1167 |  | 
| 1168 | # Ensure that something $( (cd / && pwd) ) works.  If ) is already on the | 
| 1169 | # translation stack, we want to delay it. | 
| 1170 |  | 
| 1171 | #print('ParseSubshell lexer.PushHint ) -> )') | 
| 1172 | self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) | 
| 1173 |  | 
| 1174 | c_list = self.ParseCommandList() | 
| 1175 | if not c_list: return None | 
| 1176 |  | 
| 1177 | # Remove singleton CommandList as an optimization. | 
| 1178 | if len(c_list.children) == 1: | 
| 1179 | child = c_list.children[0] | 
| 1180 | else: | 
| 1181 | child = c_list | 
| 1182 | node = ast.Subshell(child) | 
| 1183 |  | 
| 1184 | right_spid = word.LeftMostSpanForWord(self.cur_word) | 
| 1185 | if not self._Eat(Id.Right_Subshell): return None | 
| 1186 |  | 
| 1187 | node.spids.extend((left_spid, right_spid)) | 
| 1188 | return node | 
| 1189 |  | 
| 1190 | def ParseDBracket(self): | 
| 1191 | """ | 
| 1192 | Pass the underlying word parser off to the boolean expression parser. | 
| 1193 | """ | 
| 1194 | maybe_error_word = self.cur_word | 
| 1195 | # TODO: Test interactive.  Without closing ]], you should get > prompt | 
| 1196 | # (PS2) | 
| 1197 |  | 
| 1198 | self._Next()  # skip [[ | 
| 1199 | b_parser = BoolParser(self.w_parser) | 
| 1200 | bnode = b_parser.Parse() | 
| 1201 | if not bnode: | 
| 1202 | error_stack = b_parser.Error() | 
| 1203 | self.error_stack.extend(error_stack) | 
| 1204 | self.AddErrorContext("Error parsing [[", word=maybe_error_word) | 
| 1205 | return None | 
| 1206 | return ast.DBracket(bnode) | 
| 1207 |  | 
| 1208 | def ParseDParen(self): | 
| 1209 | maybe_error_word = self.cur_word | 
| 1210 | self._Next()  # skip (( | 
| 1211 | #print('1 ((', self.cur_word) | 
| 1212 | anode = self.w_parser.ReadDParen() | 
| 1213 | if not anode: | 
| 1214 | error_stack = self.w_parser.Error() | 
| 1215 | self.error_stack.extend(error_stack) | 
| 1216 | self.AddErrorContext("Error parsing ((", word=maybe_error_word) | 
| 1217 | return None | 
| 1218 |  | 
| 1219 | #print('2 ((', self.cur_word) | 
| 1220 | return ast.DParen(anode) | 
| 1221 |  | 
| 1222 | def ParseCommand(self): | 
| 1223 | """ | 
| 1224 | command          : simple_command | 
| 1225 | | compound_command io_redirect* | 
| 1226 | | function_def | 
| 1227 | | ksh_function_def | 
| 1228 | ; | 
| 1229 | """ | 
| 1230 | if not self._Peek(): return None | 
| 1231 |  | 
| 1232 | if self.c_id == Id.KW_Function: | 
| 1233 | return self.ParseKshFunctionDef() | 
| 1234 |  | 
| 1235 | if self.c_id in ( | 
| 1236 | Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen, Id.Lit_LBrace, | 
| 1237 | Id.KW_For, Id.KW_While, Id.KW_Until, Id.KW_If, Id.KW_Case, Id.KW_Time): | 
| 1238 | node = self.ParseCompoundCommand() | 
| 1239 | if not node: return None | 
| 1240 | if node.tag != command_e.TimeBlock:  # The only one without redirects | 
| 1241 | redirects = self._ParseRedirectList() | 
| 1242 | if redirects is None: | 
| 1243 | return None | 
| 1244 | node.redirects = redirects | 
| 1245 | return node | 
| 1246 |  | 
| 1247 | # NOTE: I added this to fix cases in parse-errors.test.sh, but it doesn't | 
| 1248 | # work because Lit_RBrace is in END_LIST below. | 
| 1249 |  | 
| 1250 | # TODO: KW_Do is also invalid here. | 
| 1251 | if self.c_id == Id.Lit_RBrace: | 
| 1252 | self.AddErrorContext('Unexpected }', word=self.cur_word) | 
| 1253 | return None | 
| 1254 |  | 
| 1255 | if self.c_kind == Kind.Redir:  # Leading redirect | 
| 1256 | return self.ParseSimpleCommand() | 
| 1257 |  | 
| 1258 | if self.c_kind == Kind.Word: | 
| 1259 | if self.w_parser.LookAhead() == Id.Op_LParen:  # ( | 
| 1260 | kov = word.LooksLikeAssignment(self.cur_word) | 
| 1261 | if kov: | 
| 1262 | return self.ParseSimpleCommand()  # f=(a b c)  # array | 
| 1263 | else: | 
| 1264 | return self.ParseFunctionDef()  # f() { echo; }  # function | 
| 1265 |  | 
| 1266 | return self.ParseSimpleCommand()  # echo foo | 
| 1267 |  | 
| 1268 | self.AddErrorContext( | 
| 1269 | "ParseCommand: Expected to parse a command, got %s", self.cur_word, | 
| 1270 | word=self.cur_word) | 
| 1271 | return None | 
| 1272 |  | 
| 1273 | def ParsePipeline(self): | 
| 1274 | """ | 
| 1275 | pipeline         : Bang? command ( '|' newline_ok command )* ; | 
| 1276 | """ | 
| 1277 | negated = False | 
| 1278 |  | 
| 1279 | if not self._Peek(): return None | 
| 1280 | if self.c_id == Id.KW_Bang: | 
| 1281 | negated = True | 
| 1282 | self._Next() | 
| 1283 |  | 
| 1284 | child = self.ParseCommand() | 
| 1285 | if not child: return None | 
| 1286 |  | 
| 1287 | children = [child] | 
| 1288 |  | 
| 1289 | if not self._Peek(): return None | 
| 1290 | if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp): | 
| 1291 | if negated: | 
| 1292 | node = ast.Pipeline(children, negated) | 
| 1293 | return node | 
| 1294 | else: | 
| 1295 | return child | 
| 1296 |  | 
| 1297 | pipe_index = 0 | 
| 1298 | stderr_indices = [] | 
| 1299 |  | 
| 1300 | if self.c_id == Id.Op_PipeAmp: | 
| 1301 | stderr_indices.append(pipe_index) | 
| 1302 | pipe_index += 1 | 
| 1303 |  | 
| 1304 | while True: | 
| 1305 | self._Next()  # skip past Id.Op_Pipe or Id.Op_PipeAmp | 
| 1306 |  | 
| 1307 | if not self._NewlineOk(): | 
| 1308 | return None | 
| 1309 |  | 
| 1310 | child = self.ParseCommand() | 
| 1311 | if not child: | 
| 1312 | self.AddErrorContext('Error parsing command after pipe') | 
| 1313 | # TODO: Return partial pipeline here?  All signatures should (ok, | 
| 1314 | # node).  Only the completion uses the node when ok is False. | 
| 1315 | return None | 
| 1316 | children.append(child) | 
| 1317 |  | 
| 1318 | if not self._Peek(): return None | 
| 1319 | if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp): | 
| 1320 | break | 
| 1321 |  | 
| 1322 | if self.c_id == Id.Op_PipeAmp: | 
| 1323 | stderr_indices.append(pipe_index) | 
| 1324 | pipe_index += 1 | 
| 1325 |  | 
| 1326 | node = ast.Pipeline(children, negated) | 
| 1327 | node.stderr_indices = stderr_indices | 
| 1328 | return node | 
| 1329 |  | 
| 1330 | def ParseAndOr(self): | 
| 1331 | """ | 
| 1332 | and_or           : and_or ( AND_IF | OR_IF ) newline_ok pipeline | 
| 1333 | | pipeline | 
| 1334 |  | 
| 1335 | Note that it is left recursive and left associative.  We parse it | 
| 1336 | iteratively with a token of lookahead. | 
| 1337 | """ | 
| 1338 | child = self.ParsePipeline() | 
| 1339 | if not child: return None | 
| 1340 |  | 
| 1341 | if not self._Peek(): return None | 
| 1342 | if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp): | 
| 1343 | return child | 
| 1344 |  | 
| 1345 | ops = [] | 
| 1346 | children = [child] | 
| 1347 |  | 
| 1348 | while True: | 
| 1349 | ops.append(self.c_id) | 
| 1350 |  | 
| 1351 | self._Next()  # skip past || && | 
| 1352 |  | 
| 1353 | if not self._NewlineOk(): | 
| 1354 | return None | 
| 1355 |  | 
| 1356 | child = self.ParsePipeline() | 
| 1357 | if not child: return None | 
| 1358 |  | 
| 1359 | children.append(child) | 
| 1360 |  | 
| 1361 | if not self._Peek(): return None | 
| 1362 | if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp): | 
| 1363 | break | 
| 1364 |  | 
| 1365 | node = ast.AndOr(ops, children) | 
| 1366 | return node | 
| 1367 |  | 
| 1368 | def ParseCommandLine(self): | 
| 1369 | """ | 
| 1370 | NOTE: This is only called in InteractiveLoop.  Oh crap I need to really | 
| 1371 | read and execute a line at a time then? | 
| 1372 |  | 
| 1373 | BUG: sleep 1 & sleep 1 &  doesn't work here, when written in REPL.   But it | 
| 1374 | does work with '-c', because that calls ParseFile and not ParseCommandLine | 
| 1375 | over and over. | 
| 1376 |  | 
| 1377 | TODO: Get rid of ParseFile and stuff?  Shouldn't be used for -c and so | 
| 1378 | forth.  Just have an ExecuteLoop for now.  But you still need | 
| 1379 | ParseCommandList, for internal nodes. | 
| 1380 |  | 
| 1381 | command_line     : and_or (sync_op and_or)* trailer? ; | 
| 1382 | trailer          : sync_op newline_ok | 
| 1383 | | NEWLINES; | 
| 1384 | sync_op          : '&' | ';'; | 
| 1385 |  | 
| 1386 | This rule causes LL(k > 1) behavior.  We would have to peek to see if there | 
| 1387 | is another command word after the sync op. | 
| 1388 |  | 
| 1389 | But it's easier to express imperatively.  Do the following in a loop: | 
| 1390 | 1. ParseAndOr | 
| 1391 | 2. Peek. | 
| 1392 | a. If there's a newline, then return.  (We're only parsing a single | 
| 1393 | line.) | 
| 1394 | b. If there's a sync_op, process it.  Then look for a newline and | 
| 1395 | return.  Otherwise, parse another AndOr. | 
| 1396 |  | 
| 1397 | COMPARE | 
| 1398 | command_line     : and_or (sync_op and_or)* trailer? ;   # TOP LEVEL | 
| 1399 | command_term     : and_or (trailer and_or)* ;            # CHILDREN | 
| 1400 |  | 
| 1401 | I think you should be able to factor these out. | 
| 1402 | """ | 
| 1403 | children = [] | 
| 1404 | done = False | 
| 1405 | while not done: | 
| 1406 | child = self.ParseAndOr() | 
| 1407 | if not child: return None | 
| 1408 |  | 
| 1409 | if not self._Peek(): return None | 
| 1410 | if self.c_id in (Id.Op_Semi, Id.Op_Amp):  # also Id.Op_Amp. | 
| 1411 | child = ast.Sentence(child, self.cur_word.token) | 
| 1412 | self._Next() | 
| 1413 |  | 
| 1414 | if not self._Peek(): return None | 
| 1415 | if self.c_id in (Id.Op_Newline, Id.Eof_Real): | 
| 1416 | done = True | 
| 1417 |  | 
| 1418 | elif self.c_id == Id.Op_Newline: | 
| 1419 | done = True | 
| 1420 |  | 
| 1421 | elif self.c_id == Id.Eof_Real: | 
| 1422 | done = True | 
| 1423 |  | 
| 1424 | else: | 
| 1425 | self.AddErrorContext( | 
| 1426 | 'ParseCommandLine: Unexpected token %s', self.cur_word) | 
| 1427 | return None | 
| 1428 |  | 
| 1429 | children.append(child) | 
| 1430 |  | 
| 1431 | return ast.CommandList(children) | 
| 1432 |  | 
| 1433 | def ParseCommandTerm(self): | 
| 1434 | """" | 
| 1435 | command_term     : and_or (trailer and_or)* ; | 
| 1436 | trailer          : sync_op newline_ok | 
| 1437 | | NEWLINES; | 
| 1438 | sync_op          : '&' | ';'; | 
| 1439 |  | 
| 1440 | This is handled in imperative style, like ParseCommandLine. | 
| 1441 | Called by ParseCommandList for all blocks, and also for ParseCaseItem, | 
| 1442 | which is slightly different.  (HOW?  Is it the DSEMI?) | 
| 1443 |  | 
| 1444 | Returns: | 
| 1445 | ast.command | 
| 1446 | """ | 
| 1447 | # Word types that will end the command term. | 
| 1448 | END_LIST = ( | 
| 1449 | Id.Eof_Real, Id.Eof_RParen, Id.Eof_Backtick, Id.Right_Subshell, | 
| 1450 | Id.Lit_RBrace, Id.Op_DSemi) | 
| 1451 |  | 
| 1452 | # NOTE: This is similar to ParseCommandLine, except there is a lot of stuff | 
| 1453 | # about here docs.  Here docs are inherently line-oriented. | 
| 1454 | # | 
| 1455 | # - Why aren't we doing END_LIST in ParseCommandLine? | 
| 1456 | #   - Because you will never be inside $() at the top level. | 
| 1457 | #   - We also know it will end in a newline.  It can't end in "fi"! | 
| 1458 | #   - example: if true; then { echo hi; } fi | 
| 1459 | # - Why aren't we doing 'for c in children' too? | 
| 1460 |  | 
| 1461 | children = [] | 
| 1462 | done = False | 
| 1463 | while not done: | 
| 1464 | if not self._Peek(): return None | 
| 1465 | #print('====> ParseCommandTerm word', self.cur_word) | 
| 1466 |  | 
| 1467 | # Most keywords are valid "first words".  But do/done/then do not BEGIN | 
| 1468 | # commands, so they are not valid. | 
| 1469 | if self.c_id in ( | 
| 1470 | Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else, | 
| 1471 | Id.KW_Esac): | 
| 1472 | break | 
| 1473 |  | 
| 1474 | child = self.ParseAndOr() | 
| 1475 | if not child: | 
| 1476 | self.AddErrorContext('Error parsing AndOr in ParseCommandTerm') | 
| 1477 | return None | 
| 1478 |  | 
| 1479 | if not self._Peek(): return None | 
| 1480 | if self.c_id == Id.Op_Newline: | 
| 1481 | self._Next() | 
| 1482 |  | 
| 1483 | if not self._Peek(): return None | 
| 1484 | if self.c_id in END_LIST: | 
| 1485 | done = True | 
| 1486 |  | 
| 1487 | elif self.c_id in (Id.Op_Semi, Id.Op_Amp): | 
| 1488 | child = ast.Sentence(child, self.cur_word.token) | 
| 1489 | self._Next() | 
| 1490 |  | 
| 1491 | if not self._Peek(): return None | 
| 1492 | if self.c_id == Id.Op_Newline: | 
| 1493 | self._Next()  # skip over newline | 
| 1494 |  | 
| 1495 | # Test if we should keep going.  There might be another command after | 
| 1496 | # the semi and newline. | 
| 1497 | if not self._Peek(): return None | 
| 1498 | if self.c_id in END_LIST: | 
| 1499 | done = True | 
| 1500 |  | 
| 1501 | elif self.c_id in END_LIST:  # ; EOF | 
| 1502 | done = True | 
| 1503 |  | 
| 1504 | elif self.c_id in END_LIST:  # EOF | 
| 1505 | done = True | 
| 1506 |  | 
| 1507 | else: | 
| 1508 | pass  # e.g. "} done", "fi fi", ") fi", etc. is OK | 
| 1509 |  | 
| 1510 | children.append(child) | 
| 1511 |  | 
| 1512 | if not self._Peek(): return None | 
| 1513 |  | 
| 1514 | return ast.CommandList(children) | 
| 1515 |  | 
| 1516 | def ParseCommandList(self): | 
| 1517 | """ | 
| 1518 | command_list     : newline_ok command_term trailer? ; | 
| 1519 |  | 
| 1520 | This one is called by all the compound commands.  It's basically a command | 
| 1521 | block. | 
| 1522 |  | 
| 1523 | NOTE: Rather than translating the CFG directly, the code follows a style | 
| 1524 | more like this: more like this: (and_or trailer)+.  It makes capture | 
| 1525 | easier. | 
| 1526 | """ | 
| 1527 | if not self._NewlineOk(): return None | 
| 1528 |  | 
| 1529 | node = self.ParseCommandTerm() | 
| 1530 | if node is None: return None | 
| 1531 | assert node is not False | 
| 1532 | return node | 
| 1533 |  | 
| 1534 | def ParseWholeFile(self): | 
| 1535 | """Entry point for main() in non-interactive shell. | 
| 1536 |  | 
| 1537 | Very similar to ParseCommandList, but we allow empty files. | 
| 1538 |  | 
| 1539 | TODO: This should be turned into a Parse and Execute loop, freeing arenas | 
| 1540 | if they don't contain functions. | 
| 1541 | """ | 
| 1542 | if not self._NewlineOk(): return None | 
| 1543 |  | 
| 1544 | #print('ParseFile', self.c_kind, self.cur_word) | 
| 1545 | # An empty node to execute | 
| 1546 | if self.c_kind == Kind.Eof: | 
| 1547 | return ast.NoOp() | 
| 1548 |  | 
| 1549 | # This calls ParseAndOr(), but I think it should be a loop that calls | 
| 1550 | # ParseCommandLine(), like oil.InteractiveLoop. | 
| 1551 | node = self.ParseCommandTerm() | 
| 1552 | if node is None: return None | 
| 1553 | assert node is not False | 
| 1554 |  | 
| 1555 | return node |