| 1 | """
|
| 2 | parse_lib.py - Consolidate various parser instantiations here.
|
| 3 | """
|
| 4 |
|
| 5 | from _devbuild.gen.id_kind_asdl import Id_t
|
| 6 | from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
|
| 7 | ArgList, Proc, Func, command, pat_t)
|
| 8 | from _devbuild.gen.types_asdl import lex_mode_e
|
| 9 | from _devbuild.gen import grammar_nt
|
| 10 |
|
| 11 | from asdl import format as fmt
|
| 12 | from core import state
|
| 13 | from frontend import lexer
|
| 14 | from frontend import reader
|
| 15 | from osh import tdop
|
| 16 | from osh import arith_parse
|
| 17 | from osh import cmd_parse
|
| 18 | from osh import word_parse
|
| 19 | from mycpp import mylib
|
| 20 | from mycpp.mylib import log
|
| 21 | from ysh import expr_parse
|
| 22 | from ysh import expr_to_ast
|
| 23 | from ysh.expr_parse import ctx_PNodeAllocator
|
| 24 |
|
| 25 | _ = log
|
| 26 |
|
| 27 | from typing import Any, List, Tuple, Dict, TYPE_CHECKING
|
| 28 | if TYPE_CHECKING:
|
| 29 | from core.alloc import Arena
|
| 30 | from core.util import _DebugFile
|
| 31 | from core import optview
|
| 32 | from frontend.lexer import Lexer
|
| 33 | from frontend.reader import _Reader
|
| 34 | from osh.tdop import TdopParser
|
| 35 | from osh.word_parse import WordParser
|
| 36 | from osh.cmd_parse import CommandParser
|
| 37 | from pgen2.grammar import Grammar
|
| 38 |
|
| 39 |
|
| 40 | class _BaseTrail(object):
|
| 41 | """Base class has members, but no-ops for methods."""
|
| 42 |
|
| 43 | def __init__(self):
|
| 44 | # type: () -> None
|
| 45 | # word from a partially completed command.
|
| 46 | # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
|
| 47 | self.words = [] # type: List[CompoundWord]
|
| 48 | self.redirects = [] # type: List[Redir]
|
| 49 | # TODO: We should maintain the LST invariant and have a single list, but
|
| 50 | # that I ran into the "cases classes are better than variants" problem.
|
| 51 |
|
| 52 | # Non-ignored tokens, after PushHint translation. Used for variable name
|
| 53 | # completion. Filled in by _Peek() in osh/word_parse.py.
|
| 54 | #
|
| 55 | # Example:
|
| 56 | # $ echo $\
|
| 57 | # f<TAB>
|
| 58 | # This could complete $foo.
|
| 59 | # Problem: readline doesn't even allow that, because it spans more than one
|
| 60 | # line!
|
| 61 | self.tokens = [] # type: List[Token]
|
| 62 |
|
| 63 | self.alias_words = [
|
| 64 | ] # type: List[CompoundWord] # words INSIDE an alias expansion
|
| 65 | self._expanding_alias = False
|
| 66 |
|
| 67 | def Clear(self):
|
| 68 | # type: () -> None
|
| 69 | pass
|
| 70 |
|
| 71 | def SetLatestWords(self, words, redirects):
|
| 72 | # type: (List[CompoundWord], List[Redir]) -> None
|
| 73 | pass
|
| 74 |
|
| 75 | def AppendToken(self, token):
|
| 76 | # type: (Token) -> None
|
| 77 | pass
|
| 78 |
|
| 79 | def BeginAliasExpansion(self):
|
| 80 | # type: () -> None
|
| 81 | pass
|
| 82 |
|
| 83 | def EndAliasExpansion(self):
|
| 84 | # type: () -> None
|
| 85 | pass
|
| 86 |
|
| 87 | if mylib.PYTHON:
|
| 88 |
|
| 89 | def PrintDebugString(self, debug_f):
|
| 90 | # type: (_DebugFile) -> None
|
| 91 |
|
| 92 | # note: could cast DebugFile to IO[str] instead of ignoring?
|
| 93 | debug_f.writeln(' words:')
|
| 94 | for w in self.words:
|
| 95 | fmt.PrettyPrint(w, f=debug_f) # type: ignore
|
| 96 | debug_f.writeln('')
|
| 97 |
|
| 98 | debug_f.writeln(' redirects:')
|
| 99 | for r in self.redirects:
|
| 100 | fmt.PrettyPrint(r, f=debug_f) # type: ignore
|
| 101 | debug_f.writeln('')
|
| 102 |
|
| 103 | debug_f.writeln(' tokens:')
|
| 104 | for p in self.tokens:
|
| 105 | fmt.PrettyPrint(p, f=debug_f) # type: ignore
|
| 106 | debug_f.writeln('')
|
| 107 |
|
| 108 | debug_f.writeln(' alias_words:')
|
| 109 | for w in self.alias_words:
|
| 110 | fmt.PrettyPrint(w, f=debug_f) # type: ignore
|
| 111 | debug_f.writeln('')
|
| 112 |
|
| 113 | def __repr__(self):
|
| 114 | # type: () -> str
|
| 115 | return '<Trail %s %s %s %s>' % (self.words, self.redirects,
|
| 116 | self.tokens, self.alias_words)
|
| 117 |
|
| 118 |
|
| 119 | class ctx_Alias(object):
|
| 120 | """Used by CommandParser so we know to be ready for FIRST alias word.
|
| 121 |
|
| 122 | For example, for
|
| 123 |
|
| 124 | alias ll='ls -l'
|
| 125 |
|
| 126 | Then we want to capture 'ls' as the first word.
|
| 127 |
|
| 128 | We do NOT want SetLatestWords or AppendToken to be active, because we don't
|
| 129 | need other tokens from 'ls -l'.
|
| 130 |
|
| 131 | It would also probably cause bugs in history expansion, e.g. echo !1 should
|
| 132 | be the first word the user typed, not the first word after alias expansion.
|
| 133 | """
|
| 134 |
|
| 135 | def __init__(self, trail):
|
| 136 | # type: (_BaseTrail) -> None
|
| 137 | trail._expanding_alias = True
|
| 138 | self.trail = trail
|
| 139 |
|
| 140 | def __enter__(self):
|
| 141 | # type: () -> None
|
| 142 | pass
|
| 143 |
|
| 144 | def __exit__(self, type, value, traceback):
|
| 145 | # type: (Any, Any, Any) -> None
|
| 146 | self.trail._expanding_alias = False
|
| 147 |
|
| 148 |
|
| 149 | class Trail(_BaseTrail):
|
| 150 | """Info left by the parser to help us complete shell syntax and commands.
|
| 151 |
|
| 152 | It's also used for history expansion.
|
| 153 | """
|
| 154 |
|
| 155 | def __init__(self):
|
| 156 | # type: () -> None
|
| 157 | """Empty constructor for mycpp."""
|
| 158 | _BaseTrail.__init__(self)
|
| 159 |
|
| 160 | def Clear(self):
|
| 161 | # type: () -> None
|
| 162 | del self.words[:]
|
| 163 | del self.redirects[:]
|
| 164 | # The other ones don't need to be reset?
|
| 165 | del self.tokens[:]
|
| 166 | del self.alias_words[:]
|
| 167 |
|
| 168 | def SetLatestWords(self, words, redirects):
|
| 169 | # type: (List[CompoundWord], List[Redir]) -> None
|
| 170 | if self._expanding_alias:
|
| 171 | self.alias_words = words # Save these separately
|
| 172 | return
|
| 173 | self.words = words
|
| 174 | self.redirects = redirects
|
| 175 |
|
| 176 | def AppendToken(self, token):
|
| 177 | # type: (Token) -> None
|
| 178 | if self._expanding_alias: # We don't want tokens inside aliases
|
| 179 | return
|
| 180 | self.tokens.append(token)
|
| 181 |
|
| 182 |
|
| 183 | if TYPE_CHECKING:
|
| 184 | AliasesInFlight = List[Tuple[str, int]]
|
| 185 |
|
| 186 |
|
| 187 | class ParseContext(object):
|
| 188 | """Context shared between the mutually recursive Command and Word parsers.
|
| 189 |
|
| 190 | In contrast, STATE is stored in the CommandParser and WordParser
|
| 191 | instances.
|
| 192 | """
|
| 193 |
|
| 194 | def __init__(self,
|
| 195 | arena,
|
| 196 | parse_opts,
|
| 197 | aliases,
|
| 198 | ysh_grammar,
|
| 199 | do_lossless=False):
|
| 200 | # type: (Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
|
| 201 | self.arena = arena
|
| 202 | self.parse_opts = parse_opts
|
| 203 | self.aliases = aliases
|
| 204 | self.ysh_grammar = ysh_grammar
|
| 205 | self.do_lossless = do_lossless
|
| 206 |
|
| 207 | # NOTE: The transformer is really a pure function.
|
| 208 | if ysh_grammar:
|
| 209 | self.tr = expr_to_ast.Transformer(ysh_grammar)
|
| 210 | else: # hack for unit tests, which pass None
|
| 211 | self.tr = None
|
| 212 |
|
| 213 | if mylib.PYTHON:
|
| 214 | if self.tr:
|
| 215 | self.p_printer = self.tr.p_printer
|
| 216 | else:
|
| 217 | self.p_printer = None
|
| 218 |
|
| 219 | # Completion state lives here since it may span multiple parsers.
|
| 220 | self.trail = _BaseTrail() # no-op by default
|
| 221 |
|
| 222 | def Init_Trail(self, trail):
|
| 223 | # type: (_BaseTrail) -> None
|
| 224 | self.trail = trail
|
| 225 |
|
| 226 | def MakeLexer(self, line_reader):
|
| 227 | # type: (_Reader) -> Lexer
|
| 228 | """Helper function.
|
| 229 |
|
| 230 | NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
|
| 231 | better.
|
| 232 | """
|
| 233 | # Take Arena from LineReader
|
| 234 | line_lexer = lexer.LineLexer(line_reader.arena)
|
| 235 | return lexer.Lexer(line_lexer, line_reader)
|
| 236 |
|
| 237 | def MakeOshParser(self, line_reader, emit_comp_dummy=False):
|
| 238 | # type: (_Reader, bool) -> CommandParser
|
| 239 | lx = self.MakeLexer(line_reader)
|
| 240 | if emit_comp_dummy:
|
| 241 | lx.EmitCompDummy() # A special token before EOF!
|
| 242 |
|
| 243 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
| 244 | c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
|
| 245 | line_reader)
|
| 246 | return c_parser
|
| 247 |
|
| 248 | def MakeConfigParser(self, line_reader):
|
| 249 | # type: (_Reader) -> CommandParser
|
| 250 | lx = self.MakeLexer(line_reader)
|
| 251 | parse_opts = state.MakeOilOpts()
|
| 252 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
| 253 | c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
|
| 254 | line_reader)
|
| 255 | return c_parser
|
| 256 |
|
| 257 | def MakeWordParserForHereDoc(self, line_reader):
|
| 258 | # type: (_Reader) -> WordParser
|
| 259 | lx = self.MakeLexer(line_reader)
|
| 260 | return word_parse.WordParser(self, lx, line_reader)
|
| 261 |
|
| 262 | def MakeWordParser(self, lx, line_reader):
|
| 263 | # type: (Lexer, _Reader) -> WordParser
|
| 264 | return word_parse.WordParser(self, lx, line_reader)
|
| 265 |
|
| 266 | def MakeArithParser(self, code_str):
|
| 267 | # type: (str) -> TdopParser
|
| 268 | """Used for a[x+1]=foo in the CommandParser."""
|
| 269 | line_reader = reader.StringLineReader(code_str, self.arena)
|
| 270 | lx = self.MakeLexer(line_reader)
|
| 271 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
| 272 | w_parser.Init(lex_mode_e.Arith) # Special initialization
|
| 273 | a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
|
| 274 | self.parse_opts)
|
| 275 | return a_parser
|
| 276 |
|
| 277 | def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
|
| 278 | # type: (_Reader, Lexer, Id_t) -> CommandParser
|
| 279 | """To parse command sub, we want a fresh word parser state."""
|
| 280 | w_parser = word_parse.WordParser(self, lexer, line_reader)
|
| 281 | c_parser = cmd_parse.CommandParser(self,
|
| 282 | self.parse_opts,
|
| 283 | w_parser,
|
| 284 | lexer,
|
| 285 | line_reader,
|
| 286 | eof_id=eof_id)
|
| 287 | return c_parser
|
| 288 |
|
| 289 | def MakeWordParserForPlugin(self, code_str):
|
| 290 | # type: (str) -> WordParser
|
| 291 | """For $PS1, $PS4, etc."""
|
| 292 | line_reader = reader.StringLineReader(code_str, self.arena)
|
| 293 | lx = self.MakeLexer(line_reader)
|
| 294 | return word_parse.WordParser(self, lx, line_reader)
|
| 295 |
|
| 296 | def _YshParser(self):
|
| 297 | # type: () -> expr_parse.ExprParser
|
| 298 | return expr_parse.ExprParser(self, self.ysh_grammar)
|
| 299 |
|
| 300 | def ParseVarDecl(self, kw_token, lexer):
|
| 301 | # type: (Token, Lexer) -> Tuple[command.VarDecl, Token]
|
| 302 | """ var mylist = [1, 2, 3] """
|
| 303 | e_parser = self._YshParser()
|
| 304 | with ctx_PNodeAllocator(e_parser):
|
| 305 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
|
| 306 |
|
| 307 | if 0:
|
| 308 | self.p_printer.Print(pnode)
|
| 309 |
|
| 310 | ast_node = self.tr.MakeVarDecl(pnode)
|
| 311 | ast_node.keyword = kw_token # VarDecl didn't fill this in
|
| 312 |
|
| 313 | return ast_node, last_token
|
| 314 |
|
| 315 | def ParseMutation(self, kw_token, lexer):
|
| 316 | # type: (Token, Lexer) -> Tuple[command.Mutation, Token]
|
| 317 | """ setvar d['a'] += 1 """
|
| 318 | e_parser = self._YshParser()
|
| 319 | with ctx_PNodeAllocator(e_parser):
|
| 320 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
|
| 321 | if 0:
|
| 322 | self.p_printer.Print(pnode)
|
| 323 | ast_node = self.tr.MakeMutation(pnode)
|
| 324 | ast_node.keyword = kw_token # VarDecl didn't fill this in
|
| 325 |
|
| 326 | return ast_node, last_token
|
| 327 |
|
| 328 | def ParseProcCallArgs(self, lx, out, start_symbol):
|
| 329 | # type: (Lexer, ArgList, int) -> None
|
| 330 | """ json write (x, foo=1) and assert [42 === x] """
|
| 331 |
|
| 332 | e_parser = self._YshParser()
|
| 333 | with ctx_PNodeAllocator(e_parser):
|
| 334 | pnode, last_token = e_parser.Parse(lx, start_symbol)
|
| 335 |
|
| 336 | if 0:
|
| 337 | self.p_printer.Print(pnode)
|
| 338 |
|
| 339 | self.tr.ProcCallArgs(pnode, out)
|
| 340 | out.right = last_token
|
| 341 |
|
| 342 | def ParseYshExpr(self, lx, start_symbol):
|
| 343 | # type: (Lexer, int) -> Tuple[expr_t, Token]
|
| 344 | """if (x > 0) { ...
|
| 345 |
|
| 346 | }, while, etc.
|
| 347 | """
|
| 348 |
|
| 349 | e_parser = self._YshParser()
|
| 350 | with ctx_PNodeAllocator(e_parser):
|
| 351 | pnode, last_token = e_parser.Parse(lx, start_symbol)
|
| 352 | if 0:
|
| 353 | self.p_printer.Print(pnode)
|
| 354 |
|
| 355 | ast_node = self.tr.Expr(pnode)
|
| 356 |
|
| 357 | return ast_node, last_token
|
| 358 |
|
| 359 | def ParseYshCasePattern(self, lexer):
|
| 360 | # type: (Lexer) -> Tuple[pat_t, Token, Token]
|
| 361 | """(6) | (7), / dot* '.py' /, (else), etc.
|
| 362 |
|
| 363 | Alongside the pattern, this returns the first token in the pattern and
|
| 364 | the LBrace token at the start of the case arm body.
|
| 365 | """
|
| 366 | e_parser = self._YshParser()
|
| 367 | with ctx_PNodeAllocator(e_parser):
|
| 368 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
|
| 369 |
|
| 370 | left_tok = pnode.GetChild(0).tok
|
| 371 | pattern = self.tr.YshCasePattern(pnode)
|
| 372 |
|
| 373 | return pattern, left_tok, last_token
|
| 374 |
|
| 375 | def ParseProc(self, lexer, out):
|
| 376 | # type: (Lexer, Proc) -> Token
|
| 377 | """proc f(x, y, @args) {"""
|
| 378 | e_parser = self._YshParser()
|
| 379 | with ctx_PNodeAllocator(e_parser):
|
| 380 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
|
| 381 |
|
| 382 | if 0:
|
| 383 | self.p_printer.Print(pnode)
|
| 384 |
|
| 385 | out.sig = self.tr.Proc(pnode)
|
| 386 |
|
| 387 | return last_token
|
| 388 |
|
| 389 | def ParseFunc(self, lexer, out):
|
| 390 | # type: (Lexer, Func) -> Token
|
| 391 | """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
|
| 392 | e_parser = self._YshParser()
|
| 393 | with ctx_PNodeAllocator(e_parser):
|
| 394 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
|
| 395 |
|
| 396 | if 0:
|
| 397 | self.p_printer.Print(pnode)
|
| 398 |
|
| 399 | self.tr.YshFunc(pnode, out)
|
| 400 | return last_token
|
| 401 |
|
| 402 |
|
| 403 | # Another parser instantiation:
|
| 404 | # - For Array Literal in word_parse.py WordParser:
|
| 405 | # w_parser = WordParser(self.lexer, self.line_reader)
|