| 1 | """
|
| 2 | parse_lib.py - Consolidate various parser instantiations here.
|
| 3 | """
|
| 4 |
|
| 5 | from core import lexer
|
| 6 | from core import reader
|
| 7 |
|
| 8 | from osh import lex
|
| 9 | from osh import word_parse
|
| 10 | from osh import cmd_parse
|
| 11 | from osh.meta import Id, IdInstance
|
| 12 |
|
| 13 | # bin/osh should work without compiling fastlex? But we want all the unit
|
| 14 | # tests to run with a known version of it.
|
| 15 | try:
|
| 16 | import fastlex
|
| 17 | except ImportError:
|
| 18 | fastlex = None
|
| 19 |
|
| 20 |
|
| 21 | class MatchToken_Slow(object):
|
| 22 | """An abstract matcher that doesn't depend on OSH."""
|
| 23 | def __init__(self, lexer_def):
|
| 24 | self.lexer_def = {}
|
| 25 | for state, pat_list in lexer_def.items():
|
| 26 | self.lexer_def[state] = lexer.CompileAll(pat_list)
|
| 27 |
|
| 28 | def __call__(self, lex_mode, line, start_pos):
|
| 29 | """Returns (id, end_pos)."""
|
| 30 | # Simulate the EOL handling in re2c.
|
| 31 | if start_pos >= len(line):
|
| 32 | return Id.Eol_Tok, start_pos
|
| 33 |
|
| 34 | re_list = self.lexer_def[lex_mode]
|
| 35 | matches = []
|
| 36 | for regex, tok_type in re_list:
|
| 37 | m = regex.match(line, start_pos) # left-anchored
|
| 38 | if m:
|
| 39 | matches.append((m.end(0), tok_type, m.group(0)))
|
| 40 | if not matches:
|
| 41 | raise AssertionError('no match at position %d: %r' % (start_pos, line))
|
| 42 | end_pos, tok_type, tok_val = max(matches, key=lambda m: m[0])
|
| 43 | return tok_type, end_pos
|
| 44 |
|
| 45 |
|
| 46 | def MatchToken_Fast(lex_mode, line, start_pos):
|
| 47 | """Returns (id, end_pos)."""
|
| 48 | tok_type, end_pos = fastlex.MatchToken(lex_mode.enum_id, line, start_pos)
|
| 49 | # IMPORTANT: We're reusing Id instances here. Ids are very common, so this
|
| 50 | # saves memory.
|
| 51 | return IdInstance(tok_type), end_pos
|
| 52 |
|
| 53 |
|
| 54 | def _MakeMatcher():
|
| 55 | # NOTE: Could have an environment variable to control this for speed?
|
| 56 | #return MatchToken_Slow(lex.LEXER_DEF)
|
| 57 |
|
| 58 | if fastlex:
|
| 59 | return MatchToken_Fast
|
| 60 | else:
|
| 61 | return MatchToken_Slow(lex.LEXER_DEF)
|
| 62 |
|
| 63 |
|
| 64 | def InitLexer(s, arena):
|
| 65 | """For tests only."""
|
| 66 | match_func = _MakeMatcher()
|
| 67 | line_lexer = lexer.LineLexer(match_func, '', arena)
|
| 68 | line_reader = reader.StringLineReader(s, arena)
|
| 69 | lx = lexer.Lexer(line_lexer, line_reader)
|
| 70 | return line_reader, lx
|
| 71 |
|
| 72 |
|
| 73 | # New API:
|
| 74 | # - MakeParser(reader, arena) - for top level, 'source'
|
| 75 | # - eval: MakeParser(StringLineReader(), arena)
|
| 76 | # - source: MakeParser(FileLineReader(), arena)
|
| 77 | # - MakeParserForCommandSub(reader, lexer) -- arena is inside lexer/reader
|
| 78 | # - MakeParserForCompletion(code_str) # no arena? no errors?
|
| 79 | # - MakeWordParserForHereDoc(lines, arena) # arena is lost
|
| 80 | # - although you want to AddLine
|
| 81 | # - line_id = arena.AddLine()
|
| 82 |
|
| 83 |
|
| 84 | # NOTE:
|
| 85 | # - Does it make sense to create ParseState objects? They have no dependencies
|
| 86 | # -- just pure data. Or just recreate them every time? One issue is that
|
| 87 | # you need somewhere to store the side effects -- errors for parsers, and the
|
| 88 | # actual values for the evaluators/executors.
|
| 89 |
|
| 90 | def MakeParser(line_reader, arena):
|
| 91 | """Top level parser."""
|
| 92 | line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
|
| 93 | lx = lexer.Lexer(line_lexer, line_reader)
|
| 94 | w_parser = word_parse.WordParser(lx, line_reader)
|
| 95 | c_parser = cmd_parse.CommandParser(w_parser, lx, line_reader, arena)
|
| 96 | return w_parser, c_parser
|
| 97 |
|
| 98 |
|
| 99 | # TODO: We could reuse w_parser with Reset() each time. That's what the REPL
|
| 100 | # does.
|
| 101 | # But LineLexer and Lexer are also stateful! So that might not be worth it.
|
| 102 | # Hm the REPL only does line_reader.Reset()?
|
| 103 | #
|
| 104 | # NOTE: It probably needs to take a VirtualLineReader for $PS1, $PS2, ...
|
| 105 | # values.
|
| 106 | def MakeParserForCompletion(code_str, arena):
|
| 107 | """Parser for partial lines."""
|
| 108 | # NOTE: We don't need to use a arena here? Or we need a "scratch arena" that
|
| 109 | # doesn't interfere with the rest of the program.
|
| 110 | line_reader = reader.StringLineReader(code_str, arena)
|
| 111 | line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena) # AtEnd() is true
|
| 112 | lx = lexer.Lexer(line_lexer, line_reader)
|
| 113 | w_parser = word_parse.WordParser(lx, line_reader)
|
| 114 | c_parser = cmd_parse.CommandParser(w_parser, lx, line_reader, arena)
|
| 115 | return w_parser, c_parser
|
| 116 |
|
| 117 |
|
| 118 | def MakeWordParserForHereDoc(lines, arena):
|
| 119 | line_reader = reader.VirtualLineReader(lines, arena)
|
| 120 | line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
|
| 121 | lx = lexer.Lexer(line_lexer, line_reader)
|
| 122 | return word_parse.WordParser(lx, line_reader)
|
| 123 |
|
| 124 |
|
| 125 | def MakeWordParserForPlugin(code_str, arena):
|
| 126 | line_reader = reader.StringLineReader(code_str, arena)
|
| 127 | line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
|
| 128 | lx = lexer.Lexer(line_lexer, line_reader)
|
| 129 | return word_parse.WordParser(lx, line_reader)
|
| 130 |
|
| 131 |
|
| 132 | def MakeParserForCommandSub(line_reader, lexer):
|
| 133 | """To parse command sub, we want a fresh word parser state.
|
| 134 |
|
| 135 | It's a new instance based on same lexer and arena.
|
| 136 | """
|
| 137 | arena = line_reader.arena
|
| 138 | w_parser = word_parse.WordParser(lexer, line_reader)
|
| 139 | c_parser = cmd_parse.CommandParser(w_parser, lexer, line_reader, arena)
|
| 140 | return c_parser
|
| 141 |
|
| 142 |
|
| 143 | # Another parser instantiation:
|
| 144 | # - For Array Literal in word_parse.py WordParser:
|
| 145 | # w_parser = WordParser(self.lexer, self.line_reader)
|