opy/_regtest/src/osh/parse

OILS / opy / _regtest / src / osh / parse_lib.py View on Github | oilshell.org

145 lines, 70 significant

1	"""
2	parse_lib.py - Consolidate various parser instantiations here.
3	"""
4
5	from core import lexer
6	from core import reader
7
8	from osh import lex
9	from osh import word_parse
10	from osh import cmd_parse
11	from osh.meta import Id, IdInstance
12
13	# bin/osh should work without compiling fastlex? But we want all the unit
14	# tests to run with a known version of it.
15	try:
16	import fastlex
17	except ImportError:
18	fastlex = None
19
20
21	class MatchToken_Slow(object):
22	"""An abstract matcher that doesn't depend on OSH."""
23	def __init__(self, lexer_def):
24	self.lexer_def = {}
25	for state, pat_list in lexer_def.items():
26	self.lexer_def[state] = lexer.CompileAll(pat_list)
27
28	def __call__(self, lex_mode, line, start_pos):
29	"""Returns (id, end_pos)."""
30	# Simulate the EOL handling in re2c.
31	if start_pos >= len(line):
32	return Id.Eol_Tok, start_pos
33
34	re_list = self.lexer_def[lex_mode]
35	matches = []
36	for regex, tok_type in re_list:
37	m = regex.match(line, start_pos) # left-anchored
38	if m:
39	matches.append((m.end(0), tok_type, m.group(0)))
40	if not matches:
41	raise AssertionError('no match at position %d: %r' % (start_pos, line))
42	end_pos, tok_type, tok_val = max(matches, key=lambda m: m[0])
43	return tok_type, end_pos
44
45
46	def MatchToken_Fast(lex_mode, line, start_pos):
47	"""Returns (id, end_pos)."""
48	tok_type, end_pos = fastlex.MatchToken(lex_mode.enum_id, line, start_pos)
49	# IMPORTANT: We're reusing Id instances here. Ids are very common, so this
50	# saves memory.
51	return IdInstance(tok_type), end_pos
52
53
54	def _MakeMatcher():
55	# NOTE: Could have an environment variable to control this for speed?
56	#return MatchToken_Slow(lex.LEXER_DEF)
57
58	if fastlex:
59	return MatchToken_Fast
60	else:
61	return MatchToken_Slow(lex.LEXER_DEF)
62
63
64	def InitLexer(s, arena):
65	"""For tests only."""
66	match_func = _MakeMatcher()
67	line_lexer = lexer.LineLexer(match_func, '', arena)
68	line_reader = reader.StringLineReader(s, arena)
69	lx = lexer.Lexer(line_lexer, line_reader)
70	return line_reader, lx
71
72
73	# New API:
74	# - MakeParser(reader, arena) - for top level, 'source'
75	# - eval: MakeParser(StringLineReader(), arena)
76	# - source: MakeParser(FileLineReader(), arena)
77	# - MakeParserForCommandSub(reader, lexer) -- arena is inside lexer/reader
78	# - MakeParserForCompletion(code_str) # no arena? no errors?
79	# - MakeWordParserForHereDoc(lines, arena) # arena is lost
80	# - although you want to AddLine
81	# - line_id = arena.AddLine()
82
83
84	# NOTE:
85	# - Does it make sense to create ParseState objects? They have no dependencies
86	# -- just pure data. Or just recreate them every time? One issue is that
87	# you need somewhere to store the side effects -- errors for parsers, and the
88	# actual values for the evaluators/executors.
89
90	def MakeParser(line_reader, arena):
91	"""Top level parser."""
92	line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
93	lx = lexer.Lexer(line_lexer, line_reader)
94	w_parser = word_parse.WordParser(lx, line_reader)
95	c_parser = cmd_parse.CommandParser(w_parser, lx, line_reader, arena)
96	return w_parser, c_parser
97
98
99	# TODO: We could reuse w_parser with Reset() each time. That's what the REPL
100	# does.
101	# But LineLexer and Lexer are also stateful! So that might not be worth it.
102	# Hm the REPL only does line_reader.Reset()?
103	#
104	# NOTE: It probably needs to take a VirtualLineReader for $PS1, $PS2, ...
105	# values.
106	def MakeParserForCompletion(code_str, arena):
107	"""Parser for partial lines."""
108	# NOTE: We don't need to use a arena here? Or we need a "scratch arena" that
109	# doesn't interfere with the rest of the program.
110	line_reader = reader.StringLineReader(code_str, arena)
111	line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena) # AtEnd() is true
112	lx = lexer.Lexer(line_lexer, line_reader)
113	w_parser = word_parse.WordParser(lx, line_reader)
114	c_parser = cmd_parse.CommandParser(w_parser, lx, line_reader, arena)
115	return w_parser, c_parser
116
117
118	def MakeWordParserForHereDoc(lines, arena):
119	line_reader = reader.VirtualLineReader(lines, arena)
120	line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
121	lx = lexer.Lexer(line_lexer, line_reader)
122	return word_parse.WordParser(lx, line_reader)
123
124
125	def MakeWordParserForPlugin(code_str, arena):
126	line_reader = reader.StringLineReader(code_str, arena)
127	line_lexer = lexer.LineLexer(_MakeMatcher(), '', arena)
128	lx = lexer.Lexer(line_lexer, line_reader)
129	return word_parse.WordParser(lx, line_reader)
130
131
132	def MakeParserForCommandSub(line_reader, lexer):
133	"""To parse command sub, we want a fresh word parser state.
134
135	It's a new instance based on same lexer and arena.
136	"""
137	arena = line_reader.arena
138	w_parser = word_parse.WordParser(lexer, line_reader)
139	c_parser = cmd_parse.CommandParser(w_parser, lexer, line_reader, arena)
140	return c_parser
141
142
143	# Another parser instantiation:
144	# - For Array Literal in word_parse.py WordParser:
145	# w_parser = WordParser(self.lexer, self.line_reader)