core/alloc.py

OILS / core / alloc.py View on Github | oilshell.org

301 lines, 127 significant

1	"""
2	alloc.py - strategies for managing SourceLine and Token
3
4	"""
5
6	from _devbuild.gen.syntax_asdl import source_t, Token, SourceLine
7	from asdl import runtime
8	from mycpp.mylib import log
9
10	from typing import List, Any
11
12	_ = log
13
14
15	def SnipCodeBlock(left, right, lines):
16	# type: (Token, Token, List[SourceLine]) -> str
17	"""Return the code string between left and right tokens, EXCLUSIVE.
18
19	Meaning { } are not included.
20
21	Used for Hay evaluation. Similar to SnipCodeString().
22	"""
23	pieces = [] # type: List[str]
24
25	assert left.length == 1, "{ expected"
26	assert right.length == 1, "} expected"
27
28	# Pad with spaces so column numbers aren't off
29	pieces.append(' ' * (left.col + 1))
30
31	if left.line == right.line:
32	for li in lines:
33	if li == left.line:
34	piece = li.content[left.col + left.length:right.col]
35	pieces.append(piece)
36	return ''.join(pieces)
37
38	saving = False
39	found_left = False
40	found_right = False
41	for li in lines:
42	if li == left.line:
43	found_left = True
44	saving = True
45
46	# Save everything after the left token
47	piece = li.content[left.col + left.length:]
48	pieces.append(piece)
49	#log(' %r', piece)
50	continue
51
52	if li == right.line:
53	found_right = True
54
55	piece = li.content[:right.col]
56	pieces.append(piece)
57	#log(' %r', piece)
58
59	saving = False
60	break
61
62	if saving:
63	pieces.append(li.content)
64	#log(' %r', li.content)
65
66	assert found_left, "Couldn't find left token"
67	assert found_right, "Couldn't find right token"
68	return ''.join(pieces)
69
70
71	class ctx_SourceCode(object):
72
73	def __init__(self, arena, src):
74	# type: (Arena, source_t) -> None
75	arena.PushSource(src)
76	self.arena = arena
77
78	def __enter__(self):
79	# type: () -> None
80	pass
81
82	def __exit__(self, type, value, traceback):
83	# type: (Any, Any, Any) -> None
84	self.arena.PopSource()
85
86
87	class Arena(object):
88	"""Manages source_t, SourceLine, Token."""
89
90	def __init__(self, save_tokens=False):
91	# type: (bool) -> None
92
93	self.save_tokens = save_tokens
94
95	# indexed by span_id
96	self.tokens = [] # type: List[Token]
97	self.num_tokens = 0
98
99	# All lines that haven't been discarded. For LST formatting.
100	self.lines_list = [] # type: List[SourceLine]
101
102	# reuse these instances in many line_span instances
103	self.source_instances = [] # type: List[source_t]
104
105	def SaveTokens(self):
106	# type: () -> None
107	"""
108	Used by --tool X. Do we need LosslessArena?
109	"""
110	self.save_tokens = True
111
112	def PushSource(self, src):
113	# type: (source_t) -> None
114	self.source_instances.append(src)
115
116	def PopSource(self):
117	# type: () -> None
118	self.source_instances.pop()
119
120	def AddLine(self, line, line_num):
121	# type: (str, int) -> SourceLine
122	"""Save a physical line and return a line_id for later retrieval.
123
124	The line number is 1-based.
125	"""
126	src_line = SourceLine(line_num, line, self.source_instances[-1])
127	self.lines_list.append(src_line)
128	return src_line
129
130	def DiscardLines(self):
131	# type: () -> None
132	"""Remove references ot lines we've accumulated.
133
134	- This makes the linear search in SnipCodeString() shorter.
135	- It removes the ARENA's references to all lines. The TOKENS still
136	reference some lines.
137	"""
138	#log("discarding %d lines", len(self.lines_list))
139	del self.lines_list[:]
140
141	def SaveLinesAndDiscard(self, left, right):
142	# type: (Token, Token) -> List[SourceLine]
143	"""Save the lines between two tokens, e.g. for { and }
144
145	Why?
146	- In between { }, we want to preserve lines not pointed to by a token, e.g.
147	comment lines.
148	- But we don't want to save all lines in an interactive shell:
149	echo 1
150	echo 2
151	...
152	echo 500000
153	echo 500001
154
155	The lines should be freed after execution takes place.
156	"""
157	#log('*** Saving lines between %r and %r', left, right)
158
159	saved = [] # type: List[SourceLine]
160	saving = False
161	for li in self.lines_list:
162	if li == left.line:
163	saving = True
164
165	# These lines are PERMANENT, and never deleted. What if you overwrite a
166	# function name? You might want to save those in a the function record
167	# ITSELF.
168	#
169	# This is for INLINE hay blocks that can be evaluated at any point. In
170	# contrast, parse_hay(other_file) uses ParseWholeFile, and we could save
171	# all lines.
172
173	# TODO: consider creating a new Arena for each CommandParser? Or rename itj
174	# to 'BackingLines' or something.
175
176	# TODO: We should mutate li.line_id here so it's the index into
177	# saved_lines?
178	if saving:
179	saved.append(li)
180	#log(' %r', li.val)
181
182	if li == right.line:
183	saving = False
184	break
185
186	#log('*** SAVED %d lines', len(saved))
187
188	self.DiscardLines()
189	return saved
190
191	#log('SAVED = %s', [line.val for line in self.saved_lines])
192
193	def SnipCodeString(self, left, right):
194	# type: (Token, Token) -> str
195	"""Return the code string between left and right tokens, INCLUSIVE.
196
197	Used for ALIAS expansion, which happens in the PARSER.
198
199	The argument to aliases can span multiple lines, like this:
200
201	$ myalias '1 2 3'
202	"""
203	if left.line == right.line:
204	for li in self.lines_list:
205	if li == left.line:
206	piece = li.content[left.col:right.col + right.length]
207	return piece
208
209	pieces = [] # type: List[str]
210	saving = False
211	found_left = False
212	found_right = False
213	for li in self.lines_list:
214	if li == left.line:
215	found_left = True
216	saving = True
217
218	# Save everything after the left token
219	piece = li.content[left.col:]
220	pieces.append(piece)
221	#log(' %r', piece)
222	continue
223
224	if li == right.line:
225	found_right = True
226
227	piece = li.content[:right.col + right.length]
228	pieces.append(piece)
229	#log(' %r', piece)
230
231	saving = False
232	break
233
234	if saving:
235	pieces.append(li.content)
236	#log(' %r', li.content)
237
238	assert found_left, "Couldn't find left token"
239	assert found_right, "Couldn't find right token"
240	return ''.join(pieces)
241
242	def NewToken(self, id_, col, length, src_line, val):
243	# type: (int, int, int, SourceLine, str) -> Token
244	span_id = self.num_tokens # spids are just array indices
245	self.num_tokens += 1
246
247	tok = Token(id_, col, length, span_id, src_line, val)
248	if self.save_tokens:
249	self.tokens.append(tok)
250	return tok
251
252	def UnreadOne(self):
253	# type: () -> None
254	"""Reuse the last span ID."""
255	if self.save_tokens:
256	self.tokens.pop()
257	self.num_tokens -= 1
258
259	def GetToken(self, span_id):
260	# type: (int) -> Token
261	assert span_id != runtime.NO_SPID, span_id
262	assert span_id < len(self.tokens), \
263	'Span ID out of range: %d is greater than %d' % (span_id, len(self.tokens))
264	return self.tokens[span_id]
265
266	def LastSpanId(self):
267	# type: () -> int
268	"""Return one past the last span ID."""
269	return len(self.tokens)
270
271
272	class LosslessArena(Arena):
273	"""
274	TODO:
275
276	Has lossless invariant, for
277	--tool fmt
278	--tool ysh-ify
279
280	Retains all SourceLine and Token
281
282	Somehow disallow re-parsing? Is that equivalent to ctx_SourceCode()?
283	"""
284	pass
285
286
287	class DynamicArena(Arena):
288	"""
289	For batch and interactive shell
290
291	TODO:
292	- Test that SourceLine and Token are GC'd
293
294	However, it should support:
295	- SnipCodeString() for aliases
296	- SnipCodeBlock() for Hay
297
298	Neither of those are necessary in the LosslessArena? We might have
299	different utilities there.
300	"""
301	pass