OILS / core / alloc.py View on Github | oilshell.org

301 lines, 127 significant
1"""
2alloc.py - strategies for managing SourceLine and Token
3
4"""
5
6from _devbuild.gen.syntax_asdl import source_t, Token, SourceLine
7from asdl import runtime
8from mycpp.mylib import log
9
10from typing import List, Any
11
12_ = log
13
14
15def SnipCodeBlock(left, right, lines):
16 # type: (Token, Token, List[SourceLine]) -> str
17 """Return the code string between left and right tokens, EXCLUSIVE.
18
19 Meaning { } are not included.
20
21 Used for Hay evaluation. Similar to SnipCodeString().
22 """
23 pieces = [] # type: List[str]
24
25 assert left.length == 1, "{ expected"
26 assert right.length == 1, "} expected"
27
28 # Pad with spaces so column numbers aren't off
29 pieces.append(' ' * (left.col + 1))
30
31 if left.line == right.line:
32 for li in lines:
33 if li == left.line:
34 piece = li.content[left.col + left.length:right.col]
35 pieces.append(piece)
36 return ''.join(pieces)
37
38 saving = False
39 found_left = False
40 found_right = False
41 for li in lines:
42 if li == left.line:
43 found_left = True
44 saving = True
45
46 # Save everything after the left token
47 piece = li.content[left.col + left.length:]
48 pieces.append(piece)
49 #log(' %r', piece)
50 continue
51
52 if li == right.line:
53 found_right = True
54
55 piece = li.content[:right.col]
56 pieces.append(piece)
57 #log(' %r', piece)
58
59 saving = False
60 break
61
62 if saving:
63 pieces.append(li.content)
64 #log(' %r', li.content)
65
66 assert found_left, "Couldn't find left token"
67 assert found_right, "Couldn't find right token"
68 return ''.join(pieces)
69
70
71class ctx_SourceCode(object):
72
73 def __init__(self, arena, src):
74 # type: (Arena, source_t) -> None
75 arena.PushSource(src)
76 self.arena = arena
77
78 def __enter__(self):
79 # type: () -> None
80 pass
81
82 def __exit__(self, type, value, traceback):
83 # type: (Any, Any, Any) -> None
84 self.arena.PopSource()
85
86
87class Arena(object):
88 """Manages source_t, SourceLine, Token."""
89
90 def __init__(self, save_tokens=False):
91 # type: (bool) -> None
92
93 self.save_tokens = save_tokens
94
95 # indexed by span_id
96 self.tokens = [] # type: List[Token]
97 self.num_tokens = 0
98
99 # All lines that haven't been discarded. For LST formatting.
100 self.lines_list = [] # type: List[SourceLine]
101
102 # reuse these instances in many line_span instances
103 self.source_instances = [] # type: List[source_t]
104
105 def SaveTokens(self):
106 # type: () -> None
107 """
108 Used by --tool X. Do we need LosslessArena?
109 """
110 self.save_tokens = True
111
112 def PushSource(self, src):
113 # type: (source_t) -> None
114 self.source_instances.append(src)
115
116 def PopSource(self):
117 # type: () -> None
118 self.source_instances.pop()
119
120 def AddLine(self, line, line_num):
121 # type: (str, int) -> SourceLine
122 """Save a physical line and return a line_id for later retrieval.
123
124 The line number is 1-based.
125 """
126 src_line = SourceLine(line_num, line, self.source_instances[-1])
127 self.lines_list.append(src_line)
128 return src_line
129
130 def DiscardLines(self):
131 # type: () -> None
132 """Remove references ot lines we've accumulated.
133
134 - This makes the linear search in SnipCodeString() shorter.
135 - It removes the ARENA's references to all lines. The TOKENS still
136 reference some lines.
137 """
138 #log("discarding %d lines", len(self.lines_list))
139 del self.lines_list[:]
140
141 def SaveLinesAndDiscard(self, left, right):
142 # type: (Token, Token) -> List[SourceLine]
143 """Save the lines between two tokens, e.g. for { and }
144
145 Why?
146 - In between { }, we want to preserve lines not pointed to by a token, e.g.
147 comment lines.
148 - But we don't want to save all lines in an interactive shell:
149 echo 1
150 echo 2
151 ...
152 echo 500000
153 echo 500001
154
155 The lines should be freed after execution takes place.
156 """
157 #log('*** Saving lines between %r and %r', left, right)
158
159 saved = [] # type: List[SourceLine]
160 saving = False
161 for li in self.lines_list:
162 if li == left.line:
163 saving = True
164
165 # These lines are PERMANENT, and never deleted. What if you overwrite a
166 # function name? You might want to save those in a the function record
167 # ITSELF.
168 #
169 # This is for INLINE hay blocks that can be evaluated at any point. In
170 # contrast, parse_hay(other_file) uses ParseWholeFile, and we could save
171 # all lines.
172
173 # TODO: consider creating a new Arena for each CommandParser? Or rename itj
174 # to 'BackingLines' or something.
175
176 # TODO: We should mutate li.line_id here so it's the index into
177 # saved_lines?
178 if saving:
179 saved.append(li)
180 #log(' %r', li.val)
181
182 if li == right.line:
183 saving = False
184 break
185
186 #log('*** SAVED %d lines', len(saved))
187
188 self.DiscardLines()
189 return saved
190
191 #log('SAVED = %s', [line.val for line in self.saved_lines])
192
193 def SnipCodeString(self, left, right):
194 # type: (Token, Token) -> str
195 """Return the code string between left and right tokens, INCLUSIVE.
196
197 Used for ALIAS expansion, which happens in the PARSER.
198
199 The argument to aliases can span multiple lines, like this:
200
201 $ myalias '1 2 3'
202 """
203 if left.line == right.line:
204 for li in self.lines_list:
205 if li == left.line:
206 piece = li.content[left.col:right.col + right.length]
207 return piece
208
209 pieces = [] # type: List[str]
210 saving = False
211 found_left = False
212 found_right = False
213 for li in self.lines_list:
214 if li == left.line:
215 found_left = True
216 saving = True
217
218 # Save everything after the left token
219 piece = li.content[left.col:]
220 pieces.append(piece)
221 #log(' %r', piece)
222 continue
223
224 if li == right.line:
225 found_right = True
226
227 piece = li.content[:right.col + right.length]
228 pieces.append(piece)
229 #log(' %r', piece)
230
231 saving = False
232 break
233
234 if saving:
235 pieces.append(li.content)
236 #log(' %r', li.content)
237
238 assert found_left, "Couldn't find left token"
239 assert found_right, "Couldn't find right token"
240 return ''.join(pieces)
241
242 def NewToken(self, id_, col, length, src_line, val):
243 # type: (int, int, int, SourceLine, str) -> Token
244 span_id = self.num_tokens # spids are just array indices
245 self.num_tokens += 1
246
247 tok = Token(id_, col, length, span_id, src_line, val)
248 if self.save_tokens:
249 self.tokens.append(tok)
250 return tok
251
252 def UnreadOne(self):
253 # type: () -> None
254 """Reuse the last span ID."""
255 if self.save_tokens:
256 self.tokens.pop()
257 self.num_tokens -= 1
258
259 def GetToken(self, span_id):
260 # type: (int) -> Token
261 assert span_id != runtime.NO_SPID, span_id
262 assert span_id < len(self.tokens), \
263 'Span ID out of range: %d is greater than %d' % (span_id, len(self.tokens))
264 return self.tokens[span_id]
265
266 def LastSpanId(self):
267 # type: () -> int
268 """Return one past the last span ID."""
269 return len(self.tokens)
270
271
272class LosslessArena(Arena):
273 """
274 TODO:
275
276 Has lossless invariant, for
277 --tool fmt
278 --tool ysh-ify
279
280 Retains all SourceLine and Token
281
282 Somehow disallow re-parsing? Is that equivalent to ctx_SourceCode()?
283 """
284 pass
285
286
287class DynamicArena(Arena):
288 """
289 For batch and interactive shell
290
291 TODO:
292 - Test that SourceLine and Token are GC'd
293
294 However, it should support:
295 - SnipCodeString() for aliases
296 - SnipCodeBlock() for Hay
297
298 Neither of those are necessary in the LosslessArena? We might have
299 different utilities there.
300 """
301 pass