OILS / mycpp / pass_state.py View on Github | oilshell.org

356 lines, 181 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7from collections import defaultdict
8
9from mycpp.util import join_name, log, SymbolPath
10
11from typing import Optional
12
13_ = log
14
15
16class Virtual(object):
17 """
18 See unit test for example usage.
19 """
20
21 def __init__(self) -> None:
22 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
23 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
24 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath, str]]] = {}
25 self.has_vtable: dict[SymbolPath, bool] = {}
26 self.can_reorder_fields: dict[SymbolPath, bool] = {}
27
28 # _Executor -> vm::_Executor
29 self.base_class_unique: dict[str, SymbolPath] = {}
30
31 # These are called on the Forward Declare pass
32 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
33 #log('OnMethod %s %s', class_name, method_name)
34
35 # __init__ and so forth don't count
36 if method_name.startswith('__') and method_name.endswith('__'):
37 return
38
39 self.methods[class_name].append(method_name)
40
41 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
42 if len(base_class) > 1:
43 # Hack for
44 #
45 # class _Executor: pass
46 # versus
47 # class MyExecutor(vm._Executor): pass
48 base_key = base_class[-1]
49
50 # Fail if we have two base classes in different namespaces with the same
51 # name.
52 if base_key in self.base_class_unique:
53 # Make sure we don't have collisions
54 assert self.base_class_unique[base_key] == base_class or base_class in self.subclasses[self.base_class_unique[base_key]], base_class
55 else:
56 self.base_class_unique[base_key] = base_class
57
58 else:
59 base_key = base_class
60
61 self.subclasses[base_class].append(subclass)
62
63 def Calculate(self) -> None:
64 """
65 Call this after the forward declare pass.
66
67 TODO: Are there bugs based on conflicting class names?
68 """
69 for base_class, subclasses in self.subclasses.items():
70 self.can_reorder_fields[base_class] = False
71
72 for subclass in subclasses:
73 self.can_reorder_fields[subclass] = False
74
75 b_methods = self.methods[base_class]
76 s_methods = self.methods[subclass]
77 overlapping = set(b_methods) & set(s_methods)
78 for method in overlapping:
79 self.virtuals[(base_class, method)] = None
80 self.virtuals[(subclass, method)] = (base_class, method)
81 if overlapping:
82 self.has_vtable[base_class] = True
83 self.has_vtable[subclass] = True
84
85 # These is called on the Decl pass
86 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
87 return (class_name, method_name) in self.virtuals
88
89 def HasVTable(self, class_name: SymbolPath) -> bool:
90 return class_name in self.has_vtable
91
92 def CanReorderFields(self, class_name: SymbolPath) -> bool:
93 if class_name in self.can_reorder_fields:
94 return self.can_reorder_fields[class_name]
95 else:
96 return True # by default they can be reordered
97
98
99class Fact(object):
100 """
101 An abstract fact. These can be used to build up datalog programs.
102 """
103 def __init__(self) -> None:
104 pass
105
106 def name(self) -> str:
107 raise NotImplementedError()
108
109 def Generate(self, func: str, statement: int) -> str:
110 raise NotImplementedError()
111
112
113class ControlFlowGraph(object):
114 """
115 A simple control-flow graph.
116
117 Every statement in the program is represented as a node in a graph with
118 unique a numeric ID. Control flow is represented as directed edges through
119 the graph. Loops can introduce back-edges. Every node in the graph will
120 satisfy at least one of the following conditions:
121
122 - Its indegree is at least one.
123
124 - Its outdegree is at least one.
125
126 For simple linear graphs all you need is the AddStatement method. For more
127 complex flows there is a set of context managers below to help simplify
128 construction.
129
130 - For branches-like statements (e.g. if- and try- statements) use
131 CfgBranchContext. It will take care of the details associated with
132 stitching the different branches to statements in the next statement.
133
134 - For loops, use CfgLoopContext. It will take care of adding back-edges
135 and connecting break statements to any statements that proceed the
136 loop.
137
138 - CfgBlockContext can be used for simple cases where you just want to
139 track the beginning and end of a sequence of statements.
140
141 Statements can carry annotations called facts, which are used as inputs to
142 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
143 To annotate a statement, use the AddFact method with any object that
144 implements the Fact interface.
145
146 See the unit tests in pass_state_test.py and the mycpp phase in
147 control_flow_pass.py for detailed examples of usage.
148 """
149
150 def __init__(self) -> None:
151 self.statement_counter: int = 0
152 self.edges: set[tuple[int, int]] = set({})
153 self.block_stack: list[int] = []
154 self.predecessors: set[int] = set({})
155 self.deadends: set[int] = set({})
156
157 # order doesn't actually matter here, but sets require elements to be
158 # hashable
159 self.facts: dict[int, list[Fact]] = defaultdict(list)
160
161 def AddEdge(self, pred: int, succ: int) -> None:
162 """
163 Add a directed edge from pred to succ. If pred is a deadend, its
164 non-deadends will be used instead.
165 """
166 if pred in self.deadends:
167 for w in [u for (u, v) in self.edges if v == pred]:
168 self.AddEdge(w, succ)
169 else:
170 self.edges.add((pred, succ))
171
172 def AddDeadend(self, statement: int):
173 """
174 Mark a statement as a dead-end (e.g. return or continue).
175 """
176 self.deadends.add(statement)
177
178 def AddStatement(self) -> int:
179 """
180 Add a new statement and return its ID.
181 """
182 if len(self.predecessors) == 0:
183 if len(self.block_stack):
184 self.predecessors.add(self.block_stack[-1])
185 else:
186 self.predecessors.add(self.statement_counter)
187
188 self.statement_counter += 1
189 for pred in self.predecessors:
190 self.AddEdge(pred, self.statement_counter)
191
192 self.predecessors = set({})
193
194 if len(self.block_stack):
195 self.block_stack[-1] = self.statement_counter
196
197 return self.statement_counter
198
199 def AddFact(self, statement: int, fact: Fact) -> None:
200 """
201 Annotate a statement with a fact.
202 """
203 self.facts[statement].append(fact)
204
205 def _PushBlock(self, begin: Optional[int] = None) -> int:
206 """
207 Start a block at the given statement ID. If a beginning statement isn't
208 provided one will be created and its ID will be returend.
209
210 Direct use of this function is discouraged. Consider using one of the
211 block context managers below instead.
212 """
213 if begin is None:
214 begin = self.AddStatement()
215 else:
216 self.predecessors.add(begin)
217
218 self.block_stack.append(begin)
219 return begin
220
221 def _PopBlock(self) -> int:
222 """
223 Pop a block from the top of the stack and return the ID of the block's
224 last statement.
225
226 Direct use of this function is discouraged. Consider using one of the
227 block context managers below instead.
228 """
229 assert len(self.block_stack)
230 last = self.block_stack.pop()
231 if len(self.block_stack) and last not in self.deadends:
232 self.block_stack[-1] = last
233
234 return last
235
236
237class CfgBlockContext(object):
238 """
239 Context manager to make dealing with things like with-statements easier.
240 """
241 def __init__(self, cfg: ControlFlowGraph, begin: Optional[int] = None) -> None:
242 self.cfg = cfg
243 if cfg is None:
244 return
245
246 self.entry = self.cfg._PushBlock(begin)
247 self.exit = self.entry
248
249 def __enter__(self) -> None:
250 return self if self.cfg else None
251
252 def __exit__(self, *args) -> None:
253 if not self.cfg:
254 return
255
256 self.exit = self.cfg._PopBlock()
257
258
259class CfgBranchContext(object):
260 """
261 Context manager to make dealing with if-else blocks easier.
262 """
263 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
264 self.cfg = cfg
265 self.entry = branch_point
266 self.exit = self.entry
267 if cfg is None:
268 return
269
270 self.arms = []
271 self.pushed = False
272
273 def AddBranch(self, entry: Optional[int] = None):
274 if not self.cfg:
275 return CfgBranchContext(None, None)
276
277 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
278 self.cfg._PushBlock(self.arms[-1].entry)
279 self.arms[-1].pushed = True
280 return self.arms[-1]
281
282 def __enter__(self) -> None:
283 return self
284
285 def __exit__(self, *args) -> None:
286 if not self.cfg:
287 return
288
289 if self.pushed:
290 self.exit = self.cfg._PopBlock()
291
292 for arm in self.arms:
293 if arm.exit not in self.cfg.deadends:
294 self.cfg.predecessors.add(arm.exit)
295
296
297
298class CfgLoopContext(object):
299 """
300 Context manager to make dealing with loops easier.
301 """
302 def __init__(self, cfg: ControlFlowGraph) -> None:
303 self.cfg = cfg
304 self.breaks = set({})
305 if cfg is None:
306 return
307
308 self.entry = self.cfg._PushBlock()
309 self.exit = self.entry
310
311 def AddBreak(self, statement: int) -> None:
312 assert self.cfg
313 self.breaks.add(statement)
314 self.cfg.AddDeadend(statement)
315
316 def AddContinue(self, statement: int) -> None:
317 self.cfg.AddEdge(statement, self.entry)
318 self.cfg.AddDeadend(statement)
319
320 def __enter__(self) -> None:
321 return self if self.cfg else None
322
323 def __exit__(self, *args) -> None:
324 if not self.cfg:
325 return
326
327 self.exit = self.cfg._PopBlock()
328 self.cfg.AddEdge(self.exit, self.entry)
329 for pred in self.cfg.predecessors:
330 self.cfg.AddEdge(pred, self.entry)
331
332 # If we had any breaks, arm the predecessor set with the current
333 # statement and the break statements.
334 if len(self.breaks):
335 if len(self.cfg.block_stack):
336 self.cfg.predecessors.add(self.cfg.block_stack[-1])
337 else:
338 self.cfg.predecessors.add(self.cfg.statement_counter)
339
340 for b in self.breaks:
341 self.cfg.deadends.remove(b)
342 self.cfg.predecessors.add(b)
343
344
345def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph], facts_dir='_tmp/mycpp-facts') -> None:
346 """
347 Dump the given control flow graphs and associated facts into the given
348 directory as text files that can be consumed by datalog.
349 """
350 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
351 os.makedirs(facts_dir, exist_ok=True)
352 with open(edge_facts, 'w') as cfg_f:
353 for func, cfg in sorted(cfgs.items()):
354 joined = join_name(func, delim='.')
355 for (u, v) in sorted(cfg.edges):
356 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))