OILS / mycpp / pass_state.py View on Github | oilshell.org

387 lines, 205 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7from collections import defaultdict
8
9from mycpp.util import join_name, log, SymbolPath
10
11from typing import Optional
12
13_ = log
14
15
16class Virtual(object):
17 """Calculate which C++ methods need the virtual keyword.
18
19 See unit test for example usage.
20 """
21
22 def __init__(self) -> None:
23 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
24 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
25 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
26 str]]] = {}
27 self.has_vtable: dict[SymbolPath, bool] = {}
28 self.can_reorder_fields: dict[SymbolPath, bool] = {}
29
30 # _Executor -> vm::_Executor
31 self.base_class_unique: dict[str, SymbolPath] = {}
32
33 # These are called on the Forward Declare pass
34 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
35 #log('OnMethod %s %s', class_name, method_name)
36
37 # __init__ and so forth don't count
38 if method_name.startswith('__') and method_name.endswith('__'):
39 return
40
41 self.methods[class_name].append(method_name)
42
43 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
44 if len(base_class) > 1:
45 # Hack for
46 #
47 # class _Executor: pass
48 # versus
49 # class MyExecutor(vm._Executor): pass
50 base_key = base_class[-1]
51
52 # Fail if we have two base classes in different namespaces with the same
53 # name.
54 if base_key in self.base_class_unique:
55 # Make sure we don't have collisions
56 assert (self.base_class_unique[base_key] == base_class or
57 base_class
58 in self.subclasses[self.base_class_unique[base_key]]
59 ), base_class
60 else:
61 self.base_class_unique[base_key] = base_class
62
63 else:
64 base_key = base_class
65
66 self.subclasses[base_class].append(subclass)
67
68 def Calculate(self) -> None:
69 """Call this after the forward declare pass."""
70 for base_class, subclasses in self.subclasses.items():
71 self.can_reorder_fields[base_class] = False
72
73 for subclass in subclasses:
74 self.can_reorder_fields[subclass] = False
75
76 b_methods = self.methods[base_class]
77 s_methods = self.methods[subclass]
78 overlapping = set(b_methods) & set(s_methods)
79 for method in overlapping:
80 self.virtuals[(base_class, method)] = None
81 self.virtuals[(subclass, method)] = (base_class, method)
82 if overlapping:
83 self.has_vtable[base_class] = True
84 self.has_vtable[subclass] = True
85
86 # These is called on the Decl pass
87 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
88 return (class_name, method_name) in self.virtuals
89
90 def HasVTable(self, class_name: SymbolPath) -> bool:
91 return class_name in self.has_vtable
92
93 def CanReorderFields(self, class_name: SymbolPath) -> bool:
94 if class_name in self.can_reorder_fields:
95 return self.can_reorder_fields[class_name]
96 else:
97 return True # by default they can be reordered
98
99
100class Fact(object):
101 """
102 An abstract fact. These can be used to build up datalog programs.
103 """
104
105 def __init__(self) -> None:
106 pass
107
108 def name(self) -> str:
109 raise NotImplementedError()
110
111 def Generate(self, func: str, statement: int) -> str:
112 raise NotImplementedError()
113
114
115class FunctionCall(Fact):
116 def __init__(self, callee: str) -> None:
117 self.callee = callee
118
119 def name(self) -> str:
120 return 'call'
121
122 def Generate(self, func: str, statement: int) -> str:
123 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
124
125
126class ControlFlowGraph(object):
127 """
128 A simple control-flow graph.
129
130 Every statement in the program is represented as a node in a graph with
131 unique a numeric ID. Control flow is represented as directed edges through
132 the graph. Loops can introduce back-edges. Every node in the graph will
133 satisfy at least one of the following conditions:
134
135 - Its indegree is at least one.
136
137 - Its outdegree is at least one.
138
139 For simple linear graphs all you need is the AddStatement method. For more
140 complex flows there is a set of context managers below to help simplify
141 construction.
142
143 - For branches-like statements (e.g. if- and try- statements) use
144 CfgBranchContext. It will take care of the details associated with
145 stitching the different branches to statements in the next statement.
146
147 - For loops, use CfgLoopContext. It will take care of adding back-edges
148 and connecting break statements to any statements that proceed the
149 loop.
150
151 - CfgBlockContext can be used for simple cases where you just want to
152 track the beginning and end of a sequence of statements.
153
154 Statements can carry annotations called facts, which are used as inputs to
155 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
156 To annotate a statement, use the AddFact method with any object that
157 implements the Fact interface.
158
159 See the unit tests in pass_state_test.py and the mycpp phase in
160 control_flow_pass.py for detailed examples of usage.
161 """
162
163 def __init__(self) -> None:
164 self.statement_counter: int = 0
165 self.edges: set[tuple[int, int]] = set({})
166 self.block_stack: list[int] = []
167 self.predecessors: set[int] = set({})
168 self.deadends: set[int] = set({})
169
170 # order doesn't actually matter here, but sets require elements to be
171 # hashable
172 self.facts: dict[int, list[Fact]] = defaultdict(list)
173
174 def AddEdge(self, pred: int, succ: int) -> None:
175 """
176 Add a directed edge from pred to succ. If pred is a deadend, its
177 non-deadends will be used instead.
178 """
179 if pred in self.deadends:
180 for w in [u for (u, v) in self.edges if v == pred]:
181 self.AddEdge(w, succ)
182 else:
183 self.edges.add((pred, succ))
184
185 def AddDeadend(self, statement: int):
186 """
187 Mark a statement as a dead-end (e.g. return or continue).
188 """
189 self.deadends.add(statement)
190
191 def AddStatement(self) -> int:
192 """
193 Add a new statement and return its ID.
194 """
195 if len(self.predecessors) == 0:
196 if len(self.block_stack):
197 self.predecessors.add(self.block_stack[-1])
198 else:
199 self.predecessors.add(self.statement_counter)
200
201 self.statement_counter += 1
202 for pred in self.predecessors:
203 self.AddEdge(pred, self.statement_counter)
204
205 self.predecessors = set({})
206
207 if len(self.block_stack):
208 self.block_stack[-1] = self.statement_counter
209
210 return self.statement_counter
211
212 def AddFact(self, statement: int, fact: Fact) -> None:
213 """
214 Annotate a statement with a fact.
215 """
216 self.facts[statement].append(fact)
217
218 def _PushBlock(self, begin: Optional[int] = None) -> int:
219 """
220 Start a block at the given statement ID. If a beginning statement isn't
221 provided one will be created and its ID will be returend.
222
223 Direct use of this function is discouraged. Consider using one of the
224 block context managers below instead.
225 """
226 if begin is None:
227 begin = self.AddStatement()
228 else:
229 self.predecessors.add(begin)
230
231 self.block_stack.append(begin)
232 return begin
233
234 def _PopBlock(self) -> int:
235 """
236 Pop a block from the top of the stack and return the ID of the block's
237 last statement.
238
239 Direct use of this function is discouraged. Consider using one of the
240 block context managers below instead.
241 """
242 assert len(self.block_stack)
243 last = self.block_stack.pop()
244 if len(self.block_stack) and last not in self.deadends:
245 self.block_stack[-1] = last
246
247 return last
248
249
250class CfgBlockContext(object):
251 """
252 Context manager to make dealing with things like with-statements easier.
253 """
254
255 def __init__(self,
256 cfg: ControlFlowGraph,
257 begin: Optional[int] = None) -> None:
258 self.cfg = cfg
259 if cfg is None:
260 return
261
262 self.entry = self.cfg._PushBlock(begin)
263 self.exit = self.entry
264
265 def __enter__(self) -> None:
266 return self if self.cfg else None
267
268 def __exit__(self, *args) -> None:
269 if not self.cfg:
270 return
271
272 self.exit = self.cfg._PopBlock()
273
274
275class CfgBranchContext(object):
276 """
277 Context manager to make dealing with if-else blocks easier.
278 """
279
280 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
281 self.cfg = cfg
282 self.entry = branch_point
283 self.exit = self.entry
284 if cfg is None:
285 return
286
287 self.arms = []
288 self.pushed = False
289
290 def AddBranch(self, entry: Optional[int] = None):
291 if not self.cfg:
292 return CfgBranchContext(None, None)
293
294 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
295 self.cfg._PushBlock(self.arms[-1].entry)
296 self.arms[-1].pushed = True
297 return self.arms[-1]
298
299 def __enter__(self) -> None:
300 return self
301
302 def __exit__(self, *args) -> None:
303 if not self.cfg:
304 return
305
306 if self.pushed:
307 self.exit = self.cfg._PopBlock()
308
309 for arm in self.arms:
310 if arm.exit not in self.cfg.deadends:
311 self.cfg.predecessors.add(arm.exit)
312
313
314class CfgLoopContext(object):
315 """
316 Context manager to make dealing with loops easier.
317 """
318
319 def __init__(self, cfg: ControlFlowGraph, entry: Optional[int] = None) -> None:
320 self.cfg = cfg
321 self.breaks = set({})
322 if cfg is None:
323 return
324
325 self.entry = self.cfg._PushBlock(entry)
326 self.exit = self.entry
327
328 def AddBreak(self, statement: int) -> None:
329 assert self.cfg
330 self.breaks.add(statement)
331 self.cfg.AddDeadend(statement)
332
333 def AddContinue(self, statement: int) -> None:
334 self.cfg.AddEdge(statement, self.entry)
335 self.cfg.AddDeadend(statement)
336
337 def __enter__(self) -> None:
338 return self if self.cfg else None
339
340 def __exit__(self, *args) -> None:
341 if not self.cfg:
342 return
343
344 self.exit = self.cfg._PopBlock()
345 self.cfg.AddEdge(self.exit, self.entry)
346 for pred in self.cfg.predecessors:
347 self.cfg.AddEdge(pred, self.entry)
348
349 # If we had any breaks, arm the predecessor set with the current
350 # statement and the break statements.
351 if len(self.breaks):
352 if len(self.cfg.block_stack):
353 self.cfg.predecessors.add(self.cfg.block_stack[-1])
354 else:
355 self.cfg.predecessors.add(self.cfg.statement_counter)
356
357 for b in self.breaks:
358 self.cfg.deadends.remove(b)
359 self.cfg.predecessors.add(b)
360
361
362def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
363 facts_dir='_tmp/mycpp-facts') -> None:
364 """
365 Dump the given control flow graphs and associated facts into the given
366 directory as text files that can be consumed by datalog.
367 """
368 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
369 fact_files = {}
370 os.makedirs(facts_dir, exist_ok=True)
371 with open(edge_facts, 'w') as cfg_f:
372 for func, cfg in sorted(cfgs.items()):
373 joined = join_name(func, delim='.')
374 for (u, v) in sorted(cfg.edges):
375 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
376
377 for statement, facts in sorted(cfg.facts.items()):
378 for fact in facts: # already sorted temporally
379 fact_f = fact_files.get(fact.name())
380 if not fact_f:
381 fact_f = open('{}/{}.facts'.format(facts_dir, fact.name()), 'w')
382 fact_files[fact.name()] = fact_f
383
384 fact_f.write(fact.Generate(joined, statement))
385
386 for f in fact_files.values():
387 f.close()