1 | """
|
2 | pass_state.py
|
3 | """
|
4 | from __future__ import print_function
|
5 |
|
6 | import os
|
7 | from collections import defaultdict
|
8 |
|
9 | from mycpp.util import join_name, log, SymbolPath
|
10 |
|
11 | from typing import Optional
|
12 |
|
13 | _ = log
|
14 |
|
15 |
|
16 | class Virtual(object):
|
17 | """Calculate which C++ methods need the virtual keyword.
|
18 |
|
19 | See unit test for example usage.
|
20 | """
|
21 |
|
22 | def __init__(self) -> None:
|
23 | self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
|
24 | self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
|
25 | self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
|
26 | str]]] = {}
|
27 | self.has_vtable: dict[SymbolPath, bool] = {}
|
28 | self.can_reorder_fields: dict[SymbolPath, bool] = {}
|
29 |
|
30 | # _Executor -> vm::_Executor
|
31 | self.base_class_unique: dict[str, SymbolPath] = {}
|
32 |
|
33 | # These are called on the Forward Declare pass
|
34 | def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
|
35 | #log('OnMethod %s %s', class_name, method_name)
|
36 |
|
37 | # __init__ and so forth don't count
|
38 | if method_name.startswith('__') and method_name.endswith('__'):
|
39 | return
|
40 |
|
41 | self.methods[class_name].append(method_name)
|
42 |
|
43 | def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
|
44 | if len(base_class) > 1:
|
45 | # Hack for
|
46 | #
|
47 | # class _Executor: pass
|
48 | # versus
|
49 | # class MyExecutor(vm._Executor): pass
|
50 | base_key = base_class[-1]
|
51 |
|
52 | # Fail if we have two base classes in different namespaces with the same
|
53 | # name.
|
54 | if base_key in self.base_class_unique:
|
55 | # Make sure we don't have collisions
|
56 | assert (self.base_class_unique[base_key] == base_class or
|
57 | base_class
|
58 | in self.subclasses[self.base_class_unique[base_key]]
|
59 | ), base_class
|
60 | else:
|
61 | self.base_class_unique[base_key] = base_class
|
62 |
|
63 | else:
|
64 | base_key = base_class
|
65 |
|
66 | self.subclasses[base_class].append(subclass)
|
67 |
|
68 | def Calculate(self) -> None:
|
69 | """Call this after the forward declare pass."""
|
70 | for base_class, subclasses in self.subclasses.items():
|
71 | self.can_reorder_fields[base_class] = False
|
72 |
|
73 | for subclass in subclasses:
|
74 | self.can_reorder_fields[subclass] = False
|
75 |
|
76 | b_methods = self.methods[base_class]
|
77 | s_methods = self.methods[subclass]
|
78 | overlapping = set(b_methods) & set(s_methods)
|
79 | for method in overlapping:
|
80 | self.virtuals[(base_class, method)] = None
|
81 | self.virtuals[(subclass, method)] = (base_class, method)
|
82 | if overlapping:
|
83 | self.has_vtable[base_class] = True
|
84 | self.has_vtable[subclass] = True
|
85 |
|
86 | # These is called on the Decl pass
|
87 | def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
|
88 | return (class_name, method_name) in self.virtuals
|
89 |
|
90 | def HasVTable(self, class_name: SymbolPath) -> bool:
|
91 | return class_name in self.has_vtable
|
92 |
|
93 | def CanReorderFields(self, class_name: SymbolPath) -> bool:
|
94 | if class_name in self.can_reorder_fields:
|
95 | return self.can_reorder_fields[class_name]
|
96 | else:
|
97 | return True # by default they can be reordered
|
98 |
|
99 |
|
100 | class Fact(object):
|
101 | """
|
102 | An abstract fact. These can be used to build up datalog programs.
|
103 | """
|
104 |
|
105 | def __init__(self) -> None:
|
106 | pass
|
107 |
|
108 | def name(self) -> str:
|
109 | raise NotImplementedError()
|
110 |
|
111 | def Generate(self, func: str, statement: int) -> str:
|
112 | raise NotImplementedError()
|
113 |
|
114 |
|
115 | class FunctionCall(Fact):
|
116 | def __init__(self, callee: str) -> None:
|
117 | self.callee = callee
|
118 |
|
119 | def name(self) -> str:
|
120 | return 'call'
|
121 |
|
122 | def Generate(self, func: str, statement: int) -> str:
|
123 | return '{}\t{}\t{}\n'.format(func, statement, self.callee)
|
124 |
|
125 |
|
126 | class ControlFlowGraph(object):
|
127 | """
|
128 | A simple control-flow graph.
|
129 |
|
130 | Every statement in the program is represented as a node in a graph with
|
131 | unique a numeric ID. Control flow is represented as directed edges through
|
132 | the graph. Loops can introduce back-edges. Every node in the graph will
|
133 | satisfy at least one of the following conditions:
|
134 |
|
135 | - Its indegree is at least one.
|
136 |
|
137 | - Its outdegree is at least one.
|
138 |
|
139 | For simple linear graphs all you need is the AddStatement method. For more
|
140 | complex flows there is a set of context managers below to help simplify
|
141 | construction.
|
142 |
|
143 | - For branches-like statements (e.g. if- and try- statements) use
|
144 | CfgBranchContext. It will take care of the details associated with
|
145 | stitching the different branches to statements in the next statement.
|
146 |
|
147 | - For loops, use CfgLoopContext. It will take care of adding back-edges
|
148 | and connecting break statements to any statements that proceed the
|
149 | loop.
|
150 |
|
151 | - CfgBlockContext can be used for simple cases where you just want to
|
152 | track the beginning and end of a sequence of statements.
|
153 |
|
154 | Statements can carry annotations called facts, which are used as inputs to
|
155 | datalog programs to perform dataflow diffrent kinds of dataflow analyses.
|
156 | To annotate a statement, use the AddFact method with any object that
|
157 | implements the Fact interface.
|
158 |
|
159 | See the unit tests in pass_state_test.py and the mycpp phase in
|
160 | control_flow_pass.py for detailed examples of usage.
|
161 | """
|
162 |
|
163 | def __init__(self) -> None:
|
164 | self.statement_counter: int = 0
|
165 | self.edges: set[tuple[int, int]] = set({})
|
166 | self.block_stack: list[int] = []
|
167 | self.predecessors: set[int] = set({})
|
168 | self.deadends: set[int] = set({})
|
169 |
|
170 | # order doesn't actually matter here, but sets require elements to be
|
171 | # hashable
|
172 | self.facts: dict[int, list[Fact]] = defaultdict(list)
|
173 |
|
174 | def AddEdge(self, pred: int, succ: int) -> None:
|
175 | """
|
176 | Add a directed edge from pred to succ. If pred is a deadend, its
|
177 | non-deadends will be used instead.
|
178 | """
|
179 | if pred in self.deadends:
|
180 | for w in [u for (u, v) in self.edges if v == pred]:
|
181 | self.AddEdge(w, succ)
|
182 | else:
|
183 | self.edges.add((pred, succ))
|
184 |
|
185 | def AddDeadend(self, statement: int):
|
186 | """
|
187 | Mark a statement as a dead-end (e.g. return or continue).
|
188 | """
|
189 | self.deadends.add(statement)
|
190 |
|
191 | def AddStatement(self) -> int:
|
192 | """
|
193 | Add a new statement and return its ID.
|
194 | """
|
195 | if len(self.predecessors) == 0:
|
196 | if len(self.block_stack):
|
197 | self.predecessors.add(self.block_stack[-1])
|
198 | else:
|
199 | self.predecessors.add(self.statement_counter)
|
200 |
|
201 | self.statement_counter += 1
|
202 | for pred in self.predecessors:
|
203 | self.AddEdge(pred, self.statement_counter)
|
204 |
|
205 | self.predecessors = set({})
|
206 |
|
207 | if len(self.block_stack):
|
208 | self.block_stack[-1] = self.statement_counter
|
209 |
|
210 | return self.statement_counter
|
211 |
|
212 | def AddFact(self, statement: int, fact: Fact) -> None:
|
213 | """
|
214 | Annotate a statement with a fact.
|
215 | """
|
216 | self.facts[statement].append(fact)
|
217 |
|
218 | def _PushBlock(self, begin: Optional[int] = None) -> int:
|
219 | """
|
220 | Start a block at the given statement ID. If a beginning statement isn't
|
221 | provided one will be created and its ID will be returend.
|
222 |
|
223 | Direct use of this function is discouraged. Consider using one of the
|
224 | block context managers below instead.
|
225 | """
|
226 | if begin is None:
|
227 | begin = self.AddStatement()
|
228 | else:
|
229 | self.predecessors.add(begin)
|
230 |
|
231 | self.block_stack.append(begin)
|
232 | return begin
|
233 |
|
234 | def _PopBlock(self) -> int:
|
235 | """
|
236 | Pop a block from the top of the stack and return the ID of the block's
|
237 | last statement.
|
238 |
|
239 | Direct use of this function is discouraged. Consider using one of the
|
240 | block context managers below instead.
|
241 | """
|
242 | assert len(self.block_stack)
|
243 | last = self.block_stack.pop()
|
244 | if len(self.block_stack) and last not in self.deadends:
|
245 | self.block_stack[-1] = last
|
246 |
|
247 | return last
|
248 |
|
249 |
|
250 | class CfgBlockContext(object):
|
251 | """
|
252 | Context manager to make dealing with things like with-statements easier.
|
253 | """
|
254 |
|
255 | def __init__(self,
|
256 | cfg: ControlFlowGraph,
|
257 | begin: Optional[int] = None) -> None:
|
258 | self.cfg = cfg
|
259 | if cfg is None:
|
260 | return
|
261 |
|
262 | self.entry = self.cfg._PushBlock(begin)
|
263 | self.exit = self.entry
|
264 |
|
265 | def __enter__(self) -> None:
|
266 | return self if self.cfg else None
|
267 |
|
268 | def __exit__(self, *args) -> None:
|
269 | if not self.cfg:
|
270 | return
|
271 |
|
272 | self.exit = self.cfg._PopBlock()
|
273 |
|
274 |
|
275 | class CfgBranchContext(object):
|
276 | """
|
277 | Context manager to make dealing with if-else blocks easier.
|
278 | """
|
279 |
|
280 | def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
|
281 | self.cfg = cfg
|
282 | self.entry = branch_point
|
283 | self.exit = self.entry
|
284 | if cfg is None:
|
285 | return
|
286 |
|
287 | self.arms = []
|
288 | self.pushed = False
|
289 |
|
290 | def AddBranch(self, entry: Optional[int] = None):
|
291 | if not self.cfg:
|
292 | return CfgBranchContext(None, None)
|
293 |
|
294 | self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
|
295 | self.cfg._PushBlock(self.arms[-1].entry)
|
296 | self.arms[-1].pushed = True
|
297 | return self.arms[-1]
|
298 |
|
299 | def __enter__(self) -> None:
|
300 | return self
|
301 |
|
302 | def __exit__(self, *args) -> None:
|
303 | if not self.cfg:
|
304 | return
|
305 |
|
306 | if self.pushed:
|
307 | self.exit = self.cfg._PopBlock()
|
308 |
|
309 | for arm in self.arms:
|
310 | if arm.exit not in self.cfg.deadends:
|
311 | self.cfg.predecessors.add(arm.exit)
|
312 |
|
313 |
|
314 | class CfgLoopContext(object):
|
315 | """
|
316 | Context manager to make dealing with loops easier.
|
317 | """
|
318 |
|
319 | def __init__(self, cfg: ControlFlowGraph, entry: Optional[int] = None) -> None:
|
320 | self.cfg = cfg
|
321 | self.breaks = set({})
|
322 | if cfg is None:
|
323 | return
|
324 |
|
325 | self.entry = self.cfg._PushBlock(entry)
|
326 | self.exit = self.entry
|
327 |
|
328 | def AddBreak(self, statement: int) -> None:
|
329 | assert self.cfg
|
330 | self.breaks.add(statement)
|
331 | self.cfg.AddDeadend(statement)
|
332 |
|
333 | def AddContinue(self, statement: int) -> None:
|
334 | self.cfg.AddEdge(statement, self.entry)
|
335 | self.cfg.AddDeadend(statement)
|
336 |
|
337 | def __enter__(self) -> None:
|
338 | return self if self.cfg else None
|
339 |
|
340 | def __exit__(self, *args) -> None:
|
341 | if not self.cfg:
|
342 | return
|
343 |
|
344 | self.exit = self.cfg._PopBlock()
|
345 | self.cfg.AddEdge(self.exit, self.entry)
|
346 | for pred in self.cfg.predecessors:
|
347 | self.cfg.AddEdge(pred, self.entry)
|
348 |
|
349 | # If we had any breaks, arm the predecessor set with the current
|
350 | # statement and the break statements.
|
351 | if len(self.breaks):
|
352 | if len(self.cfg.block_stack):
|
353 | self.cfg.predecessors.add(self.cfg.block_stack[-1])
|
354 | else:
|
355 | self.cfg.predecessors.add(self.cfg.statement_counter)
|
356 |
|
357 | for b in self.breaks:
|
358 | self.cfg.deadends.remove(b)
|
359 | self.cfg.predecessors.add(b)
|
360 |
|
361 |
|
362 | def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
|
363 | facts_dir='_tmp/mycpp-facts') -> None:
|
364 | """
|
365 | Dump the given control flow graphs and associated facts into the given
|
366 | directory as text files that can be consumed by datalog.
|
367 | """
|
368 | edge_facts = '{}/cf_edge.facts'.format(facts_dir)
|
369 | fact_files = {}
|
370 | os.makedirs(facts_dir, exist_ok=True)
|
371 | with open(edge_facts, 'w') as cfg_f:
|
372 | for func, cfg in sorted(cfgs.items()):
|
373 | joined = join_name(func, delim='.')
|
374 | for (u, v) in sorted(cfg.edges):
|
375 | cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
|
376 |
|
377 | for statement, facts in sorted(cfg.facts.items()):
|
378 | for fact in facts: # already sorted temporally
|
379 | fact_f = fact_files.get(fact.name())
|
380 | if not fact_f:
|
381 | fact_f = open('{}/{}.facts'.format(facts_dir, fact.name()), 'w')
|
382 | fact_files[fact.name()] = fact_f
|
383 |
|
384 | fact_f.write(fact.Generate(joined, statement))
|
385 |
|
386 | for f in fact_files.values():
|
387 | f.close()
|