OILS / mycpp / pass_state.py View on Github | oilshell.org

562 lines, 298 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7import re
8import subprocess
9from collections import defaultdict
10
11from mypy.types import Type
12from mypy.nodes import Expression
13
14from mycpp.util import join_name, log, split_py_name, SymbolPath
15
16from typing import Optional
17
18_ = log
19
20
21class ModuleMember(object):
22 """
23 A member of a Python module.
24
25 e.g. core.state.Mem => core::state::Mem
26 """
27
28 def __init__(self, module_path: SymbolPath, member: str) -> None:
29 self.module_path = module_path
30 self.member = member
31
32
33class StaticObjectMember(object):
34 """
35 A static member of an object. Usually a a method like an alternative constructor.
36
37 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
38 """
39
40 def __init__(self, base_type_name: SymbolPath, member: str) -> None:
41 self.base_type_name = base_type_name
42 self.member = member
43
44
45class HeapObjectMember(object):
46 """
47 A member of a heap-allocated object.
48
49 e.g foo.empty() => foo->empty()
50 """
51
52 def __init__(self, object_expr: Expression, object_type: Type,
53 member: str) -> None:
54 self.object_expr = object_expr
55 self.object_type = object_type
56 self.member = member
57
58
59class StackObjectMember(object):
60 """
61 A member of a stack-allocated object.
62
63 e.g foo.empty() => foo.empty()
64 """
65
66 def __init__(self, object_expr: Expression, object_type: Type,
67 member: str) -> None:
68 self.ojbect_expr = object_expr
69 self.object_type = object_type
70 self.member = member
71
72
73class Virtual(object):
74 """Calculate which C++ methods need the virtual keyword.
75
76 See unit test for example usage.
77 """
78
79 def __init__(self) -> None:
80 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
81 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
82 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
83 str]]] = {}
84 self.has_vtable: dict[SymbolPath, bool] = {}
85 self.can_reorder_fields: dict[SymbolPath, bool] = {}
86
87 # _Executor -> vm::_Executor
88 self.base_class_unique: dict[str, SymbolPath] = {}
89
90 # These are called on the Forward Declare pass
91 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
92 #log('OnMethod %s %s', class_name, method_name)
93
94 # __init__ and so forth don't count
95 if method_name.startswith('__') and method_name.endswith('__'):
96 return
97
98 self.methods[class_name].append(method_name)
99
100 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
101 if len(base_class) > 1:
102 # Hack for
103 #
104 # class _Executor: pass
105 # versus
106 # class MyExecutor(vm._Executor): pass
107 base_key = base_class[-1]
108
109 # Fail if we have two base classes in different namespaces with the same
110 # name.
111 if base_key in self.base_class_unique:
112 # Make sure we don't have collisions
113 assert (self.base_class_unique[base_key] == base_class or
114 base_class
115 in self.subclasses[self.base_class_unique[base_key]]
116 ), base_class
117 else:
118 self.base_class_unique[base_key] = base_class
119
120 else:
121 base_key = base_class
122
123 self.subclasses[base_class].append(subclass)
124
125 def Calculate(self) -> None:
126 """Call this after the forward declare pass."""
127 for base_class, subclasses in self.subclasses.items():
128 self.can_reorder_fields[base_class] = False
129
130 for subclass in subclasses:
131 self.can_reorder_fields[subclass] = False
132
133 b_methods = self.methods[base_class]
134 s_methods = self.methods[subclass]
135 overlapping = set(b_methods) & set(s_methods)
136 for method in overlapping:
137 self.virtuals[(base_class, method)] = None
138 self.virtuals[(subclass, method)] = (base_class, method)
139 if overlapping:
140 self.has_vtable[base_class] = True
141 self.has_vtable[subclass] = True
142
143 # These is called on the Decl pass
144 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
145 return (class_name, method_name) in self.virtuals
146
147 def HasVTable(self, class_name: SymbolPath) -> bool:
148 return class_name in self.has_vtable
149
150 def CanReorderFields(self, class_name: SymbolPath) -> bool:
151 if class_name in self.can_reorder_fields:
152 return self.can_reorder_fields[class_name]
153 else:
154 return True # by default they can be reordered
155
156
157def SymbolPathToReference(func: str, p: SymbolPath) -> str:
158 if len(p) > 1:
159 return '$ObjectMember({}, {})'.format(join_name(p[:-1], delim='.'), p[-1])
160
161 return '$LocalVariable({}, {})'.format(func, p[0])
162
163
164class Fact(object):
165 """
166 An abstract fact. These can be used to build up datalog programs.
167 """
168
169 def __init__(self) -> None:
170 pass
171
172 def name(self) -> str:
173 raise NotImplementedError()
174
175 def Generate(self, func: str, statement: int) -> str:
176 raise NotImplementedError()
177
178
179class FunctionCall(Fact):
180
181 def __init__(self, callee: str) -> None:
182 self.callee = callee
183
184 def name(self) -> str:
185 return 'call'
186
187 def Generate(self, func: str, statement: int) -> str:
188 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
189
190
191class Definition(Fact):
192 """
193 The definition of a variable. This corresponds to an allocation.
194 """
195
196 def __init__(self, ref: SymbolPath, obj: str) -> None:
197 self.ref = ref
198 self.obj = obj
199
200 def name(self) -> str:
201 return 'assign'
202
203 def Generate(self, func: str, statement: int) -> str:
204 return '{}\t{}\t{}\t{}\n'.format(func, statement,
205 SymbolPathToReference(func, self.ref),
206 self.obj)
207
208
209class Assignment(Fact):
210 """
211 The assignment of one variable or object member to another.
212 """
213
214 def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
215 self.lhs = lhs
216 self.rhs = rhs
217
218 def name(self) -> str:
219 return 'assign'
220
221 def Generate(self, func: str, statement: int) -> str:
222 return '{}\t{}\t{}\t$Ref({})\n'.format(func, statement,
223 SymbolPathToReference(func, self.lhs),
224 SymbolPathToReference(func, self.rhs))
225
226
227class Use(Fact):
228 """
229 The use of a reference.
230 """
231
232 def __init__(self, ref: SymbolPath) -> None:
233 self.ref = ref
234
235 def name(self) -> str:
236 return 'use'
237
238 def Generate(self, func: str, statement: int) -> str:
239 return '{}\t{}\t{}\n'.format(func, statement,
240 SymbolPathToReference(func, self.ref))
241
242
243class ControlFlowGraph(object):
244 """
245 A simple control-flow graph.
246
247 Every statement in the program is represented as a node in a graph with
248 unique a numeric ID. Control flow is represented as directed edges through
249 the graph. Loops can introduce back-edges. Every node in the graph will
250 satisfy at least one of the following conditions:
251
252 - Its indegree is at least one.
253
254 - Its outdegree is at least one.
255
256 For simple linear graphs all you need is the AddStatement method. For more
257 complex flows there is a set of context managers below to help simplify
258 construction.
259
260 - For branches-like statements (e.g. if- and try- statements) use
261 CfgBranchContext. It will take care of the details associated with
262 stitching the different branches to statements in the next statement.
263
264 - For loops, use CfgLoopContext. It will take care of adding back-edges
265 and connecting break statements to any statements that proceed the
266 loop.
267
268 - CfgBlockContext can be used for simple cases where you just want to
269 track the beginning and end of a sequence of statements.
270
271 Statements can carry annotations called facts, which are used as inputs to
272 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
273 To annotate a statement, use the AddFact method with any object that
274 implements the Fact interface.
275
276 See the unit tests in pass_state_test.py and the mycpp phase in
277 control_flow_pass.py for detailed examples of usage.
278 """
279
280 def __init__(self) -> None:
281 self.statement_counter: int = 0
282 self.edges: set[tuple[int, int]] = set({})
283 self.block_stack: list[int] = []
284 self.predecessors: set[int] = set({})
285 self.deadends: set[int] = set({})
286
287 # order doesn't actually matter here, but sets require elements to be
288 # hashable
289 self.facts: dict[int, list[Fact]] = defaultdict(list)
290
291 def AddEdge(self, pred: int, succ: int) -> None:
292 """
293 Add a directed edge from pred to succ. If pred is a deadend, its
294 non-deadends will be used instead.
295 """
296 if pred in self.deadends:
297 for w in [u for (u, v) in self.edges if v == pred]:
298 self.AddEdge(w, succ)
299 else:
300 self.edges.add((pred, succ))
301
302 def AddDeadend(self, statement: int):
303 """
304 Mark a statement as a dead-end (e.g. return or continue).
305 """
306 self.deadends.add(statement)
307
308 def AddStatement(self) -> int:
309 """
310 Add a new statement and return its ID.
311 """
312 if len(self.predecessors) == 0:
313 if len(self.block_stack):
314 self.predecessors.add(self.block_stack[-1])
315 else:
316 self.predecessors.add(self.statement_counter)
317
318 self.statement_counter += 1
319 for pred in self.predecessors:
320 self.AddEdge(pred, self.statement_counter)
321
322 self.predecessors = set({})
323
324 if len(self.block_stack):
325 self.block_stack[-1] = self.statement_counter
326
327 return self.statement_counter
328
329 def AddFact(self, statement: int, fact: Fact) -> None:
330 """
331 Annotate a statement with a fact.
332 """
333 self.facts[statement].append(fact)
334
335 def _PushBlock(self, begin: Optional[int] = None) -> int:
336 """
337 Start a block at the given statement ID. If a beginning statement isn't
338 provided one will be created and its ID will be returend.
339
340 Direct use of this function is discouraged. Consider using one of the
341 block context managers below instead.
342 """
343 if begin is None:
344 begin = self.AddStatement()
345 else:
346 self.predecessors.add(begin)
347
348 self.block_stack.append(begin)
349 return begin
350
351 def _PopBlock(self) -> int:
352 """
353 Pop a block from the top of the stack and return the ID of the block's
354 last statement.
355
356 Direct use of this function is discouraged. Consider using one of the
357 block context managers below instead.
358 """
359 assert len(self.block_stack)
360 last = self.block_stack.pop()
361 if len(self.block_stack) and last not in self.deadends:
362 self.block_stack[-1] = last
363
364 return last
365
366
367class CfgBlockContext(object):
368 """
369 Context manager to make dealing with things like with-statements easier.
370 """
371
372 def __init__(self,
373 cfg: ControlFlowGraph,
374 begin: Optional[int] = None) -> None:
375 self.cfg = cfg
376 if cfg is None:
377 return
378
379 self.entry = self.cfg._PushBlock(begin)
380 self.exit = self.entry
381
382 def __enter__(self) -> None:
383 return self if self.cfg else None
384
385 def __exit__(self, *args) -> None:
386 if not self.cfg:
387 return
388
389 self.exit = self.cfg._PopBlock()
390
391
392class CfgBranchContext(object):
393 """
394 Context manager to make dealing with if-else blocks easier.
395 """
396
397 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
398 self.cfg = cfg
399 self.entry = branch_point
400 self.exit = self.entry
401 if cfg is None:
402 return
403
404 self.arms = []
405 self.pushed = False
406
407 def AddBranch(self, entry: Optional[int] = None):
408 if not self.cfg:
409 return CfgBranchContext(None, None)
410
411 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
412 self.cfg._PushBlock(self.arms[-1].entry)
413 self.arms[-1].pushed = True
414 return self.arms[-1]
415
416 def __enter__(self) -> None:
417 return self
418
419 def __exit__(self, *args) -> None:
420 if not self.cfg:
421 return
422
423 if self.pushed:
424 self.exit = self.cfg._PopBlock()
425
426 for arm in self.arms:
427 if arm.exit not in self.cfg.deadends:
428 self.cfg.predecessors.add(arm.exit)
429
430
431class CfgLoopContext(object):
432 """
433 Context manager to make dealing with loops easier.
434 """
435
436 def __init__(self,
437 cfg: ControlFlowGraph,
438 entry: Optional[int] = None) -> None:
439 self.cfg = cfg
440 self.breaks = set({})
441 if cfg is None:
442 return
443
444 self.entry = self.cfg._PushBlock(entry)
445 self.exit = self.entry
446
447 def AddBreak(self, statement: int) -> None:
448 assert self.cfg
449 self.breaks.add(statement)
450 self.cfg.AddDeadend(statement)
451
452 def AddContinue(self, statement: int) -> None:
453 self.cfg.AddEdge(statement, self.entry)
454 self.cfg.AddDeadend(statement)
455
456 def __enter__(self) -> None:
457 return self if self.cfg else None
458
459 def __exit__(self, *args) -> None:
460 if not self.cfg:
461 return
462
463 self.exit = self.cfg._PopBlock()
464 self.cfg.AddEdge(self.exit, self.entry)
465 for pred in self.cfg.predecessors:
466 self.cfg.AddEdge(pred, self.entry)
467
468 # If we had any breaks, arm the predecessor set with the current
469 # statement and the break statements.
470 if len(self.breaks):
471 if len(self.cfg.block_stack):
472 self.cfg.predecessors.add(self.cfg.block_stack[-1])
473 else:
474 self.cfg.predecessors.add(self.cfg.statement_counter)
475
476 for b in self.breaks:
477 self.cfg.deadends.remove(b)
478 self.cfg.predecessors.add(b)
479
480
481class StackRoots(object):
482 """
483 Output of the souffle stack roots solver.
484 """
485
486 def __init__(self) -> None:
487 self.root_tuples: set[tuple(SymbolPath, SymbolPath)] = set({})
488
489
490 def needs_root(self, func: SymbolPath, reference: SymbolPath) -> bool:
491 """
492 Returns true if the given reference should have a stack root.
493 """
494 return (func, reference) in self.root_tuples
495
496
497def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
498 facts_dir='_tmp/mycpp-facts') -> None:
499 """
500 Dump the given control flow graphs and associated facts into the given
501 directory as text files that can be consumed by datalog.
502 """
503 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
504 fact_files = {}
505 os.makedirs(facts_dir, exist_ok=True)
506 with open(edge_facts, 'w') as cfg_f:
507 for func, cfg in sorted(cfgs.items()):
508 joined = join_name(func, delim='.')
509 for (u, v) in sorted(cfg.edges):
510 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
511
512 for statement, facts in sorted(cfg.facts.items()):
513 for fact in facts: # already sorted temporally
514 fact_f = fact_files.get(fact.name())
515 if not fact_f:
516 fact_f = open(
517 '{}/{}.facts'.format(facts_dir, fact.name()), 'w')
518 fact_files[fact.name()] = fact_f
519
520 fact_f.write(fact.Generate(joined, statement))
521
522 for f in fact_files.values():
523 f.close()
524
525
526def ComputeStackRoots(cfgs: dict[str, ControlFlowGraph],
527 facts_dir:str = '_tmp/mycpp-facts',
528 souffle_output_dir:str = '_tmp') -> StackRoots:
529 """
530 Run the the souffle stack roots solver and translate its output in a format
531 that can be queried by cppgen_pass.
532 """
533 DumpControlFlowGraphs(cfgs, facts_dir=facts_dir)
534 subprocess.check_call(
535 [
536 '_bin/datalog/dataflow',
537 '-F', facts_dir,
538 '-D', souffle_output_dir,
539 ]
540 )
541
542 roots = StackRoots()
543 with open('{}/stack_root_vars.tsv'.format(souffle_output_dir), 'r') as roots_f:
544 pat = re.compile(r'\$(.*)\((.*), (.*)\)')
545 for line in roots_f:
546 function, ref = line.split('\t')
547 m = pat.match(ref)
548 assert m.group(1) in ('LocalVariable', 'ObjectMember')
549 if m.group(1) == 'LocalVariable':
550 _, ref_func, var_name = m.groups()
551 assert ref_func == function
552 roots.root_tuples.add((split_py_name(function), (var_name,)))
553
554 if m.group(1) == 'ObjectMember':
555 _, base_obj, member_name = m.groups()
556 roots.root_tuples.add((split_py_name(function),
557 split_py_name(base_obj) + (member_name,)))
558
559 for t in roots.root_tuples:
560 print(t)
561
562 return roots