OILS / mycpp / pass_state.py View on Github | oilshell.org

580 lines, 310 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7import re
8import subprocess
9from collections import defaultdict
10
11from mypy.types import Type
12from mypy.nodes import Expression
13
14from mycpp.util import join_name, log, split_py_name, SymbolPath
15
16from typing import Optional
17
18_ = log
19
20
21class ModuleMember(object):
22 """
23 A member of a Python module.
24
25 e.g. core.state.Mem => core::state::Mem
26 """
27
28 def __init__(self, module_path: SymbolPath, member: str) -> None:
29 self.module_path = module_path
30 self.member = member
31
32
33class StaticObjectMember(object):
34 """
35 A static member of an object. Usually a a method like an alternative constructor.
36
37 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
38 """
39
40 def __init__(self, base_type_name: SymbolPath, member: str) -> None:
41 self.base_type_name = base_type_name
42 self.member = member
43
44
45class HeapObjectMember(object):
46 """
47 A member of a heap-allocated object.
48
49 e.g foo.empty() => foo->empty()
50 """
51
52 def __init__(self, object_expr: Expression, object_type: Type,
53 member: str) -> None:
54 self.object_expr = object_expr
55 self.object_type = object_type
56 self.member = member
57
58
59class StackObjectMember(object):
60 """
61 A member of a stack-allocated object.
62
63 e.g foo.empty() => foo.empty()
64 """
65
66 def __init__(self, object_expr: Expression, object_type: Type,
67 member: str) -> None:
68 self.ojbect_expr = object_expr
69 self.object_type = object_type
70 self.member = member
71
72
73class Virtual(object):
74 """Calculate which C++ methods need the virtual keyword.
75
76 See unit test for example usage.
77 """
78
79 def __init__(self) -> None:
80 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
81 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
82 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
83 str]]] = {}
84 self.has_vtable: dict[SymbolPath, bool] = {}
85 self.can_reorder_fields: dict[SymbolPath, bool] = {}
86
87 # _Executor -> vm::_Executor
88 self.base_class_unique: dict[str, SymbolPath] = {}
89
90 # These are called on the Forward Declare pass
91 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
92 #log('OnMethod %s %s', class_name, method_name)
93
94 # __init__ and so forth don't count
95 if method_name.startswith('__') and method_name.endswith('__'):
96 return
97
98 self.methods[class_name].append(method_name)
99
100 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
101 if len(base_class) > 1:
102 # Hack for
103 #
104 # class _Executor: pass
105 # versus
106 # class MyExecutor(vm._Executor): pass
107 base_key = base_class[-1]
108
109 # Fail if we have two base classes in different namespaces with the same
110 # name.
111 if base_key in self.base_class_unique:
112 # Make sure we don't have collisions
113 assert (self.base_class_unique[base_key] == base_class or
114 base_class
115 in self.subclasses[self.base_class_unique[base_key]]
116 ), base_class
117 else:
118 self.base_class_unique[base_key] = base_class
119
120 else:
121 base_key = base_class
122
123 self.subclasses[base_class].append(subclass)
124
125 def Calculate(self) -> None:
126 """Call this after the forward declare pass."""
127 for base_class, subclasses in self.subclasses.items():
128 self.can_reorder_fields[base_class] = False
129
130 for subclass in subclasses:
131 self.can_reorder_fields[subclass] = False
132
133 b_methods = self.methods[base_class]
134 s_methods = self.methods[subclass]
135 overlapping = set(b_methods) & set(s_methods)
136 for method in overlapping:
137 self.virtuals[(base_class, method)] = None
138 self.virtuals[(subclass, method)] = (base_class, method)
139 if overlapping:
140 self.has_vtable[base_class] = True
141 self.has_vtable[subclass] = True
142
143 # These is called on the Decl pass
144 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
145 return (class_name, method_name) in self.virtuals
146
147 def HasVTable(self, class_name: SymbolPath) -> bool:
148 return class_name in self.has_vtable
149
150 def CanReorderFields(self, class_name: SymbolPath) -> bool:
151 if class_name in self.can_reorder_fields:
152 return self.can_reorder_fields[class_name]
153 else:
154 return True # by default they can be reordered
155
156
157def SymbolPathToReference(func: str, p: SymbolPath) -> str:
158 if len(p) > 1:
159 return '$ObjectMember({}, {})'.format(join_name(p[:-1], delim='.'),
160 p[-1])
161
162 return '$LocalVariable({}, {})'.format(func, p[0])
163
164
165class Fact(object):
166 """
167 An abstract fact. These can be used to build up datalog programs.
168 """
169
170 def __init__(self) -> None:
171 pass
172
173 def name(self) -> str:
174 raise NotImplementedError()
175
176 def Generate(self, func: str, statement: int) -> str:
177 raise NotImplementedError()
178
179
180class FunctionCall(Fact):
181
182 def __init__(self, callee: str) -> None:
183 self.callee = callee
184
185 def name(self) -> str:
186 return 'call'
187
188 def Generate(self, func: str, statement: int) -> str:
189 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
190
191
192class Definition(Fact):
193 """
194 The definition of a variable. This corresponds to an allocation.
195 """
196
197 def __init__(self, ref: SymbolPath, obj: str) -> None:
198 self.ref = ref
199 self.obj = obj
200
201 def name(self) -> str:
202 return 'assign'
203
204 def Generate(self, func: str, statement: int) -> str:
205 return '{}\t{}\t{}\t{}\n'.format(func, statement,
206 SymbolPathToReference(func, self.ref),
207 self.obj)
208
209
210class Assignment(Fact):
211 """
212 The assignment of one variable or object member to another.
213 """
214
215 def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
216 self.lhs = lhs
217 self.rhs = rhs
218
219 def name(self) -> str:
220 return 'assign'
221
222 def Generate(self, func: str, statement: int) -> str:
223 return '{}\t{}\t{}\t$Ref({})\n'.format(
224 func, statement, SymbolPathToReference(func, self.lhs),
225 SymbolPathToReference(func, self.rhs))
226
227
228class Use(Fact):
229 """
230 The use of a reference.
231 """
232
233 def __init__(self, ref: SymbolPath) -> None:
234 self.ref = ref
235
236 def name(self) -> str:
237 return 'use'
238
239 def Generate(self, func: str, statement: int) -> str:
240 return '{}\t{}\t{}\n'.format(func, statement,
241 SymbolPathToReference(func, self.ref))
242
243
244class Bind(Fact):
245 """
246 Binding a reference to a function parameter.
247 """
248
249 def __init__(self, ref: SymbolPath, callee: SymbolPath,
250 param: str) -> None:
251 self.ref = ref
252 self.callee = callee
253 self.param = param
254
255 def name(self) -> str:
256 return 'bind'
257
258 def Generate(self, func: str, statement: int) -> str:
259 return '{}\t{}\t{}\t{}\t{}\n'.format(
260 func, statement, SymbolPathToReference(func, self.ref),
261 join_name(self.callee, delim='.'), self.param)
262
263
264class ControlFlowGraph(object):
265 """
266 A simple control-flow graph.
267
268 Every statement in the program is represented as a node in a graph with
269 unique a numeric ID. Control flow is represented as directed edges through
270 the graph. Loops can introduce back-edges. Every node in the graph will
271 satisfy at least one of the following conditions:
272
273 - Its indegree is at least one.
274
275 - Its outdegree is at least one.
276
277 For simple linear graphs all you need is the AddStatement method. For more
278 complex flows there is a set of context managers below to help simplify
279 construction.
280
281 - For branches-like statements (e.g. if- and try- statements) use
282 CfgBranchContext. It will take care of the details associated with
283 stitching the different branches to statements in the next statement.
284
285 - For loops, use CfgLoopContext. It will take care of adding back-edges
286 and connecting break statements to any statements that proceed the
287 loop.
288
289 - CfgBlockContext can be used for simple cases where you just want to
290 track the beginning and end of a sequence of statements.
291
292 Statements can carry annotations called facts, which are used as inputs to
293 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
294 To annotate a statement, use the AddFact method with any object that
295 implements the Fact interface.
296
297 See the unit tests in pass_state_test.py and the mycpp phase in
298 control_flow_pass.py for detailed examples of usage.
299 """
300
301 def __init__(self) -> None:
302 self.statement_counter: int = 0
303 self.edges: set[tuple[int, int]] = set({})
304 self.block_stack: list[int] = []
305 self.predecessors: set[int] = set({})
306 self.deadends: set[int] = set({})
307
308 # order doesn't actually matter here, but sets require elements to be
309 # hashable
310 self.facts: dict[int, list[Fact]] = defaultdict(list)
311
312 def AddEdge(self, pred: int, succ: int) -> None:
313 """
314 Add a directed edge from pred to succ. If pred is a deadend, its
315 non-deadends will be used instead.
316 """
317 if pred in self.deadends:
318 for w in [u for (u, v) in self.edges if v == pred]:
319 self.AddEdge(w, succ)
320 else:
321 self.edges.add((pred, succ))
322
323 def AddDeadend(self, statement: int):
324 """
325 Mark a statement as a dead-end (e.g. return or continue).
326 """
327 self.deadends.add(statement)
328
329 def AddStatement(self) -> int:
330 """
331 Add a new statement and return its ID.
332 """
333 if len(self.predecessors) == 0:
334 if len(self.block_stack):
335 self.predecessors.add(self.block_stack[-1])
336 else:
337 self.predecessors.add(self.statement_counter)
338
339 self.statement_counter += 1
340 for pred in self.predecessors:
341 self.AddEdge(pred, self.statement_counter)
342
343 self.predecessors = set({})
344
345 if len(self.block_stack):
346 self.block_stack[-1] = self.statement_counter
347
348 return self.statement_counter
349
350 def AddFact(self, statement: int, fact: Fact) -> None:
351 """
352 Annotate a statement with a fact.
353 """
354 self.facts[statement].append(fact)
355
356 def _PushBlock(self, begin: Optional[int] = None) -> int:
357 """
358 Start a block at the given statement ID. If a beginning statement isn't
359 provided one will be created and its ID will be returend.
360
361 Direct use of this function is discouraged. Consider using one of the
362 block context managers below instead.
363 """
364 if begin is None:
365 begin = self.AddStatement()
366 else:
367 self.predecessors.add(begin)
368
369 self.block_stack.append(begin)
370 return begin
371
372 def _PopBlock(self) -> int:
373 """
374 Pop a block from the top of the stack and return the ID of the block's
375 last statement.
376
377 Direct use of this function is discouraged. Consider using one of the
378 block context managers below instead.
379 """
380 assert len(self.block_stack)
381 last = self.block_stack.pop()
382 if len(self.block_stack) and last not in self.deadends:
383 self.block_stack[-1] = last
384
385 return last
386
387
388class CfgBlockContext(object):
389 """
390 Context manager to make dealing with things like with-statements easier.
391 """
392
393 def __init__(self,
394 cfg: ControlFlowGraph,
395 begin: Optional[int] = None) -> None:
396 self.cfg = cfg
397 if cfg is None:
398 return
399
400 self.entry = self.cfg._PushBlock(begin)
401 self.exit = self.entry
402
403 def __enter__(self) -> None:
404 return self if self.cfg else None
405
406 def __exit__(self, *args) -> None:
407 if not self.cfg:
408 return
409
410 self.exit = self.cfg._PopBlock()
411
412
413class CfgBranchContext(object):
414 """
415 Context manager to make dealing with if-else blocks easier.
416 """
417
418 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
419 self.cfg = cfg
420 self.entry = branch_point
421 self.exit = self.entry
422 if cfg is None:
423 return
424
425 self.arms = []
426 self.pushed = False
427
428 def AddBranch(self, entry: Optional[int] = None):
429 if not self.cfg:
430 return CfgBranchContext(None, None)
431
432 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
433 self.cfg._PushBlock(self.arms[-1].entry)
434 self.arms[-1].pushed = True
435 return self.arms[-1]
436
437 def __enter__(self) -> None:
438 return self
439
440 def __exit__(self, *args) -> None:
441 if not self.cfg:
442 return
443
444 if self.pushed:
445 self.exit = self.cfg._PopBlock()
446
447 for arm in self.arms:
448 if arm.exit not in self.cfg.deadends:
449 self.cfg.predecessors.add(arm.exit)
450
451
452class CfgLoopContext(object):
453 """
454 Context manager to make dealing with loops easier.
455 """
456
457 def __init__(self,
458 cfg: ControlFlowGraph,
459 entry: Optional[int] = None) -> None:
460 self.cfg = cfg
461 self.breaks = set({})
462 if cfg is None:
463 return
464
465 self.entry = self.cfg._PushBlock(entry)
466 self.exit = self.entry
467
468 def AddBreak(self, statement: int) -> None:
469 assert self.cfg
470 self.breaks.add(statement)
471 self.cfg.AddDeadend(statement)
472
473 def AddContinue(self, statement: int) -> None:
474 self.cfg.AddEdge(statement, self.entry)
475 self.cfg.AddDeadend(statement)
476
477 def __enter__(self) -> None:
478 return self if self.cfg else None
479
480 def __exit__(self, *args) -> None:
481 if not self.cfg:
482 return
483
484 self.exit = self.cfg._PopBlock()
485 self.cfg.AddEdge(self.exit, self.entry)
486 for pred in self.cfg.predecessors:
487 self.cfg.AddEdge(pred, self.entry)
488
489 # If we had any breaks, arm the predecessor set with the current
490 # statement and the break statements.
491 if len(self.breaks):
492 if len(self.cfg.block_stack):
493 self.cfg.predecessors.add(self.cfg.block_stack[-1])
494 else:
495 self.cfg.predecessors.add(self.cfg.statement_counter)
496
497 for b in self.breaks:
498 self.cfg.deadends.remove(b)
499 self.cfg.predecessors.add(b)
500
501
502class StackRoots(object):
503 """
504 Output of the souffle stack roots solver.
505 """
506
507 def __init__(self, tuples: set[tuple[SymbolPath, SymbolPath]]) -> None:
508 self.root_tuples = tuples
509
510 def needs_root(self, func: SymbolPath, reference: SymbolPath) -> bool:
511 """
512 Returns true if the given reference should have a stack root.
513 """
514 return (func, reference) in self.root_tuples
515
516
517def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
518 facts_dir='_tmp/mycpp-facts') -> None:
519 """
520 Dump the given control flow graphs and associated facts into the given
521 directory as text files that can be consumed by datalog.
522 """
523 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
524 fact_files = {}
525 os.makedirs(facts_dir, exist_ok=True)
526 with open(edge_facts, 'w') as cfg_f:
527 for func, cfg in sorted(cfgs.items()):
528 joined = join_name(func, delim='.')
529 for (u, v) in sorted(cfg.edges):
530 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
531
532 for statement, facts in sorted(cfg.facts.items()):
533 for fact in facts: # already sorted temporally
534 fact_f = fact_files.get(fact.name())
535 if not fact_f:
536 fact_f = open(
537 '{}/{}.facts'.format(facts_dir, fact.name()), 'w')
538 fact_files[fact.name()] = fact_f
539
540 fact_f.write(fact.Generate(joined, statement))
541
542 for f in fact_files.values():
543 f.close()
544
545
546def ComputeStackRoots(cfgs: dict[str, ControlFlowGraph],
547 facts_dir: str = '_tmp/mycpp-facts',
548 souffle_output_dir: str = '_tmp') -> StackRoots:
549 """
550 Run the the souffle stack roots solver and translate its output in a format
551 that can be queried by cppgen_pass.
552 """
553 DumpControlFlowGraphs(cfgs, facts_dir=facts_dir)
554 subprocess.check_call([
555 '_bin/datalog/dataflow',
556 '-F',
557 facts_dir,
558 '-D',
559 souffle_output_dir,
560 ])
561
562 tuples: set[tuple[SymbolPath, SymbolPath]] = set({})
563 with open('{}/stack_root_vars.tsv'.format(souffle_output_dir),
564 'r') as roots_f:
565 pat = re.compile(r'\$(.*)\((.*), (.*)\)')
566 for line in roots_f:
567 function, ref = line.split('\t')
568 m = pat.match(ref)
569 assert m.group(1) in ('LocalVariable', 'ObjectMember')
570 if m.group(1) == 'LocalVariable':
571 _, ref_func, var_name = m.groups()
572 assert ref_func == function
573 tuples.add((split_py_name(function), (var_name, )))
574
575 if m.group(1) == 'ObjectMember':
576 _, base_obj, member_name = m.groups()
577 tuples.add((split_py_name(function),
578 split_py_name(base_obj) + (member_name, )))
579
580 return StackRoots(tuples)