OILS / mycpp / pass_state.py View on Github | oilshell.org

505 lines, 262 significant
1"""
2pass_state.py
3"""
4from __future__ import print_function
5
6import os
7from collections import defaultdict
8
9from mypy.types import Type
10from mypy.nodes import Expression
11
12from mycpp.util import join_name, log, SymbolPath
13
14from typing import Optional
15
16_ = log
17
18
19class ModuleMember(object):
20 """
21 A member of a Python module.
22
23 e.g. core.state.Mem => core::state::Mem
24 """
25
26 def __init__(self, module_path: SymbolPath, member: str) -> None:
27 self.module_path = module_path
28 self.member = member
29
30
31class StaticObjectMember(object):
32 """
33 A static member of an object. Usually a a method like an alternative constructor.
34
35 e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
36 """
37
38 def __init__(self, base_type_name: SymbolPath, member: str) -> None:
39 self.base_type_name = base_type_name
40 self.member = member
41
42
43class HeapObjectMember(object):
44 """
45 A member of a heap-allocated object.
46
47 e.g foo.empty() => foo->empty()
48 """
49
50 def __init__(self, object_expr: Expression, object_type: Type,
51 member: str) -> None:
52 self.object_expr = object_expr
53 self.object_type = object_type
54 self.member = member
55
56
57class StackObjectMember(object):
58 """
59 A member of a stack-allocated object.
60
61 e.g foo.empty() => foo.empty()
62 """
63
64 def __init__(self, object_expr: Expression, object_type: Type,
65 member: str) -> None:
66 self.ojbect_expr = object_expr
67 self.object_type = object_type
68 self.member = member
69
70
71class Virtual(object):
72 """Calculate which C++ methods need the virtual keyword.
73
74 See unit test for example usage.
75 """
76
77 def __init__(self) -> None:
78 self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
79 self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
80 self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath,
81 str]]] = {}
82 self.has_vtable: dict[SymbolPath, bool] = {}
83 self.can_reorder_fields: dict[SymbolPath, bool] = {}
84
85 # _Executor -> vm::_Executor
86 self.base_class_unique: dict[str, SymbolPath] = {}
87
88 # These are called on the Forward Declare pass
89 def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
90 #log('OnMethod %s %s', class_name, method_name)
91
92 # __init__ and so forth don't count
93 if method_name.startswith('__') and method_name.endswith('__'):
94 return
95
96 self.methods[class_name].append(method_name)
97
98 def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
99 if len(base_class) > 1:
100 # Hack for
101 #
102 # class _Executor: pass
103 # versus
104 # class MyExecutor(vm._Executor): pass
105 base_key = base_class[-1]
106
107 # Fail if we have two base classes in different namespaces with the same
108 # name.
109 if base_key in self.base_class_unique:
110 # Make sure we don't have collisions
111 assert (self.base_class_unique[base_key] == base_class or
112 base_class
113 in self.subclasses[self.base_class_unique[base_key]]
114 ), base_class
115 else:
116 self.base_class_unique[base_key] = base_class
117
118 else:
119 base_key = base_class
120
121 self.subclasses[base_class].append(subclass)
122
123 def Calculate(self) -> None:
124 """Call this after the forward declare pass."""
125 for base_class, subclasses in self.subclasses.items():
126 self.can_reorder_fields[base_class] = False
127
128 for subclass in subclasses:
129 self.can_reorder_fields[subclass] = False
130
131 b_methods = self.methods[base_class]
132 s_methods = self.methods[subclass]
133 overlapping = set(b_methods) & set(s_methods)
134 for method in overlapping:
135 self.virtuals[(base_class, method)] = None
136 self.virtuals[(subclass, method)] = (base_class, method)
137 if overlapping:
138 self.has_vtable[base_class] = True
139 self.has_vtable[subclass] = True
140
141 # These is called on the Decl pass
142 def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
143 return (class_name, method_name) in self.virtuals
144
145 def HasVTable(self, class_name: SymbolPath) -> bool:
146 return class_name in self.has_vtable
147
148 def CanReorderFields(self, class_name: SymbolPath) -> bool:
149 if class_name in self.can_reorder_fields:
150 return self.can_reorder_fields[class_name]
151 else:
152 return True # by default they can be reordered
153
154
155def SymbolPathToReference(func: str, p: SymbolPath) -> str:
156 if len(p) > 1:
157 return '$ObjectMember({}, {})'.format(join_name(p[:-1], delim='.'), p[-1])
158
159 return '$LocalVariable({}, {})'.format(func, p[0])
160
161
162class Fact(object):
163 """
164 An abstract fact. These can be used to build up datalog programs.
165 """
166
167 def __init__(self) -> None:
168 pass
169
170 def name(self) -> str:
171 raise NotImplementedError()
172
173 def Generate(self, func: str, statement: int) -> str:
174 raise NotImplementedError()
175
176
177class FunctionCall(Fact):
178
179 def __init__(self, callee: str) -> None:
180 self.callee = callee
181
182 def name(self) -> str:
183 return 'call'
184
185 def Generate(self, func: str, statement: int) -> str:
186 return '{}\t{}\t{}\n'.format(func, statement, self.callee)
187
188
189class Definition(Fact):
190 """
191 The definition of a variable. This corresponds to an allocation.
192 """
193
194 def __init__(self, ref: SymbolPath, obj: str) -> None:
195 self.ref = ref
196 self.obj = obj
197
198 def name(self) -> str:
199 return 'assign'
200
201 def Generate(self, func: str, statement: int) -> str:
202 return '{}\t{}\t{}\t{}\n'.format(func, statement,
203 SymbolPathToReference(func, self.ref),
204 self.obj)
205
206
207class Assignment(Fact):
208 """
209 The assignment of one variable or object member to another.
210 """
211
212 def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
213 self.lhs = lhs
214 self.rhs = rhs
215
216 def name(self) -> str:
217 return 'assign'
218
219 def Generate(self, func: str, statement: int) -> str:
220 return '{}\t{}\t{}\t$Ref({})\n'.format(func, statement,
221 SymbolPathToReference(func, self.lhs),
222 SymbolPathToReference(func, self.rhs))
223
224
225class Use(Fact):
226 """
227 The use of a reference.
228 """
229
230 def __init__(self, ref: SymbolPath) -> None:
231 self.ref = ref
232
233 def name(self) -> str:
234 return 'use'
235
236 def Generate(self, func: str, statement: int) -> str:
237 return '{}\t{}\t{}\n'.format(func, statement,
238 SymbolPathToReference(func, self.ref))
239
240
241class ControlFlowGraph(object):
242 """
243 A simple control-flow graph.
244
245 Every statement in the program is represented as a node in a graph with
246 unique a numeric ID. Control flow is represented as directed edges through
247 the graph. Loops can introduce back-edges. Every node in the graph will
248 satisfy at least one of the following conditions:
249
250 - Its indegree is at least one.
251
252 - Its outdegree is at least one.
253
254 For simple linear graphs all you need is the AddStatement method. For more
255 complex flows there is a set of context managers below to help simplify
256 construction.
257
258 - For branches-like statements (e.g. if- and try- statements) use
259 CfgBranchContext. It will take care of the details associated with
260 stitching the different branches to statements in the next statement.
261
262 - For loops, use CfgLoopContext. It will take care of adding back-edges
263 and connecting break statements to any statements that proceed the
264 loop.
265
266 - CfgBlockContext can be used for simple cases where you just want to
267 track the beginning and end of a sequence of statements.
268
269 Statements can carry annotations called facts, which are used as inputs to
270 datalog programs to perform dataflow diffrent kinds of dataflow analyses.
271 To annotate a statement, use the AddFact method with any object that
272 implements the Fact interface.
273
274 See the unit tests in pass_state_test.py and the mycpp phase in
275 control_flow_pass.py for detailed examples of usage.
276 """
277
278 def __init__(self) -> None:
279 self.statement_counter: int = 0
280 self.edges: set[tuple[int, int]] = set({})
281 self.block_stack: list[int] = []
282 self.predecessors: set[int] = set({})
283 self.deadends: set[int] = set({})
284
285 # order doesn't actually matter here, but sets require elements to be
286 # hashable
287 self.facts: dict[int, list[Fact]] = defaultdict(list)
288
289 def AddEdge(self, pred: int, succ: int) -> None:
290 """
291 Add a directed edge from pred to succ. If pred is a deadend, its
292 non-deadends will be used instead.
293 """
294 if pred in self.deadends:
295 for w in [u for (u, v) in self.edges if v == pred]:
296 self.AddEdge(w, succ)
297 else:
298 self.edges.add((pred, succ))
299
300 def AddDeadend(self, statement: int):
301 """
302 Mark a statement as a dead-end (e.g. return or continue).
303 """
304 self.deadends.add(statement)
305
306 def AddStatement(self) -> int:
307 """
308 Add a new statement and return its ID.
309 """
310 if len(self.predecessors) == 0:
311 if len(self.block_stack):
312 self.predecessors.add(self.block_stack[-1])
313 else:
314 self.predecessors.add(self.statement_counter)
315
316 self.statement_counter += 1
317 for pred in self.predecessors:
318 self.AddEdge(pred, self.statement_counter)
319
320 self.predecessors = set({})
321
322 if len(self.block_stack):
323 self.block_stack[-1] = self.statement_counter
324
325 return self.statement_counter
326
327 def AddFact(self, statement: int, fact: Fact) -> None:
328 """
329 Annotate a statement with a fact.
330 """
331 self.facts[statement].append(fact)
332
333 def _PushBlock(self, begin: Optional[int] = None) -> int:
334 """
335 Start a block at the given statement ID. If a beginning statement isn't
336 provided one will be created and its ID will be returend.
337
338 Direct use of this function is discouraged. Consider using one of the
339 block context managers below instead.
340 """
341 if begin is None:
342 begin = self.AddStatement()
343 else:
344 self.predecessors.add(begin)
345
346 self.block_stack.append(begin)
347 return begin
348
349 def _PopBlock(self) -> int:
350 """
351 Pop a block from the top of the stack and return the ID of the block's
352 last statement.
353
354 Direct use of this function is discouraged. Consider using one of the
355 block context managers below instead.
356 """
357 assert len(self.block_stack)
358 last = self.block_stack.pop()
359 if len(self.block_stack) and last not in self.deadends:
360 self.block_stack[-1] = last
361
362 return last
363
364
365class CfgBlockContext(object):
366 """
367 Context manager to make dealing with things like with-statements easier.
368 """
369
370 def __init__(self,
371 cfg: ControlFlowGraph,
372 begin: Optional[int] = None) -> None:
373 self.cfg = cfg
374 if cfg is None:
375 return
376
377 self.entry = self.cfg._PushBlock(begin)
378 self.exit = self.entry
379
380 def __enter__(self) -> None:
381 return self if self.cfg else None
382
383 def __exit__(self, *args) -> None:
384 if not self.cfg:
385 return
386
387 self.exit = self.cfg._PopBlock()
388
389
390class CfgBranchContext(object):
391 """
392 Context manager to make dealing with if-else blocks easier.
393 """
394
395 def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
396 self.cfg = cfg
397 self.entry = branch_point
398 self.exit = self.entry
399 if cfg is None:
400 return
401
402 self.arms = []
403 self.pushed = False
404
405 def AddBranch(self, entry: Optional[int] = None):
406 if not self.cfg:
407 return CfgBranchContext(None, None)
408
409 self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
410 self.cfg._PushBlock(self.arms[-1].entry)
411 self.arms[-1].pushed = True
412 return self.arms[-1]
413
414 def __enter__(self) -> None:
415 return self
416
417 def __exit__(self, *args) -> None:
418 if not self.cfg:
419 return
420
421 if self.pushed:
422 self.exit = self.cfg._PopBlock()
423
424 for arm in self.arms:
425 if arm.exit not in self.cfg.deadends:
426 self.cfg.predecessors.add(arm.exit)
427
428
429class CfgLoopContext(object):
430 """
431 Context manager to make dealing with loops easier.
432 """
433
434 def __init__(self,
435 cfg: ControlFlowGraph,
436 entry: Optional[int] = None) -> None:
437 self.cfg = cfg
438 self.breaks = set({})
439 if cfg is None:
440 return
441
442 self.entry = self.cfg._PushBlock(entry)
443 self.exit = self.entry
444
445 def AddBreak(self, statement: int) -> None:
446 assert self.cfg
447 self.breaks.add(statement)
448 self.cfg.AddDeadend(statement)
449
450 def AddContinue(self, statement: int) -> None:
451 self.cfg.AddEdge(statement, self.entry)
452 self.cfg.AddDeadend(statement)
453
454 def __enter__(self) -> None:
455 return self if self.cfg else None
456
457 def __exit__(self, *args) -> None:
458 if not self.cfg:
459 return
460
461 self.exit = self.cfg._PopBlock()
462 self.cfg.AddEdge(self.exit, self.entry)
463 for pred in self.cfg.predecessors:
464 self.cfg.AddEdge(pred, self.entry)
465
466 # If we had any breaks, arm the predecessor set with the current
467 # statement and the break statements.
468 if len(self.breaks):
469 if len(self.cfg.block_stack):
470 self.cfg.predecessors.add(self.cfg.block_stack[-1])
471 else:
472 self.cfg.predecessors.add(self.cfg.statement_counter)
473
474 for b in self.breaks:
475 self.cfg.deadends.remove(b)
476 self.cfg.predecessors.add(b)
477
478
479def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph],
480 facts_dir='_tmp/mycpp-facts') -> None:
481 """
482 Dump the given control flow graphs and associated facts into the given
483 directory as text files that can be consumed by datalog.
484 """
485 edge_facts = '{}/cf_edge.facts'.format(facts_dir)
486 fact_files = {}
487 os.makedirs(facts_dir, exist_ok=True)
488 with open(edge_facts, 'w') as cfg_f:
489 for func, cfg in sorted(cfgs.items()):
490 joined = join_name(func, delim='.')
491 for (u, v) in sorted(cfg.edges):
492 cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
493
494 for statement, facts in sorted(cfg.facts.items()):
495 for fact in facts: # already sorted temporally
496 fact_f = fact_files.get(fact.name())
497 if not fact_f:
498 fact_f = open(
499 '{}/{}.facts'.format(facts_dir, fact.name()), 'w')
500 fact_files[fact.name()] = fact_f
501
502 fact_f.write(fact.Generate(joined, statement))
503
504 for f in fact_files.values():
505 f.close()