| 1 | """
|
| 2 | pass_state.py
|
| 3 | """
|
| 4 | from __future__ import print_function
|
| 5 |
|
| 6 | import os
|
| 7 | from collections import defaultdict
|
| 8 |
|
| 9 | from mycpp.util import join_name, log, SymbolPath
|
| 10 |
|
| 11 | from typing import Optional
|
| 12 |
|
| 13 | _ = log
|
| 14 |
|
| 15 |
|
| 16 | class Virtual(object):
|
| 17 | """
|
| 18 | See unit test for example usage.
|
| 19 | """
|
| 20 |
|
| 21 | def __init__(self) -> None:
|
| 22 | self.methods: dict[SymbolPath, list[str]] = defaultdict(list)
|
| 23 | self.subclasses: dict[SymbolPath, list[tuple[str]]] = defaultdict(list)
|
| 24 | self.virtuals: dict[tuple[SymbolPath, str], Optional[tuple[SymbolPath, str]]] = {}
|
| 25 | self.has_vtable: dict[SymbolPath, bool] = {}
|
| 26 | self.can_reorder_fields: dict[SymbolPath, bool] = {}
|
| 27 |
|
| 28 | # _Executor -> vm::_Executor
|
| 29 | self.base_class_unique: dict[str, SymbolPath] = {}
|
| 30 |
|
| 31 | # These are called on the Forward Declare pass
|
| 32 | def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
|
| 33 | #log('OnMethod %s %s', class_name, method_name)
|
| 34 |
|
| 35 | # __init__ and so forth don't count
|
| 36 | if method_name.startswith('__') and method_name.endswith('__'):
|
| 37 | return
|
| 38 |
|
| 39 | self.methods[class_name].append(method_name)
|
| 40 |
|
| 41 | def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
|
| 42 | if len(base_class) > 1:
|
| 43 | # Hack for
|
| 44 | #
|
| 45 | # class _Executor: pass
|
| 46 | # versus
|
| 47 | # class MyExecutor(vm._Executor): pass
|
| 48 | base_key = base_class[-1]
|
| 49 |
|
| 50 | # Fail if we have two base classes in different namespaces with the same
|
| 51 | # name.
|
| 52 | if base_key in self.base_class_unique:
|
| 53 | # Make sure we don't have collisions
|
| 54 | assert self.base_class_unique[base_key] == base_class or base_class in self.subclasses[self.base_class_unique[base_key]], base_class
|
| 55 | else:
|
| 56 | self.base_class_unique[base_key] = base_class
|
| 57 |
|
| 58 | else:
|
| 59 | base_key = base_class
|
| 60 |
|
| 61 | self.subclasses[base_class].append(subclass)
|
| 62 |
|
| 63 | def Calculate(self) -> None:
|
| 64 | """
|
| 65 | Call this after the forward declare pass.
|
| 66 |
|
| 67 | TODO: Are there bugs based on conflicting class names?
|
| 68 | """
|
| 69 | for base_class, subclasses in self.subclasses.items():
|
| 70 | self.can_reorder_fields[base_class] = False
|
| 71 |
|
| 72 | for subclass in subclasses:
|
| 73 | self.can_reorder_fields[subclass] = False
|
| 74 |
|
| 75 | b_methods = self.methods[base_class]
|
| 76 | s_methods = self.methods[subclass]
|
| 77 | overlapping = set(b_methods) & set(s_methods)
|
| 78 | for method in overlapping:
|
| 79 | self.virtuals[(base_class, method)] = None
|
| 80 | self.virtuals[(subclass, method)] = (base_class, method)
|
| 81 | if overlapping:
|
| 82 | self.has_vtable[base_class] = True
|
| 83 | self.has_vtable[subclass] = True
|
| 84 |
|
| 85 | # These is called on the Decl pass
|
| 86 | def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
|
| 87 | return (class_name, method_name) in self.virtuals
|
| 88 |
|
| 89 | def HasVTable(self, class_name: SymbolPath) -> bool:
|
| 90 | return class_name in self.has_vtable
|
| 91 |
|
| 92 | def CanReorderFields(self, class_name: SymbolPath) -> bool:
|
| 93 | if class_name in self.can_reorder_fields:
|
| 94 | return self.can_reorder_fields[class_name]
|
| 95 | else:
|
| 96 | return True # by default they can be reordered
|
| 97 |
|
| 98 |
|
| 99 | class Fact(object):
|
| 100 | """
|
| 101 | An abstract fact. These can be used to build up datalog programs.
|
| 102 | """
|
| 103 | def __init__(self) -> None:
|
| 104 | pass
|
| 105 |
|
| 106 | def name(self) -> str:
|
| 107 | raise NotImplementedError()
|
| 108 |
|
| 109 | def Generate(self, func: str, statement: int) -> str:
|
| 110 | raise NotImplementedError()
|
| 111 |
|
| 112 |
|
| 113 | class ControlFlowGraph(object):
|
| 114 | """
|
| 115 | A simple control-flow graph.
|
| 116 |
|
| 117 | Every statement in the program is represented as a node in a graph with
|
| 118 | unique a numeric ID. Control flow is represented as directed edges through
|
| 119 | the graph. Loops can introduce back-edges. Every node in the graph will
|
| 120 | satisfy at least one of the following conditions:
|
| 121 |
|
| 122 | - Its indegree is at least one.
|
| 123 |
|
| 124 | - Its outdegree is at least one.
|
| 125 |
|
| 126 | For simple linear graphs all you need is the AddStatement method. For more
|
| 127 | complex flows there is a set of context managers below to help simplify
|
| 128 | construction.
|
| 129 |
|
| 130 | - For branches-like statements (e.g. if- and try- statements) use
|
| 131 | CfgBranchContext. It will take care of the details associated with
|
| 132 | stitching the different branches to statements in the next statement.
|
| 133 |
|
| 134 | - For loops, use CfgLoopContext. It will take care of adding back-edges
|
| 135 | and connecting break statements to any statements that proceed the
|
| 136 | loop.
|
| 137 |
|
| 138 | - CfgBlockContext can be used for simple cases where you just want to
|
| 139 | track the beginning and end of a sequence of statements.
|
| 140 |
|
| 141 | Statements can carry annotations called facts, which are used as inputs to
|
| 142 | datalog programs to perform dataflow diffrent kinds of dataflow analyses.
|
| 143 | To annotate a statement, use the AddFact method with any object that
|
| 144 | implements the Fact interface.
|
| 145 |
|
| 146 | See the unit tests in pass_state_test.py and the mycpp phase in
|
| 147 | control_flow_pass.py for detailed examples of usage.
|
| 148 | """
|
| 149 |
|
| 150 | def __init__(self) -> None:
|
| 151 | self.statement_counter: int = 0
|
| 152 | self.edges: set[tuple[int, int]] = set({})
|
| 153 | self.block_stack: list[int] = []
|
| 154 | self.predecessors: set[int] = set({})
|
| 155 | self.deadends: set[int] = set({})
|
| 156 |
|
| 157 | # order doesn't actually matter here, but sets require elements to be
|
| 158 | # hashable
|
| 159 | self.facts: dict[int, list[Fact]] = defaultdict(list)
|
| 160 |
|
| 161 | def AddEdge(self, pred: int, succ: int) -> None:
|
| 162 | """
|
| 163 | Add a directed edge from pred to succ. If pred is a deadend, its
|
| 164 | non-deadends will be used instead.
|
| 165 | """
|
| 166 | if pred in self.deadends:
|
| 167 | for w in [u for (u, v) in self.edges if v == pred]:
|
| 168 | self.AddEdge(w, succ)
|
| 169 | else:
|
| 170 | self.edges.add((pred, succ))
|
| 171 |
|
| 172 | def AddDeadend(self, statement: int):
|
| 173 | """
|
| 174 | Mark a statement as a dead-end (e.g. return or continue).
|
| 175 | """
|
| 176 | self.deadends.add(statement)
|
| 177 |
|
| 178 | def AddStatement(self) -> int:
|
| 179 | """
|
| 180 | Add a new statement and return its ID.
|
| 181 | """
|
| 182 | if len(self.predecessors) == 0:
|
| 183 | if len(self.block_stack):
|
| 184 | self.predecessors.add(self.block_stack[-1])
|
| 185 | else:
|
| 186 | self.predecessors.add(self.statement_counter)
|
| 187 |
|
| 188 | self.statement_counter += 1
|
| 189 | for pred in self.predecessors:
|
| 190 | self.AddEdge(pred, self.statement_counter)
|
| 191 |
|
| 192 | self.predecessors = set({})
|
| 193 |
|
| 194 | if len(self.block_stack):
|
| 195 | self.block_stack[-1] = self.statement_counter
|
| 196 |
|
| 197 | return self.statement_counter
|
| 198 |
|
| 199 | def AddFact(self, statement: int, fact: Fact) -> None:
|
| 200 | """
|
| 201 | Annotate a statement with a fact.
|
| 202 | """
|
| 203 | self.facts[statement].append(fact)
|
| 204 |
|
| 205 | def _PushBlock(self, begin: Optional[int] = None) -> int:
|
| 206 | """
|
| 207 | Start a block at the given statement ID. If a beginning statement isn't
|
| 208 | provided one will be created and its ID will be returend.
|
| 209 |
|
| 210 | Direct use of this function is discouraged. Consider using one of the
|
| 211 | block context managers below instead.
|
| 212 | """
|
| 213 | if begin is None:
|
| 214 | begin = self.AddStatement()
|
| 215 | else:
|
| 216 | self.predecessors.add(begin)
|
| 217 |
|
| 218 | self.block_stack.append(begin)
|
| 219 | return begin
|
| 220 |
|
| 221 | def _PopBlock(self) -> int:
|
| 222 | """
|
| 223 | Pop a block from the top of the stack and return the ID of the block's
|
| 224 | last statement.
|
| 225 |
|
| 226 | Direct use of this function is discouraged. Consider using one of the
|
| 227 | block context managers below instead.
|
| 228 | """
|
| 229 | assert len(self.block_stack)
|
| 230 | last = self.block_stack.pop()
|
| 231 | if len(self.block_stack) and last not in self.deadends:
|
| 232 | self.block_stack[-1] = last
|
| 233 |
|
| 234 | return last
|
| 235 |
|
| 236 |
|
| 237 | class CfgBlockContext(object):
|
| 238 | """
|
| 239 | Context manager to make dealing with things like with-statements easier.
|
| 240 | """
|
| 241 | def __init__(self, cfg: ControlFlowGraph, begin: Optional[int] = None) -> None:
|
| 242 | self.cfg = cfg
|
| 243 | if cfg is None:
|
| 244 | return
|
| 245 |
|
| 246 | self.entry = self.cfg._PushBlock(begin)
|
| 247 | self.exit = self.entry
|
| 248 |
|
| 249 | def __enter__(self) -> None:
|
| 250 | return self if self.cfg else None
|
| 251 |
|
| 252 | def __exit__(self, *args) -> None:
|
| 253 | if not self.cfg:
|
| 254 | return
|
| 255 |
|
| 256 | self.exit = self.cfg._PopBlock()
|
| 257 |
|
| 258 |
|
| 259 | class CfgBranchContext(object):
|
| 260 | """
|
| 261 | Context manager to make dealing with if-else blocks easier.
|
| 262 | """
|
| 263 | def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
|
| 264 | self.cfg = cfg
|
| 265 | self.entry = branch_point
|
| 266 | self.exit = self.entry
|
| 267 | if cfg is None:
|
| 268 | return
|
| 269 |
|
| 270 | self.arms = []
|
| 271 | self.pushed = False
|
| 272 |
|
| 273 | def AddBranch(self, entry: Optional[int] = None):
|
| 274 | if not self.cfg:
|
| 275 | return CfgBranchContext(None, None)
|
| 276 |
|
| 277 | self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
|
| 278 | self.cfg._PushBlock(self.arms[-1].entry)
|
| 279 | self.arms[-1].pushed = True
|
| 280 | return self.arms[-1]
|
| 281 |
|
| 282 | def __enter__(self) -> None:
|
| 283 | return self
|
| 284 |
|
| 285 | def __exit__(self, *args) -> None:
|
| 286 | if not self.cfg:
|
| 287 | return
|
| 288 |
|
| 289 | if self.pushed:
|
| 290 | self.exit = self.cfg._PopBlock()
|
| 291 |
|
| 292 | for arm in self.arms:
|
| 293 | if arm.exit not in self.cfg.deadends:
|
| 294 | self.cfg.predecessors.add(arm.exit)
|
| 295 |
|
| 296 |
|
| 297 |
|
| 298 | class CfgLoopContext(object):
|
| 299 | """
|
| 300 | Context manager to make dealing with loops easier.
|
| 301 | """
|
| 302 | def __init__(self, cfg: ControlFlowGraph) -> None:
|
| 303 | self.cfg = cfg
|
| 304 | self.breaks = set({})
|
| 305 | if cfg is None:
|
| 306 | return
|
| 307 |
|
| 308 | self.entry = self.cfg._PushBlock()
|
| 309 | self.exit = self.entry
|
| 310 |
|
| 311 | def AddBreak(self, statement: int) -> None:
|
| 312 | assert self.cfg
|
| 313 | self.breaks.add(statement)
|
| 314 | self.cfg.AddDeadend(statement)
|
| 315 |
|
| 316 | def AddContinue(self, statement: int) -> None:
|
| 317 | self.cfg.AddEdge(statement, self.entry)
|
| 318 | self.cfg.AddDeadend(statement)
|
| 319 |
|
| 320 | def __enter__(self) -> None:
|
| 321 | return self if self.cfg else None
|
| 322 |
|
| 323 | def __exit__(self, *args) -> None:
|
| 324 | if not self.cfg:
|
| 325 | return
|
| 326 |
|
| 327 | self.exit = self.cfg._PopBlock()
|
| 328 | self.cfg.AddEdge(self.exit, self.entry)
|
| 329 | for pred in self.cfg.predecessors:
|
| 330 | self.cfg.AddEdge(pred, self.entry)
|
| 331 |
|
| 332 | # If we had any breaks, arm the predecessor set with the current
|
| 333 | # statement and the break statements.
|
| 334 | if len(self.breaks):
|
| 335 | if len(self.cfg.block_stack):
|
| 336 | self.cfg.predecessors.add(self.cfg.block_stack[-1])
|
| 337 | else:
|
| 338 | self.cfg.predecessors.add(self.cfg.statement_counter)
|
| 339 |
|
| 340 | for b in self.breaks:
|
| 341 | self.cfg.deadends.remove(b)
|
| 342 | self.cfg.predecessors.add(b)
|
| 343 |
|
| 344 |
|
| 345 | def DumpControlFlowGraphs(cfgs: dict[str, ControlFlowGraph], facts_dir='_tmp/mycpp-facts') -> None:
|
| 346 | """
|
| 347 | Dump the given control flow graphs and associated facts into the given
|
| 348 | directory as text files that can be consumed by datalog.
|
| 349 | """
|
| 350 | edge_facts = '{}/cf_edge.facts'.format(facts_dir)
|
| 351 | os.makedirs(facts_dir, exist_ok=True)
|
| 352 | with open(edge_facts, 'w') as cfg_f:
|
| 353 | for func, cfg in sorted(cfgs.items()):
|
| 354 | joined = join_name(func, delim='.')
|
| 355 | for (u, v) in sorted(cfg.edges):
|
| 356 | cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
|