| 1 | from __future__ import print_function
|
| 2 | """
|
| 3 | ovm.py
|
| 4 |
|
| 5 | TODO:
|
| 6 |
|
| 7 | Define a VM that doesn't use exceptions for control flow!
|
| 8 | """
|
| 9 |
|
| 10 | import operator # for + - * / etc.
|
| 11 | import os
|
| 12 | import sys
|
| 13 | import repr as repr_lib
|
| 14 |
|
| 15 | from .pyvm2 import debug1
|
| 16 | from ..lib import dis
|
| 17 |
|
| 18 | # Create a repr that won't overflow.
|
| 19 | repr_obj = repr_lib.Repr()
|
| 20 | repr_obj.maxother = 120
|
| 21 | repper = repr_obj.repr
|
| 22 |
|
| 23 | # Different than log
|
| 24 | def debug(msg, *args):
|
| 25 | if not VERBOSE:
|
| 26 | return
|
| 27 |
|
| 28 | debug1(msg, *args)
|
| 29 |
|
| 30 |
|
| 31 | VERBOSE = False
|
| 32 | #VERBOSE = True
|
| 33 |
|
| 34 |
|
| 35 | def run_code(vm, code):
|
| 36 | """Main entry point.
|
| 37 |
|
| 38 | Used by tests and by execfile.
|
| 39 | """
|
| 40 | frame = vm.make_frame(code)
|
| 41 | val = vm.run_frame(frame)
|
| 42 | vm.check_invariants()
|
| 43 | if os.getenv('VM_SUMMARY'):
|
| 44 | debug1('*** OVM executed for %d ticks', vm.num_ticks)
|
| 45 | # If we return the number of ticks here, the unit tests break.
|
| 46 | return val
|
| 47 |
|
| 48 |
|
| 49 | def run_code_object(code, args, package=None):
|
| 50 | sys.argv = args
|
| 51 | vm = VirtualMachine()
|
| 52 | run_code(vm, code)
|
| 53 |
|
| 54 |
|
| 55 | # Note: handler is a jump target!
|
| 56 | import collections
|
| 57 | Block = collections.namedtuple("Block", "type, handler, level")
|
| 58 |
|
| 59 |
|
| 60 | class Frame(object):
|
| 61 | def __init__(self, f_code, callargs):
|
| 62 | self.f_code = f_code
|
| 63 | self.f_locals = dict(callargs) # Do we need to make a copy?
|
| 64 | self.stack = [] # expression stack
|
| 65 | self.f_lineno = f_code.co_firstlineno
|
| 66 | self.f_lasti = 0
|
| 67 |
|
| 68 | self.block_stack = []
|
| 69 | self.generator = None
|
| 70 |
|
| 71 | def __repr__(self): # pragma: no cover
|
| 72 | return '<Frame at 0x%08x: %r @ %d>' % (
|
| 73 | id(self), self.f_code.co_filename, self.f_lineno
|
| 74 | )
|
| 75 |
|
| 76 | def top(self):
|
| 77 | """Return the value at the top of the stack, with no changes."""
|
| 78 | return self.stack[-1]
|
| 79 |
|
| 80 | def pop(self, i=0):
|
| 81 | """Pop a value from the stack.
|
| 82 |
|
| 83 | Default to the top of the stack, but `i` can be a count from the top
|
| 84 | instead.
|
| 85 |
|
| 86 | """
|
| 87 | return self.stack.pop(-1-i)
|
| 88 |
|
| 89 | def push(self, *vals):
|
| 90 | """Push values onto the value stack."""
|
| 91 | self.stack.extend(vals)
|
| 92 |
|
| 93 | def popn(self, n):
|
| 94 | """Pop a number of values from the value stack.
|
| 95 |
|
| 96 | A list of `n` values is returned, the deepest value first.
|
| 97 | """
|
| 98 | if n:
|
| 99 | ret = self.stack[-n:]
|
| 100 | del self.stack[-n:]
|
| 101 | return ret
|
| 102 | else:
|
| 103 | return []
|
| 104 |
|
| 105 | def peek(self, n):
|
| 106 | """Get a value `n` entries down in the stack, without changing the stack."""
|
| 107 | return self.stack[-n]
|
| 108 |
|
| 109 | def jump(self, offset):
|
| 110 | """Move the bytecode pointer to `offset`, so it will execute next."""
|
| 111 | self.f_lasti = offset
|
| 112 |
|
| 113 | def push_block(self, type, handler=None, level=None):
|
| 114 | """Used for SETUP_{LOOP,EXCEPT,FINALLY,WITH}."""
|
| 115 | if level is None:
|
| 116 | level = len(self.stack)
|
| 117 | self.block_stack.append(Block(type, handler, level))
|
| 118 |
|
| 119 | def pop_block(self):
|
| 120 | return self.block_stack.pop()
|
| 121 |
|
| 122 | def _unwind_block(self, block, vm):
|
| 123 | """
|
| 124 | Args:
|
| 125 | vm: VirtualMachineError to possibly mutate
|
| 126 | """
|
| 127 | while len(self.stack) > block.level:
|
| 128 | self.pop()
|
| 129 |
|
| 130 | def handle_block_stack(self, why, vm):
|
| 131 | """
|
| 132 | After every bytecode that returns why != None, handle everything on the
|
| 133 | block stack.
|
| 134 |
|
| 135 | The block stack and data stack are shuffled for looping, exception
|
| 136 | handling, or returning.
|
| 137 | """
|
| 138 | assert why != 'yield'
|
| 139 |
|
| 140 | block = self.block_stack[-1]
|
| 141 | assert block.type == 'loop', block.type
|
| 142 |
|
| 143 | if why == 'continue':
|
| 144 | self.jump(vm.return_value) # why is it left in the return value?
|
| 145 | return None
|
| 146 |
|
| 147 | self.pop_block()
|
| 148 | self._unwind_block(block, vm)
|
| 149 |
|
| 150 | if why == 'break':
|
| 151 | self.jump(block.handler)
|
| 152 | return None
|
| 153 |
|
| 154 | raise AssertionError('why = %r' % why)
|
| 155 |
|
| 156 | def decode_next_raw(self):
|
| 157 | """
|
| 158 | Parse 1 - 3 bytes of bytecode into an instruction and maybe arguments.
|
| 159 | """
|
| 160 | opcode = ord(self.f_code.co_code[self.f_lasti])
|
| 161 | self.f_lasti += 1
|
| 162 |
|
| 163 | arguments = []
|
| 164 | if opcode >= dis.HAVE_ARGUMENT:
|
| 165 | a1 = self.f_code.co_code[self.f_lasti]
|
| 166 | a2 = self.f_code.co_code[self.f_lasti+1]
|
| 167 | arg = ord(a1) + (ord(a2) << 8)
|
| 168 | self.f_lasti += 2
|
| 169 | else:
|
| 170 | arg = None
|
| 171 | return opcode, arg
|
| 172 |
|
| 173 | def line_number(self):
|
| 174 | """Get the current line number the frame is executing."""
|
| 175 | # We don't keep f_lineno up to date, so calculate it based on the
|
| 176 | # instruction address and the line number table.
|
| 177 | lnotab = self.f_code.co_lnotab
|
| 178 | byte_increments = lnotab[0::2]
|
| 179 | line_increments = lnotab[1::2]
|
| 180 |
|
| 181 | byte_num = 0
|
| 182 | line_num = self.f_code.co_firstlineno
|
| 183 |
|
| 184 | for byte_incr, line_incr in zip(byte_increments, line_increments):
|
| 185 | byte_incr = ord(byte_incr)
|
| 186 | line_incr = ord(line_incr)
|
| 187 |
|
| 188 | byte_num += byte_incr
|
| 189 | if byte_num > self.f_lasti:
|
| 190 | break
|
| 191 | line_num += line_incr
|
| 192 |
|
| 193 | return line_num
|
| 194 |
|
| 195 |
|
| 196 | class VirtualMachineError(Exception):
|
| 197 | """For raising errors in the operation of the VM."""
|
| 198 | pass
|
| 199 |
|
| 200 |
|
| 201 | BINARY_OPERATORS = {
|
| 202 | 'POWER': pow,
|
| 203 | 'MULTIPLY': operator.mul,
|
| 204 | 'DIVIDE': getattr(operator, 'div', lambda x, y: None),
|
| 205 | 'FLOOR_DIVIDE': operator.floordiv,
|
| 206 | 'TRUE_DIVIDE': operator.truediv,
|
| 207 | 'MODULO': operator.mod,
|
| 208 | 'ADD': operator.add,
|
| 209 | 'SUBTRACT': operator.sub,
|
| 210 | 'SUBSCR': operator.getitem,
|
| 211 | 'LSHIFT': operator.lshift,
|
| 212 | 'RSHIFT': operator.rshift,
|
| 213 | 'AND': operator.and_,
|
| 214 | 'XOR': operator.xor,
|
| 215 | 'OR': operator.or_,
|
| 216 | }
|
| 217 |
|
| 218 | COMPARE_OPERATORS = [
|
| 219 | operator.lt,
|
| 220 | operator.le,
|
| 221 | operator.eq,
|
| 222 | operator.ne,
|
| 223 | operator.gt,
|
| 224 | operator.ge,
|
| 225 | lambda x, y: x in y,
|
| 226 | lambda x, y: x not in y,
|
| 227 | lambda x, y: x is y,
|
| 228 | lambda x, y: x is not y,
|
| 229 | lambda x, y: issubclass(x, Exception) and issubclass(x, y),
|
| 230 | ]
|
| 231 |
|
| 232 |
|
| 233 | class VirtualMachine(object):
|
| 234 |
|
| 235 | def __init__(self, verbose=VERBOSE):
|
| 236 | """
|
| 237 | Args:
|
| 238 | subset: turn off bytecodes that OPy doesn't need (e.g. print
|
| 239 | statement, etc.)
|
| 240 | verbose: turn on logging
|
| 241 | """
|
| 242 | self.verbose = verbose
|
| 243 | # some objects define __repr__, which means our debug() logging screws
|
| 244 | # things up! Even though they don't have side effects, this somehow
|
| 245 | # matters.
|
| 246 | self.repr_ok = True
|
| 247 |
|
| 248 | # The call stack of frames.
|
| 249 | self.frames = []
|
| 250 | # The current frame.
|
| 251 | self.frame = None
|
| 252 | self.return_value = None
|
| 253 |
|
| 254 | self.cur_line = None # current line number
|
| 255 | self.num_ticks = 0
|
| 256 |
|
| 257 | def top(self):
|
| 258 | return self.frame.top()
|
| 259 |
|
| 260 | def pop(self, i=0):
|
| 261 | return self.frame.pop(i=i)
|
| 262 |
|
| 263 | def push(self, *vals):
|
| 264 | self.frame.push(*vals)
|
| 265 |
|
| 266 | def popn(self, n):
|
| 267 | return self.frame.popn(n)
|
| 268 |
|
| 269 | def peek(self, n):
|
| 270 | return self.frame.peek(n)
|
| 271 |
|
| 272 | def jump(self, offset):
|
| 273 | self.frame.jump(offset)
|
| 274 |
|
| 275 | # TODO: The frame should only have locals?
|
| 276 | # All globals are constants? No "rebindable" globals. (You can have
|
| 277 | # mutable objects but not rebind them.)
|
| 278 | def make_frame(self, code, callargs={}):
|
| 279 | """
|
| 280 | Called by self.run_code and Function.__call__.
|
| 281 | """
|
| 282 | frame = Frame(code, callargs)
|
| 283 | return frame
|
| 284 |
|
| 285 | def log_tick(self, byteName, arguments, opoffset, linestarts):
|
| 286 | """ Log arguments, block stack, and data stack for each opcode."""
|
| 287 | indent = " " * (len(self.frames)-1)
|
| 288 | stack_rep = repper(self.frame.stack)
|
| 289 | #block_stack_rep = repper(self.frame.block_stack)
|
| 290 | # repr_lib is causing problems
|
| 291 | if self.repr_ok:
|
| 292 | stack_rep = repr(self.frame.stack)
|
| 293 | #block_stack_rep = repr(self.frame.block_stack)
|
| 294 |
|
| 295 | arg_str = ''
|
| 296 | if arguments and self.repr_ok:
|
| 297 | arg_str = ' %r' % (arguments[0],)
|
| 298 |
|
| 299 | # TODO: Should increment
|
| 300 |
|
| 301 | li = linestarts.get(opoffset, None)
|
| 302 | if li is not None and self.cur_line != li:
|
| 303 | self.cur_line = li
|
| 304 |
|
| 305 | debug('%s%d: %s%s (line %s)', indent, opoffset, byteName, arg_str,
|
| 306 | self.cur_line)
|
| 307 | if self.repr_ok:
|
| 308 | debug(' %sval stack: %s', indent, stack_rep)
|
| 309 | #debug(' %sblock stack: %s', indent, block_stack_rep)
|
| 310 | debug('')
|
| 311 |
|
| 312 | # Helpers for run_frame
|
| 313 | def _push_frame(self, frame):
|
| 314 | self.frames.append(frame)
|
| 315 | self.frame = frame
|
| 316 |
|
| 317 | def _pop_frame(self):
|
| 318 | # NOTE: Block(handler) is a jump address.
|
| 319 | #if self.frame.block_stack:
|
| 320 | if False:
|
| 321 | debug1('block stack: %s', self.frame.block_stack)
|
| 322 | raise VirtualMachineError(
|
| 323 | "Block stack still has %d entries" %
|
| 324 | len(self.frame.block_stack))
|
| 325 |
|
| 326 | self.frames.pop()
|
| 327 | if self.frames:
|
| 328 | self.frame = self.frames[-1]
|
| 329 | else:
|
| 330 | self.frame = None
|
| 331 |
|
| 332 | def run_frame(self, frame):
|
| 333 | """Run a frame until it returns or raises an exception.
|
| 334 |
|
| 335 | This function raises GuestException or returns the return value.
|
| 336 |
|
| 337 | Corresponds to PyEval_EvalFrameEx in ceval.c. That returns 'PyObject*
|
| 338 | retval' -- but how does it indicate an exception?
|
| 339 |
|
| 340 | I think retval is NULL, and then
|
| 341 |
|
| 342 | """
|
| 343 | # bytecode offset -> line number
|
| 344 | #print('frame %s ' % frame)
|
| 345 | # NOTE: Also done in Frmae.line_number()
|
| 346 | linestarts = dict(dis.findlinestarts(frame.f_code))
|
| 347 | #print('STARTS %s ' % linestarts)
|
| 348 |
|
| 349 | self._push_frame(frame)
|
| 350 | while True:
|
| 351 | self.num_ticks += 1
|
| 352 | if self.num_ticks == 300:
|
| 353 | raise VirtualMachineError('Too many ticks')
|
| 354 |
|
| 355 | opoffset = self.frame.f_lasti # For logging only
|
| 356 | opcode, arg = self.frame.decode_next_raw()
|
| 357 | inst_name = dis.opname[opcode]
|
| 358 | arguments = []
|
| 359 | if self.verbose:
|
| 360 | self.log_tick(inst_name, arguments, opoffset, linestarts)
|
| 361 | debug1('arg %s', arg)
|
| 362 |
|
| 363 | # When unwinding the block stack, we need to keep track of why we
|
| 364 | # are doing it.
|
| 365 |
|
| 366 | # NOTE: In addition to returning why == 'exception', this can also
|
| 367 | # RAISE GuestException from recursive call via call_function.
|
| 368 |
|
| 369 | why = False
|
| 370 |
|
| 371 | if inst_name == 'POP_TOP':
|
| 372 | self.pop()
|
| 373 |
|
| 374 | elif inst_name == 'LOAD_CONST':
|
| 375 | const = self.frame.f_code.co_consts[arg]
|
| 376 | self.push(const)
|
| 377 |
|
| 378 | elif inst_name == 'STORE_NAME':
|
| 379 | # This is true. NOTE: dis.hasname is a list.
|
| 380 | #debug1('STORE_NAME %d', opcode)
|
| 381 | name = self.frame.f_code.co_names[arg]
|
| 382 | self.frame.f_locals[name] = self.pop()
|
| 383 |
|
| 384 | elif inst_name == 'LOAD_NAME':
|
| 385 | #debug1('NAME arg %d', arg)
|
| 386 |
|
| 387 | name = self.frame.f_code.co_names[arg]
|
| 388 | #debug1('NAME %r', name)
|
| 389 | frame = self.frame
|
| 390 | if name in frame.f_locals:
|
| 391 | val = frame.f_locals[name]
|
| 392 | elif name == 'print': # Special case!
|
| 393 | val = print
|
| 394 | #elif name in frame.f_globals:
|
| 395 | # val = frame.f_globals[name]
|
| 396 | #elif name in frame.f_builtins:
|
| 397 | # val = frame.f_builtins[name]
|
| 398 | else:
|
| 399 | raise NameError("name '%s' is not defined" % name)
|
| 400 | self.push(val)
|
| 401 | # Hack because this is not a global in OVM.
|
| 402 | #if name == 'True':
|
| 403 | # self.push(True)
|
| 404 | #else:
|
| 405 | # self.push(self.frame.f_locals[name])
|
| 406 |
|
| 407 | #
|
| 408 | # Operations
|
| 409 | #
|
| 410 | elif inst_name == 'BINARY_ADD':
|
| 411 | # I guess popn() is slightly faster because you don't decrement.
|
| 412 | # twice? Even in C?
|
| 413 | x, y = self.popn(2)
|
| 414 | self.push(BINARY_OPERATORS['ADD'](x, y))
|
| 415 |
|
| 416 | elif inst_name == 'COMPARE_OP':
|
| 417 | x, y = self.popn(2)
|
| 418 | self.push(COMPARE_OPERATORS[arg](x, y))
|
| 419 |
|
| 420 | #
|
| 421 | # FUNCTIONS / LOOPS
|
| 422 | #
|
| 423 | elif inst_name == 'CALL_FUNCTION':
|
| 424 | # NOTE: There are different bytecodes for
|
| 425 | # CALL_FUNCTION_{VAR,KW,VAR_KW}.
|
| 426 | # I don't thinks we need those in OPy. I think that is for each
|
| 427 | # combination of foo(*args, **kwargs). I guess I do need _VAR
|
| 428 | # for log(msg, *args).
|
| 429 |
|
| 430 | len_kw, len_pos= divmod(arg, 256)
|
| 431 | assert len_kw == 0
|
| 432 | namedargs = {}
|
| 433 | for i in xrange(len_kw):
|
| 434 | key, val = self.popn(2)
|
| 435 | namedargs[key] = val
|
| 436 |
|
| 437 | posargs = self.popn(len_pos)
|
| 438 | #posargs.extend(args) # For extras
|
| 439 |
|
| 440 | func = self.pop() # TODO: assert that it's the print function
|
| 441 |
|
| 442 | do_wrap = False
|
| 443 | if do_wrap:
|
| 444 | # Hm there needs to be a better way of doing this?
|
| 445 | #callargs = inspect.getcallargs(func, *posargs, **namedargs)
|
| 446 | callargs = None
|
| 447 | frame = self.make_frame(func.func_code, callargs)
|
| 448 | retval = self.run_frame(frame)
|
| 449 | else:
|
| 450 | byterun_func = func
|
| 451 | retval = byterun_func(*posargs, **namedargs)
|
| 452 | self.push(retval)
|
| 453 |
|
| 454 | elif inst_name == 'RETURN_VALUE':
|
| 455 | why = 'return'
|
| 456 |
|
| 457 | elif inst_name == 'SETUP_LOOP':
|
| 458 | dest = self.frame.f_lasti + arg # dis.hasjrel
|
| 459 | self.frame.push_block('loop', dest)
|
| 460 |
|
| 461 | elif inst_name == 'POP_BLOCK':
|
| 462 | self.frame.pop_block()
|
| 463 |
|
| 464 | elif inst_name == 'BREAK_LOOP':
|
| 465 | # TODO: Get rid of this; it should be a jump.
|
| 466 | why = 'break'
|
| 467 |
|
| 468 | #
|
| 469 | # JUMPS
|
| 470 | #
|
| 471 | elif inst_name == 'POP_JUMP_IF_FALSE':
|
| 472 | # NOTE: This is a "superinstruction"; it could just be POP, then
|
| 473 | # JUMP_IF_FALSE.
|
| 474 | val = self.pop()
|
| 475 | if not val:
|
| 476 | self.jump(arg)
|
| 477 |
|
| 478 | elif inst_name == 'JUMP_ABSOLUTE':
|
| 479 | self.jump(arg) # dis.hasjabs
|
| 480 |
|
| 481 | elif inst_name == 'JUMP_FORWARD':
|
| 482 | self.jump(self.frame.f_lasti + arg) # dis.hasjrel
|
| 483 |
|
| 484 | #
|
| 485 | # Intentionally ignored imports
|
| 486 | #
|
| 487 | elif inst_name == 'IMPORT_NAME':
|
| 488 | pass
|
| 489 | elif inst_name == 'IMPORT_FROM':
|
| 490 | pass
|
| 491 |
|
| 492 | else:
|
| 493 | raise AssertionError('OVM not handling %r' % inst_name)
|
| 494 |
|
| 495 | while why and frame.block_stack:
|
| 496 | debug('WHY %s', why)
|
| 497 | debug('STACK %s', frame.block_stack)
|
| 498 | why = self.frame.handle_block_stack(why, self)
|
| 499 |
|
| 500 | # TODO: I should be popping and cleaning up blocks here.
|
| 501 | if why:
|
| 502 | break
|
| 503 |
|
| 504 | self._pop_frame()
|
| 505 |
|
| 506 | #debug1('num_ticks: %d' % num_ticks)
|
| 507 | return self.return_value
|
| 508 |
|
| 509 | def check_invariants(self):
|
| 510 | # Check some invariants
|
| 511 | if self.frames: # pragma: no cover
|
| 512 | raise VirtualMachineError("Frames left over!")
|
| 513 | # NOTE: self.frame is None at the end.
|
| 514 | if self.frame and self.frame.stack: # pragma: no cover
|
| 515 | raise VirtualMachineError("Data left on stack! %r" % self.frame.stack)
|