| 1 | #!/usr/bin/env python2
|
| 2 | from __future__ import print_function
|
| 3 | """inspect_pyc module
|
| 4 |
|
| 5 | This is a refactor of a recipe from Ned Batchelder's blog. He has
|
| 6 | given me permission to publish this. You can find the post at the
|
| 7 | following URL:
|
| 8 |
|
| 9 | http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
|
| 10 |
|
| 11 | You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".
|
| 12 |
|
| 13 | NOTE:
|
| 14 | You can also see bytecode with:
|
| 15 | import os, dis
|
| 16 | dis.dis(os)
|
| 17 |
|
| 18 | But that doesn't give all the metadata. It's also nicer than
|
| 19 | tools/dumppyc.py, which came with the 'compiler2' package.
|
| 20 | """
|
| 21 |
|
| 22 | import marshal, struct, sys, time, types
|
| 23 |
|
| 24 | import consts # this package
|
| 25 |
|
| 26 | from opy.lib import dis
|
| 27 |
|
| 28 |
|
| 29 | INDENT = ' '
|
| 30 | MAX_HEX_LEN = 16
|
| 31 | NAME_OFFSET = 20
|
| 32 |
|
| 33 |
|
| 34 | def to_hexstr(bytes_value, level=0, wrap=False):
|
| 35 | indent = INDENT * level
|
| 36 | line = " ".join(("%02x",) * MAX_HEX_LEN)
|
| 37 | last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
|
| 38 | lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
|
| 39 | if last:
|
| 40 | lines += (last,)
|
| 41 | if wrap:
|
| 42 | template = indent + ("\n"+indent).join(lines)
|
| 43 | else:
|
| 44 | template = " ".join(lines)
|
| 45 | try:
|
| 46 | return template % tuple(bytes_value)
|
| 47 | except TypeError:
|
| 48 | return template % tuple(ord(char) for char in bytes_value)
|
| 49 |
|
| 50 |
|
| 51 | def ShowFlags(flags):
|
| 52 | flag_names = []
|
| 53 | for bit in sorted(consts.VALUE_TO_NAME):
|
| 54 | if flags & bit:
|
| 55 | flag_names.append(consts.VALUE_TO_NAME[bit])
|
| 56 |
|
| 57 | h = "0x%05x" % flags
|
| 58 | if flag_names:
|
| 59 | return '%s %s' % (h, ' '.join(flag_names))
|
| 60 | else:
|
| 61 | return h
|
| 62 |
|
| 63 |
|
| 64 | def unpack_pyc(f):
|
| 65 | magic = f.read(4)
|
| 66 | unixtime = struct.unpack("I", f.read(4))[0]
|
| 67 | timestamp = time.asctime(time.localtime(unixtime))
|
| 68 | code = marshal.load(f)
|
| 69 | return magic, unixtime, timestamp, code
|
| 70 |
|
| 71 |
|
| 72 | # Enhancements:
|
| 73 | # - Actually print the line of code! That will be very helpful.
|
| 74 |
|
| 75 | def disassemble(co, indent, f):
|
| 76 | """Copied from dis module.
|
| 77 |
|
| 78 | Args:
|
| 79 | co: code object
|
| 80 | indent: indentation to print with
|
| 81 |
|
| 82 | NOTE: byterun/pyobj.py:Frame.decode_next does something very similar.
|
| 83 | """
|
| 84 | def out(*args, **kwargs):
|
| 85 | print(*args, file=f, **kwargs)
|
| 86 |
|
| 87 | code = co.co_code
|
| 88 | labels = dis.findlabels(code)
|
| 89 | linestarts = dict(dis.findlinestarts(co))
|
| 90 | n = len(code)
|
| 91 | i = 0
|
| 92 | extended_arg = 0
|
| 93 | free = None
|
| 94 |
|
| 95 | while i < n:
|
| 96 | c = code[i]
|
| 97 | op = ord(c)
|
| 98 |
|
| 99 | if i in linestarts:
|
| 100 | if i > 0:
|
| 101 | out()
|
| 102 | prefix = linestarts[i]
|
| 103 | else:
|
| 104 | prefix = ''
|
| 105 | out('%s%4s' % (indent, prefix), end=' ')
|
| 106 |
|
| 107 | if i in labels: # Jump targets get a special symbol
|
| 108 | arrow = '>>'
|
| 109 | else:
|
| 110 | arrow = ' '
|
| 111 |
|
| 112 | out(' %s %4r %-20s ' % (arrow, i, dis.opname[op]), end=' ')
|
| 113 | i += 1
|
| 114 | if op >= dis.HAVE_ARGUMENT:
|
| 115 | oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
|
| 116 | extended_arg = 0
|
| 117 | i += 2
|
| 118 | if op == dis.EXTENDED_ARG:
|
| 119 | extended_arg = oparg*65536L
|
| 120 |
|
| 121 | oparg_str = None
|
| 122 |
|
| 123 | if op in dis.hasconst:
|
| 124 | c = co.co_consts[oparg]
|
| 125 | if isinstance(c, types.CodeType):
|
| 126 | # %r prints a memory address, which inhibits diffing
|
| 127 | oparg_str = '(code object %s %s %s)' % (
|
| 128 | c.co_name, c.co_filename, c.co_firstlineno)
|
| 129 | else:
|
| 130 | oparg_str = '(%r)' % (c,)
|
| 131 |
|
| 132 | elif op in dis.hasname:
|
| 133 | oparg_str = '(%s)' % (co.co_names[oparg],)
|
| 134 |
|
| 135 | elif op in dis.hasjrel:
|
| 136 | oparg_str = '(to %r)' % (i + oparg,)
|
| 137 |
|
| 138 | elif op in dis.haslocal:
|
| 139 | oparg_str = '(%s)' % (co.co_varnames[oparg],)
|
| 140 |
|
| 141 | elif op in dis.hascompare:
|
| 142 | oparg_str = '(%s)' % (dis.cmp_op[oparg],)
|
| 143 |
|
| 144 | elif op in dis.hasfree:
|
| 145 | if free is None:
|
| 146 | free = co.co_cellvars + co.co_freevars
|
| 147 | oparg_str = '(%s)' % (free[oparg],)
|
| 148 |
|
| 149 | if oparg_str:
|
| 150 | out('%5r %s' % (oparg, oparg_str), end=' ')
|
| 151 | else:
|
| 152 | out('%5r' % oparg, end=' ')
|
| 153 |
|
| 154 | out()
|
| 155 |
|
| 156 |
|
| 157 | def ParseOps(code):
|
| 158 | """A lightweight parser. Does some of what disassemble() does.
|
| 159 | """
|
| 160 | n = len(code)
|
| 161 | i = 0
|
| 162 | extended_arg = 0
|
| 163 |
|
| 164 | while i < n:
|
| 165 | c = code[i]
|
| 166 | op = ord(c)
|
| 167 |
|
| 168 | i += 1
|
| 169 | if op >= dis.HAVE_ARGUMENT:
|
| 170 | oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
|
| 171 | extended_arg = 0
|
| 172 | i += 2
|
| 173 | if op == dis.EXTENDED_ARG:
|
| 174 | extended_arg = oparg*65536L
|
| 175 |
|
| 176 | yield dis.opname[op], oparg
|
| 177 |
|
| 178 |
|
| 179 | class Visitor(object):
|
| 180 |
|
| 181 | def __init__(self, dis_bytecode=True, co_name=None):
|
| 182 | """
|
| 183 | Args:
|
| 184 | dis_bytecode: Whether to show disassembly.
|
| 185 | co_name: only print code object with exact name (and its children)
|
| 186 | """
|
| 187 | self.dis_bytecode = dis_bytecode
|
| 188 | # Name of thing to print
|
| 189 | self.co_name = co_name
|
| 190 |
|
| 191 | def show_consts(self, consts, level=0):
|
| 192 | indent = INDENT * level
|
| 193 | for i, obj in enumerate(consts):
|
| 194 | if isinstance(obj, types.CodeType):
|
| 195 | print("%s%s (code object)" % (indent, i))
|
| 196 | # RECURSIVE CALL.
|
| 197 | self.show_code(obj, level=level+1)
|
| 198 | else:
|
| 199 | print("%s%s %r" % (indent, i, obj))
|
| 200 |
|
| 201 | def maybe_show_consts(self, consts, level=0):
|
| 202 | for obj in consts:
|
| 203 | if isinstance(obj, types.CodeType):
|
| 204 | self.show_code(obj, level=level+1) # RECURSIVE CALL.
|
| 205 |
|
| 206 | def show_bytecode(self, code, level=0):
|
| 207 | """Call dis.disassemble() to show bytecode."""
|
| 208 |
|
| 209 | indent = INDENT * level
|
| 210 | print(to_hexstr(code.co_code, level, wrap=True))
|
| 211 |
|
| 212 | if self.dis_bytecode:
|
| 213 | print(indent + "disassembled:")
|
| 214 | disassemble(code, indent, sys.stdout)
|
| 215 |
|
| 216 | def show_code(self, code, level=0):
|
| 217 | """Print a code object, e.g. metadata, bytecode, and consts."""
|
| 218 |
|
| 219 | # Filter recursive call
|
| 220 | if self.co_name and code.co_name != self.co_name:
|
| 221 | self.maybe_show_consts(code.co_consts, level=level+1)
|
| 222 | return
|
| 223 |
|
| 224 | indent = INDENT * level
|
| 225 |
|
| 226 | for name in dir(code):
|
| 227 | if not name.startswith("co_"):
|
| 228 | continue
|
| 229 | if name in ("co_code", "co_consts"):
|
| 230 | continue
|
| 231 | value = getattr(code, name)
|
| 232 | if isinstance(value, str):
|
| 233 | value = repr(value)
|
| 234 | elif name == "co_flags":
|
| 235 | value = ShowFlags(value)
|
| 236 | elif name == "co_lnotab":
|
| 237 | value = "0x(%s)" % to_hexstr(value)
|
| 238 | print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
|
| 239 |
|
| 240 | # Show bytecode FIRST, and then consts. There is nested bytecode in the
|
| 241 | # consts, so it's a 'top-down' order.
|
| 242 | print("%sco_code" % indent)
|
| 243 | self.show_bytecode(code, level=level+1)
|
| 244 |
|
| 245 | print("%sco_consts" % indent)
|
| 246 | self.show_consts(code.co_consts, level=level+1)
|
| 247 |
|
| 248 | def Visit(self, f):
|
| 249 | """Write a readable listing of a .pyc file to stdout."""
|
| 250 |
|
| 251 | magic, unixtime, timestamp, code = unpack_pyc(f)
|
| 252 |
|
| 253 | magic = "0x(%s)" % to_hexstr(magic)
|
| 254 | print(" ## inspecting pyc file ##")
|
| 255 | print("magic number: %s" % magic)
|
| 256 | print("timestamp: %s (%s)" % (unixtime, timestamp))
|
| 257 | print("code")
|
| 258 | self.show_code(code, level=1)
|
| 259 | print(" ## done inspecting pyc file ##")
|