| 1 | #!/usr/bin/env python2
|
| 2 | """
|
| 3 | oheap2.py
|
| 4 |
|
| 5 | OVM2 stores objects in a data structure called OHeap2. It has the following
|
| 6 | concepts.
|
| 7 |
|
| 8 | - Handles: 4-byte / 32-bit integers that point to Cells.
|
| 9 | - Cells: 16-byte structs with a type tag, in a contiguous array. This array is
|
| 10 | scanned by the GC.
|
| 11 | - Slabs: Variable-length arrays of memory that are owned by a Cell. Each slab
|
| 12 | may be malloc'd individually.
|
| 13 | - In the case of a string, it's opaque to the GC.
|
| 14 | - In the case of a tuple or struct, it must be scanned by the GC.
|
| 15 |
|
| 16 | This is called the "4-16-N design".
|
| 17 |
|
| 18 | Every cell has a type tag, which is simply an integer. Negative integers
|
| 19 | indicate native types implemented in C. Positive integers are for user-defined
|
| 20 | types.
|
| 21 |
|
| 22 | Cells also have an is_slab bit and a length for small. TODO: is_slab should be
|
| 23 | is_small?
|
| 24 |
|
| 25 | Operations on OHeap
|
| 26 | -------------------
|
| 27 |
|
| 28 | 1. Allocate a new cell, possibly resizing
|
| 29 |
|
| 30 | 2. Garbage Collect
|
| 31 | - Walk all the cells. Free unused cells along with their slabs.
|
| 32 |
|
| 33 | 3. Save to file
|
| 34 | Slabs: Native Pointers to Offsets
|
| 35 |
|
| 36 | 3a. Copy an object tree to a fresh oheap? e.g. for IPC. I guess this is like "Save".
|
| 37 | - Walk all the cells and slabs
|
| 38 |
|
| 39 | 4. Load from file into fresh OHeap
|
| 40 | - Patch Slab Offsets to Native Pointers
|
| 41 |
|
| 42 | 4a. Load from file and merge into existing OHeap?
|
| 43 | - Inside a namespace?
|
| 44 | - Import?
|
| 45 | - Patch all cell refs!
|
| 46 |
|
| 47 | 5. Make permanent? Move to another "generation"? OPy classes and bytecode
|
| 48 | perhaps fit in this category.
|
| 49 |
|
| 50 | TODO
|
| 51 | ----
|
| 52 |
|
| 53 | - Bits on handle: whether it's const or not?
|
| 54 | - negative handles could be const?
|
| 55 |
|
| 56 | - Singleton values for None/True/False
|
| 57 | - interning of strings
|
| 58 | - hash tables, and hashes of strings
|
| 59 |
|
| 60 | Introspection
|
| 61 | -------------
|
| 62 |
|
| 63 | These operations should work:
|
| 64 |
|
| 65 | type()
|
| 66 | getattr() -- hm in OVM I don't want to look at base classes
|
| 67 | dir() -- list all attributes. ditto -- don't look at base classes?
|
| 68 | hm except this returns different stuff for modules, classes, and objects
|
| 69 | it looks at superclasses
|
| 70 | isinstance(obj, typ) - check if the type tag of obj points at typ or a superclass
|
| 71 | """
|
| 72 | from __future__ import print_function
|
| 73 |
|
| 74 | import struct
|
| 75 | import sys
|
| 76 | import types
|
| 77 |
|
| 78 | from core.util import log
|
| 79 |
|
| 80 |
|
| 81 | TAG_NONE = -1
|
| 82 | TAG_BOOL = -2
|
| 83 | TAG_INT = -3
|
| 84 | TAG_FLOAT = -4
|
| 85 | TAG_STR = -5
|
| 86 | TAG_TUPLE = -6
|
| 87 | TAG_CODE = -7
|
| 88 |
|
| 89 | MIN_SMALL_INT = -(1 << 63)
|
| 90 | MAX_SMALL_INT = (1 << 63) - 1
|
| 91 |
|
| 92 | MAX_LEN_SMALL_STR = 11 # 12 bytes - 1 for NUL
|
| 93 |
|
| 94 | MAX_LEN_SMALL_TUPLE = 3 # 4 Handles in 12 bytes?
|
| 95 | # We can steal a bit from the tag for the small/big
|
| 96 | # bit.
|
| 97 |
|
| 98 | def u8(i): # 1 byte unsigned
|
| 99 | return struct.pack('B', i)
|
| 100 |
|
| 101 | def i16(i): # 2 bytes
|
| 102 | return struct.pack('h', i)
|
| 103 |
|
| 104 | def i32(i): # 4 bytes
|
| 105 | return struct.pack('i', i)
|
| 106 |
|
| 107 | def i64(i): # 8 bytes (long long)
|
| 108 | return struct.pack('q', i)
|
| 109 |
|
| 110 | def f64(i): # 8 byte double
|
| 111 | return struct.pack('d', i)
|
| 112 |
|
| 113 |
|
| 114 | def Align4(i):
|
| 115 | """Round up to the nearest multiple of 4. See unit tests."""
|
| 116 | return ((i-1) | 3) + 1
|
| 117 |
|
| 118 |
|
| 119 | def Align16(i):
|
| 120 | """Round up to the nearest multiple of 16. See unit tests."""
|
| 121 | return ((i-1) | 15) + 1
|
| 122 |
|
| 123 |
|
| 124 | class Encoder(object):
|
| 125 | """
|
| 126 | Write objects into an OHeap2 structure that can be lazily loaded.
|
| 127 |
|
| 128 | First pass:
|
| 129 | Append to cells and append to slabs
|
| 130 | Second pass:
|
| 131 | Write slabs as bytes, and then patch offsets in cells?
|
| 132 | Write all the cells
|
| 133 | Write the root object at the front of the file? Or do it at the end?
|
| 134 | OHeap writes it at the beginning after
|
| 135 | """
|
| 136 |
|
| 137 | def __init__(self):
|
| 138 | self.chunk = bytearray()
|
| 139 | # An array of cells
|
| 140 | self.cells = []
|
| 141 | # Write all these first? So that the cells can point to them.
|
| 142 | self.slabs = []
|
| 143 |
|
| 144 | def Any(self, obj):
|
| 145 | """
|
| 146 | Encode an object and return its id.
|
| 147 | """
|
| 148 | id_ = len(self.cells)
|
| 149 |
|
| 150 | # TODO: Enforce that None is a singleton. But what about joining OHeaps?
|
| 151 | if isinstance(obj, types.NoneType):
|
| 152 | self.cells.append((TAG_NONE, False, None))
|
| 153 |
|
| 154 | # TODO: Enforce that True/False is a singleton. But what about joining
|
| 155 | # OHeaps?
|
| 156 | elif isinstance(obj, bool):
|
| 157 | self.cells.append((TAG_BOOL, False, obj))
|
| 158 |
|
| 159 | elif isinstance(obj, int):
|
| 160 | i = obj
|
| 161 | if MIN_SMALL_INT < i < MAX_SMALL_INT:
|
| 162 | self.cells.append((TAG_INT, False, i))
|
| 163 | else:
|
| 164 | raise NotImplementedError
|
| 165 |
|
| 166 | elif isinstance(obj, float):
|
| 167 | raise NotImplementedError
|
| 168 |
|
| 169 | # TODO: Identical strings could be "interned" here.
|
| 170 | elif isinstance(obj, str):
|
| 171 | s = obj
|
| 172 | n = len(s)
|
| 173 | if n < MAX_LEN_SMALL_STR:
|
| 174 | self.cells.append((TAG_STR, False, s))
|
| 175 | else:
|
| 176 | # length and they payload
|
| 177 | slab_index = len(self.slabs)
|
| 178 | self.slabs.append((n, s))
|
| 179 | self.cells.append((TAG_STR, True, slab_index))
|
| 180 |
|
| 181 | elif isinstance(obj, tuple):
|
| 182 | t = obj
|
| 183 | refs = []
|
| 184 | for item in t:
|
| 185 | refs.append(self.Any(item)) # Depth-first.
|
| 186 |
|
| 187 | # Compute ID after adding all the children.
|
| 188 | id_ = len(self.cells)
|
| 189 |
|
| 190 | n = len(t)
|
| 191 | if n < MAX_LEN_SMALL_TUPLE:
|
| 192 | # TODO: How do we know how long it is?
|
| 193 | self.cells.append((TAG_TUPLE, False, refs))
|
| 194 | else:
|
| 195 | slab_index = len(self.slabs)
|
| 196 | self.slabs.append((n, refs))
|
| 197 | self.cells.append((TAG_TUPLE, True, slab_index))
|
| 198 |
|
| 199 | elif isinstance(obj, types.CodeType):
|
| 200 | co = obj
|
| 201 | refs = []
|
| 202 |
|
| 203 | # Ints first
|
| 204 | refs.append(self.Any(co.co_argcount))
|
| 205 | refs.append(self.Any(co.co_nlocals))
|
| 206 | refs.append(self.Any(co.co_stacksize))
|
| 207 | refs.append(self.Any(co.co_flags))
|
| 208 | refs.append(self.Any(co.co_firstlineno))
|
| 209 |
|
| 210 | # Strings
|
| 211 | refs.append(self.Any(co.co_name))
|
| 212 | refs.append(self.Any(co.co_filename))
|
| 213 | refs.append(self.Any(co.co_code)) # bytecode
|
| 214 |
|
| 215 | # Tuples
|
| 216 | refs.append(self.Any(co.co_names))
|
| 217 | refs.append(self.Any(co.co_varnames))
|
| 218 | refs.append(self.Any(co.co_consts))
|
| 219 |
|
| 220 | slab_index = len(self.slabs)
|
| 221 | self.slabs.append((len(refs), refs))
|
| 222 | self.cells.append((TAG_CODE, True, slab_index))
|
| 223 |
|
| 224 | else:
|
| 225 | raise AssertionError
|
| 226 |
|
| 227 | return id_
|
| 228 |
|
| 229 | def Write(self, f):
|
| 230 | f.write('OHP2') # magic header
|
| 231 | f.write(i32(0)) # placeholder for cell offset
|
| 232 | f.write(i32(0)) # placeholder for root object. NOTE: We could always
|
| 233 | # fseek(SEEK_END, -4)? But I think we want it to more
|
| 234 | # self-describing.
|
| 235 |
|
| 236 | # Write slabs first, so we know their offsets.
|
| 237 | slab_offsets = []
|
| 238 | pos = 0 # Reader should patch relative to 12
|
| 239 |
|
| 240 | for length, payload in self.slabs:
|
| 241 |
|
| 242 | if isinstance(payload, str):
|
| 243 | slab_offsets.append(pos)
|
| 244 |
|
| 245 | f.write(i32(length)) # length in bytes
|
| 246 | pos += 4
|
| 247 |
|
| 248 | n = len(payload)
|
| 249 | aligned = Align4(n+1) # at least NUL byte for terminator
|
| 250 | f.write(payload)
|
| 251 | f.write('\0' * (aligned - n)) # padding
|
| 252 | pos += aligned
|
| 253 |
|
| 254 | elif isinstance(payload, list): # list of references
|
| 255 | slab_offsets.append(pos)
|
| 256 |
|
| 257 | # length in references. Could be unsigned?
|
| 258 | f.write(i32(length))
|
| 259 |
|
| 260 | # NOTE: There is no GC offset, since all of them are scanned?
|
| 261 | for ref in payload:
|
| 262 | f.write(i32(ref))
|
| 263 | pos += 4
|
| 264 | pos += len(payload) * 4
|
| 265 |
|
| 266 | else:
|
| 267 | raise AssertionError(payload)
|
| 268 |
|
| 269 | log('Slab offsets: %s', slab_offsets)
|
| 270 |
|
| 271 | # Pad out the slabs so that the cells begins at a multiple of 16.
|
| 272 | total_slab_size = Align16(pos) # including pad, but not including header.
|
| 273 | f.write('\0' * (total_slab_size - pos)) # Write the pad
|
| 274 |
|
| 275 | # Encode it into 16 bytes
|
| 276 | for tag, is_slab, val in self.cells:
|
| 277 | #log('%s %s %s', tag, is_slab, val)
|
| 278 |
|
| 279 | # 4 byte tag. This may be patched into a type Handle?
|
| 280 | # Reserve LSB for is_slab? Or highest bit?
|
| 281 | # The tag could be (i >> 1) then?
|
| 282 | f.write(i16(tag))
|
| 283 |
|
| 284 | if tag == TAG_NONE:
|
| 285 | f.write(i16(0))
|
| 286 | f.write(i32(0))
|
| 287 | f.write(i32(0))
|
| 288 | f.write(i32(0))
|
| 289 |
|
| 290 | elif tag == TAG_BOOL:
|
| 291 | f.write(i16(0)) # pad
|
| 292 | f.write(i32(0)) # pad
|
| 293 | f.write(i32(0)) # pad
|
| 294 | f.write(i32(int(val))) # 0 or 1
|
| 295 |
|
| 296 | elif tag == TAG_INT:
|
| 297 | assert not is_slab, val
|
| 298 | f.write(i16(0)) # Padding
|
| 299 | f.write(i32(0)) # Padding
|
| 300 | f.write(i64(val))
|
| 301 |
|
| 302 | elif tag == TAG_FLOAT:
|
| 303 | assert not is_slab, val
|
| 304 | f.write(i16(0)) # Padding
|
| 305 | f.write(i32(0)) # Padding
|
| 306 | f.write(f64(val))
|
| 307 |
|
| 308 | elif tag == TAG_STR:
|
| 309 | if is_slab:
|
| 310 | # For a string, a big/small bit could technically be in the last
|
| 311 | # byte. To reuse NUL terminator. But I want the patching process to
|
| 312 | # be consistent.
|
| 313 | slab_index = val
|
| 314 | offset = slab_offsets[slab_index]
|
| 315 | f.write(u8(1)) # is_slab
|
| 316 | f.write(u8(0)) # length stored in slab
|
| 317 | f.write(i32(0)) # pad
|
| 318 | f.write(i32(0)) # pad
|
| 319 | f.write(i32(offset))
|
| 320 | else:
|
| 321 | n = len(val)
|
| 322 | f.write(u8(0)) # is_slab
|
| 323 | f.write(u8(n)) # length stored here
|
| 324 | f.write(val)
|
| 325 | num_pad = 12 - n # at least one NUL
|
| 326 | f.write('\0' * num_pad)
|
| 327 |
|
| 328 | elif tag == TAG_TUPLE:
|
| 329 | if is_slab:
|
| 330 | slab_index = val
|
| 331 | offset = slab_offsets[slab_index]
|
| 332 | f.write(u8(1)) # is_slab
|
| 333 | f.write(u8(0)) # length stored in slab
|
| 334 | f.write(i32(0)) # pad
|
| 335 | f.write(i32(0)) # pad
|
| 336 | f.write(i32(offset))
|
| 337 | else:
|
| 338 | n = len(val)
|
| 339 | f.write(u8(0)) # is_slab
|
| 340 | f.write(u8(n))
|
| 341 | # TODO: how is length represented?
|
| 342 | for ref in val:
|
| 343 | f.write(i32(ref))
|
| 344 | num_pad = 3 - len(val)
|
| 345 | for i in xrange(num_pad):
|
| 346 | f.write(i32(0))
|
| 347 |
|
| 348 | elif tag == TAG_CODE:
|
| 349 | assert is_slab, val
|
| 350 | slab_index = val
|
| 351 | #log('slab_index %d', slab_index)
|
| 352 | offset = slab_offsets[slab_index]
|
| 353 | f.write(u8(1)) # is_slab
|
| 354 | f.write(u8(0)) # length stored in slab
|
| 355 | f.write(i32(0)) # pad
|
| 356 | f.write(i32(0)) # pad
|
| 357 | f.write(i32(offset))
|
| 358 |
|
| 359 | else:
|
| 360 | raise AssertionError(tag)
|
| 361 |
|
| 362 | log('')
|
| 363 | log('slabs')
|
| 364 | for slab in self.slabs:
|
| 365 | log('\t%r', slab)
|
| 366 |
|
| 367 | log('cells')
|
| 368 | for c in self.cells:
|
| 369 | #log('\t%r', c)
|
| 370 | pass
|
| 371 |
|
| 372 | log('%d slabs in %d bytes', len(self.slabs), total_slab_size)
|
| 373 | log('%d cells in %d bytes', len(self.cells), f.tell() - 12 - total_slab_size)
|
| 374 |
|
| 375 | # Fill in the cell position
|
| 376 | f.seek(4)
|
| 377 | f.write(i32(total_slab_size))
|
| 378 | f.write(i32(len(self.cells)))
|
| 379 |
|
| 380 |
|
| 381 | def Write(co, f):
|
| 382 | print(co)
|
| 383 | enc = Encoder()
|
| 384 | enc.Any(co)
|
| 385 | enc.Write(f)
|
| 386 |
|
| 387 |
|
| 388 | def main(argv):
|
| 389 | chunk = bytearray()
|
| 390 | chunk.extend('hello')
|
| 391 | chunk.append('\0')
|
| 392 |
|
| 393 | print('Hello from oheap2.py')
|
| 394 | print(repr(chunk))
|
| 395 |
|
| 396 |
|
| 397 | if __name__ == '__main__':
|
| 398 | try:
|
| 399 | main(sys.argv)
|
| 400 | except RuntimeError as e:
|
| 401 | print('FATAL: %s' % e, file=sys.stderr)
|
| 402 | sys.exit(1)
|