| 1 | #!/usr/bin/env python2
|
| 2 | """
|
| 3 | asdl_cpp.py
|
| 4 |
|
| 5 | Turn an ASDL schema into C++ code that reads the OHeap V1 format.
|
| 6 |
|
| 7 | TODO:
|
| 8 | - Optional fields
|
| 9 | - in osh, it's only used in two places:
|
| 10 | - arith_expr? for slice length
|
| 11 | - word? for var replace
|
| 12 | - So you're already using pointers, can encode the NULL pointer.
|
| 13 |
|
| 14 | - Change everything to use references instead of pointers? Non-nullable.
|
| 15 | - Unify ClassDefVisitor and MethodBodyVisitor.
|
| 16 | - Whether you need a separate method body should be a flag.
|
| 17 | - offset calculations are duplicated
|
| 18 | - generate a C++ pretty-printer
|
| 19 |
|
| 20 | Technically we don't even need alignment? I guess the reason is to increase
|
| 21 | address space. If 1, then we have 16MiB of code. If 4, then we have 64 MiB.
|
| 22 |
|
| 23 | Everything is decoded on the fly, or is a char*, which I don't think has to be
|
| 24 | aligned (because the natural alignment would be 1 byte anyway.)
|
| 25 | """
|
| 26 | from __future__ import print_function
|
| 27 |
|
| 28 | import sys
|
| 29 |
|
| 30 | from asdl import asdl_ as asdl
|
| 31 | from asdl import front_end
|
| 32 | from asdl import meta
|
| 33 | from asdl import runtime
|
| 34 | from asdl import visitor
|
| 35 | from misc.old import encode
|
| 36 |
|
| 37 | class ChainOfVisitors:
|
| 38 | def __init__(self, *visitors):
|
| 39 | self.visitors = visitors
|
| 40 |
|
| 41 | def VisitModule(self, module):
|
| 42 | for v in self.visitors:
|
| 43 | v.VisitModule(module)
|
| 44 |
|
| 45 |
|
| 46 | _BUILTINS = {
|
| 47 | 'string': 'char*', # A read-only string is a char*
|
| 48 | 'int': 'int',
|
| 49 | 'bool': 'bool',
|
| 50 | 'id': 'Id', # Application specific hack for now
|
| 51 | }
|
| 52 |
|
| 53 | class ForwardDeclareVisitor(visitor.AsdlVisitor):
|
| 54 | """Print forward declarations.
|
| 55 |
|
| 56 | ASDL allows forward references of types, but C++ doesn't.
|
| 57 | """
|
| 58 | def VisitCompoundSum(self, sum, name, depth):
|
| 59 | self.Emit("class %(name)s_t;" % locals(), depth)
|
| 60 |
|
| 61 | def VisitProduct(self, product, name, depth):
|
| 62 | self.Emit("class %(name)s_t;" % locals(), depth)
|
| 63 |
|
| 64 | def EmitFooter(self):
|
| 65 | self.Emit("", 0) # blank line
|
| 66 |
|
| 67 |
|
| 68 | class ClassDefVisitor(visitor.AsdlVisitor):
|
| 69 | """Generate C++ classes and type-safe enums."""
|
| 70 |
|
| 71 | def __init__(self, f, enc_params, type_lookup, enum_types=None):
|
| 72 | visitor.AsdlVisitor.__init__(self, f)
|
| 73 | self.ref_width = enc_params.ref_width
|
| 74 | self.type_lookup = type_lookup
|
| 75 | self.enum_types = enum_types or {}
|
| 76 | self.pointer_type = enc_params.pointer_type
|
| 77 | self.footer = [] # lines
|
| 78 |
|
| 79 | def _GetCppType(self, field):
|
| 80 | """Return a string for the C++ name of the type."""
|
| 81 | type_name = field.type
|
| 82 |
|
| 83 | cpp_type = _BUILTINS.get(type_name)
|
| 84 | if cpp_type is not None:
|
| 85 | return cpp_type
|
| 86 |
|
| 87 | typ = self.type_lookup[type_name]
|
| 88 | if isinstance(typ, asdl.Sum) and asdl.is_simple(typ):
|
| 89 | # Use the enum instead of the class.
|
| 90 | return "%s_e" % type_name
|
| 91 |
|
| 92 | # - Pointer for optional type.
|
| 93 | # - ints and strings should generally not be optional? We don't have them
|
| 94 | # in osh yet, so leave it out for now.
|
| 95 | if field.opt:
|
| 96 | return "%s_t*" % type_name
|
| 97 |
|
| 98 | return "%s_t&" % type_name
|
| 99 |
|
| 100 | def EmitFooter(self):
|
| 101 | for line in self.footer:
|
| 102 | self.f.write(line)
|
| 103 |
|
| 104 | def _EmitEnum(self, sum, name, depth):
|
| 105 | enum = []
|
| 106 | for i in xrange(len(sum.types)):
|
| 107 | type = sum.types[i]
|
| 108 | enum.append("%s = %d" % (type.name, i + 1)) # zero is reserved
|
| 109 |
|
| 110 | self.Emit("enum class %s_e : uint8_t {" % name, depth)
|
| 111 | self.Emit(", ".join(enum), depth + 1)
|
| 112 | self.Emit("};", depth)
|
| 113 | self.Emit("", depth)
|
| 114 |
|
| 115 | def VisitSimpleSum(self, sum, name, depth):
|
| 116 | self._EmitEnum(sum, name, depth)
|
| 117 |
|
| 118 | def VisitCompoundSum(self, sum, name, depth):
|
| 119 | # This is a sign that Python needs string interpolation!!!
|
| 120 | def Emit(s, depth=depth):
|
| 121 | self.Emit(s % sys._getframe(1).f_locals, depth)
|
| 122 |
|
| 123 | self._EmitEnum(sum, name, depth)
|
| 124 |
|
| 125 | Emit("class %(name)s_t : public Obj {")
|
| 126 | Emit(" public:")
|
| 127 | # All sum types have a tag
|
| 128 | Emit("%(name)s_e tag() const {", depth + 1)
|
| 129 | Emit("return static_cast<%(name)s_e>(bytes_[0]);", depth + 2)
|
| 130 | Emit("}", depth + 1)
|
| 131 | Emit("};")
|
| 132 | Emit("")
|
| 133 |
|
| 134 | # TODO: This should be replaced with a call to the generic
|
| 135 | # self.VisitChildren()
|
| 136 | super_name = "%s_t" % name
|
| 137 | for t in sum.types:
|
| 138 | self.VisitConstructor(t, super_name, depth)
|
| 139 |
|
| 140 | # rudimentary attribute handling
|
| 141 | for field in sum.attributes:
|
| 142 | type_name = str(field.type)
|
| 143 | assert type_name in runtime.BUILTIN_TYPES, type_name
|
| 144 | Emit("%s %s;" % (type_name, field.name), depth + 1)
|
| 145 |
|
| 146 | def VisitConstructor(self, cons, def_name, depth):
|
| 147 | #print(dir(cons))
|
| 148 | if cons.fields:
|
| 149 | self.Emit("class %s : public %s {" % (cons.name, def_name), depth)
|
| 150 | self.Emit(" public:", depth)
|
| 151 | offset = 1 # for the ID
|
| 152 | for f in cons.fields:
|
| 153 | self.VisitField(f, cons.name, offset, depth + 1)
|
| 154 | offset += self.ref_width
|
| 155 | self.Emit("};", depth)
|
| 156 | self.Emit("", depth)
|
| 157 |
|
| 158 | def VisitProduct(self, product, name, depth):
|
| 159 | self.Emit("class %(name)s_t : public Obj {" % locals(), depth)
|
| 160 | self.Emit(" public:", depth)
|
| 161 | offset = 0
|
| 162 | for f in product.fields:
|
| 163 | type_name = '%s_t' % name
|
| 164 | self.VisitField(f, type_name, offset, depth + 1)
|
| 165 | offset += self.ref_width
|
| 166 |
|
| 167 | for field in product.attributes:
|
| 168 | # rudimentary attribute handling
|
| 169 | type_name = str(field.type)
|
| 170 | assert type_name in runtime.BUILTIN_TYPES, type_name
|
| 171 | self.Emit("%s %s;" % (type_name, field.name), depth + 1)
|
| 172 | self.Emit("};", depth)
|
| 173 | self.Emit("", depth)
|
| 174 |
|
| 175 | def VisitField(self, field, type_name, offset, depth):
|
| 176 | """
|
| 177 | Even though they are inline, some of them can't be in the class {}, because
|
| 178 | static_cast<> requires inheritance relationships to be already declared. We
|
| 179 | have to print all the classes first, then all the bodies that might use
|
| 180 | static_cast<>.
|
| 181 |
|
| 182 | http://stackoverflow.com/questions/5808758/why-is-a-static-cast-from-a-pointer-to-base-to-a-pointer-to-derived-invalid
|
| 183 | """
|
| 184 | ctype = self._GetCppType(field)
|
| 185 | name = field.name
|
| 186 | pointer_type = self.pointer_type
|
| 187 | # Either 'left' or 'BoolBinary::left', depending on whether it's inline.
|
| 188 | # Mutated later.
|
| 189 | maybe_qual_name = name
|
| 190 |
|
| 191 | func_proto = None
|
| 192 | func_header = None
|
| 193 | body_line1 = None
|
| 194 | inline_body = None
|
| 195 |
|
| 196 | if field.seq: # Array/repeated
|
| 197 | # For size accessor, follow the ref, and then it's the first integer.
|
| 198 | size_header = (
|
| 199 | 'inline int %(name)s_size(const %(pointer_type)s* base) const {')
|
| 200 | size_body = "return Ref(base, %(offset)d).Int(0);"
|
| 201 |
|
| 202 | self.Emit(size_header % locals(), depth)
|
| 203 | self.Emit(size_body % locals(), depth + 1)
|
| 204 | self.Emit("}", depth)
|
| 205 |
|
| 206 | ARRAY_OFFSET = 'int a = (index+1) * 3;'
|
| 207 | A_POINTER = (
|
| 208 | 'inline const %(ctype)s %(maybe_qual_name)s('
|
| 209 | 'const %(pointer_type)s* base, int index) const')
|
| 210 |
|
| 211 | if ctype in ('bool', 'int'):
|
| 212 | func_header = A_POINTER + ' {'
|
| 213 | body_line1 = ARRAY_OFFSET
|
| 214 | inline_body = 'return Ref(base, %(offset)d).Int(a);'
|
| 215 |
|
| 216 | elif ctype.endswith('_e') or ctype in self.enum_types:
|
| 217 | func_header = A_POINTER + ' {'
|
| 218 | body_line1 = ARRAY_OFFSET
|
| 219 | inline_body = (
|
| 220 | 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d).Int(a));')
|
| 221 |
|
| 222 | elif ctype == 'char*':
|
| 223 | func_header = A_POINTER + ' {'
|
| 224 | body_line1 = ARRAY_OFFSET
|
| 225 | inline_body = 'return Ref(base, %(offset)d).Str(base, a);'
|
| 226 |
|
| 227 | else:
|
| 228 | # Write function prototype now; write body later.
|
| 229 | func_proto = A_POINTER + ';'
|
| 230 |
|
| 231 | maybe_qual_name = '%s::%s' % (type_name, name)
|
| 232 | func_def = A_POINTER + ' {'
|
| 233 | # This static_cast<> (downcast) causes problems if put within "class
|
| 234 | # {}".
|
| 235 | func_body = (
|
| 236 | 'return static_cast<const %(ctype)s>('
|
| 237 | 'Ref(base, %(offset)d).Ref(base, a));')
|
| 238 |
|
| 239 | self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
|
| 240 | self.footer.extend(visitor.FormatLines(ARRAY_OFFSET, 1))
|
| 241 | self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
|
| 242 | self.footer.append('}\n\n')
|
| 243 | maybe_qual_name = name # RESET for later
|
| 244 |
|
| 245 | else: # not repeated
|
| 246 | SIMPLE = "inline %(ctype)s %(maybe_qual_name)s() const {"
|
| 247 | POINTER = (
|
| 248 | 'inline const %(ctype)s %(maybe_qual_name)s('
|
| 249 | 'const %(pointer_type)s* base) const')
|
| 250 |
|
| 251 | if ctype in ('bool', 'int'):
|
| 252 | func_header = SIMPLE
|
| 253 | inline_body = 'return Int(%(offset)d);'
|
| 254 |
|
| 255 | elif ctype.endswith('_e') or ctype in self.enum_types:
|
| 256 | func_header = SIMPLE
|
| 257 | inline_body = 'return static_cast<const %(ctype)s>(Int(%(offset)d));'
|
| 258 |
|
| 259 | elif ctype == 'char*':
|
| 260 | func_header = POINTER + " {"
|
| 261 | inline_body = 'return Str(base, %(offset)d);'
|
| 262 |
|
| 263 | else:
|
| 264 | # Write function prototype now; write body later.
|
| 265 | func_proto = POINTER + ";"
|
| 266 |
|
| 267 | maybe_qual_name = '%s::%s' % (type_name, name)
|
| 268 | func_def = POINTER + ' {'
|
| 269 | if field.opt:
|
| 270 | func_body = (
|
| 271 | 'return static_cast<const %(ctype)s>(Optional(base, %(offset)d));')
|
| 272 | else:
|
| 273 | func_body = (
|
| 274 | 'return static_cast<const %(ctype)s>(Ref(base, %(offset)d));')
|
| 275 |
|
| 276 | # depth 0 for bodies
|
| 277 | self.footer.extend(visitor.FormatLines(func_def % locals(), 0))
|
| 278 | self.footer.extend(visitor.FormatLines(func_body % locals(), 1))
|
| 279 | self.footer.append('}\n\n')
|
| 280 | maybe_qual_name = name # RESET for later
|
| 281 |
|
| 282 | if func_proto:
|
| 283 | self.Emit(func_proto % locals(), depth)
|
| 284 | else:
|
| 285 | self.Emit(func_header % locals(), depth)
|
| 286 | if body_line1:
|
| 287 | self.Emit(body_line1, depth + 1)
|
| 288 | self.Emit(inline_body % locals(), depth + 1)
|
| 289 | self.Emit("}", depth)
|
| 290 |
|
| 291 |
|
| 292 | def main(argv):
|
| 293 | try:
|
| 294 | action = argv[1]
|
| 295 | except IndexError:
|
| 296 | raise RuntimeError('Action required')
|
| 297 |
|
| 298 | # TODO: Also generate a switch/static_cast<> pretty printer in C++! For
|
| 299 | # debugging. Might need to detect cycles though.
|
| 300 | if action == 'cpp':
|
| 301 | schema_path = argv[2]
|
| 302 |
|
| 303 | # NOTE: This import can't be at the top level osh/asdl_gen.py depends on
|
| 304 | # this gen_cpp.py module. We should move all the main() functions out of
|
| 305 | # asdl/ and into command line tools.
|
| 306 |
|
| 307 | from core.meta import Id
|
| 308 | app_types = {'id': meta.UserType('id_kind_asdl', 'Id_t')}
|
| 309 | with open(schema_path) as input_f:
|
| 310 | module, type_lookup = front_end.LoadSchema(input_f, app_types)
|
| 311 |
|
| 312 | # TODO: gen_cpp.py should be a library and the application should add Id?
|
| 313 | # Or we should enable ASDL metaprogramming, and let Id be a metaprogrammed
|
| 314 | # simple sum type.
|
| 315 |
|
| 316 | f = sys.stdout
|
| 317 |
|
| 318 | # How do mutation of strings, arrays, etc. work? Are they like C++
|
| 319 | # containers, or their own? I think they mirror the oil language
|
| 320 | # semantics.
|
| 321 | # Every node should have a mirror. MutableObj. MutableRef (pointer).
|
| 322 | # MutableArithVar -- has std::string. The mirrors are heap allocated.
|
| 323 | # All the mutable ones should support Dump()/Encode()?
|
| 324 | # You can just write more at the end... don't need to disturb existing
|
| 325 | # nodes? Rewrite pointers.
|
| 326 |
|
| 327 | alignment = 4
|
| 328 | enc = encode.Params(alignment)
|
| 329 | d = {'pointer_type': enc.pointer_type}
|
| 330 |
|
| 331 | f.write("""\
|
| 332 | #include <cstdint>
|
| 333 |
|
| 334 | class Obj {
|
| 335 | public:
|
| 336 | // Decode a 3 byte integer from little endian
|
| 337 | inline int Int(int n) const;
|
| 338 |
|
| 339 | inline const Obj& Ref(const %(pointer_type)s* base, int n) const;
|
| 340 |
|
| 341 | inline const Obj* Optional(const %(pointer_type)s* base, int n) const;
|
| 342 |
|
| 343 | // NUL-terminated
|
| 344 | inline const char* Str(const %(pointer_type)s* base, int n) const;
|
| 345 |
|
| 346 | protected:
|
| 347 | uint8_t bytes_[1]; // first is ID; rest are a payload
|
| 348 | };
|
| 349 |
|
| 350 | """ % d)
|
| 351 |
|
| 352 | # Id should be treated as an enum.
|
| 353 | c = ChainOfVisitors(
|
| 354 | ForwardDeclareVisitor(f),
|
| 355 | ClassDefVisitor(f, enc, type_lookup, enum_types=['Id']))
|
| 356 | c.VisitModule(module)
|
| 357 |
|
| 358 | f.write("""\
|
| 359 | inline int Obj::Int(int n) const {
|
| 360 | return bytes_[n] + (bytes_[n+1] << 8) + (bytes_[n+2] << 16);
|
| 361 | }
|
| 362 |
|
| 363 | inline const Obj& Obj::Ref(const %(pointer_type)s* base, int n) const {
|
| 364 | int offset = Int(n);
|
| 365 | return reinterpret_cast<const Obj&>(base[offset]);
|
| 366 | }
|
| 367 |
|
| 368 | inline const Obj* Obj::Optional(const %(pointer_type)s* base, int n) const {
|
| 369 | int offset = Int(n);
|
| 370 | if (offset) {
|
| 371 | return reinterpret_cast<const Obj*>(base + offset);
|
| 372 | } else {
|
| 373 | return nullptr;
|
| 374 | }
|
| 375 | }
|
| 376 |
|
| 377 | inline const char* Obj::Str(const %(pointer_type)s* base, int n) const {
|
| 378 | int offset = Int(n);
|
| 379 | return reinterpret_cast<const char*>(base + offset);
|
| 380 | }
|
| 381 | """ % d)
|
| 382 | # uint32_t* and char*/Obj* aren't related, so we need to use
|
| 383 | # reinterpret_cast<>.
|
| 384 | # http://stackoverflow.com/questions/10151834/why-cant-i-static-cast-between-char-and-unsigned-char
|
| 385 |
|
| 386 | else:
|
| 387 | raise RuntimeError('Invalid action %r' % action)
|
| 388 |
|
| 389 |
|
| 390 | if __name__ == '__main__':
|
| 391 | try:
|
| 392 | main(sys.argv)
|
| 393 | except RuntimeError as e:
|
| 394 | print('FATAL: %s' % e, file=sys.stderr)
|
| 395 | sys.exit(1)
|