OILS / asdl / front_end.py View on Github | oilshell.org

514 lines, 295 significant
1"""front_end.py: Lexer and parser for the ASDL schema language."""
2from __future__ import print_function
3
4import re
5
6from asdl import ast
7from asdl.ast import (AST, Use, Module, TypeDecl, Constructor, Field, Sum,
8 SimpleSum, Product)
9from asdl.util import log
10
11_ = log
12
13_KEYWORDS = ['use', 'module', 'generate']
14
15_TOKENS = [
16 ('Keyword', ''),
17 ('Name', ''),
18
19 # For operators, the string matters
20 ('Equals', '='),
21 ('Comma', ','),
22 ('Question', '?'),
23 ('Pipe', '|'),
24 ('Asterisk', '*'),
25 ('LParen', '('),
26 ('RParen', ')'),
27 ('LBrace', '{'),
28 ('RBrace', '}'),
29 ('Percent', '%'),
30
31 # Oil addition for parameterized types.
32 ('LBracket', '['),
33 ('RBracket', ']'),
34
35 # - Start with Dict[string, bool].
36 # - List[string] is an alias for string*
37 #
38 # statically typed: Dict and List
39 # dynamically typed: dict and list
40]
41
42_TOKEN_STR = [name for name, _ in _TOKENS] # integer -> string like LParen
43_TOKEN_INT = {} # string like '(' -> integer
44
45
46class TokenKind(object):
47 """ASDL tokens.
48
49 TokenKind.LBrace = 5, etc.
50 """
51 pass
52
53
54for i, (name, val) in enumerate(_TOKENS):
55 setattr(TokenKind, name, i)
56 _TOKEN_INT[val] = i
57
58
59class Token(object):
60
61 def __init__(self, kind, value, lineno):
62 self.kind = kind
63 self.value = value
64 self.lineno = lineno
65
66
67class ASDLSyntaxError(Exception):
68
69 def __init__(self, msg, lineno=None):
70 self.msg = msg
71 self.lineno = lineno or '<unknown>'
72
73 def __str__(self):
74 return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
75
76
77def _Tokenize(f):
78 """Tokenize the given buffer.
79
80 Yield Token objects.
81 """
82 for lineno, line in enumerate(f, 1):
83 for m in re.finditer(r'\s*(\w+|--.*|#.*|.)', line.strip()):
84 c = m.group(1)
85 if c in _KEYWORDS:
86 yield Token(TokenKind.Keyword, c, lineno)
87
88 elif c[0].isalpha():
89 yield Token(TokenKind.Name, c, lineno)
90
91 elif c.startswith('--') or c.startswith('#'):
92 # ASDL comments start with --
93 # Added # comments like Python and shell
94 break
95
96 else:
97 # Operators
98 try:
99 op_kind = _TOKEN_INT[c]
100 except KeyError:
101 raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
102 yield Token(op_kind, c, lineno)
103
104
105def _SumIsSimple(variant_list):
106 """Return True if a sum is a simple.
107
108 A sum is simple if its types have no fields, e.g. unaryop = Invert |
109 Not | UAdd | USub
110 """
111 for t in variant_list:
112 if t.fields or t.shared_type:
113 return False
114 return True
115
116
117_CODE_GEN_OPTIONS = [
118 'no_namespace_suffix', # Id.Foo instead of Id_e.Foo
119 'integers', # integer builtin_i instead of strongly typed builtin_e
120 'bit_set', # not implemented: 1 << n instead of n
121
122 # probably don't need this
123 # 'common_synthetic_field:left_tok',
124
125 # Put this type, and transitive closure of types it references, in the
126 # unique "first class variant" namespace, and generate type reflection.
127 'reflect_all_types',
128
129 # Squeeze and Freeze, with the number of bits as a option Hm the headers
130 # here still need type reflection. Probably OK.
131 'mirror_all_types:16',
132]
133
134
135class ASDLParser(object):
136 """Parser for ASDL files.
137
138 Create, then call the parse method on a buffer containing ASDL. This
139 is a simple recursive descent parser that uses _Tokenize for the
140 lexing.
141 """
142
143 def __init__(self):
144 self._tokenizer = None
145 self.cur_token = None
146
147 def parse(self, f):
148 """Parse the ASDL in the file and return an AST with a Module root."""
149 self._tokenizer = _Tokenize(f)
150 self._advance()
151 return self._parse_module()
152
153 def _parse_module(self):
154 """
155 type_decl : NAME (':' NAME) '=' compound_type
156 module : 'module' NAME '{' use* type_decl* '}'
157
158 We added:
159 : for code gen options
160 use for imports
161
162 alloc_members =
163 List
164 | Dict
165 | Struct
166 generate [bit_set]
167
168 -- color::Red, not color_e::Red or color_i::Red
169 color = Red | Green
170 generate [integers, no_sum_suffix]
171 """
172 if not self._at_keyword('module'):
173 raise ASDLSyntaxError(
174 'Expected "module" (found {})'.format(self.cur_token.value),
175 self.cur_token.lineno)
176 self._advance()
177 name = self._match(TokenKind.Name)
178 self._match(TokenKind.LBrace)
179
180 uses = []
181 while self._at_keyword('use'):
182 uses.append(self._parse_use())
183
184 defs = []
185 while self.cur_token.kind == TokenKind.Name:
186 typename = self._advance()
187 self._match(TokenKind.Equals)
188 type_ = self._parse_compound_type()
189 defs.append(TypeDecl(typename, type_))
190
191 self._match(TokenKind.RBrace)
192 return Module(name, uses, defs)
193
194 def _parse_use(self):
195 """Use: 'use' NAME+ '{' NAME+ '}'.
196
197 example: use frontend syntax { Token }
198
199 This means frontend/syntax.asdl.h :: Token
200 """
201 self._advance() # past 'use'
202 module_parts = []
203 while self.cur_token.kind == TokenKind.Name:
204 part = self._advance()
205 module_parts.append(part)
206
207 self._match(TokenKind.LBrace)
208
209 type_names = []
210 while self.cur_token.kind == TokenKind.Name:
211 t = self._advance()
212 type_names.append(t)
213 if self.cur_token.kind == TokenKind.RParen:
214 break
215 elif self.cur_token.kind == TokenKind.Comma:
216 self._advance()
217
218 self._match(TokenKind.RBrace)
219 #print('MOD %s' % module_parts)
220 return Use(module_parts, type_names)
221
222 def _parse_compound_type(self):
223 """Constructor : NAME fields? | NAME '%' NAME # shared variant.
224
225 compound_type : product
226 | constructor ('|' constructor)*
227 """
228 if self.cur_token.kind == TokenKind.LParen:
229 # If we see a (, it's a product
230 return self._parse_product()
231 else:
232 # Otherwise it's a sum. Look for ConstructorId
233 sumlist = []
234 while True:
235 cons_name = self._match(TokenKind.Name)
236
237 shared_type = None
238 fields = None
239 if self.cur_token.kind == TokenKind.LParen:
240 fields = self._parse_fields()
241 elif self.cur_token.kind == TokenKind.Percent:
242 self._advance()
243 shared_type = self._match(TokenKind.Name)
244 else:
245 pass
246
247 cons = Constructor(cons_name, shared_type, fields)
248 sumlist.append(cons)
249
250 if self.cur_token.kind != TokenKind.Pipe:
251 break
252 self._advance()
253 generate = self._parse_optional_generate()
254
255 # Additional validation
256 if generate is not None:
257 for g in generate:
258 if g not in _CODE_GEN_OPTIONS:
259 raise ASDLSyntaxError('Invalid code gen option %r' % g,
260 self.cur_token.lineno)
261
262 if _SumIsSimple(sumlist):
263 return SimpleSum(sumlist, generate)
264 else:
265 return Sum(sumlist, generate)
266
267 def _parse_type_expr(self):
268 """One or two params:
269
270 type_params : '[' type_expr ( ',' type_expr )* ']'
271
272 type_expr : NAME type_params? (''?' | '*')? # allow one suffix
273
274 NAME is validated against Optional, List, Dict afterward
275 """
276 type_name = self._match(TokenKind.Name)
277
278 # Accept Python-like naming!
279 if type_name == 'str':
280 type_name = 'string'
281
282 children = []
283 if self.cur_token.kind == TokenKind.LBracket:
284 self._advance()
285 children.append(self._parse_type_expr())
286 if self.cur_token.kind == TokenKind.Comma:
287 self._advance()
288 children.append(self._parse_type_expr())
289
290 self._match(TokenKind.RBracket)
291
292 if type_name in ('List', 'Optional'):
293 if len(children) != 1:
294 raise ASDLSyntaxError(
295 'Expected 1 type param to {}'.format(type_name),
296 self.cur_token.lineno)
297 elif type_name == 'Dict':
298 if len(children) != 2:
299 raise ASDLSyntaxError(
300 'Expected 2 type params to {}'.format(type_name),
301 self.cur_token.lineno)
302 else:
303 if len(children) != 0:
304 raise ASDLSyntaxError(
305 'Expected zero type params to {}'.format(type_name),
306 self.cur_token.lineno)
307
308 if len(children):
309 typ = ast.ParameterizedType(type_name, children)
310 else:
311 typ = ast.NamedType(type_name)
312
313 if self.cur_token.kind == TokenKind.Asterisk:
314 # string* is equivalent to List[string]
315 typ = ast.ParameterizedType('List', [typ])
316 self._advance()
317
318 elif self.cur_token.kind == TokenKind.Question:
319 # string* is equivalent to Optional[string]
320 typ = ast.ParameterizedType('Optional', [typ])
321 self._advance()
322
323 return typ
324
325 def _parse_fields(self):
326 """fields_inner: type_expr NAME ( ',' type_expr NAME )* ','?
327
328 fields : '(' fields_inner? ')'
329
330 Name Quantifier? should be changed to typename.
331 """
332 fields = []
333 self._match(TokenKind.LParen)
334 while self.cur_token.kind == TokenKind.Name:
335 typ = self._parse_type_expr()
336 field_name = self._match(TokenKind.Name)
337
338 fields.append(Field(typ, field_name))
339
340 if self.cur_token.kind == TokenKind.RParen:
341 break
342 elif self.cur_token.kind == TokenKind.Comma:
343 self._advance()
344
345 self._match(TokenKind.RParen)
346 return fields
347
348 def _parse_list(self):
349 """list_inner: NAME ( ',' NAME )* ','?
350
351 list : '[' list_inner? ']'
352 """
353 generate = []
354 self._match(TokenKind.LBracket)
355 while self.cur_token.kind == TokenKind.Name:
356 name = self._match(TokenKind.Name)
357
358 generate.append(name)
359
360 if self.cur_token.kind == TokenKind.RBracket:
361 break
362 elif self.cur_token.kind == TokenKind.Comma:
363 self._advance()
364
365 self._match(TokenKind.RBracket)
366 return generate
367
368 def _parse_optional_generate(self):
369 """Attributes = 'generate' list."""
370 if self._at_keyword('generate'):
371 self._advance()
372 return self._parse_list()
373 else:
374 return None
375
376 def _parse_product(self):
377 """Product: fields attributes?"""
378 return Product(self._parse_fields())
379
380 def _advance(self):
381 """Return current token; read next token into self.cur_token."""
382 cur_val = None if self.cur_token is None else self.cur_token.value
383 try:
384 self.cur_token = next(self._tokenizer)
385 except StopIteration:
386 self.cur_token = None
387 return cur_val
388
389 def _match(self, kind):
390 """The 'match' primitive of RD parsers.
391
392 * Verifies that the current token is of the given kind (kind can
393 be a tuple, in which the kind must match one of its members).
394 * Returns the value of the current token
395 * Reads in the next token
396
397 Args:
398 kind: A TokenKind, or a tuple of TokenKind
399 """
400 if self.cur_token.kind == kind:
401 value = self.cur_token.value
402 self._advance()
403 return value
404 else:
405 raise ASDLSyntaxError(
406 'Expected token {}, got {}'.format(_TOKEN_STR[kind],
407 self.cur_token.value),
408 self.cur_token.lineno)
409
410 def _at_keyword(self, keyword):
411 return (self.cur_token.kind == TokenKind.Keyword and
412 self.cur_token.value == keyword)
413
414
415_PRIMITIVE_TYPES = [
416 'string',
417 'int',
418 'BigInt',
419 'float',
420 'bool',
421
422 # 'any' is used:
423 # - for value.Obj in the the Oil expression evaluator. We're not doing any
424 # dynamic or static checking now.
425 'any',
426]
427
428
429def _ResolveType(typ, type_lookup):
430 # type: (AST, dict) -> None
431 """Recursively attach a 'resolved' field to AST nodes."""
432 if isinstance(typ, ast.NamedType):
433 if typ.name not in _PRIMITIVE_TYPES:
434 ast_node = type_lookup.get(typ.name)
435 if ast_node is None:
436 raise ASDLSyntaxError("Couldn't find type %r" % typ.name)
437 typ.resolved = ast_node
438
439 elif isinstance(typ, ast.ParameterizedType):
440 for child in typ.children:
441 _ResolveType(child, type_lookup)
442
443 if typ.type_name == 'Optional':
444 child = typ.children[0]
445 if isinstance(child, ast.NamedType):
446 if child.name in _PRIMITIVE_TYPES and child.name != 'string':
447 raise ASDLSyntaxError(
448 'Optional primitive type {} not allowed'.format(
449 child.name))
450
451 if child.resolved and isinstance(child.resolved,
452 ast.SimpleSum):
453 raise ASDLSyntaxError(
454 'Optional simple sum type {} not allowed'.format(
455 child.name))
456
457 else:
458 raise AssertionError()
459
460
461def _ResolveFields(field_ast_nodes, type_lookup):
462 """
463 Args:
464 type_lookup: Populated by name resolution
465 """
466 for field in field_ast_nodes:
467 _ResolveType(field.typ, type_lookup)
468
469
470def _ResolveModule(module, app_types):
471 """Name resolution for NamedType."""
472 # Types that fields are declared with: int, id, word_part, etc.
473 # Fields are NOT declared with Constructor names.
474 type_lookup = dict(app_types)
475
476 # Note: we don't actually load the type, and instead leave that to MyPy /
477 # C++. A consequence of this is TypeNameHeuristic().
478 for u in module.uses:
479 for type_name in u.type_names:
480 type_lookup[type_name] = u # type: ast.Use()
481
482 # NOTE: We need two passes because types can be mutually recursive, e.g.
483 # asdl/arith.asdl.
484
485 # First pass: collect declared types and make entries for them.
486 for d in module.dfns:
487 type_lookup[d.name] = d.value
488
489 # Second pass: add NamedType.resolved field
490 for d in module.dfns:
491 ast_node = d.value
492 if isinstance(ast_node, ast.Product):
493 #log('fields %s', ast_node.fields)
494 _ResolveFields(ast_node.fields, type_lookup)
495
496 elif isinstance(ast_node, ast.Sum):
497 for cons in ast_node.types:
498 _ResolveFields(cons.fields, type_lookup)
499
500 else:
501 raise AssertionError(ast_node)
502
503
504def LoadSchema(f, app_types, verbose=False):
505 """Returns an AST for the schema."""
506 p = ASDLParser()
507 schema_ast = p.parse(f)
508 if verbose:
509 import sys
510 schema_ast.Print(sys.stdout, 0)
511
512 # Make sure all the names are valid
513 _ResolveModule(schema_ast, app_types)
514 return schema_ast