OILS / frontend / syntax.asdl View on Github | oilshell.org

680 lines, 294 significant
1# Data types for the Oils AST, aka "Lossless Syntax Tree".
2#
3# Invariant: the source text can be reconstructed byte-for-byte from this tree.
4# The test/lossless.sh suite verifies this.
5
6# We usually try to preserve the physical order of the source in the ASDL
7# fields. One exception is the order of redirects:
8#
9# echo >out.txt hi
10# # versus
11# echo hi >out.txt
12
13# Unrepresented:
14# - let arithmetic (rarely used)
15# - coprocesses # one with arg and one without
16# - select block
17# - case fallthrough ;& and ;;&
18
19# Possible refactorings:
20#
21# # %CompoundWord as first class variant:
22# bool_expr = WordTest %CompoundWord | ...
23#
24# # Can DoubleQuoted have a subset of parts compared with CompoundWord?
25# string_part = ... # subset of word_part
26#
27# - Distinguish word_t with BracedTree vs. those without? seq_word_t?
28
29module syntax
30{
31 use core value {
32 value
33 }
34
35 # More efficient than the List[bool] pattern we've been using
36 BoolParamBox = (bool b)
37 IntParamBox = (int i)
38
39 # core/main_loop.py
40 parse_result = EmptyLine | Eof | Node(command cmd)
41
42 # 'source' represents the location of a line / token.
43 source =
44 Interactive
45 | Headless
46 | Unused(str comment) # completion and history never show parse errors?
47 | CFlag
48 | Stdin(str comment)
49
50 # TODO: if it's not the main script, it's sourced, and you could provide
51 # a chain of locations back to the sourced script!
52 # MainFile(str path) or SourcedFile(str path, loc location)
53 | MainFile(str path)
54 | SourcedFile(str path, loc location)
55
56 # code parsed from a word
57 # used for 'eval', 'trap', 'printf', 'complete -W', etc.
58 | ArgvWord(str what, loc location)
59
60 # code parsed from the value of a variable
61 # used for $PS1 $PROMPT_COMMAND
62 | Variable(str var_name, loc location)
63
64 # Point to the original variable reference
65 | VarRef(Token orig_tok)
66
67 # alias expansion (location of first word)
68 | Alias(str argv0, loc argv0_loc)
69
70 # 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
71 # TODO: use this for eval_unsafe_arith instead of Variable
72 | Reparsed(str what, Token left_token, Token right_token)
73
74 # For --location-str
75 | Synthetic(str s)
76
77 SourceLine = (int line_num, str content, source src)
78
79 # Ways to make Token 24 bytes:
80 # - Introduce WideToken with the wide_tok.tval field, which we use with
81 # lexer.LazyStr()
82 # - It will be 32 bytes rather than 24
83 # - loc = Token %Token | WideToken %WideToken
84 # - Which tokens need to be big?
85 # - Id.Lit_{Chars,VarLike,...} - word_part.Literal
86 # - SingleQuoted does not store tokens; it stores a string
87 # - Id.Arith_VarLike - arith_expr.Var
88 # - Id.VSub_{DollarName,Number} - SimpleVarSub
89 # - Id.VSub_Name - BracedVarSub
90 # - Id.Expr_Name - expr.Var
91 #
92 # Instrumenting lexer.LazyVal() with histogram:
93 # 22561 Id.Lit_Chars
94 # 8366 Id.Lit_VarLike
95 # 4681 Id.Lit_Colon
96 # 287 Id.Lit_Slash
97 # 164 Id.ControlFlow_Break
98 # 158 Id.ControlFlow_Exit
99 # 29 Id.ControlFlow_Continue
100 # 3 Id.Lit_Comma
101 # 2 Id.Lit_LBracket
102 # 1 Id.Lit_LBrace
103
104 # 36255 total tokens
105 # 36255 tokens with LazyVal()
106 # 13 unique tokens IDs
107 #
108 # This is ONLY word_::_EvalWordPart() -> word_part::Literal. So it does seem
109 # a HANDFUL of syntax.asdl nodes can use WideToken, and we can use the TYPE
110 # SYSTEM to distinguish them.
111 #
112 # In other words, Lexer always returns type Token, and then the parser may
113 # "upgrade" to type WideToken when it knows it will need to store
114 # word_part.Literal, SimpleVarSub, etc. This also means we can INLINE
115 # SimpleVarSub %WideToken into word_part_t and expr_t.
116
117 # Possible problem with WideToken: parse_ctx.trail.tokens is List[Token], and
118 # WordParser._GetToken() appends to it. But we can just use TokenVal() to
119 # create garbage there.
120
121 # Pack id and length into 32 bits with special uint16 type.
122 # TODO: This breaks ASDL pretty printing of Id type!
123
124 # We transpose (id, col, length) -> (id, length, col) for C struct packing.
125
126 # Note that ASDL generates:
127 # typedef int Id_t;
128 # So uint16 id implies truncation. We happen to know there are less than
129 # 2^16 IDs, but it's not checked. Yaks could check it.
130
131 Token = (id id, uint16 length, int col, SourceLine? line, str? tval)
132
133 WideToken = (id id, int length, int col, SourceLine? line, str? tval)
134
135 # Slight ASDL bug: CompoundWord has to be defined before using it as a shared
136 # variant. The _product_counter algorithm should be moved into a separate
137 # tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
138 CompoundWord = (List[word_part] parts)
139
140 # Source location for errors
141 loc =
142 Missing # equivalent of runtime.NO_SPID
143 | Token %Token
144 # Very common case: argv arrays need original location
145 | ArgWord %CompoundWord
146 | WordPart(word_part p)
147 | Word(word w)
148 | Arith(arith_expr a)
149 # e.g. for errexit blaming
150 | Command(command c)
151 # the location of a token that's too long
152 | TokenTooLong(SourceLine line, id id, int length, int col)
153
154 debug_frame =
155 Main(str dollar0)
156 # call_loc => BASH_LINENO
157 # call_loc may be None with new --source flag?
158 | Source(Token? call_tok, str source_name)
159 # def_tok => BASH_SOURCE
160 # call_loc may be None if invoked via RunFuncForCompletion?
161 | Call(Token? call_tok, Token def_tok, str func_name)
162
163 #
164 # Shell language
165 #
166
167 bracket_op =
168 WholeArray(id op_id) # * or @
169 | ArrayIndex(arith_expr expr)
170
171 suffix_op =
172 Nullary %Token # ${x@Q} or ${!prefix@} (which also has prefix_op)
173 | Unary(Token op, rhs_word arg_word) # e.g. ${v:-default}
174 # TODO: Implement YSH ${x|html} and ${x %.3f}
175 | Static(Token tok, str arg)
176 | PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
177 # begin is optional with ${array::1}
178 | Slice(arith_expr? begin, arith_expr? length)
179
180 BracedVarSub = (
181 Token left, # in dynamic ParseVarRef, same as name_tok
182 Token token, # location for the name
183 str var_name, # the name - TODO: remove this, use LazyStr() instead
184 Token? prefix_op, # prefix # or ! operators
185 bracket_op? bracket_op,
186 suffix_op? suffix_op,
187 Token right # in dynamic ParseVarRef, same as name_tok
188 )
189
190 # Variants:
191 # - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
192 # - And """ and ''' e.g. Id.Left_TDoubleQuote
193 DoubleQuoted = (Token left, List[word_part] parts, Token right)
194
195 # Consider making str? sval LAZY, like lexer.LazyStr(tok)
196 SingleQuoted = (Token left, str sval, Token right)
197
198 # e.g. Id.VSub_QMark, Id.VSub_DollarName $foo with lexer.LazyStr()
199 SimpleVarSub = (Token tok)
200
201 CommandSub = (Token left_token, command child, Token right)
202
203 # - can contain word.BracedTree
204 # - no 'Token right' for now, doesn't appear to be used
205 ShArrayLiteral = (Token left, List[word] words, Token right)
206
207 # Unevaluated, typed arguments for func and proc.
208 # Note that ...arg is expr.Spread.
209 ArgList = (
210 Token left, List[expr] pos_args,
211 Token? semi_tok, List[NamedArg] named_args,
212 Token? semi_tok2, expr? block_expr,
213 Token right
214 )
215
216 AssocPair = (CompoundWord key, CompoundWord value)
217
218 word_part =
219 ShArrayLiteral %ShArrayLiteral
220 | BashAssocLiteral(Token left, List[AssocPair] pairs, Token right)
221 | Literal %Token
222 # escaped case is separate so the evaluator doesn't have to check token ID
223 | EscapedLiteral(Token token, str ch)
224 | SingleQuoted %SingleQuoted
225 | DoubleQuoted %DoubleQuoted
226 # Could be SimpleVarSub %Token that's VSub_DollarName, but let's not
227 # confuse with the comon word_part.Literal is common for wno
228 | SimpleVarSub %SimpleVarSub
229 | BracedVarSub %BracedVarSub
230 | ZshVarSub (Token left, CompoundWord ignored, Token right)
231 # For command sub and process sub: $(...) <(...) >(...)
232 | CommandSub %CommandSub
233 # ~ or ~bob
234 | TildeSub(Token left, # always the tilde
235 Token? name, str? user_name)
236 | ArithSub(Token left, arith_expr anode, Token right)
237 # {a,b,c}
238 | BracedTuple(List[CompoundWord] words)
239 # {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
240 # {a..f} or {a..f..2} or {a..f..-2}
241 # the whole range is one Token,
242 | BracedRange(Token blame_tok, id kind, str start, str end, int step)
243 # extended globs are parsed statically, unlike globs
244 | ExtGlob(Token op, List[CompoundWord] arms, Token right)
245 # a regex group is similar to an extended glob part
246 | BashRegexGroup(Token left, CompoundWord? child, Token right)
247
248 # YSH word_part extensions
249
250 # @myarray - Id.Lit_Splice (could be optimized to %Token)
251 | Splice(Token blame_tok, str var_name)
252 # $[d.key], etc.
253 | ExprSub(Token left, expr child, Token right)
254
255 # Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
256 # The latter is semantically necessary. (See osh/word_parse.py).
257 # At runtime: RHS of 'declare x='.
258 rhs_word = Empty | Compound %CompoundWord
259
260 word =
261 # Returns from WordParser, but not generally stored in LST
262 Operator %Token
263 # A Compound word can contain any word_part except the Braced*Part.
264 # We could model this with another variant type but it incurs runtime
265 # overhead and seems like overkill. Note that DoubleQuoted can't
266 # contain a SingleQuoted, etc. either.
267 | Compound %CompoundWord
268 # For word sequences command.Simple, ShArrayLiteral, for_iter.Words
269 # Could be its own type
270 | BracedTree(List[word_part] parts)
271 # For dynamic parsing of test aka [ - the string is already evaluated.
272 | String(id id, str s, CompoundWord? blame_loc)
273
274 # Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
275 sh_lhs =
276 Name(Token left, str name) # Lit_VarLike foo=
277 # TODO: Could be Name %Token
278 | IndexedName(Token left, str name, arith_expr index)
279 | UnparsedIndex(Token left, str name, str index) # for translation
280
281 arith_expr =
282 VarSub %Token # e.g. $(( x )) Id.Arith_VarLike
283 | Word %CompoundWord # e.g. $(( 123'456'$y ))
284
285 | UnaryAssign(id op_id, arith_expr child)
286 | BinaryAssign(id op_id, arith_expr left, arith_expr right)
287
288 | Unary(id op_id, arith_expr child)
289 # TODO: op should be token, e.g. for divide by zero
290 | Binary(Token op, arith_expr left, arith_expr right)
291 | TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
292
293 bool_expr =
294 WordTest(word w) # e.g. [[ myword ]]
295 | Binary(id op_id, word left, word right)
296 | Unary(id op_id, word child)
297 | LogicalNot(bool_expr child)
298 | LogicalAnd(bool_expr left, bool_expr right)
299 | LogicalOr(bool_expr left, bool_expr right)
300
301 redir_loc =
302 Fd(int fd) | VarName(str name)
303
304 redir_param =
305 Word %CompoundWord
306 | HereDoc(word here_begin, # e.g. EOF or 'EOF'
307 Token? here_end_tok, # Token consisting of the whole line
308 # It's always filled in AFTER creation, but
309 # temporarily so optional
310 List[word_part] stdin_parts # one for each line
311 )
312
313 Redir = (Token op, redir_loc loc, redir_param arg)
314
315 assign_op = Equal | PlusEqual
316 AssignPair = (Token left, sh_lhs lhs, assign_op op, rhs_word rhs)
317 # TODO: could put Id.Lit_VarLike foo= into LazyStr() with -1 slice
318 EnvPair = (Token left, str name, rhs_word val)
319
320 condition =
321 Shell(List[command] commands) # if false; true; then echo hi; fi
322 | YshExpr(expr e) # if (x > 0) { echo hi }
323 # TODO: add more specific blame location
324
325 # Each arm tests one word against multiple words
326 # shell: *.cc|*.h) echo C++ ;;
327 # YSH: *.cc|*.h { echo C++ }
328 #
329 # Three location tokens:
330 # 1. left - shell has ( or *.cc ysh has *.cc
331 # 2. middle - shell has ) ysh has {
332 # 3. right - shell has optional ;; ysh has required }
333 #
334 # For YSH typed case, left can be ( and /
335 # And case_pat may contain more details
336 CaseArm = (
337 Token left, pat pattern, Token middle, List[command] action,
338 Token? right
339 )
340
341 # The argument to match against in a case command
342 # In YSH-style case commands we match against an `expr`, but in sh-style case
343 # commands we match against a word.
344 case_arg =
345 Word(word w)
346 | YshExpr(expr e)
347
348 EggexFlag = (bool negated, Token flag)
349
350 # canonical_flags can be compared for equality. This is needed to splice
351 # eggexes correctly, e.g. / 'abc' @pat ; i /
352 Eggex = (
353 Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
354 str? canonical_flags)
355
356 pat =
357 Else
358 | Words(List[word] words)
359 | YshExprs(List[expr] exprs)
360 | Eggex %Eggex
361
362 # Each if arm starts with either an "if" or "elif" keyword
363 # In YSH, the then keyword is not used (replaced by braces {})
364 IfArm = (
365 Token keyword, condition cond, Token? then_kw, List[command] action,
366 # then_tok used in ysh-ify
367 Token? then_tok)
368
369 for_iter =
370 Args # for x; do echo $x; done # implicit "$@"
371 | Words(List[word] words) # for x in 'foo' *.py { echo $x }
372 # like ShArrayLiteral, but no location for %(
373 | YshExpr(expr e, Token blame) # for x in (mylist) { echo $x }
374
375 BraceGroup = (
376 Token left, Token? doc_token, List[command] children,
377 List[Redir] redirects, Token right
378 )
379
380 Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
381 RestParam = (Token blame_tok, str name)
382
383 ParamGroup = (List[Param] params, RestParam? rest_of)
384
385 # 'open' is for proc p { }; closed is for proc p () { }
386 proc_sig =
387 Open
388 | Closed(ParamGroup? word, ParamGroup? positional, ParamGroup? named,
389 Param? block_param)
390
391 Proc = (Token keyword, Token name, proc_sig sig, command body)
392
393 Func = (
394 Token keyword, Token name,
395 ParamGroup? positional, ParamGroup? named,
396 command body
397 )
398
399 # Retain references to lines
400 LiteralBlock = (BraceGroup brace_group, List[SourceLine] lines)
401
402 # Represents all these case: s=1 s+=1 s[x]=1 ...
403 ParsedAssignment = (Token? left, Token? close, int part_offset, CompoundWord w)
404
405 command =
406 NoOp
407 | Simple(Token? blame_tok, # TODO: make required (BracedTuple?)
408 List[EnvPair] more_env,
409 List[word] words, List[Redir] redirects,
410 ArgList? typed_args, LiteralBlock? block,
411 # do_fork is semantic, not syntactic
412 bool do_fork)
413 # This doesn't technically belong in the LST, but it's convenient for
414 # execution
415 | ExpandedAlias(command child, List[Redir] redirects, List[EnvPair] more_env)
416 | Sentence(command child, Token terminator)
417 # Represents "bare assignment"
418 # Token left is redundant with pairs[0].left
419 | ShAssignment(Token left, List[AssignPair] pairs, List[Redir] redirects)
420 | Retval(Token keyword, expr val)
421 | ControlFlow(Token keyword, word? arg_word)
422 # ops are | |&
423 | Pipeline(Token? negated, List[command] children, List[Token] ops)
424 # ops are && ||
425 | AndOr(List[command] children, List[Token] ops)
426 # Part of for, while, until (but not if, case, ShFunction). No redirects.
427 | DoGroup(Token left, List[command] children, Token right)
428 # A brace group is a compound command, with redirects.
429 | BraceGroup %BraceGroup
430 # Contains a single child, like CommandSub
431 | Subshell(Token left, command child, Token right, List[Redir] redirects)
432 | DParen(Token left, arith_expr child, Token right, List[Redir] redirects)
433 | DBracket(Token left, bool_expr expr, Token right, List[Redir] redirects)
434 # up to 3 iterations variables
435 | ForEach(Token keyword, List[str] iter_names, for_iter iterable,
436 Token? semi_tok, command body, List[Redir] redirects)
437 # C-style for loop. Any of the 3 expressions can be omitted.
438 # Note: body is required, but only optional here because of initialization
439 # order.
440 | ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
441 arith_expr? update, command? body, List[Redir] redirects)
442 | WhileUntil(Token keyword, condition cond, command body, List[Redir] redirects)
443 | If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
444 Token? fi_kw, List[Redir] redirects)
445 | Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
446 Token arms_end, List[Redir] redirects)
447 # The keyword is optional in the case of bash-style functions
448 # (ie. "foo() { ... }") which do not have one.
449 | ShFunction(Token? keyword, Token name_tok, str name, command body)
450 | TimeBlock(Token keyword, command pipeline)
451 # Some nodes optimize it out as List[command], but we use CommandList for
452 # 1. the top level
453 # 2. ls ; ls & ls (same line)
454 # 3. CommandSub # single child that's a CommandList
455 # 4. Subshell # single child that's a CommandList
456 | CommandList(List[command] children)
457
458 # YSH command constructs
459
460 # var, const.
461 # - Keyword is None for hay blocks
462 # - RHS is None, for use with value.Place
463 # - TODO: consider using BareDecl
464 | VarDecl(Token? keyword, List[NameType] lhs, expr? rhs)
465
466 # this can behave like 'var', can be desugared
467 | BareDecl(Token lhs, expr rhs)
468
469 # setvar, maybe 'auto' later
470 | Mutation(Token keyword, List[y_lhs] lhs, Token op, expr rhs)
471 # = keyword
472 | Expr(Token keyword, expr e)
473 | Proc %Proc
474 | Func %Func
475
476 #
477 # Glob representation, for converting ${x//} to extended regexes.
478 #
479
480 # Example: *.[ch] is:
481 # GlobOp(<Glob_Star '*'>),
482 # GlobLit(Glob_OtherLiteral, '.'),
483 # CharClass(False, ['ch']) # from Glob_CleanLiterals token
484
485 glob_part =
486 Literal(id id, str s)
487 | Operator(id op_id) # * or ?
488 | CharClass(bool negated, List[str] strs)
489
490 # Char classes are opaque for now. If we ever need them:
491 # - Collating symbols are [. .]
492 # - Equivalence classes are [=
493
494 printf_part =
495 Literal %Token
496 # flags are 0 hyphen space + #
497 # type is 's' for %s, etc.
498 | Percent(List[Token] flags, Token? width, Token? precision, Token type)
499
500 #
501 # YSH Language
502 #
503 # Copied and modified from Python-3.7/Parser/Python.asdl !
504
505 expr_context = Load | Store | Del | AugLoad | AugStore | Param
506
507 # Type expressions: Int List[Int] Dict[Str, Any]
508 # Do we have Func[Int, Int => Int] ? I guess we can parse that into this
509 # system.
510 TypeExpr = (Token tok, str name, List[TypeExpr] params)
511
512 # LHS bindings in var/const, and eggex
513 NameType = (Token left, str name, TypeExpr? typ)
514
515 # TODO: Inline this into GenExp and ListComp? Just use a flag there?
516 Comprehension = (List[NameType] lhs, expr iter, expr? cond)
517
518 # Named arguments supplied to call. Token is null for f(; ...named).
519 NamedArg = (Token? name, expr value)
520
521 # Subscripts are lists of expressions
522 # a[:i, n] (we don't have matrices, but we have data frames)
523 Subscript = (Token left, expr obj, expr index)
524
525 # Attributes are obj.attr, d->key, name::scope,
526 Attribute = (expr obj, Token op, Token attr, str attr_name, expr_context ctx)
527
528 y_lhs =
529 Var %Token # Id.Expr_Name
530 | Subscript %Subscript
531 | Attribute %Attribute
532
533 place_op =
534 # &a[i+1]
535 Subscript(Token op, expr index)
536 # &d.mykey
537 | Attribute(Token op, Token attr)
538
539 expr =
540 Var(Token left, str name) # a variable name to evaluate
541 # Constants are typically Null, Bool, Int, Float
542 # and also Str for key in {key: 42}
543 # But string literals are SingleQuoted or DoubleQuoted
544 # Python uses Num(object n), which doesn't respect our "LST" invariant.
545 | Const(Token c, value val)
546
547 # read(&x) json read (&x[0])
548 | Place(Token blame_tok, str var_name, place_op* ops)
549
550 # :| one 'two' "$three" |
551 | ShArrayLiteral %ShArrayLiteral
552
553 # / d+ ; ignorecase; %python /
554 | Eggex %Eggex
555
556 # $name is not an expr, but $? is, e.g. Id.VSub_QMark
557 | SimpleVarSub %SimpleVarSub
558 | BracedVarSub %BracedVarSub
559 | CommandSub %CommandSub
560 | SingleQuoted %SingleQuoted
561 | DoubleQuoted %DoubleQuoted
562
563 | Literal(expr inner)
564 | Lambda(List[NameType] params, expr body)
565
566 | Unary(Token op, expr child)
567 | Binary(Token op, expr left, expr right)
568 # x < 4 < 3 and (x < 4) < 3
569 | Compare(expr left, List[Token] ops, List[expr] comparators)
570 | FuncCall(expr func, ArgList args)
571
572 # TODO: Need a representation for method call. We don't just want
573 # Attribute() and then Call()
574
575 | IfExp(expr test, expr body, expr orelse)
576 | Tuple(Token left, List[expr] elts, expr_context ctx)
577
578 | List(Token left, List[expr] elts, expr_context ctx)
579 | Dict(Token left, List[expr] keys, List[expr] values)
580 # For the values in {n1, n2}
581 | Implicit
582
583 | ListComp(Token left, expr elt, List[Comprehension] generators)
584 # not implemented
585 | DictComp(Token left, expr key, expr value, List[Comprehension] generators)
586 | GeneratorExp(expr elt, List[Comprehension] generators)
587
588 # Ranges are written 1:2, with first class expression syntax. There is no
589 # step as in Python. Use range(0, 10, step=2) for that.
590 | Range(expr lower, Token op, expr upper)
591
592 # Slices occur within [] only. Unlike ranges, the start/end can be #
593 # implicit. Like ranges, denote a step with slice(0, 10, step=2).
594 # a[3:] a[:i]
595 | Slice(expr? lower, Token op, expr? upper)
596
597 | Subscript %Subscript
598 | Attribute %Attribute
599
600 # Ellipsis is like 'Starred' within Python, which are valid on the LHS in
601 # Python for unpacking, and # within list literals for splicing.
602 # (Starred is NOT used for {k:v, **a}. That used a blank "keys"
603 # attribute.)
604
605 # I think we can use { **pairs } like Python
606 | Spread(Token left, expr child)
607
608 #
609 # Regex Language (Eggex)
610 #
611
612 # e.g. alnum digit
613 PosixClass = (Token? negated, str name)
614 # e.g. d w s
615 PerlClass = (Token? negated, str name)
616
617 # Char Sets and Ranges both use Char Codes
618 # with u_braced == true : \u{ff}
619 # with u_braced == false: \xff \\ 'a' a '0' 0
620 # ERE doesn't make a distinction, but compiling to Python/PCRE can use it
621 CharCode = (Token blame_tok, int i, bool u_braced)
622 CharRange = (CharCode start, CharCode end)
623
624 # Note: .NET has && in character classes, making it a recursive language
625
626 class_literal_term =
627 PosixClass %PosixClass
628 | PerlClass %PerlClass
629 | CharRange %CharRange
630 | CharCode %CharCode
631
632 | SingleQuoted %SingleQuoted
633 # @chars
634 | Splice(Token name, str var_name) # coudl be Splice %Token
635
636 # evaluated version of class_literal_term (could be in runtime.asdl)
637 char_class_term =
638 PosixClass %PosixClass
639 | PerlClass %PerlClass
640
641 | CharRange %CharRange
642 # For [ \x00 \\ ]
643 | CharCode %CharCode
644
645 # NOTE: modifier is unused now, can represent L or P
646 re_repeat =
647 Op %Token # + * ? or Expr_DecInt for x{3}
648 | Range(Token? left, str lower, str upper, Token? right) # dot{1,2}
649 # Haven't implemented the modifier, e.g. x{+ P}
650 # | Num(Token times, id modifier)
651 # | Range(Token? lower, Token? upper, id modifier)
652
653 re =
654 Primitive(Token blame_tok, id id) # . ^ $ dot %start %end
655 | PosixClass %PosixClass
656 | PerlClass %PerlClass
657 # syntax [ $x \n ]
658 | CharClassLiteral(bool negated, List[class_literal_term] terms)
659 # evaluated [ 'abc' \n ]
660 | CharClass(bool negated, List[char_class_term] terms)
661
662 # @D
663 | Splice(Token name, str var_name) # TODO: Splice %Token
664
665 | SingleQuoted %SingleQuoted
666
667 # Compound:
668 | Repeat(re child, re_repeat op)
669 | Seq(List[re] children)
670 | Alt(List[re] children)
671
672 | Group(re child)
673 # convert_func is filled in on evaluation
674 # TODO: name and func_name can be expanded to strings
675 | Capture(re child, Token? name, Token? func_name)
676 | Backtracking(bool negated, Token name, re child)
677
678 # \u{ff} is parsed as this, but SingleQuoted also evaluates to it
679 | LiteralChars(Token blame_tok, str s)
680}