frontend/syntax.asdl

OILS / frontend / syntax.asdl View on Github | oilshell.org

680 lines, 294 significant

1	# Data types for the Oils AST, aka "Lossless Syntax Tree".
2	#
3	# Invariant: the source text can be reconstructed byte-for-byte from this tree.
4	# The test/lossless.sh suite verifies this.
5
6	# We usually try to preserve the physical order of the source in the ASDL
7	# fields. One exception is the order of redirects:
8	#
9	# echo >out.txt hi
10	# # versus
11	# echo hi >out.txt
12
13	# Unrepresented:
14	# - let arithmetic (rarely used)
15	# - coprocesses # one with arg and one without
16	# - select block
17	# - case fallthrough ;& and ;;&
18
19	# Possible refactorings:
20	#
21	# # %CompoundWord as first class variant:
22	# bool_expr = WordTest %CompoundWord \| ...
23	#
24	# # Can DoubleQuoted have a subset of parts compared with CompoundWord?
25	# string_part = ... # subset of word_part
26	#
27	# - Distinguish word_t with BracedTree vs. those without? seq_word_t?
28
29	module syntax
30	{
31	use core value {
32	value
33	}
34
35	# More efficient than the List[bool] pattern we've been using
36	BoolParamBox = (bool b)
37	IntParamBox = (int i)
38
39	# core/main_loop.py
40	parse_result = EmptyLine \| Eof \| Node(command cmd)
41
42	# 'source' represents the location of a line / token.
43	source =
44	Interactive
45	\| Headless
46	\| Unused(str comment) # completion and history never show parse errors?
47	\| CFlag
48	\| Stdin(str comment)
49
50	# TODO: if it's not the main script, it's sourced, and you could provide
51	# a chain of locations back to the sourced script!
52	# MainFile(str path) or SourcedFile(str path, loc location)
53	\| MainFile(str path)
54	\| SourcedFile(str path, loc location)
55
56	# code parsed from a word
57	# used for 'eval', 'trap', 'printf', 'complete -W', etc.
58	\| ArgvWord(str what, loc location)
59
60	# code parsed from the value of a variable
61	# used for $PS1 $PROMPT_COMMAND
62	\| Variable(str var_name, loc location)
63
64	# Point to the original variable reference
65	\| VarRef(Token orig_tok)
66
67	# alias expansion (location of first word)
68	\| Alias(str argv0, loc argv0_loc)
69
70	# 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
71	# TODO: use this for eval_unsafe_arith instead of Variable
72	\| Reparsed(str what, Token left_token, Token right_token)
73
74	# For --location-str
75	\| Synthetic(str s)
76
77	SourceLine = (int line_num, str content, source src)
78
79	# Ways to make Token 24 bytes:
80	# - Introduce WideToken with the wide_tok.tval field, which we use with
81	# lexer.LazyStr()
82	# - It will be 32 bytes rather than 24
83	# - loc = Token %Token \| WideToken %WideToken
84	# - Which tokens need to be big?
85	# - Id.Lit_{Chars,VarLike,...} - word_part.Literal
86	# - SingleQuoted does not store tokens; it stores a string
87	# - Id.Arith_VarLike - arith_expr.Var
88	# - Id.VSub_{DollarName,Number} - SimpleVarSub
89	# - Id.VSub_Name - BracedVarSub
90	# - Id.Expr_Name - expr.Var
91	#
92	# Instrumenting lexer.LazyVal() with histogram:
93	# 22561 Id.Lit_Chars
94	# 8366 Id.Lit_VarLike
95	# 4681 Id.Lit_Colon
96	# 287 Id.Lit_Slash
97	# 164 Id.ControlFlow_Break
98	# 158 Id.ControlFlow_Exit
99	# 29 Id.ControlFlow_Continue
100	# 3 Id.Lit_Comma
101	# 2 Id.Lit_LBracket
102	# 1 Id.Lit_LBrace
103
104	# 36255 total tokens
105	# 36255 tokens with LazyVal()
106	# 13 unique tokens IDs
107	#
108	# This is ONLY word_::_EvalWordPart() -> word_part::Literal. So it does seem
109	# a HANDFUL of syntax.asdl nodes can use WideToken, and we can use the TYPE
110	# SYSTEM to distinguish them.
111	#
112	# In other words, Lexer always returns type Token, and then the parser may
113	# "upgrade" to type WideToken when it knows it will need to store
114	# word_part.Literal, SimpleVarSub, etc. This also means we can INLINE
115	# SimpleVarSub %WideToken into word_part_t and expr_t.
116
117	# Possible problem with WideToken: parse_ctx.trail.tokens is List[Token], and
118	# WordParser._GetToken() appends to it. But we can just use TokenVal() to
119	# create garbage there.
120
121	# Pack id and length into 32 bits with special uint16 type.
122	# TODO: This breaks ASDL pretty printing of Id type!
123
124	# We transpose (id, col, length) -> (id, length, col) for C struct packing.
125
126	# Note that ASDL generates:
127	# typedef int Id_t;
128	# So uint16 id implies truncation. We happen to know there are less than
129	# 2^16 IDs, but it's not checked. Yaks could check it.
130
131	Token = (id id, uint16 length, int col, SourceLine? line, str? tval)
132
133	WideToken = (id id, int length, int col, SourceLine? line, str? tval)
134
135	# Slight ASDL bug: CompoundWord has to be defined before using it as a shared
136	# variant. The _product_counter algorithm should be moved into a separate
137	# tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
138	CompoundWord = (List[word_part] parts)
139
140	# Source location for errors
141	loc =
142	Missing # equivalent of runtime.NO_SPID
143	\| Token %Token
144	# Very common case: argv arrays need original location
145	\| ArgWord %CompoundWord
146	\| WordPart(word_part p)
147	\| Word(word w)
148	\| Arith(arith_expr a)
149	# e.g. for errexit blaming
150	\| Command(command c)
151	# the location of a token that's too long
152	\| TokenTooLong(SourceLine line, id id, int length, int col)
153
154	debug_frame =
155	Main(str dollar0)
156	# call_loc => BASH_LINENO
157	# call_loc may be None with new --source flag?
158	\| Source(Token? call_tok, str source_name)
159	# def_tok => BASH_SOURCE
160	# call_loc may be None if invoked via RunFuncForCompletion?
161	\| Call(Token? call_tok, Token def_tok, str func_name)
162
163	#
164	# Shell language
165	#
166
167	bracket_op =
168	WholeArray(id op_id) # * or @
169	\| ArrayIndex(arith_expr expr)
170
171	suffix_op =
172	Nullary %Token # ${x@Q} or ${!prefix@} (which also has prefix_op)
173	\| Unary(Token op, rhs_word arg_word) # e.g. ${v:-default}
174	# TODO: Implement YSH ${x\|html} and ${x %.3f}
175	\| Static(Token tok, str arg)
176	\| PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
177	# begin is optional with ${array::1}
178	\| Slice(arith_expr? begin, arith_expr? length)
179
180	BracedVarSub = (
181	Token left, # in dynamic ParseVarRef, same as name_tok
182	Token token, # location for the name
183	str var_name, # the name - TODO: remove this, use LazyStr() instead
184	Token? prefix_op, # prefix # or ! operators
185	bracket_op? bracket_op,
186	suffix_op? suffix_op,
187	Token right # in dynamic ParseVarRef, same as name_tok
188	)
189
190	# Variants:
191	# - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
192	# - And """ and ''' e.g. Id.Left_TDoubleQuote
193	DoubleQuoted = (Token left, List[word_part] parts, Token right)
194
195	# Consider making str? sval LAZY, like lexer.LazyStr(tok)
196	SingleQuoted = (Token left, str sval, Token right)
197
198	# e.g. Id.VSub_QMark, Id.VSub_DollarName $foo with lexer.LazyStr()
199	SimpleVarSub = (Token tok)
200
201	CommandSub = (Token left_token, command child, Token right)
202
203	# - can contain word.BracedTree
204	# - no 'Token right' for now, doesn't appear to be used
205	ShArrayLiteral = (Token left, List[word] words, Token right)
206
207	# Unevaluated, typed arguments for func and proc.
208	# Note that ...arg is expr.Spread.
209	ArgList = (
210	Token left, List[expr] pos_args,
211	Token? semi_tok, List[NamedArg] named_args,
212	Token? semi_tok2, expr? block_expr,
213	Token right
214	)
215
216	AssocPair = (CompoundWord key, CompoundWord value)
217
218	word_part =
219	ShArrayLiteral %ShArrayLiteral
220	\| BashAssocLiteral(Token left, List[AssocPair] pairs, Token right)
221	\| Literal %Token
222	# escaped case is separate so the evaluator doesn't have to check token ID
223	\| EscapedLiteral(Token token, str ch)
224	\| SingleQuoted %SingleQuoted
225	\| DoubleQuoted %DoubleQuoted
226	# Could be SimpleVarSub %Token that's VSub_DollarName, but let's not
227	# confuse with the comon word_part.Literal is common for wno
228	\| SimpleVarSub %SimpleVarSub
229	\| BracedVarSub %BracedVarSub
230	\| ZshVarSub (Token left, CompoundWord ignored, Token right)
231	# For command sub and process sub: $(...) <(...) >(...)
232	\| CommandSub %CommandSub
233	# ~ or ~bob
234	\| TildeSub(Token left, # always the tilde
235	Token? name, str? user_name)
236	\| ArithSub(Token left, arith_expr anode, Token right)
237	# {a,b,c}
238	\| BracedTuple(List[CompoundWord] words)
239	# {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
240	# {a..f} or {a..f..2} or {a..f..-2}
241	# the whole range is one Token,
242	\| BracedRange(Token blame_tok, id kind, str start, str end, int step)
243	# extended globs are parsed statically, unlike globs
244	\| ExtGlob(Token op, List[CompoundWord] arms, Token right)
245	# a regex group is similar to an extended glob part
246	\| BashRegexGroup(Token left, CompoundWord? child, Token right)
247
248	# YSH word_part extensions
249
250	# @myarray - Id.Lit_Splice (could be optimized to %Token)
251	\| Splice(Token blame_tok, str var_name)
252	# $[d.key], etc.
253	\| ExprSub(Token left, expr child, Token right)
254
255	# Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
256	# The latter is semantically necessary. (See osh/word_parse.py).
257	# At runtime: RHS of 'declare x='.
258	rhs_word = Empty \| Compound %CompoundWord
259
260	word =
261	# Returns from WordParser, but not generally stored in LST
262	Operator %Token
263	# A Compound word can contain any word_part except the Braced*Part.
264	# We could model this with another variant type but it incurs runtime
265	# overhead and seems like overkill. Note that DoubleQuoted can't
266	# contain a SingleQuoted, etc. either.
267	\| Compound %CompoundWord
268	# For word sequences command.Simple, ShArrayLiteral, for_iter.Words
269	# Could be its own type
270	\| BracedTree(List[word_part] parts)
271	# For dynamic parsing of test aka [ - the string is already evaluated.
272	\| String(id id, str s, CompoundWord? blame_loc)
273
274	# Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
275	sh_lhs =
276	Name(Token left, str name) # Lit_VarLike foo=
277	# TODO: Could be Name %Token
278	\| IndexedName(Token left, str name, arith_expr index)
279	\| UnparsedIndex(Token left, str name, str index) # for translation
280
281	arith_expr =
282	VarSub %Token # e.g. $(( x )) Id.Arith_VarLike
283	\| Word %CompoundWord # e.g. $(( 123'456'$y ))
284
285	\| UnaryAssign(id op_id, arith_expr child)
286	\| BinaryAssign(id op_id, arith_expr left, arith_expr right)
287
288	\| Unary(id op_id, arith_expr child)
289	# TODO: op should be token, e.g. for divide by zero
290	\| Binary(Token op, arith_expr left, arith_expr right)
291	\| TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
292
293	bool_expr =
294	WordTest(word w) # e.g. [[ myword ]]
295	\| Binary(id op_id, word left, word right)
296	\| Unary(id op_id, word child)
297	\| LogicalNot(bool_expr child)
298	\| LogicalAnd(bool_expr left, bool_expr right)
299	\| LogicalOr(bool_expr left, bool_expr right)
300
301	redir_loc =
302	Fd(int fd) \| VarName(str name)
303
304	redir_param =
305	Word %CompoundWord
306	\| HereDoc(word here_begin, # e.g. EOF or 'EOF'
307	Token? here_end_tok, # Token consisting of the whole line
308	# It's always filled in AFTER creation, but
309	# temporarily so optional
310	List[word_part] stdin_parts # one for each line
311	)
312
313	Redir = (Token op, redir_loc loc, redir_param arg)
314
315	assign_op = Equal \| PlusEqual
316	AssignPair = (Token left, sh_lhs lhs, assign_op op, rhs_word rhs)
317	# TODO: could put Id.Lit_VarLike foo= into LazyStr() with -1 slice
318	EnvPair = (Token left, str name, rhs_word val)
319
320	condition =
321	Shell(List[command] commands) # if false; true; then echo hi; fi
322	\| YshExpr(expr e) # if (x > 0) { echo hi }
323	# TODO: add more specific blame location
324
325	# Each arm tests one word against multiple words
326	# shell: .cc\|.h) echo C++ ;;
327	# YSH: .cc\|.h { echo C++ }
328	#
329	# Three location tokens:
330	# 1. left - shell has ( or .cc ysh has .cc
331	# 2. middle - shell has ) ysh has {
332	# 3. right - shell has optional ;; ysh has required }
333	#
334	# For YSH typed case, left can be ( and /
335	# And case_pat may contain more details
336	CaseArm = (
337	Token left, pat pattern, Token middle, List[command] action,
338	Token? right
339	)
340
341	# The argument to match against in a case command
342	# In YSH-style case commands we match against an `expr`, but in sh-style case
343	# commands we match against a word.
344	case_arg =
345	Word(word w)
346	\| YshExpr(expr e)
347
348	EggexFlag = (bool negated, Token flag)
349
350	# canonical_flags can be compared for equality. This is needed to splice
351	# eggexes correctly, e.g. / 'abc' @pat ; i /
352	Eggex = (
353	Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
354	str? canonical_flags)
355
356	pat =
357	Else
358	\| Words(List[word] words)
359	\| YshExprs(List[expr] exprs)
360	\| Eggex %Eggex
361
362	# Each if arm starts with either an "if" or "elif" keyword
363	# In YSH, the then keyword is not used (replaced by braces {})
364	IfArm = (
365	Token keyword, condition cond, Token? then_kw, List[command] action,
366	# then_tok used in ysh-ify
367	Token? then_tok)
368
369	for_iter =
370	Args # for x; do echo $x; done # implicit "$@"
371	\| Words(List[word] words) # for x in 'foo' *.py { echo $x }
372	# like ShArrayLiteral, but no location for %(
373	\| YshExpr(expr e, Token blame) # for x in (mylist) { echo $x }
374
375	BraceGroup = (
376	Token left, Token? doc_token, List[command] children,
377	List[Redir] redirects, Token right
378	)
379
380	Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
381	RestParam = (Token blame_tok, str name)
382
383	ParamGroup = (List[Param] params, RestParam? rest_of)
384
385	# 'open' is for proc p { }; closed is for proc p () { }
386	proc_sig =
387	Open
388	\| Closed(ParamGroup? word, ParamGroup? positional, ParamGroup? named,
389	Param? block_param)
390
391	Proc = (Token keyword, Token name, proc_sig sig, command body)
392
393	Func = (
394	Token keyword, Token name,
395	ParamGroup? positional, ParamGroup? named,
396	command body
397	)
398
399	# Retain references to lines
400	LiteralBlock = (BraceGroup brace_group, List[SourceLine] lines)
401
402	# Represents all these case: s=1 s+=1 s[x]=1 ...
403	ParsedAssignment = (Token? left, Token? close, int part_offset, CompoundWord w)
404
405	command =
406	NoOp
407	\| Simple(Token? blame_tok, # TODO: make required (BracedTuple?)
408	List[EnvPair] more_env,
409	List[word] words, List[Redir] redirects,
410	ArgList? typed_args, LiteralBlock? block,
411	# do_fork is semantic, not syntactic
412	bool do_fork)
413	# This doesn't technically belong in the LST, but it's convenient for
414	# execution
415	\| ExpandedAlias(command child, List[Redir] redirects, List[EnvPair] more_env)
416	\| Sentence(command child, Token terminator)
417	# Represents "bare assignment"
418	# Token left is redundant with pairs[0].left
419	\| ShAssignment(Token left, List[AssignPair] pairs, List[Redir] redirects)
420	\| Retval(Token keyword, expr val)
421	\| ControlFlow(Token keyword, word? arg_word)
422	# ops are \| \|&
423	\| Pipeline(Token? negated, List[command] children, List[Token] ops)
424	# ops are && \|\|
425	\| AndOr(List[command] children, List[Token] ops)
426	# Part of for, while, until (but not if, case, ShFunction). No redirects.
427	\| DoGroup(Token left, List[command] children, Token right)
428	# A brace group is a compound command, with redirects.
429	\| BraceGroup %BraceGroup
430	# Contains a single child, like CommandSub
431	\| Subshell(Token left, command child, Token right, List[Redir] redirects)
432	\| DParen(Token left, arith_expr child, Token right, List[Redir] redirects)
433	\| DBracket(Token left, bool_expr expr, Token right, List[Redir] redirects)
434	# up to 3 iterations variables
435	\| ForEach(Token keyword, List[str] iter_names, for_iter iterable,
436	Token? semi_tok, command body, List[Redir] redirects)
437	# C-style for loop. Any of the 3 expressions can be omitted.
438	# Note: body is required, but only optional here because of initialization
439	# order.
440	\| ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
441	arith_expr? update, command? body, List[Redir] redirects)
442	\| WhileUntil(Token keyword, condition cond, command body, List[Redir] redirects)
443	\| If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
444	Token? fi_kw, List[Redir] redirects)
445	\| Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
446	Token arms_end, List[Redir] redirects)
447	# The keyword is optional in the case of bash-style functions
448	# (ie. "foo() { ... }") which do not have one.
449	\| ShFunction(Token? keyword, Token name_tok, str name, command body)
450	\| TimeBlock(Token keyword, command pipeline)
451	# Some nodes optimize it out as List[command], but we use CommandList for
452	# 1. the top level
453	# 2. ls ; ls & ls (same line)
454	# 3. CommandSub # single child that's a CommandList
455	# 4. Subshell # single child that's a CommandList
456	\| CommandList(List[command] children)
457
458	# YSH command constructs
459
460	# var, const.
461	# - Keyword is None for hay blocks
462	# - RHS is None, for use with value.Place
463	# - TODO: consider using BareDecl
464	\| VarDecl(Token? keyword, List[NameType] lhs, expr? rhs)
465
466	# this can behave like 'var', can be desugared
467	\| BareDecl(Token lhs, expr rhs)
468
469	# setvar, maybe 'auto' later
470	\| Mutation(Token keyword, List[y_lhs] lhs, Token op, expr rhs)
471	# = keyword
472	\| Expr(Token keyword, expr e)
473	\| Proc %Proc
474	\| Func %Func
475
476	#
477	# Glob representation, for converting ${x//} to extended regexes.
478	#
479
480	# Example: *.[ch] is:
481	# GlobOp(<Glob_Star '*'>),
482	# GlobLit(Glob_OtherLiteral, '.'),
483	# CharClass(False, ['ch']) # from Glob_CleanLiterals token
484
485	glob_part =
486	Literal(id id, str s)
487	\| Operator(id op_id) # * or ?
488	\| CharClass(bool negated, List[str] strs)
489
490	# Char classes are opaque for now. If we ever need them:
491	# - Collating symbols are [. .]
492	# - Equivalence classes are [=
493
494	printf_part =
495	Literal %Token
496	# flags are 0 hyphen space + #
497	# type is 's' for %s, etc.
498	\| Percent(List[Token] flags, Token? width, Token? precision, Token type)
499
500	#
501	# YSH Language
502	#
503	# Copied and modified from Python-3.7/Parser/Python.asdl !
504
505	expr_context = Load \| Store \| Del \| AugLoad \| AugStore \| Param
506
507	# Type expressions: Int List[Int] Dict[Str, Any]
508	# Do we have Func[Int, Int => Int] ? I guess we can parse that into this
509	# system.
510	TypeExpr = (Token tok, str name, List[TypeExpr] params)
511
512	# LHS bindings in var/const, and eggex
513	NameType = (Token left, str name, TypeExpr? typ)
514
515	# TODO: Inline this into GenExp and ListComp? Just use a flag there?
516	Comprehension = (List[NameType] lhs, expr iter, expr? cond)
517
518	# Named arguments supplied to call. Token is null for f(; ...named).
519	NamedArg = (Token? name, expr value)
520
521	# Subscripts are lists of expressions
522	# a[:i, n] (we don't have matrices, but we have data frames)
523	Subscript = (Token left, expr obj, expr index)
524
525	# Attributes are obj.attr, d->key, name::scope,
526	Attribute = (expr obj, Token op, Token attr, str attr_name, expr_context ctx)
527
528	y_lhs =
529	Var %Token # Id.Expr_Name
530	\| Subscript %Subscript
531	\| Attribute %Attribute
532
533	place_op =
534	# &a[i+1]
535	Subscript(Token op, expr index)
536	# &d.mykey
537	\| Attribute(Token op, Token attr)
538
539	expr =
540	Var(Token left, str name) # a variable name to evaluate
541	# Constants are typically Null, Bool, Int, Float
542	# and also Str for key in {key: 42}
543	# But string literals are SingleQuoted or DoubleQuoted
544	# Python uses Num(object n), which doesn't respect our "LST" invariant.
545	\| Const(Token c, value val)
546
547	# read(&x) json read (&x[0])
548	\| Place(Token blame_tok, str var_name, place_op* ops)
549
550	# :\| one 'two' "$three" \|
551	\| ShArrayLiteral %ShArrayLiteral
552
553	# / d+ ; ignorecase; %python /
554	\| Eggex %Eggex
555
556	# $name is not an expr, but $? is, e.g. Id.VSub_QMark
557	\| SimpleVarSub %SimpleVarSub
558	\| BracedVarSub %BracedVarSub
559	\| CommandSub %CommandSub
560	\| SingleQuoted %SingleQuoted
561	\| DoubleQuoted %DoubleQuoted
562
563	\| Literal(expr inner)
564	\| Lambda(List[NameType] params, expr body)
565
566	\| Unary(Token op, expr child)
567	\| Binary(Token op, expr left, expr right)
568	# x < 4 < 3 and (x < 4) < 3
569	\| Compare(expr left, List[Token] ops, List[expr] comparators)
570	\| FuncCall(expr func, ArgList args)
571
572	# TODO: Need a representation for method call. We don't just want
573	# Attribute() and then Call()
574
575	\| IfExp(expr test, expr body, expr orelse)
576	\| Tuple(Token left, List[expr] elts, expr_context ctx)
577
578	\| List(Token left, List[expr] elts, expr_context ctx)
579	\| Dict(Token left, List[expr] keys, List[expr] values)
580	# For the values in {n1, n2}
581	\| Implicit
582
583	\| ListComp(Token left, expr elt, List[Comprehension] generators)
584	# not implemented
585	\| DictComp(Token left, expr key, expr value, List[Comprehension] generators)
586	\| GeneratorExp(expr elt, List[Comprehension] generators)
587
588	# Ranges are written 1:2, with first class expression syntax. There is no
589	# step as in Python. Use range(0, 10, step=2) for that.
590	\| Range(expr lower, Token op, expr upper)
591
592	# Slices occur within [] only. Unlike ranges, the start/end can be #
593	# implicit. Like ranges, denote a step with slice(0, 10, step=2).
594	# a[3:] a[:i]
595	\| Slice(expr? lower, Token op, expr? upper)
596
597	\| Subscript %Subscript
598	\| Attribute %Attribute
599
600	# Ellipsis is like 'Starred' within Python, which are valid on the LHS in
601	# Python for unpacking, and # within list literals for splicing.
602	# (Starred is NOT used for {k:v, **a}. That used a blank "keys"
603	# attribute.)
604
605	# I think we can use { **pairs } like Python
606	\| Spread(Token left, expr child)
607
608	#
609	# Regex Language (Eggex)
610	#
611
612	# e.g. alnum digit
613	PosixClass = (Token? negated, str name)
614	# e.g. d w s
615	PerlClass = (Token? negated, str name)
616
617	# Char Sets and Ranges both use Char Codes
618	# with u_braced == true : \u{ff}
619	# with u_braced == false: \xff \\ 'a' a '0' 0
620	# ERE doesn't make a distinction, but compiling to Python/PCRE can use it
621	CharCode = (Token blame_tok, int i, bool u_braced)
622	CharRange = (CharCode start, CharCode end)
623
624	# Note: .NET has && in character classes, making it a recursive language
625
626	class_literal_term =
627	PosixClass %PosixClass
628	\| PerlClass %PerlClass
629	\| CharRange %CharRange
630	\| CharCode %CharCode
631
632	\| SingleQuoted %SingleQuoted
633	# @chars
634	\| Splice(Token name, str var_name) # coudl be Splice %Token
635
636	# evaluated version of class_literal_term (could be in runtime.asdl)
637	char_class_term =
638	PosixClass %PosixClass
639	\| PerlClass %PerlClass
640
641	\| CharRange %CharRange
642	# For [ \x00 \\ ]
643	\| CharCode %CharCode
644
645	# NOTE: modifier is unused now, can represent L or P
646	re_repeat =
647	Op %Token # + * ? or Expr_DecInt for x{3}
648	\| Range(Token? left, str lower, str upper, Token? right) # dot{1,2}
649	# Haven't implemented the modifier, e.g. x{+ P}
650	# \| Num(Token times, id modifier)
651	# \| Range(Token? lower, Token? upper, id modifier)
652
653	re =
654	Primitive(Token blame_tok, id id) # . ^ $ dot %start %end
655	\| PosixClass %PosixClass
656	\| PerlClass %PerlClass
657	# syntax [ $x \n ]
658	\| CharClassLiteral(bool negated, List[class_literal_term] terms)
659	# evaluated [ 'abc' \n ]
660	\| CharClass(bool negated, List[char_class_term] terms)
661
662	# @D
663	\| Splice(Token name, str var_name) # TODO: Splice %Token
664
665	\| SingleQuoted %SingleQuoted
666
667	# Compound:
668	\| Repeat(re child, re_repeat op)
669	\| Seq(List[re] children)
670	\| Alt(List[re] children)
671
672	\| Group(re child)
673	# convert_func is filled in on evaluation
674	# TODO: name and func_name can be expanded to strings
675	\| Capture(re child, Token? name, Token? func_name)
676	\| Backtracking(bool negated, Token name, re child)
677
678	# \u{ff} is parsed as this, but SingleQuoted also evaluates to it
679	\| LiteralChars(Token blame_tok, str s)
680	}