1 | # Data types for the Oils AST, aka "Lossless Syntax Tree".
|
2 | #
|
3 | # Invariant: the source text can be reconstructed byte-for-byte from this tree.
|
4 | # The test/lossless.sh suite verifies this.
|
5 |
|
6 | # We usually try to preserve the physical order of the source in the ASDL
|
7 | # fields. One exception is the order of redirects:
|
8 | #
|
9 | # echo >out.txt hi
|
10 | # # versus
|
11 | # echo hi >out.txt
|
12 |
|
13 | # Unrepresented:
|
14 | # - let arithmetic (rarely used)
|
15 | # - coprocesses # one with arg and one without
|
16 | # - select block
|
17 | # - case fallthrough ;& and ;;&
|
18 |
|
19 | # Possible refactorings:
|
20 | #
|
21 | # # %CompoundWord as first class variant:
|
22 | # bool_expr = WordTest %CompoundWord | ...
|
23 | #
|
24 | # # Can DoubleQuoted have a subset of parts compared with CompoundWord?
|
25 | # string_part = ... # subset of word_part
|
26 | #
|
27 | # - Distinguish word_t with BracedTree vs. those without? seq_word_t?
|
28 |
|
29 | module syntax
|
30 | {
|
31 | use core value {
|
32 | value
|
33 | }
|
34 |
|
35 | # More efficient than the List[bool] pattern we've been using
|
36 | BoolParamBox = (bool b)
|
37 | IntParamBox = (int i)
|
38 |
|
39 | # core/main_loop.py
|
40 | parse_result = EmptyLine | Eof | Node(command cmd)
|
41 |
|
42 | # 'source' represents the location of a line / token.
|
43 | source =
|
44 | Interactive
|
45 | | Headless
|
46 | | Unused(str comment) # completion and history never show parse errors?
|
47 | | CFlag
|
48 | | Stdin(str comment)
|
49 |
|
50 | # TODO: if it's not the main script, it's sourced, and you could provide
|
51 | # a chain of locations back to the sourced script!
|
52 | # MainFile(str path) or SourcedFile(str path, loc location)
|
53 | | MainFile(str path)
|
54 | | SourcedFile(str path, loc location)
|
55 |
|
56 | # code parsed from a word
|
57 | # used for 'eval', 'trap', 'printf', 'complete -W', etc.
|
58 | | ArgvWord(str what, loc location)
|
59 |
|
60 | # code parsed from the value of a variable
|
61 | # used for $PS1 $PROMPT_COMMAND
|
62 | | Variable(str var_name, loc location)
|
63 |
|
64 | # Point to the original variable reference
|
65 | | VarRef(Token orig_tok)
|
66 |
|
67 | # alias expansion (location of first word)
|
68 | | Alias(str argv0, loc argv0_loc)
|
69 |
|
70 | # 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
|
71 | # TODO: use this for eval_unsafe_arith instead of Variable
|
72 | | Reparsed(str what, Token left_token, Token right_token)
|
73 |
|
74 | # For --location-str
|
75 | | Synthetic(str s)
|
76 |
|
77 | SourceLine = (int line_num, str content, source src)
|
78 |
|
79 | # Ways to make Token 24 bytes:
|
80 | # - Introduce WideToken with the wide_tok.tval field, which we use with
|
81 | # lexer.LazyStr()
|
82 | # - It will be 32 bytes rather than 24
|
83 | # - loc = Token %Token | WideToken %WideToken
|
84 | # - Which tokens need to be big?
|
85 | # - Id.Lit_{Chars,VarLike,...} - word_part.Literal
|
86 | # - SingleQuoted does not store tokens; it stores a string
|
87 | # - Id.Arith_VarLike - arith_expr.Var
|
88 | # - Id.VSub_{DollarName,Number} - SimpleVarSub
|
89 | # - Id.VSub_Name - BracedVarSub
|
90 | # - Id.Expr_Name - expr.Var
|
91 | #
|
92 | # Instrumenting lexer.LazyVal() with histogram:
|
93 | # 22561 Id.Lit_Chars
|
94 | # 8366 Id.Lit_VarLike
|
95 | # 4681 Id.Lit_Colon
|
96 | # 287 Id.Lit_Slash
|
97 | # 164 Id.ControlFlow_Break
|
98 | # 158 Id.ControlFlow_Exit
|
99 | # 29 Id.ControlFlow_Continue
|
100 | # 3 Id.Lit_Comma
|
101 | # 2 Id.Lit_LBracket
|
102 | # 1 Id.Lit_LBrace
|
103 |
|
104 | # 36255 total tokens
|
105 | # 36255 tokens with LazyVal()
|
106 | # 13 unique tokens IDs
|
107 | #
|
108 | # This is ONLY word_::_EvalWordPart() -> word_part::Literal. So it does seem
|
109 | # a HANDFUL of syntax.asdl nodes can use WideToken, and we can use the TYPE
|
110 | # SYSTEM to distinguish them.
|
111 | #
|
112 | # In other words, Lexer always returns type Token, and then the parser may
|
113 | # "upgrade" to type WideToken when it knows it will need to store
|
114 | # word_part.Literal, SimpleVarSub, etc. This also means we can INLINE
|
115 | # SimpleVarSub %WideToken into word_part_t and expr_t.
|
116 |
|
117 | # Possible problem with WideToken: parse_ctx.trail.tokens is List[Token], and
|
118 | # WordParser._GetToken() appends to it. But we can just use TokenVal() to
|
119 | # create garbage there.
|
120 |
|
121 | # Pack id and length into 32 bits with special uint16 type.
|
122 | # TODO: This breaks ASDL pretty printing of Id type!
|
123 |
|
124 | # We transpose (id, col, length) -> (id, length, col) for C struct packing.
|
125 |
|
126 | # Note that ASDL generates:
|
127 | # typedef int Id_t;
|
128 | # So uint16 id implies truncation. We happen to know there are less than
|
129 | # 2^16 IDs, but it's not checked. Yaks could check it.
|
130 |
|
131 | Token = (id id, uint16 length, int col, SourceLine? line, str? tval)
|
132 |
|
133 | WideToken = (id id, int length, int col, SourceLine? line, str? tval)
|
134 |
|
135 | # Slight ASDL bug: CompoundWord has to be defined before using it as a shared
|
136 | # variant. The _product_counter algorithm should be moved into a separate
|
137 | # tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
|
138 | CompoundWord = (List[word_part] parts)
|
139 |
|
140 | # Source location for errors
|
141 | loc =
|
142 | Missing # equivalent of runtime.NO_SPID
|
143 | | Token %Token
|
144 | # Very common case: argv arrays need original location
|
145 | | ArgWord %CompoundWord
|
146 | | WordPart(word_part p)
|
147 | | Word(word w)
|
148 | | Arith(arith_expr a)
|
149 | # e.g. for errexit blaming
|
150 | | Command(command c)
|
151 | # the location of a token that's too long
|
152 | | TokenTooLong(SourceLine line, id id, int length, int col)
|
153 |
|
154 | debug_frame =
|
155 | Main(str dollar0)
|
156 | # call_loc => BASH_LINENO
|
157 | # call_loc may be None with new --source flag?
|
158 | | Source(Token? call_tok, str source_name)
|
159 | # def_tok => BASH_SOURCE
|
160 | # call_loc may be None if invoked via RunFuncForCompletion?
|
161 | | Call(Token? call_tok, Token def_tok, str func_name)
|
162 |
|
163 | #
|
164 | # Shell language
|
165 | #
|
166 |
|
167 | bracket_op =
|
168 | WholeArray(id op_id) # * or @
|
169 | | ArrayIndex(arith_expr expr)
|
170 |
|
171 | suffix_op =
|
172 | Nullary %Token # ${x@Q} or ${!prefix@} (which also has prefix_op)
|
173 | | Unary(Token op, rhs_word arg_word) # e.g. ${v:-default}
|
174 | # TODO: Implement YSH ${x|html} and ${x %.3f}
|
175 | | Static(Token tok, str arg)
|
176 | | PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
|
177 | # begin is optional with ${array::1}
|
178 | | Slice(arith_expr? begin, arith_expr? length)
|
179 |
|
180 | BracedVarSub = (
|
181 | Token left, # in dynamic ParseVarRef, same as name_tok
|
182 | Token token, # location for the name
|
183 | str var_name, # the name - TODO: remove this, use LazyStr() instead
|
184 | Token? prefix_op, # prefix # or ! operators
|
185 | bracket_op? bracket_op,
|
186 | suffix_op? suffix_op,
|
187 | Token right # in dynamic ParseVarRef, same as name_tok
|
188 | )
|
189 |
|
190 | # Variants:
|
191 | # - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
|
192 | # - And """ and ''' e.g. Id.Left_TDoubleQuote
|
193 | DoubleQuoted = (Token left, List[word_part] parts, Token right)
|
194 |
|
195 | # Consider making str? sval LAZY, like lexer.LazyStr(tok)
|
196 | SingleQuoted = (Token left, str sval, Token right)
|
197 |
|
198 | # e.g. Id.VSub_QMark, Id.VSub_DollarName $foo with lexer.LazyStr()
|
199 | SimpleVarSub = (Token tok)
|
200 |
|
201 | CommandSub = (Token left_token, command child, Token right)
|
202 |
|
203 | # - can contain word.BracedTree
|
204 | # - no 'Token right' for now, doesn't appear to be used
|
205 | ShArrayLiteral = (Token left, List[word] words, Token right)
|
206 |
|
207 | # Unevaluated, typed arguments for func and proc.
|
208 | # Note that ...arg is expr.Spread.
|
209 | ArgList = (
|
210 | Token left, List[expr] pos_args,
|
211 | Token? semi_tok, List[NamedArg] named_args,
|
212 | Token? semi_tok2, expr? block_expr,
|
213 | Token right
|
214 | )
|
215 |
|
216 | AssocPair = (CompoundWord key, CompoundWord value)
|
217 |
|
218 | word_part =
|
219 | ShArrayLiteral %ShArrayLiteral
|
220 | | BashAssocLiteral(Token left, List[AssocPair] pairs, Token right)
|
221 | | Literal %Token
|
222 | # escaped case is separate so the evaluator doesn't have to check token ID
|
223 | | EscapedLiteral(Token token, str ch)
|
224 | | SingleQuoted %SingleQuoted
|
225 | | DoubleQuoted %DoubleQuoted
|
226 | # Could be SimpleVarSub %Token that's VSub_DollarName, but let's not
|
227 | # confuse with the comon word_part.Literal is common for wno
|
228 | | SimpleVarSub %SimpleVarSub
|
229 | | BracedVarSub %BracedVarSub
|
230 | | ZshVarSub (Token left, CompoundWord ignored, Token right)
|
231 | # For command sub and process sub: $(...) <(...) >(...)
|
232 | | CommandSub %CommandSub
|
233 | # ~ or ~bob
|
234 | | TildeSub(Token left, # always the tilde
|
235 | Token? name, str? user_name)
|
236 | | ArithSub(Token left, arith_expr anode, Token right)
|
237 | # {a,b,c}
|
238 | | BracedTuple(List[CompoundWord] words)
|
239 | # {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
|
240 | # {a..f} or {a..f..2} or {a..f..-2}
|
241 | # the whole range is one Token,
|
242 | | BracedRange(Token blame_tok, id kind, str start, str end, int step)
|
243 | # extended globs are parsed statically, unlike globs
|
244 | | ExtGlob(Token op, List[CompoundWord] arms, Token right)
|
245 | # a regex group is similar to an extended glob part
|
246 | | BashRegexGroup(Token left, CompoundWord? child, Token right)
|
247 |
|
248 | # YSH word_part extensions
|
249 |
|
250 | # @myarray - Id.Lit_Splice (could be optimized to %Token)
|
251 | | Splice(Token blame_tok, str var_name)
|
252 | # $[d.key], etc.
|
253 | | ExprSub(Token left, expr child, Token right)
|
254 |
|
255 | # Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
|
256 | # The latter is semantically necessary. (See osh/word_parse.py).
|
257 | # At runtime: RHS of 'declare x='.
|
258 | rhs_word = Empty | Compound %CompoundWord
|
259 |
|
260 | word =
|
261 | # Returns from WordParser, but not generally stored in LST
|
262 | Operator %Token
|
263 | # A Compound word can contain any word_part except the Braced*Part.
|
264 | # We could model this with another variant type but it incurs runtime
|
265 | # overhead and seems like overkill. Note that DoubleQuoted can't
|
266 | # contain a SingleQuoted, etc. either.
|
267 | | Compound %CompoundWord
|
268 | # For word sequences command.Simple, ShArrayLiteral, for_iter.Words
|
269 | # Could be its own type
|
270 | | BracedTree(List[word_part] parts)
|
271 | # For dynamic parsing of test aka [ - the string is already evaluated.
|
272 | | String(id id, str s, CompoundWord? blame_loc)
|
273 |
|
274 | # Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
|
275 | sh_lhs =
|
276 | Name(Token left, str name) # Lit_VarLike foo=
|
277 | # TODO: Could be Name %Token
|
278 | | IndexedName(Token left, str name, arith_expr index)
|
279 | | UnparsedIndex(Token left, str name, str index) # for translation
|
280 |
|
281 | arith_expr =
|
282 | VarSub %Token # e.g. $(( x )) Id.Arith_VarLike
|
283 | | Word %CompoundWord # e.g. $(( 123'456'$y ))
|
284 |
|
285 | | UnaryAssign(id op_id, arith_expr child)
|
286 | | BinaryAssign(id op_id, arith_expr left, arith_expr right)
|
287 |
|
288 | | Unary(id op_id, arith_expr child)
|
289 | # TODO: op should be token, e.g. for divide by zero
|
290 | | Binary(Token op, arith_expr left, arith_expr right)
|
291 | | TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
|
292 |
|
293 | bool_expr =
|
294 | WordTest(word w) # e.g. [[ myword ]]
|
295 | | Binary(id op_id, word left, word right)
|
296 | | Unary(id op_id, word child)
|
297 | | LogicalNot(bool_expr child)
|
298 | | LogicalAnd(bool_expr left, bool_expr right)
|
299 | | LogicalOr(bool_expr left, bool_expr right)
|
300 |
|
301 | redir_loc =
|
302 | Fd(int fd) | VarName(str name)
|
303 |
|
304 | redir_param =
|
305 | Word %CompoundWord
|
306 | | HereDoc(word here_begin, # e.g. EOF or 'EOF'
|
307 | Token? here_end_tok, # Token consisting of the whole line
|
308 | # It's always filled in AFTER creation, but
|
309 | # temporarily so optional
|
310 | List[word_part] stdin_parts # one for each line
|
311 | )
|
312 |
|
313 | Redir = (Token op, redir_loc loc, redir_param arg)
|
314 |
|
315 | assign_op = Equal | PlusEqual
|
316 | AssignPair = (Token left, sh_lhs lhs, assign_op op, rhs_word rhs)
|
317 | # TODO: could put Id.Lit_VarLike foo= into LazyStr() with -1 slice
|
318 | EnvPair = (Token left, str name, rhs_word val)
|
319 |
|
320 | condition =
|
321 | Shell(List[command] commands) # if false; true; then echo hi; fi
|
322 | | YshExpr(expr e) # if (x > 0) { echo hi }
|
323 | # TODO: add more specific blame location
|
324 |
|
325 | # Each arm tests one word against multiple words
|
326 | # shell: *.cc|*.h) echo C++ ;;
|
327 | # YSH: *.cc|*.h { echo C++ }
|
328 | #
|
329 | # Three location tokens:
|
330 | # 1. left - shell has ( or *.cc ysh has *.cc
|
331 | # 2. middle - shell has ) ysh has {
|
332 | # 3. right - shell has optional ;; ysh has required }
|
333 | #
|
334 | # For YSH typed case, left can be ( and /
|
335 | # And case_pat may contain more details
|
336 | CaseArm = (
|
337 | Token left, pat pattern, Token middle, List[command] action,
|
338 | Token? right
|
339 | )
|
340 |
|
341 | # The argument to match against in a case command
|
342 | # In YSH-style case commands we match against an `expr`, but in sh-style case
|
343 | # commands we match against a word.
|
344 | case_arg =
|
345 | Word(word w)
|
346 | | YshExpr(expr e)
|
347 |
|
348 | EggexFlag = (bool negated, Token flag)
|
349 |
|
350 | # canonical_flags can be compared for equality. This is needed to splice
|
351 | # eggexes correctly, e.g. / 'abc' @pat ; i /
|
352 | Eggex = (
|
353 | Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
|
354 | str? canonical_flags)
|
355 |
|
356 | pat =
|
357 | Else
|
358 | | Words(List[word] words)
|
359 | | YshExprs(List[expr] exprs)
|
360 | | Eggex %Eggex
|
361 |
|
362 | # Each if arm starts with either an "if" or "elif" keyword
|
363 | # In YSH, the then keyword is not used (replaced by braces {})
|
364 | IfArm = (
|
365 | Token keyword, condition cond, Token? then_kw, List[command] action,
|
366 | # then_tok used in ysh-ify
|
367 | Token? then_tok)
|
368 |
|
369 | for_iter =
|
370 | Args # for x; do echo $x; done # implicit "$@"
|
371 | | Words(List[word] words) # for x in 'foo' *.py { echo $x }
|
372 | # like ShArrayLiteral, but no location for %(
|
373 | | YshExpr(expr e, Token blame) # for x in (mylist) { echo $x }
|
374 |
|
375 | BraceGroup = (
|
376 | Token left, Token? doc_token, List[command] children,
|
377 | List[Redir] redirects, Token right
|
378 | )
|
379 |
|
380 | Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
|
381 | RestParam = (Token blame_tok, str name)
|
382 |
|
383 | ParamGroup = (List[Param] params, RestParam? rest_of)
|
384 |
|
385 | # 'open' is for proc p { }; closed is for proc p () { }
|
386 | proc_sig =
|
387 | Open
|
388 | | Closed(ParamGroup? word, ParamGroup? positional, ParamGroup? named,
|
389 | Param? block_param)
|
390 |
|
391 | Proc = (Token keyword, Token name, proc_sig sig, command body)
|
392 |
|
393 | Func = (
|
394 | Token keyword, Token name,
|
395 | ParamGroup? positional, ParamGroup? named,
|
396 | command body
|
397 | )
|
398 |
|
399 | # Retain references to lines
|
400 | LiteralBlock = (BraceGroup brace_group, List[SourceLine] lines)
|
401 |
|
402 | # Represents all these case: s=1 s+=1 s[x]=1 ...
|
403 | ParsedAssignment = (Token? left, Token? close, int part_offset, CompoundWord w)
|
404 |
|
405 | command =
|
406 | NoOp
|
407 | | Simple(Token? blame_tok, # TODO: make required (BracedTuple?)
|
408 | List[EnvPair] more_env,
|
409 | List[word] words, List[Redir] redirects,
|
410 | ArgList? typed_args, LiteralBlock? block,
|
411 | # do_fork is semantic, not syntactic
|
412 | bool do_fork)
|
413 | # This doesn't technically belong in the LST, but it's convenient for
|
414 | # execution
|
415 | | ExpandedAlias(command child, List[Redir] redirects, List[EnvPair] more_env)
|
416 | | Sentence(command child, Token terminator)
|
417 | # Represents "bare assignment"
|
418 | # Token left is redundant with pairs[0].left
|
419 | | ShAssignment(Token left, List[AssignPair] pairs, List[Redir] redirects)
|
420 | | Retval(Token keyword, expr val)
|
421 | | ControlFlow(Token keyword, word? arg_word)
|
422 | # ops are | |&
|
423 | | Pipeline(Token? negated, List[command] children, List[Token] ops)
|
424 | # ops are && ||
|
425 | | AndOr(List[command] children, List[Token] ops)
|
426 | # Part of for, while, until (but not if, case, ShFunction). No redirects.
|
427 | | DoGroup(Token left, List[command] children, Token right)
|
428 | # A brace group is a compound command, with redirects.
|
429 | | BraceGroup %BraceGroup
|
430 | # Contains a single child, like CommandSub
|
431 | | Subshell(Token left, command child, Token right, List[Redir] redirects)
|
432 | | DParen(Token left, arith_expr child, Token right, List[Redir] redirects)
|
433 | | DBracket(Token left, bool_expr expr, Token right, List[Redir] redirects)
|
434 | # up to 3 iterations variables
|
435 | | ForEach(Token keyword, List[str] iter_names, for_iter iterable,
|
436 | Token? semi_tok, command body, List[Redir] redirects)
|
437 | # C-style for loop. Any of the 3 expressions can be omitted.
|
438 | # Note: body is required, but only optional here because of initialization
|
439 | # order.
|
440 | | ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
|
441 | arith_expr? update, command? body, List[Redir] redirects)
|
442 | | WhileUntil(Token keyword, condition cond, command body, List[Redir] redirects)
|
443 | | If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
|
444 | Token? fi_kw, List[Redir] redirects)
|
445 | | Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
|
446 | Token arms_end, List[Redir] redirects)
|
447 | # The keyword is optional in the case of bash-style functions
|
448 | # (ie. "foo() { ... }") which do not have one.
|
449 | | ShFunction(Token? keyword, Token name_tok, str name, command body)
|
450 | | TimeBlock(Token keyword, command pipeline)
|
451 | # Some nodes optimize it out as List[command], but we use CommandList for
|
452 | # 1. the top level
|
453 | # 2. ls ; ls & ls (same line)
|
454 | # 3. CommandSub # single child that's a CommandList
|
455 | # 4. Subshell # single child that's a CommandList
|
456 | | CommandList(List[command] children)
|
457 |
|
458 | # YSH command constructs
|
459 |
|
460 | # var, const.
|
461 | # - Keyword is None for hay blocks
|
462 | # - RHS is None, for use with value.Place
|
463 | # - TODO: consider using BareDecl
|
464 | | VarDecl(Token? keyword, List[NameType] lhs, expr? rhs)
|
465 |
|
466 | # this can behave like 'var', can be desugared
|
467 | | BareDecl(Token lhs, expr rhs)
|
468 |
|
469 | # setvar, maybe 'auto' later
|
470 | | Mutation(Token keyword, List[y_lhs] lhs, Token op, expr rhs)
|
471 | # = keyword
|
472 | | Expr(Token keyword, expr e)
|
473 | | Proc %Proc
|
474 | | Func %Func
|
475 |
|
476 | #
|
477 | # Glob representation, for converting ${x//} to extended regexes.
|
478 | #
|
479 |
|
480 | # Example: *.[ch] is:
|
481 | # GlobOp(<Glob_Star '*'>),
|
482 | # GlobLit(Glob_OtherLiteral, '.'),
|
483 | # CharClass(False, ['ch']) # from Glob_CleanLiterals token
|
484 |
|
485 | glob_part =
|
486 | Literal(id id, str s)
|
487 | | Operator(id op_id) # * or ?
|
488 | | CharClass(bool negated, List[str] strs)
|
489 |
|
490 | # Char classes are opaque for now. If we ever need them:
|
491 | # - Collating symbols are [. .]
|
492 | # - Equivalence classes are [=
|
493 |
|
494 | printf_part =
|
495 | Literal %Token
|
496 | # flags are 0 hyphen space + #
|
497 | # type is 's' for %s, etc.
|
498 | | Percent(List[Token] flags, Token? width, Token? precision, Token type)
|
499 |
|
500 | #
|
501 | # YSH Language
|
502 | #
|
503 | # Copied and modified from Python-3.7/Parser/Python.asdl !
|
504 |
|
505 | expr_context = Load | Store | Del | AugLoad | AugStore | Param
|
506 |
|
507 | # Type expressions: Int List[Int] Dict[Str, Any]
|
508 | # Do we have Func[Int, Int => Int] ? I guess we can parse that into this
|
509 | # system.
|
510 | TypeExpr = (Token tok, str name, List[TypeExpr] params)
|
511 |
|
512 | # LHS bindings in var/const, and eggex
|
513 | NameType = (Token left, str name, TypeExpr? typ)
|
514 |
|
515 | # TODO: Inline this into GenExp and ListComp? Just use a flag there?
|
516 | Comprehension = (List[NameType] lhs, expr iter, expr? cond)
|
517 |
|
518 | # Named arguments supplied to call. Token is null for f(; ...named).
|
519 | NamedArg = (Token? name, expr value)
|
520 |
|
521 | # Subscripts are lists of expressions
|
522 | # a[:i, n] (we don't have matrices, but we have data frames)
|
523 | Subscript = (Token left, expr obj, expr index)
|
524 |
|
525 | # Attributes are obj.attr, d->key, name::scope,
|
526 | Attribute = (expr obj, Token op, Token attr, str attr_name, expr_context ctx)
|
527 |
|
528 | y_lhs =
|
529 | Var %Token # Id.Expr_Name
|
530 | | Subscript %Subscript
|
531 | | Attribute %Attribute
|
532 |
|
533 | place_op =
|
534 | # &a[i+1]
|
535 | Subscript(Token op, expr index)
|
536 | # &d.mykey
|
537 | | Attribute(Token op, Token attr)
|
538 |
|
539 | expr =
|
540 | Var(Token left, str name) # a variable name to evaluate
|
541 | # Constants are typically Null, Bool, Int, Float
|
542 | # and also Str for key in {key: 42}
|
543 | # But string literals are SingleQuoted or DoubleQuoted
|
544 | # Python uses Num(object n), which doesn't respect our "LST" invariant.
|
545 | | Const(Token c, value val)
|
546 |
|
547 | # read(&x) json read (&x[0])
|
548 | | Place(Token blame_tok, str var_name, place_op* ops)
|
549 |
|
550 | # :| one 'two' "$three" |
|
551 | | ShArrayLiteral %ShArrayLiteral
|
552 |
|
553 | # / d+ ; ignorecase; %python /
|
554 | | Eggex %Eggex
|
555 |
|
556 | # $name is not an expr, but $? is, e.g. Id.VSub_QMark
|
557 | | SimpleVarSub %SimpleVarSub
|
558 | | BracedVarSub %BracedVarSub
|
559 | | CommandSub %CommandSub
|
560 | | SingleQuoted %SingleQuoted
|
561 | | DoubleQuoted %DoubleQuoted
|
562 |
|
563 | | Literal(expr inner)
|
564 | | Lambda(List[NameType] params, expr body)
|
565 |
|
566 | | Unary(Token op, expr child)
|
567 | | Binary(Token op, expr left, expr right)
|
568 | # x < 4 < 3 and (x < 4) < 3
|
569 | | Compare(expr left, List[Token] ops, List[expr] comparators)
|
570 | | FuncCall(expr func, ArgList args)
|
571 |
|
572 | # TODO: Need a representation for method call. We don't just want
|
573 | # Attribute() and then Call()
|
574 |
|
575 | | IfExp(expr test, expr body, expr orelse)
|
576 | | Tuple(Token left, List[expr] elts, expr_context ctx)
|
577 |
|
578 | | List(Token left, List[expr] elts, expr_context ctx)
|
579 | | Dict(Token left, List[expr] keys, List[expr] values)
|
580 | # For the values in {n1, n2}
|
581 | | Implicit
|
582 |
|
583 | | ListComp(Token left, expr elt, List[Comprehension] generators)
|
584 | # not implemented
|
585 | | DictComp(Token left, expr key, expr value, List[Comprehension] generators)
|
586 | | GeneratorExp(expr elt, List[Comprehension] generators)
|
587 |
|
588 | # Ranges are written 1:2, with first class expression syntax. There is no
|
589 | # step as in Python. Use range(0, 10, step=2) for that.
|
590 | | Range(expr lower, Token op, expr upper)
|
591 |
|
592 | # Slices occur within [] only. Unlike ranges, the start/end can be #
|
593 | # implicit. Like ranges, denote a step with slice(0, 10, step=2).
|
594 | # a[3:] a[:i]
|
595 | | Slice(expr? lower, Token op, expr? upper)
|
596 |
|
597 | | Subscript %Subscript
|
598 | | Attribute %Attribute
|
599 |
|
600 | # Ellipsis is like 'Starred' within Python, which are valid on the LHS in
|
601 | # Python for unpacking, and # within list literals for splicing.
|
602 | # (Starred is NOT used for {k:v, **a}. That used a blank "keys"
|
603 | # attribute.)
|
604 |
|
605 | # I think we can use { **pairs } like Python
|
606 | | Spread(Token left, expr child)
|
607 |
|
608 | #
|
609 | # Regex Language (Eggex)
|
610 | #
|
611 |
|
612 | # e.g. alnum digit
|
613 | PosixClass = (Token? negated, str name)
|
614 | # e.g. d w s
|
615 | PerlClass = (Token? negated, str name)
|
616 |
|
617 | # Char Sets and Ranges both use Char Codes
|
618 | # with u_braced == true : \u{ff}
|
619 | # with u_braced == false: \xff \\ 'a' a '0' 0
|
620 | # ERE doesn't make a distinction, but compiling to Python/PCRE can use it
|
621 | CharCode = (Token blame_tok, int i, bool u_braced)
|
622 | CharRange = (CharCode start, CharCode end)
|
623 |
|
624 | # Note: .NET has && in character classes, making it a recursive language
|
625 |
|
626 | class_literal_term =
|
627 | PosixClass %PosixClass
|
628 | | PerlClass %PerlClass
|
629 | | CharRange %CharRange
|
630 | | CharCode %CharCode
|
631 |
|
632 | | SingleQuoted %SingleQuoted
|
633 | # @chars
|
634 | | Splice(Token name, str var_name) # coudl be Splice %Token
|
635 |
|
636 | # evaluated version of class_literal_term (could be in runtime.asdl)
|
637 | char_class_term =
|
638 | PosixClass %PosixClass
|
639 | | PerlClass %PerlClass
|
640 |
|
641 | | CharRange %CharRange
|
642 | # For [ \x00 \\ ]
|
643 | | CharCode %CharCode
|
644 |
|
645 | # NOTE: modifier is unused now, can represent L or P
|
646 | re_repeat =
|
647 | Op %Token # + * ? or Expr_DecInt for x{3}
|
648 | | Range(Token? left, str lower, str upper, Token? right) # dot{1,2}
|
649 | # Haven't implemented the modifier, e.g. x{+ P}
|
650 | # | Num(Token times, id modifier)
|
651 | # | Range(Token? lower, Token? upper, id modifier)
|
652 |
|
653 | re =
|
654 | Primitive(Token blame_tok, id id) # . ^ $ dot %start %end
|
655 | | PosixClass %PosixClass
|
656 | | PerlClass %PerlClass
|
657 | # syntax [ $x \n ]
|
658 | | CharClassLiteral(bool negated, List[class_literal_term] terms)
|
659 | # evaluated [ 'abc' \n ]
|
660 | | CharClass(bool negated, List[char_class_term] terms)
|
661 |
|
662 | # @D
|
663 | | Splice(Token name, str var_name) # TODO: Splice %Token
|
664 |
|
665 | | SingleQuoted %SingleQuoted
|
666 |
|
667 | # Compound:
|
668 | | Repeat(re child, re_repeat op)
|
669 | | Seq(List[re] children)
|
670 | | Alt(List[re] children)
|
671 |
|
672 | | Group(re child)
|
673 | # convert_func is filled in on evaluation
|
674 | # TODO: name and func_name can be expanded to strings
|
675 | | Capture(re child, Token? name, Token? func_name)
|
676 | | Backtracking(bool negated, Token name, re child)
|
677 |
|
678 | # \u{ff} is parsed as this, but SingleQuoted also evaluates to it
|
679 | | LiteralChars(Token blame_tok, str s)
|
680 | }
|