1 | #!/usr/bin/env python2
|
2 | # Copyright 2016 Andy Chu. All rights reserved.
|
3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
4 | # you may not use this file except in compliance with the License.
|
5 | # You may obtain a copy of the License at
|
6 | #
|
7 | # http://www.apache.org/licenses/LICENSE-2.0
|
8 | """
|
9 | id_kind_def.py - Id and Kind definitions, used for Token, Word, Nodes, etc.
|
10 |
|
11 | NOTE: If this changes, the lexer may need to be recompiled with
|
12 | build/codegen.sh lexer.
|
13 | """
|
14 | from __future__ import print_function
|
15 |
|
16 | from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
|
17 | #from mycpp.mylib import log
|
18 |
|
19 | from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
|
20 | if TYPE_CHECKING: # avoid circular build deps
|
21 | from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
|
22 |
|
23 |
|
24 | class IdSpec(object):
|
25 | """Identifiers that form the "spine" of the shell program
|
26 | representation."""
|
27 |
|
28 | def __init__(self, kind_lookup, bool_ops):
|
29 | # type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
|
30 | self.id_str2int = {} # type: Dict[str, int]
|
31 | self.kind_str2int = {} # type: Dict[str, int]
|
32 |
|
33 | self.kind_lookup = kind_lookup # Id int -> Kind int
|
34 | self.kind_name_list = [] # type: List[str]
|
35 | self.kind_sizes = [] # type: List[int] # optional stats
|
36 |
|
37 | self.lexer_pairs = {} # type: Dict[int, List[Tuple[bool, str, int]]]
|
38 | self.bool_ops = bool_ops # type: Dict[int, bool_arg_type_t]
|
39 |
|
40 | # Incremented on each method call
|
41 | # IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
|
42 | self.id_index = 1
|
43 | self.kind_index = 1
|
44 |
|
45 | def LexerPairs(self, kind):
|
46 | # type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
|
47 | result = []
|
48 | for is_regex, pat, id_ in self.lexer_pairs[kind]:
|
49 | result.append((is_regex, pat, id_))
|
50 | return result
|
51 |
|
52 | def _AddId(self, id_name, kind=None):
|
53 | # type: (str, Optional[int]) -> int
|
54 | """
|
55 | Args:
|
56 | id_name: e.g. BoolBinary_Equal
|
57 | kind: override autoassignment. For AddBoolBinaryForBuiltin
|
58 | """
|
59 | t = self.id_index
|
60 |
|
61 | self.id_str2int[id_name] = t
|
62 |
|
63 | if kind is None:
|
64 | kind = self.kind_index
|
65 | self.kind_lookup[t] = kind
|
66 |
|
67 | self.id_index += 1 # mutate last
|
68 | return t # the index we used
|
69 |
|
70 | def _AddKind(self, kind_name):
|
71 | # type: (str) -> None
|
72 | self.kind_str2int[kind_name] = self.kind_index
|
73 | #log('%s = %d', kind_name, self.kind_index)
|
74 | self.kind_index += 1
|
75 | self.kind_name_list.append(kind_name)
|
76 |
|
77 | def AddKind(self, kind_name, tokens):
|
78 | # type: (str, List[str]) -> None
|
79 | assert isinstance(tokens, list), tokens
|
80 |
|
81 | for name in tokens:
|
82 | id_name = '%s_%s' % (kind_name, name)
|
83 | self._AddId(id_name)
|
84 |
|
85 | # Must be after adding Id
|
86 | self._AddKind(kind_name)
|
87 | self.kind_sizes.append(len(tokens)) # debug info
|
88 |
|
89 | def AddKindPairs(self, kind_name, pairs):
|
90 | # type: (str, List[Tuple[str, str]]) -> None
|
91 | assert isinstance(pairs, list), pairs
|
92 |
|
93 | lexer_pairs = []
|
94 | for name, char_pat in pairs:
|
95 | id_name = '%s_%s' % (kind_name, name)
|
96 | id_int = self._AddId(id_name)
|
97 | # After _AddId
|
98 | lexer_pairs.append((False, char_pat, id_int)) # Constant
|
99 |
|
100 | self.lexer_pairs[self.kind_index] = lexer_pairs
|
101 |
|
102 | # Must be after adding Id
|
103 | self._AddKind(kind_name)
|
104 | self.kind_sizes.append(len(pairs)) # debug info
|
105 |
|
106 | def AddBoolKind(
|
107 | self,
|
108 | kind_name, # type: str
|
109 | arg_type_pairs, # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
|
110 | ):
|
111 | # type: (...) -> None
|
112 | """
|
113 | Args:
|
114 | kind_name: string
|
115 | arg_type_pairs: dictionary of bool_arg_type_e -> []
|
116 | """
|
117 | lexer_pairs = []
|
118 | num_tokens = 0
|
119 | for arg_type, pairs in arg_type_pairs:
|
120 | #print(arg_type, pairs)
|
121 |
|
122 | for name, char_pat in pairs:
|
123 | # BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
|
124 | id_name = '%s_%s' % (kind_name, name)
|
125 | id_int = self._AddId(id_name)
|
126 | self.AddBoolOp(id_int, arg_type) # register type
|
127 | lexer_pairs.append((False, char_pat, id_int)) # constant
|
128 |
|
129 | num_tokens += len(pairs)
|
130 |
|
131 | self.lexer_pairs[self.kind_index] = lexer_pairs
|
132 |
|
133 | # Must do this after _AddId()
|
134 | self._AddKind(kind_name)
|
135 | self.kind_sizes.append(num_tokens) # debug info
|
136 |
|
137 | def AddBoolBinaryForBuiltin(self, id_name, kind):
|
138 | # type: (str, int) -> int
|
139 | """For [ = ] [ == ] and [ != ].
|
140 |
|
141 | These operators are NOT added to the lexer. The are "lexed" as
|
142 | word.String.
|
143 | """
|
144 | id_name = 'BoolBinary_%s' % id_name
|
145 | id_int = self._AddId(id_name, kind=kind)
|
146 | self.AddBoolOp(id_int, bool_arg_type_e.Str)
|
147 | return id_int
|
148 |
|
149 | def AddBoolOp(self, id_int, arg_type):
|
150 | # type: (int, bool_arg_type_t) -> None
|
151 | """Associate an ID integer with an bool_arg_type_e."""
|
152 | self.bool_ops[id_int] = arg_type
|
153 |
|
154 |
|
155 | def AddKinds(spec):
|
156 | # type: (IdSpec) -> None
|
157 |
|
158 | # A compound word, in arith context, boolean context, or command context.
|
159 | # A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
|
160 | spec.AddKind('Word', ['Compound'])
|
161 |
|
162 | # Token IDs in Kind.Arith are first to make the TDOP precedence table
|
163 | # small.
|
164 | #
|
165 | # NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
|
166 | # Actually all of Arith could be folded into Op, because we are using
|
167 | # WordParser._ReadArithWord vs. WordParser._ReadWord.
|
168 | spec.AddKindPairs(
|
169 | 'Arith',
|
170 | [
|
171 | ('Semi', ';'), # ternary for loop only
|
172 | ('Comma', ','), # function call and C comma operator
|
173 | ('Plus', '+'),
|
174 | ('Minus', '-'),
|
175 | ('Star', '*'),
|
176 | ('Slash', '/'),
|
177 | ('Percent', '%'),
|
178 | ('DPlus', '++'),
|
179 | ('DMinus', '--'),
|
180 | ('DStar', '**'),
|
181 | ('LParen', '('),
|
182 | ('RParen', ')'), # grouping and function call extension
|
183 | ('LBracket', '['),
|
184 | ('RBracket', ']'), # array and assoc array subscript
|
185 | ('RBrace', '}'), # for end of var sub
|
186 |
|
187 | # Logical Ops
|
188 | ('QMark', '?'),
|
189 | ('Colon', ':'), # Ternary Op: a < b ? 0 : 1
|
190 | ('LessEqual', '<='),
|
191 | ('Less', '<'),
|
192 | ('GreatEqual', '>='),
|
193 | ('Great', '>'),
|
194 | ('DEqual', '=='),
|
195 | ('NEqual', '!='),
|
196 | # note: these 3 are not in YSH Expr. (Could be used in find dialect.)
|
197 | ('DAmp', '&&'),
|
198 | ('DPipe', '||'),
|
199 | ('Bang', '!'),
|
200 |
|
201 | # Bitwise ops
|
202 | ('DGreat', '>>'),
|
203 | ('DLess', '<<'),
|
204 | # YSH: ^ is exponent
|
205 | ('Amp', '&'),
|
206 | ('Pipe', '|'),
|
207 | ('Caret', '^'),
|
208 | ('Tilde', '~'),
|
209 | ('Equal', '='),
|
210 |
|
211 | # Augmented Assignment for $(( ))
|
212 | # Must match the list in osh/arith_parse.py
|
213 | # YSH has **= //= like Python
|
214 | ('PlusEqual', '+='),
|
215 | ('MinusEqual', '-='),
|
216 | ('StarEqual', '*='),
|
217 | ('SlashEqual', '/='),
|
218 | ('PercentEqual', '%='),
|
219 | ('DGreatEqual', '>>='),
|
220 | ('DLessEqual', '<<='),
|
221 | ('AmpEqual', '&='),
|
222 | ('CaretEqual', '^='),
|
223 | ('PipeEqual', '|='),
|
224 | ])
|
225 |
|
226 | spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
|
227 |
|
228 | spec.AddKind('Undefined', ['Tok']) # for initial state
|
229 |
|
230 | # The Unknown kind is used when we lex something, but it's invalid.
|
231 | # Examples:
|
232 | # ${^}
|
233 | # $'\z' Such bad codes are accepted when parse_backslash is on
|
234 | # (default in OSH), so we have to lex them.
|
235 | # (x == y) should used === or ~==
|
236 | spec.AddKind('Unknown', ['Tok', 'Backslash', 'DEqual'])
|
237 |
|
238 | spec.AddKind('Eol', ['Tok']) # no more tokens on line (\0)
|
239 |
|
240 | # HereTabs is for stripping tabs on each line of <<- , while preserving the
|
241 | # "lossless invariant"
|
242 | spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment', 'HereTabs'])
|
243 |
|
244 | # Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
|
245 | # lex_mode_e.Arith
|
246 | spec.AddKind('WS', ['Space'])
|
247 |
|
248 | spec.AddKind(
|
249 | 'Lit',
|
250 | [
|
251 | 'Chars',
|
252 | 'VarLike',
|
253 | 'ArrayLhsOpen',
|
254 | 'ArrayLhsClose',
|
255 | 'Splice', # @func(a, b)
|
256 | 'AtLBracket', # @[split(x)]
|
257 | 'AtLBraceDot', # @{.myproc arg1} should be builtin_sub
|
258 | 'Other',
|
259 | 'EscapedChar',
|
260 | 'RegexMeta',
|
261 | 'LBracket',
|
262 | 'RBracket', # for assoc array literals, static globs
|
263 | 'Star',
|
264 | 'QMark',
|
265 | # Either brace expansion or keyword for { and }
|
266 | 'LBrace',
|
267 | 'RBrace',
|
268 | 'Comma',
|
269 | 'Equals', # For = f()
|
270 | 'Dollar', # detecting 'echo $'
|
271 | 'DRightBracket', # the ]] that matches [[, NOT a keyword
|
272 | 'Tilde', # tilde expansion
|
273 | 'Pound', # for comment or VarOp state
|
274 | 'TPound', # for doc comments like ###
|
275 | 'TDot', # for multiline commands ...
|
276 | 'Slash',
|
277 | 'Percent', # / # % for patsub, NOT unary op
|
278 | 'Colon', # x=foo:~:~root needs tilde expansion
|
279 | 'Digits', # for lex_mode_e.Arith
|
280 | 'At', # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
|
281 | 'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
|
282 | 'BadBackslash', # for "\z", not Id.Unknown_Backslash because it's a
|
283 | # syntax error in YSH, but NOT OSH
|
284 | 'CompDummy', # A fake Lit_* token to get partial words during
|
285 | # completion
|
286 | ])
|
287 |
|
288 | # For recognizing \` and \" and \\ within backticks. There's an extra layer
|
289 | # of backslash quoting.
|
290 | spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
|
291 |
|
292 | spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
|
293 |
|
294 | spec.AddKind(
|
295 | 'Op',
|
296 | [
|
297 | 'Newline', # mostly equivalent to SEMI
|
298 | 'Amp', # &
|
299 | 'Pipe', # |
|
300 | 'PipeAmp', # |& -- bash extension for stderr
|
301 | 'DAmp', # &&
|
302 | 'DPipe', # ||
|
303 | 'Semi', # ;
|
304 | 'DSemi', # ;; for case
|
305 | 'LParen', # For subshell. Not Kind.Left because it's NOT a WordPart.
|
306 | 'RParen', # Default, will be translated to Id.Right_*
|
307 | 'DLeftParen',
|
308 | 'DRightParen',
|
309 |
|
310 | # for [[ ]] language
|
311 | 'Less', # <
|
312 | 'Great', # >
|
313 | 'Bang', # !
|
314 |
|
315 | # YSH [] {}
|
316 | 'LBracket',
|
317 | 'RBracket',
|
318 | 'LBrace',
|
319 | 'RBrace',
|
320 | ])
|
321 |
|
322 | # YSH expressions use Kind.Expr and Kind.Arith (further below)
|
323 | spec.AddKind(
|
324 | 'Expr',
|
325 | [
|
326 | 'Reserved', # <- means nothing but it's reserved now
|
327 | 'Symbol', # %foo
|
328 | 'Name',
|
329 | 'DecInt',
|
330 | 'BinInt',
|
331 | 'OctInt',
|
332 | 'HexInt',
|
333 | 'Float',
|
334 | 'Bang', # eggex !digit, ![a-z]
|
335 | 'Dot',
|
336 | 'DDot',
|
337 | 'Colon', # mylist:pop()
|
338 | 'RArrow',
|
339 | 'RDArrow',
|
340 | 'DSlash', # integer division
|
341 | 'TEqual',
|
342 | 'NotDEqual',
|
343 | 'TildeDEqual', # === !== ~==
|
344 | 'At',
|
345 | 'DoubleAt', # splice operators
|
346 | 'Ellipsis', # for varargs
|
347 | 'Dollar', # legacy regex
|
348 | 'NotTilde', # !~
|
349 | 'DTilde',
|
350 | 'NotDTilde', # ~~ !~~
|
351 | 'DStarEqual', # **=, which bash doesn't have
|
352 | 'DSlashEqual', # //=, which bash doesn't have
|
353 | 'CastedDummy', # Used for @() $() (words in lex_mode_e.ShCommand)
|
354 | # and ${} '' "" (and all other strings)
|
355 |
|
356 | # Constants
|
357 | 'Null',
|
358 | 'True',
|
359 | 'False',
|
360 |
|
361 | # Keywords are resolved after lexing, but otherwise behave like tokens.
|
362 | 'And',
|
363 | 'Or',
|
364 | 'Not',
|
365 |
|
366 | # List comprehensions
|
367 | 'For',
|
368 | 'Is',
|
369 | 'In',
|
370 | 'If',
|
371 | 'Else',
|
372 | 'Func', # For function literals
|
373 | 'Capture',
|
374 | 'As',
|
375 |
|
376 | # Tea-specific
|
377 | 'While',
|
378 | 'Break',
|
379 | 'Continue',
|
380 | 'Return'
|
381 | ])
|
382 |
|
383 | # For C-escaped strings.
|
384 | spec.AddKind(
|
385 | 'Char',
|
386 | [
|
387 | 'OneChar',
|
388 | 'Stop',
|
389 | 'Hex', # \xff
|
390 | 'YHex', # \yff for J8 notation
|
391 |
|
392 | # Two variants of Octal: \377, and \0377.
|
393 | 'Octal3',
|
394 | 'Octal4',
|
395 | 'Unicode4',
|
396 | 'SurrogatePair', # JSON
|
397 | 'Unicode8', # bash
|
398 | 'UBraced',
|
399 | 'Pound', # YSH
|
400 | 'Literals',
|
401 | 'AsciiControl', # \x01-\x1f, what's disallowed in JSON
|
402 | ])
|
403 |
|
404 | # Regular expression primtiives.
|
405 | spec.AddKind(
|
406 | 'Re',
|
407 | [
|
408 | 'Start', # ^ or %start
|
409 | 'End', # $ or %end
|
410 | 'Dot', # . or dot
|
411 | # Future: %boundary generates \b in Python/Perl, etc.
|
412 | ])
|
413 |
|
414 | spec.AddKind(
|
415 | 'Redir',
|
416 | [
|
417 | 'Less', # < stdin
|
418 | 'Great', # > stdout
|
419 | 'DLess', # << here doc redirect
|
420 | 'TLess', # <<< bash only here string
|
421 | 'DGreat', # >> append stdout
|
422 | 'GreatAnd', # >& descriptor redirect
|
423 | 'LessAnd', # <& descriptor redirect
|
424 | 'DLessDash', # <<- here doc redirect for tabs?
|
425 | 'LessGreat', # <>
|
426 | 'Clobber', # >| POSIX?
|
427 | 'AndGreat', # bash &> stdout/stderr to file
|
428 | 'AndDGreat', # bash &>> stdout/stderr append to file
|
429 |
|
430 | #'GreatPlus', # >+ is append in YSH
|
431 | #'DGreatPlus', # >>+ is append to string in YSH
|
432 | ])
|
433 |
|
434 | # NOTE: This is for left/right WORDS only. (( is not a word so it doesn't
|
435 | # get that.
|
436 | spec.AddKind(
|
437 | 'Left',
|
438 | [
|
439 | 'DoubleQuote',
|
440 | 'SingleQuote', # ''
|
441 | 'DollarSingleQuote', # $'' for \n escapes
|
442 | 'RSingleQuote', # r''
|
443 | 'USingleQuote', # u''
|
444 | 'BSingleQuote', # b''
|
445 |
|
446 | # Multiline versions
|
447 | 'TDoubleQuote', # """ """
|
448 | 'TSingleQuote', # ''' '''
|
449 | 'RTSingleQuote', # r''' '''
|
450 | 'UTSingleQuote', # u''' '''
|
451 | 'BTSingleQuote', # b''' '''
|
452 | 'Backtick', # `
|
453 | 'DollarParen', # $(
|
454 | 'DollarBrace', # ${
|
455 | 'DollarDParen', # $((
|
456 | 'DollarBracket', # $[ - synonym for $(( in bash and zsh
|
457 | 'DollarDoubleQuote', # $" for bash localized strings
|
458 | 'ProcSubIn', # <( )
|
459 | 'ProcSubOut', # >( )
|
460 | 'AtParen', # @( for split command sub
|
461 | 'CaretParen', # ^( for Block literal in expression mode
|
462 | 'CaretBracket', # ^[ for Expr literal
|
463 | 'CaretBrace', # ^{ for Arglist
|
464 | 'CaretDoubleQuote', # ^" for Template
|
465 | 'ColonPipe', # :| for word arrays
|
466 | 'PercentParen', # legacy %( for word arrays
|
467 | ])
|
468 |
|
469 | spec.AddKind(
|
470 | 'Right',
|
471 | [
|
472 | 'DoubleQuote',
|
473 | 'SingleQuote',
|
474 | 'Backtick', # `
|
475 | 'DollarBrace', # }
|
476 | 'DollarDParen', # )) -- really the second one is a PushHint()
|
477 | # ArithSub2 is just Id.Arith_RBracket
|
478 | 'DollarDoubleQuote', # "
|
479 | 'DollarSingleQuote', # '
|
480 |
|
481 | # Disambiguated right parens
|
482 | 'Subshell', # )
|
483 | 'ShFunction', # )
|
484 | 'CasePat', # )
|
485 | 'ShArrayLiteral', # )
|
486 | 'ExtGlob', # )
|
487 | 'BlockLiteral', # } that matches &{ echo hi }
|
488 | ])
|
489 |
|
490 | spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
|
491 |
|
492 | # First position of var sub ${
|
493 | # Id.VOp2_Pound -- however you can't tell the difference at first! It could
|
494 | # be an op or a name. So it makes sense to base i on the state.
|
495 | # Id.VOp2_At
|
496 | # But then you have AS_STAR, or Id.Arith_Star maybe
|
497 |
|
498 | spec.AddKind(
|
499 | 'VSub',
|
500 | [
|
501 | 'DollarName', # $foo
|
502 | 'Name', # 'foo' in ${foo}
|
503 | 'Number', # $0 .. $9
|
504 | 'Bang', # $!
|
505 | 'At', # $@ or [@] for array subscripting
|
506 | 'Pound', # $# or ${#var} for length
|
507 | 'Dollar', # $$
|
508 | 'Star', # $*
|
509 | 'Hyphen', # $-
|
510 | 'QMark', # $?
|
511 | 'Dot', # ${.myproc builtin sub}
|
512 | ])
|
513 |
|
514 | spec.AddKindPairs('VTest', [
|
515 | ('ColonHyphen', ':-'),
|
516 | ('Hyphen', '-'),
|
517 | ('ColonEquals', ':='),
|
518 | ('Equals', '='),
|
519 | ('ColonQMark', ':?'),
|
520 | ('QMark', '?'),
|
521 | ('ColonPlus', ':+'),
|
522 | ('Plus', '+'),
|
523 | ])
|
524 |
|
525 | # Statically parse @P, so @x etc. is an error.
|
526 | spec.AddKindPairs(
|
527 | 'VOp0',
|
528 | [
|
529 | ('Q', '@Q'), # ${x@Q} for quoting
|
530 | ('E', '@E'),
|
531 | ('P', '@P'), # ${PS1@P} for prompt eval
|
532 | ('A', '@A'),
|
533 | ('a', '@a'),
|
534 | ])
|
535 |
|
536 | # String removal ops
|
537 | spec.AddKindPairs(
|
538 | 'VOp1',
|
539 | [
|
540 | ('Percent', '%'),
|
541 | ('DPercent', '%%'),
|
542 | ('Pound', '#'),
|
543 | ('DPound', '##'),
|
544 | # Case ops, in bash. At least parse them. Execution might require
|
545 | # unicode stuff.
|
546 | ('Caret', '^'),
|
547 | ('DCaret', '^^'),
|
548 | ('Comma', ','),
|
549 | ('DComma', ',,'),
|
550 | ])
|
551 |
|
552 | spec.AddKindPairs(
|
553 | 'VOpOil',
|
554 | [
|
555 | ('Pipe', '|'), # ${x|html}
|
556 | ('Space', ' '), # ${x %.3f}
|
557 | ])
|
558 |
|
559 | # Not in POSIX, but in Bash
|
560 | spec.AddKindPairs(
|
561 | 'VOp2',
|
562 | [
|
563 | ('Slash', '/'), # / for replacement
|
564 | ('Colon', ':'), # : for slicing
|
565 | ('LBracket', '['), # [ for indexing
|
566 | ('RBracket', ']'), # ] for indexing
|
567 | ])
|
568 |
|
569 | # Can only occur after ${!prefix@}
|
570 | spec.AddKindPairs('VOp3', [
|
571 | ('At', '@'),
|
572 | ('Star', '*'),
|
573 | ])
|
574 |
|
575 | # This kind is for Node types that are NOT tokens.
|
576 | spec.AddKind(
|
577 | 'Node',
|
578 | [
|
579 | # Arithmetic nodes
|
580 | 'PostDPlus',
|
581 | 'PostDMinus', # Postfix inc/dec.
|
582 | # Prefix inc/dec use Arith_DPlus/Arith_DMinus.
|
583 | 'UnaryPlus',
|
584 | 'UnaryMinus', # +1 and -1, to distinguish from infix.
|
585 | # Actually we don't need this because we they
|
586 | # will be under Expr1/Plus vs Expr2/Plus.
|
587 | 'NotIn',
|
588 | 'IsNot', # For YSH comparisons
|
589 | ])
|
590 |
|
591 | # NOTE: Not doing AddKindPairs() here because oil will have a different set
|
592 | # of keywords. It will probably have for/in/while/until/case/if/else/elif,
|
593 | # and then func/proc.
|
594 | spec.AddKind(
|
595 | 'KW',
|
596 | [
|
597 | 'DLeftBracket',
|
598 | 'Bang',
|
599 | 'For',
|
600 | 'While',
|
601 | 'Until',
|
602 | 'Do',
|
603 | 'Done',
|
604 | 'In',
|
605 | 'Case',
|
606 | 'Esac',
|
607 | 'If',
|
608 | 'Fi',
|
609 | 'Then',
|
610 | 'Else',
|
611 | 'Elif',
|
612 | 'Function',
|
613 | 'Time',
|
614 |
|
615 | # YSH keywords.
|
616 | 'Const',
|
617 | 'Var',
|
618 | 'SetVar',
|
619 | 'SetGlobal',
|
620 | # later: Auto?
|
621 | 'Call',
|
622 | 'Proc',
|
623 | 'Func',
|
624 |
|
625 | # builtins, NOT keywords: use, fork, wait, etc.
|
626 | # Things that don't affect parsing shouldn't be keywords.
|
627 | ])
|
628 |
|
629 | # Unlike bash, we parse control flow statically. They're not
|
630 | # dynamically-resolved builtins.
|
631 | spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
|
632 |
|
633 | # Special Kind for lookahead in the lexer. It's never seen by anything else.
|
634 | spec.AddKind('LookAhead', ['FuncParens'])
|
635 |
|
636 | # For parsing globs and converting them to regexes.
|
637 | spec.AddKind('Glob', [
|
638 | 'LBracket',
|
639 | 'RBracket',
|
640 | 'Star',
|
641 | 'QMark',
|
642 | 'Bang',
|
643 | 'Caret',
|
644 | 'EscapedChar',
|
645 | 'BadBackslash',
|
646 | 'CleanLiterals',
|
647 | 'OtherLiteral',
|
648 | ])
|
649 |
|
650 | # For C-escaped strings.
|
651 | spec.AddKind(
|
652 | 'Format',
|
653 | [
|
654 | 'EscapedPercent',
|
655 | 'Percent', # starts another lexer mode
|
656 | 'Flag',
|
657 | 'Num',
|
658 | 'Dot',
|
659 | 'Type',
|
660 | 'Star',
|
661 | 'Time',
|
662 | 'Zero',
|
663 | ])
|
664 |
|
665 | # For parsing prompt strings like PS1.
|
666 | spec.AddKind('PS', [
|
667 | 'Subst',
|
668 | 'Octal3',
|
669 | 'LBrace',
|
670 | 'RBrace',
|
671 | 'Literals',
|
672 | 'BadBackslash',
|
673 | ])
|
674 |
|
675 | spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
|
676 |
|
677 | spec.AddKind(
|
678 | 'J8',
|
679 | [
|
680 | 'LBracket',
|
681 | 'RBracket',
|
682 | 'LBrace',
|
683 | 'RBrace',
|
684 | 'Comma',
|
685 | 'Colon',
|
686 | 'Null',
|
687 | 'Bool',
|
688 | 'Int', # Number
|
689 | 'Float', # Number
|
690 |
|
691 | # High level tokens for "" b'' u''
|
692 | # We don't distinguish them in the parser, because we parse JSON in
|
693 | # the lexer.
|
694 | 'String',
|
695 |
|
696 | # JSON8 and NIL8
|
697 | 'Identifier',
|
698 |
|
699 | # NIL8 only
|
700 | 'LParen',
|
701 | 'RParen',
|
702 | #'Symbol',
|
703 | 'Operator',
|
704 | ])
|
705 |
|
706 |
|
707 | # Shared between [[ and test/[.
|
708 | _UNARY_STR_CHARS = 'zn' # -z -n
|
709 | _UNARY_OTHER_CHARS = 'otvR' # -o is overloaded
|
710 | _UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN' # -a is overloaded
|
711 |
|
712 | _BINARY_PATH = ['ef', 'nt', 'ot']
|
713 | _BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
|
714 |
|
715 |
|
716 | def _Dash(strs):
|
717 | # type: (List[str]) -> List[Tuple[str, str]]
|
718 | # Gives a pair of (token name, string to match)
|
719 | return [(s, '-' + s) for s in strs]
|
720 |
|
721 |
|
722 | def AddBoolKinds(spec):
|
723 | # type: (IdSpec) -> None
|
724 | spec.AddBoolKind('BoolUnary', [
|
725 | (bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
|
726 | (bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
|
727 | (bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
|
728 | ])
|
729 |
|
730 | spec.AddBoolKind('BoolBinary', [
|
731 | (bool_arg_type_e.Str, [
|
732 | ('GlobEqual', '='),
|
733 | ('GlobDEqual', '=='),
|
734 | ('GlobNEqual', '!='),
|
735 | ('EqualTilde', '=~'),
|
736 | ]),
|
737 | (bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
|
738 | (bool_arg_type_e.Int, _Dash(_BINARY_INT)),
|
739 | ])
|
740 |
|
741 | Id = spec.id_str2int
|
742 | # logical, arity, arg_type
|
743 | spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
|
744 | spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
|
745 | spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
|
746 |
|
747 | spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
|
748 | spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
|
749 |
|
750 |
|
751 | def SetupTestBuiltin(
|
752 | id_spec, # type: IdSpec
|
753 | unary_lookup, # type: Dict[str, int]
|
754 | binary_lookup, # type: Dict[str, int]
|
755 | other_lookup, # type: Dict[str, int]
|
756 | ):
|
757 | # type: (...) -> None
|
758 | """Setup tokens for test/[.
|
759 |
|
760 | Similar to _AddBoolKinds above. Differences:
|
761 | - =~ doesn't exist
|
762 | - && -> -a, || -> -o
|
763 | - ( ) -> Op_LParen (they don't appear above)
|
764 | """
|
765 | Id = id_spec.id_str2int
|
766 | Kind = id_spec.kind_str2int
|
767 |
|
768 | for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
|
769 | id_name = 'BoolUnary_%s' % letter
|
770 | unary_lookup['-' + letter] = Id[id_name]
|
771 |
|
772 | for s in _BINARY_PATH + _BINARY_INT:
|
773 | id_name = 'BoolBinary_%s' % s
|
774 | binary_lookup['-' + s] = Id[id_name]
|
775 |
|
776 | # Like the [[ definition above, but without globbing and without =~ .
|
777 |
|
778 | for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
|
779 | ('NEqual', '!=')]:
|
780 | id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
|
781 |
|
782 | binary_lookup[token_str] = id_int
|
783 |
|
784 | # Some of these names don't quite match, but it keeps the BoolParser simple.
|
785 | binary_lookup['<'] = Id['Op_Less']
|
786 | binary_lookup['>'] = Id['Op_Great']
|
787 |
|
788 | # NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
|
789 | # BoolUnary_o. The parser rather than the tokenizer handles this.
|
790 | other_lookup['!'] = Id['KW_Bang'] # like [[ !
|
791 | other_lookup['('] = Id['Op_LParen']
|
792 | other_lookup[')'] = Id['Op_RParen']
|
793 |
|
794 | other_lookup[']'] = Id['Arith_RBracket'] # For closing ]
|