| 1 | #!/usr/bin/env python2
|
| 2 | # Copyright 2016 Andy Chu. All rights reserved.
|
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 | # you may not use this file except in compliance with the License.
|
| 5 | # You may obtain a copy of the License at
|
| 6 | #
|
| 7 | # http://www.apache.org/licenses/LICENSE-2.0
|
| 8 | """
|
| 9 | id_kind_def.py - Id and Kind definitions, stored in Token
|
| 10 |
|
| 11 | NOTE: If this file changes, rebuild it with build/py.sh all
|
| 12 | """
|
| 13 | from __future__ import print_function
|
| 14 |
|
| 15 | from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
|
| 16 | #from mycpp.mylib import log
|
| 17 |
|
| 18 | from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
|
| 19 | if TYPE_CHECKING: # avoid circular build deps
|
| 20 | from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
|
| 21 |
|
| 22 |
|
| 23 | class IdSpec(object):
|
| 24 | """Identifiers that form the "spine" of the shell program
|
| 25 | representation."""
|
| 26 |
|
| 27 | def __init__(self, kind_lookup, bool_ops):
|
| 28 | # type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
|
| 29 | self.id_str2int = {} # type: Dict[str, int]
|
| 30 | self.kind_str2int = {} # type: Dict[str, int]
|
| 31 |
|
| 32 | self.kind_lookup = kind_lookup # Id int -> Kind int
|
| 33 | self.kind_name_list = [] # type: List[str]
|
| 34 | self.kind_sizes = [] # type: List[int] # optional stats
|
| 35 |
|
| 36 | self.lexer_pairs = {} # type: Dict[int, List[Tuple[bool, str, int]]]
|
| 37 | self.bool_ops = bool_ops # type: Dict[int, bool_arg_type_t]
|
| 38 |
|
| 39 | # Incremented on each method call
|
| 40 | # IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
|
| 41 | self.id_index = 1
|
| 42 | self.kind_index = 1
|
| 43 |
|
| 44 | def LexerPairs(self, kind):
|
| 45 | # type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
|
| 46 | result = []
|
| 47 | for is_regex, pat, id_ in self.lexer_pairs[kind]:
|
| 48 | result.append((is_regex, pat, id_))
|
| 49 | return result
|
| 50 |
|
| 51 | def _AddId(self, id_name, kind=None):
|
| 52 | # type: (str, Optional[int]) -> int
|
| 53 | """
|
| 54 | Args:
|
| 55 | id_name: e.g. BoolBinary_Equal
|
| 56 | kind: override autoassignment. For AddBoolBinaryForBuiltin
|
| 57 | """
|
| 58 | t = self.id_index
|
| 59 |
|
| 60 | self.id_str2int[id_name] = t
|
| 61 |
|
| 62 | if kind is None:
|
| 63 | kind = self.kind_index
|
| 64 | self.kind_lookup[t] = kind
|
| 65 |
|
| 66 | self.id_index += 1 # mutate last
|
| 67 | return t # the index we used
|
| 68 |
|
| 69 | def _AddKind(self, kind_name):
|
| 70 | # type: (str) -> None
|
| 71 | self.kind_str2int[kind_name] = self.kind_index
|
| 72 | #log('%s = %d', kind_name, self.kind_index)
|
| 73 | self.kind_index += 1
|
| 74 | self.kind_name_list.append(kind_name)
|
| 75 |
|
| 76 | def AddKind(self, kind_name, tokens):
|
| 77 | # type: (str, List[str]) -> None
|
| 78 | assert isinstance(tokens, list), tokens
|
| 79 |
|
| 80 | for name in tokens:
|
| 81 | id_name = '%s_%s' % (kind_name, name)
|
| 82 | self._AddId(id_name)
|
| 83 |
|
| 84 | # Must be after adding Id
|
| 85 | self._AddKind(kind_name)
|
| 86 | self.kind_sizes.append(len(tokens)) # debug info
|
| 87 |
|
| 88 | def AddKindPairs(self, kind_name, pairs):
|
| 89 | # type: (str, List[Tuple[str, str]]) -> None
|
| 90 | assert isinstance(pairs, list), pairs
|
| 91 |
|
| 92 | lexer_pairs = []
|
| 93 | for name, char_pat in pairs:
|
| 94 | id_name = '%s_%s' % (kind_name, name)
|
| 95 | id_int = self._AddId(id_name)
|
| 96 | # After _AddId
|
| 97 | lexer_pairs.append((False, char_pat, id_int)) # Constant
|
| 98 |
|
| 99 | self.lexer_pairs[self.kind_index] = lexer_pairs
|
| 100 |
|
| 101 | # Must be after adding Id
|
| 102 | self._AddKind(kind_name)
|
| 103 | self.kind_sizes.append(len(pairs)) # debug info
|
| 104 |
|
| 105 | def AddBoolKind(
|
| 106 | self,
|
| 107 | kind_name, # type: str
|
| 108 | arg_type_pairs, # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
|
| 109 | ):
|
| 110 | # type: (...) -> None
|
| 111 | """
|
| 112 | Args:
|
| 113 | kind_name: string
|
| 114 | arg_type_pairs: dictionary of bool_arg_type_e -> []
|
| 115 | """
|
| 116 | lexer_pairs = []
|
| 117 | num_tokens = 0
|
| 118 | for arg_type, pairs in arg_type_pairs:
|
| 119 | #print(arg_type, pairs)
|
| 120 |
|
| 121 | for name, char_pat in pairs:
|
| 122 | # BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
|
| 123 | id_name = '%s_%s' % (kind_name, name)
|
| 124 | id_int = self._AddId(id_name)
|
| 125 | self.AddBoolOp(id_int, arg_type) # register type
|
| 126 | lexer_pairs.append((False, char_pat, id_int)) # constant
|
| 127 |
|
| 128 | num_tokens += len(pairs)
|
| 129 |
|
| 130 | self.lexer_pairs[self.kind_index] = lexer_pairs
|
| 131 |
|
| 132 | # Must do this after _AddId()
|
| 133 | self._AddKind(kind_name)
|
| 134 | self.kind_sizes.append(num_tokens) # debug info
|
| 135 |
|
| 136 | def AddBoolBinaryForBuiltin(self, id_name, kind):
|
| 137 | # type: (str, int) -> int
|
| 138 | """For [ = ] [ == ] and [ != ].
|
| 139 |
|
| 140 | These operators are NOT added to the lexer. The are "lexed" as
|
| 141 | word.String.
|
| 142 | """
|
| 143 | id_name = 'BoolBinary_%s' % id_name
|
| 144 | id_int = self._AddId(id_name, kind=kind)
|
| 145 | self.AddBoolOp(id_int, bool_arg_type_e.Str)
|
| 146 | return id_int
|
| 147 |
|
| 148 | def AddBoolOp(self, id_int, arg_type):
|
| 149 | # type: (int, bool_arg_type_t) -> None
|
| 150 | """Associate an ID integer with an bool_arg_type_e."""
|
| 151 | self.bool_ops[id_int] = arg_type
|
| 152 |
|
| 153 |
|
| 154 | def AddKinds(spec):
|
| 155 | # type: (IdSpec) -> None
|
| 156 |
|
| 157 | # A compound word, in arith context, boolean context, or command context.
|
| 158 | # A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
|
| 159 | spec.AddKind('Word', ['Compound'])
|
| 160 |
|
| 161 | # Token IDs in Kind.Arith are first to make the TDOP precedence table
|
| 162 | # small.
|
| 163 | #
|
| 164 | # NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
|
| 165 | # Actually all of Arith could be folded into Op, because we are using
|
| 166 | # WordParser._ReadArithWord vs. WordParser._ReadWord.
|
| 167 | spec.AddKindPairs(
|
| 168 | 'Arith',
|
| 169 | [
|
| 170 | ('Semi', ';'), # ternary for loop only
|
| 171 | ('Comma', ','), # function call and C comma operator
|
| 172 | ('Plus', '+'),
|
| 173 | ('Minus', '-'),
|
| 174 | ('Star', '*'),
|
| 175 | ('Slash', '/'),
|
| 176 | ('Percent', '%'),
|
| 177 | ('DPlus', '++'),
|
| 178 | ('DMinus', '--'),
|
| 179 | ('DStar', '**'),
|
| 180 | ('LParen', '('),
|
| 181 | ('RParen', ')'), # grouping and function call extension
|
| 182 | ('LBracket', '['),
|
| 183 | ('RBracket', ']'), # array and assoc array subscript
|
| 184 | ('RBrace', '}'), # for end of var sub
|
| 185 |
|
| 186 | # Logical Ops
|
| 187 | ('QMark', '?'),
|
| 188 | ('Colon', ':'), # Ternary Op: a < b ? 0 : 1
|
| 189 | ('LessEqual', '<='),
|
| 190 | ('Less', '<'),
|
| 191 | ('GreatEqual', '>='),
|
| 192 | ('Great', '>'),
|
| 193 | ('DEqual', '=='),
|
| 194 | ('NEqual', '!='),
|
| 195 | # note: these 3 are not in YSH Expr. (Could be used in find dialect.)
|
| 196 | ('DAmp', '&&'),
|
| 197 | ('DPipe', '||'),
|
| 198 | ('Bang', '!'),
|
| 199 |
|
| 200 | # Bitwise ops
|
| 201 | ('DGreat', '>>'),
|
| 202 | ('DLess', '<<'),
|
| 203 | # YSH: ^ is exponent
|
| 204 | ('Amp', '&'),
|
| 205 | ('Pipe', '|'),
|
| 206 | ('Caret', '^'),
|
| 207 | ('Tilde', '~'),
|
| 208 | ('Equal', '='),
|
| 209 |
|
| 210 | # Augmented Assignment for $(( ))
|
| 211 | # Must match the list in osh/arith_parse.py
|
| 212 | # YSH has **= //= like Python
|
| 213 | ('PlusEqual', '+='),
|
| 214 | ('MinusEqual', '-='),
|
| 215 | ('StarEqual', '*='),
|
| 216 | ('SlashEqual', '/='),
|
| 217 | ('PercentEqual', '%='),
|
| 218 | ('DGreatEqual', '>>='),
|
| 219 | ('DLessEqual', '<<='),
|
| 220 | ('AmpEqual', '&='),
|
| 221 | ('CaretEqual', '^='),
|
| 222 | ('PipeEqual', '|='),
|
| 223 | ])
|
| 224 |
|
| 225 | spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
|
| 226 |
|
| 227 | spec.AddKind('Undefined', ['Tok']) # for initial state
|
| 228 |
|
| 229 | # The Unknown kind is used when we lex something, but it's invalid.
|
| 230 | # Examples:
|
| 231 | # ${^}
|
| 232 | # $'\z' Such bad codes are accepted when parse_backslash is on
|
| 233 | # (default in OSH), so we have to lex them.
|
| 234 | # (x == y) should used === or ~==
|
| 235 | spec.AddKind('Unknown', ['Tok', 'Backslash', 'DEqual'])
|
| 236 |
|
| 237 | spec.AddKind('Eol', ['Tok']) # no more tokens on line (\0)
|
| 238 |
|
| 239 | # Ignored_Newline is for J8 lexing to count lines
|
| 240 | spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment', 'Newline'])
|
| 241 |
|
| 242 | # Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
|
| 243 | # lex_mode_e.Arith
|
| 244 | spec.AddKind('WS', ['Space'])
|
| 245 |
|
| 246 | spec.AddKind(
|
| 247 | 'Lit',
|
| 248 | [
|
| 249 | 'Chars',
|
| 250 | 'CharsWithoutPrefix', # for stripping leading whitespace
|
| 251 | 'VarLike',
|
| 252 | 'ArrayLhsOpen',
|
| 253 | 'ArrayLhsClose',
|
| 254 | 'Splice', # @func(a, b)
|
| 255 | 'AtLBracket', # @[split(x)]
|
| 256 | 'AtLBraceDot', # @{.myproc arg1} should be builtin_sub
|
| 257 | 'Other',
|
| 258 | 'EscapedChar', # \* is escaped
|
| 259 | 'LBracket',
|
| 260 | 'RBracket', # for assoc array literals, static globs
|
| 261 | 'Star',
|
| 262 | 'QMark',
|
| 263 | # Either brace expansion or keyword for { and }
|
| 264 | 'LBrace',
|
| 265 | 'RBrace',
|
| 266 | 'Comma',
|
| 267 | 'Equals', # For = f()
|
| 268 | 'Dollar', # detecting 'echo $'
|
| 269 | 'DRightBracket', # the ]] that matches [[, NOT a keyword
|
| 270 | 'Tilde', # tilde expansion
|
| 271 | 'Pound', # for comment or VarOp state
|
| 272 | 'TPound', # for doc comments like ###
|
| 273 | 'TDot', # for multiline commands ...
|
| 274 | 'Slash',
|
| 275 | 'Percent', # / # % for patsub, NOT unary op
|
| 276 | 'Colon', # x=foo:~:~root needs tilde expansion
|
| 277 | 'Digits', # for lex_mode_e.Arith
|
| 278 | 'At', # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
|
| 279 | 'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
|
| 280 | 'BadBackslash', # for "\z", not Id.Unknown_Backslash because it's a
|
| 281 | # syntax error in YSH, but NOT OSH
|
| 282 | 'CompDummy', # A fake Lit_* token to get partial words during
|
| 283 | # completion
|
| 284 | ])
|
| 285 |
|
| 286 | # For recognizing \` and \" and \\ within backticks. There's an extra layer
|
| 287 | # of backslash quoting.
|
| 288 | spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
|
| 289 |
|
| 290 | spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
|
| 291 |
|
| 292 | spec.AddKind(
|
| 293 | 'Op',
|
| 294 | [
|
| 295 | 'Newline', # mostly equivalent to SEMI
|
| 296 | 'Amp', # &
|
| 297 | 'Pipe', # |
|
| 298 | 'PipeAmp', # |& -- bash extension for stderr
|
| 299 | 'DAmp', # &&
|
| 300 | 'DPipe', # ||
|
| 301 | 'Semi', # ;
|
| 302 | 'DSemi', # ;; for case
|
| 303 | 'SemiAmp', # ;& for case
|
| 304 | 'DSemiAmp', # ;;& for case
|
| 305 | 'LParen', # For subshell. Not Kind.Left because it's NOT a WordPart.
|
| 306 | 'RParen', # Default, will be translated to Id.Right_*
|
| 307 | 'DLeftParen',
|
| 308 | 'DRightParen',
|
| 309 |
|
| 310 | # for [[ ]] language
|
| 311 | 'Less', # <
|
| 312 | 'Great', # >
|
| 313 | 'Bang', # !
|
| 314 |
|
| 315 | # YSH [] {}
|
| 316 | 'LBracket',
|
| 317 | 'RBracket',
|
| 318 | 'LBrace',
|
| 319 | 'RBrace',
|
| 320 | ])
|
| 321 |
|
| 322 | # YSH expressions use Kind.Expr and Kind.Arith (further below)
|
| 323 | spec.AddKind(
|
| 324 | 'Expr',
|
| 325 | [
|
| 326 | 'Reserved', # <- means nothing but it's reserved now
|
| 327 | 'Symbol', # %foo
|
| 328 | 'Name',
|
| 329 | 'DecInt',
|
| 330 | 'BinInt',
|
| 331 | 'OctInt',
|
| 332 | 'HexInt',
|
| 333 | 'Float',
|
| 334 | 'Bang', # eggex !digit, ![a-z]
|
| 335 | 'Dot',
|
| 336 | 'DDot',
|
| 337 | 'Colon', # mylist:pop()
|
| 338 | 'RArrow',
|
| 339 | 'RDArrow',
|
| 340 | 'DSlash', # integer division
|
| 341 | 'TEqual',
|
| 342 | 'NotDEqual',
|
| 343 | 'TildeDEqual', # === !== ~==
|
| 344 | 'At',
|
| 345 | 'DoubleAt', # splice operators
|
| 346 | 'Ellipsis', # for varargs
|
| 347 | 'Dollar', # legacy regex
|
| 348 | 'NotTilde', # !~
|
| 349 | 'DTilde',
|
| 350 | 'NotDTilde', # ~~ !~~
|
| 351 | 'DStarEqual', # **=, which bash doesn't have
|
| 352 | 'DSlashEqual', # //=, which bash doesn't have
|
| 353 | 'CastedDummy', # Used for @() $() (words in lex_mode_e.ShCommand)
|
| 354 | # and ${} '' "" (and all other strings)
|
| 355 |
|
| 356 | # Constants
|
| 357 | 'Null',
|
| 358 | 'True',
|
| 359 | 'False',
|
| 360 |
|
| 361 | # Keywords are resolved after lexing, but otherwise behave like tokens.
|
| 362 | 'And',
|
| 363 | 'Or',
|
| 364 | 'Not',
|
| 365 |
|
| 366 | # List comprehensions
|
| 367 | 'For',
|
| 368 | 'Is',
|
| 369 | 'In',
|
| 370 | 'If',
|
| 371 | 'Else',
|
| 372 | 'Func', # For function literals
|
| 373 | 'Capture',
|
| 374 | 'As',
|
| 375 | ])
|
| 376 |
|
| 377 | # For C-escaped strings.
|
| 378 | spec.AddKind(
|
| 379 | 'Char',
|
| 380 | [
|
| 381 | 'OneChar',
|
| 382 | 'Stop',
|
| 383 | 'Hex', # \xff
|
| 384 | 'YHex', # \yff for J8 notation
|
| 385 |
|
| 386 | # Two variants of Octal: \377, and \0377.
|
| 387 | 'Octal3',
|
| 388 | 'Octal4',
|
| 389 | 'Unicode4',
|
| 390 | 'SurrogatePair', # JSON
|
| 391 | 'Unicode8', # bash
|
| 392 | 'UBraced',
|
| 393 | 'Pound', # YSH
|
| 394 | 'AsciiControl', # \x01-\x1f, what's disallowed in JSON
|
| 395 | ])
|
| 396 |
|
| 397 | # For lex_mode_e.BashRegex
|
| 398 | # Bash treats ( | ) as special, and space is allowed within ()
|
| 399 | # Note Id.Op_RParen -> Id.Right_BashRegex with lexer hint
|
| 400 | spec.AddKind('BashRegex', ['LParen', 'AllowedInParens'])
|
| 401 |
|
| 402 | spec.AddKind(
|
| 403 | 'Eggex',
|
| 404 | [
|
| 405 | 'Start', # ^ or %start
|
| 406 | 'End', # $ or %end
|
| 407 | 'Dot', # . or dot
|
| 408 | # Future: %boundary generates \b in Python/Perl, etc.
|
| 409 | ])
|
| 410 |
|
| 411 | spec.AddKind(
|
| 412 | 'Redir',
|
| 413 | [
|
| 414 | 'Less', # < stdin
|
| 415 | 'Great', # > stdout
|
| 416 | 'DLess', # << here doc redirect
|
| 417 | 'TLess', # <<< bash only here string
|
| 418 | 'DGreat', # >> append stdout
|
| 419 | 'GreatAnd', # >& descriptor redirect
|
| 420 | 'LessAnd', # <& descriptor redirect
|
| 421 | 'DLessDash', # <<- here doc redirect for tabs?
|
| 422 | 'LessGreat', # <>
|
| 423 | 'Clobber', # >| POSIX?
|
| 424 | 'AndGreat', # bash &> stdout/stderr to file
|
| 425 | 'AndDGreat', # bash &>> stdout/stderr append to file
|
| 426 |
|
| 427 | #'GreatPlus', # >+ is append in YSH
|
| 428 | #'DGreatPlus', # >>+ is append to string in YSH
|
| 429 | ])
|
| 430 |
|
| 431 | # NOTE: This is for left/right WORDS only. (( is not a word so it doesn't
|
| 432 | # get that.
|
| 433 | spec.AddKind(
|
| 434 | 'Left',
|
| 435 | [
|
| 436 | 'DoubleQuote',
|
| 437 | 'JDoubleQuote', # j" for J8 notation
|
| 438 | 'SingleQuote', # ''
|
| 439 | 'DollarSingleQuote', # $'' for \n escapes
|
| 440 | 'RSingleQuote', # r''
|
| 441 | 'USingleQuote', # u''
|
| 442 | 'BSingleQuote', # b''
|
| 443 |
|
| 444 | # Multiline versions
|
| 445 | 'TDoubleQuote', # """ """
|
| 446 | 'DollarTDoubleQuote', # $""" """
|
| 447 | 'TSingleQuote', # ''' '''
|
| 448 | 'RTSingleQuote', # r''' '''
|
| 449 | 'UTSingleQuote', # u''' '''
|
| 450 | 'BTSingleQuote', # b''' '''
|
| 451 | 'Backtick', # `
|
| 452 | 'DollarParen', # $(
|
| 453 | 'DollarBrace', # ${
|
| 454 | 'DollarBraceZsh', # ${(foo)
|
| 455 | 'DollarDParen', # $((
|
| 456 | 'DollarBracket', # $[ - synonym for $(( in bash and zsh
|
| 457 | 'DollarDoubleQuote', # $" for bash localized strings
|
| 458 | 'ProcSubIn', # <( )
|
| 459 | 'ProcSubOut', # >( )
|
| 460 | 'AtParen', # @( for split command sub
|
| 461 | 'CaretParen', # ^( for Block literal in expression mode
|
| 462 | 'CaretBracket', # ^[ for Expr literal
|
| 463 | 'CaretBrace', # ^{ for Arglist
|
| 464 | 'CaretDoubleQuote', # ^" for Template
|
| 465 | 'ColonPipe', # :| for word arrays
|
| 466 | 'PercentParen', # legacy %( for word arrays
|
| 467 | ])
|
| 468 |
|
| 469 | spec.AddKind(
|
| 470 | 'Right',
|
| 471 | [
|
| 472 | 'DoubleQuote',
|
| 473 | 'SingleQuote',
|
| 474 | 'Backtick', # `
|
| 475 | 'DollarBrace', # }
|
| 476 | 'DollarDParen', # )) -- really the second one is a PushHint()
|
| 477 | # ArithSub2 is just Id.Arith_RBracket
|
| 478 | 'DollarDoubleQuote', # "
|
| 479 | 'DollarSingleQuote', # '
|
| 480 |
|
| 481 | # Disambiguated right parens
|
| 482 | 'Subshell', # )
|
| 483 | 'ShFunction', # )
|
| 484 | 'CasePat', # )
|
| 485 | 'ShArrayLiteral', # )
|
| 486 | 'ExtGlob', # )
|
| 487 | 'BashRegexGroup', # )
|
| 488 | 'BlockLiteral', # } that matches &{ echo hi }
|
| 489 | ])
|
| 490 |
|
| 491 | spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
|
| 492 |
|
| 493 | # First position of var sub ${
|
| 494 | # Id.VOp2_Pound -- however you can't tell the difference at first! It could
|
| 495 | # be an op or a name. So it makes sense to base i on the state.
|
| 496 | # Id.VOp2_At
|
| 497 | # But then you have AS_STAR, or Id.Arith_Star maybe
|
| 498 |
|
| 499 | spec.AddKind(
|
| 500 | 'VSub',
|
| 501 | [
|
| 502 | 'DollarName', # $foo
|
| 503 | 'Name', # 'foo' in ${foo}
|
| 504 | 'Number', # $0 .. $9
|
| 505 | 'Bang', # $!
|
| 506 | 'At', # $@ or [@] for array subscripting
|
| 507 | 'Pound', # $# or ${#var} for length
|
| 508 | 'Dollar', # $$
|
| 509 | 'Star', # $*
|
| 510 | 'Hyphen', # $-
|
| 511 | 'QMark', # $?
|
| 512 | 'Dot', # ${.myproc builtin sub}
|
| 513 | ])
|
| 514 |
|
| 515 | spec.AddKindPairs('VTest', [
|
| 516 | ('ColonHyphen', ':-'),
|
| 517 | ('Hyphen', '-'),
|
| 518 | ('ColonEquals', ':='),
|
| 519 | ('Equals', '='),
|
| 520 | ('ColonQMark', ':?'),
|
| 521 | ('QMark', '?'),
|
| 522 | ('ColonPlus', ':+'),
|
| 523 | ('Plus', '+'),
|
| 524 | ])
|
| 525 |
|
| 526 | # Statically parse @P, so @x etc. is an error.
|
| 527 | spec.AddKindPairs(
|
| 528 | 'VOp0',
|
| 529 | [
|
| 530 | ('Q', '@Q'), # ${x@Q} for quoting
|
| 531 | ('E', '@E'),
|
| 532 | ('P', '@P'), # ${PS1@P} for prompt eval
|
| 533 | ('A', '@A'),
|
| 534 | ('a', '@a'),
|
| 535 | ])
|
| 536 |
|
| 537 | # String removal ops
|
| 538 | spec.AddKindPairs(
|
| 539 | 'VOp1',
|
| 540 | [
|
| 541 | ('Percent', '%'),
|
| 542 | ('DPercent', '%%'),
|
| 543 | ('Pound', '#'),
|
| 544 | ('DPound', '##'),
|
| 545 | # Case ops, in bash. At least parse them. Execution might require
|
| 546 | # unicode stuff.
|
| 547 | ('Caret', '^'),
|
| 548 | ('DCaret', '^^'),
|
| 549 | ('Comma', ','),
|
| 550 | ('DComma', ',,'),
|
| 551 | ])
|
| 552 |
|
| 553 | spec.AddKindPairs(
|
| 554 | 'VOpYsh',
|
| 555 | [
|
| 556 | ('Pipe', '|'), # ${x|html}
|
| 557 | ('Space', ' '), # ${x %.3f}
|
| 558 | ])
|
| 559 |
|
| 560 | # Not in POSIX, but in Bash
|
| 561 | spec.AddKindPairs(
|
| 562 | 'VOp2',
|
| 563 | [
|
| 564 | ('Slash', '/'), # / for replacement
|
| 565 | ('Colon', ':'), # : for slicing
|
| 566 | ('LBracket', '['), # [ for indexing
|
| 567 | ('RBracket', ']'), # ] for indexing
|
| 568 | ])
|
| 569 |
|
| 570 | # Can only occur after ${!prefix@}
|
| 571 | spec.AddKindPairs('VOp3', [
|
| 572 | ('At', '@'),
|
| 573 | ('Star', '*'),
|
| 574 | ])
|
| 575 |
|
| 576 | # This kind is for Node types that are NOT tokens.
|
| 577 | spec.AddKind(
|
| 578 | 'Node',
|
| 579 | [
|
| 580 | # Arithmetic nodes
|
| 581 | 'PostDPlus',
|
| 582 | 'PostDMinus', # Postfix inc/dec.
|
| 583 | # Prefix inc/dec use Arith_DPlus/Arith_DMinus.
|
| 584 | 'UnaryPlus',
|
| 585 | 'UnaryMinus', # +1 and -1, to distinguish from infix.
|
| 586 | # Actually we don't need this because we they
|
| 587 | # will be under Expr1/Plus vs Expr2/Plus.
|
| 588 | 'NotIn',
|
| 589 | 'IsNot', # For YSH comparisons
|
| 590 | ])
|
| 591 |
|
| 592 | # NOTE: Not doing AddKindPairs() here because oil will have a different set
|
| 593 | # of keywords. It will probably have for/in/while/until/case/if/else/elif,
|
| 594 | # and then func/proc.
|
| 595 | spec.AddKind(
|
| 596 | 'KW',
|
| 597 | [
|
| 598 | 'DLeftBracket',
|
| 599 | 'Bang',
|
| 600 | 'For',
|
| 601 | 'While',
|
| 602 | 'Until',
|
| 603 | 'Do',
|
| 604 | 'Done',
|
| 605 | 'In',
|
| 606 | 'Case',
|
| 607 | 'Esac',
|
| 608 | 'If',
|
| 609 | 'Fi',
|
| 610 | 'Then',
|
| 611 | 'Else',
|
| 612 | 'Elif',
|
| 613 | 'Function',
|
| 614 | 'Time',
|
| 615 |
|
| 616 | # YSH keywords.
|
| 617 | 'Const',
|
| 618 | 'Var',
|
| 619 | 'SetVar',
|
| 620 | 'SetGlobal',
|
| 621 | # later: Auto?
|
| 622 | 'Call',
|
| 623 | 'Proc',
|
| 624 | 'Typed',
|
| 625 | 'Func',
|
| 626 |
|
| 627 | # builtins, NOT keywords: use, fork, wait, etc.
|
| 628 | # Things that don't affect parsing shouldn't be keywords.
|
| 629 | ])
|
| 630 |
|
| 631 | # Unlike bash, we parse control flow statically. They're not
|
| 632 | # dynamically-resolved builtins.
|
| 633 | spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
|
| 634 |
|
| 635 | # Special Kind for lookahead in the lexer. It's never seen by anything else.
|
| 636 | spec.AddKind('LookAhead', ['FuncParens'])
|
| 637 |
|
| 638 | # For parsing globs and converting them to regexes.
|
| 639 | spec.AddKind('Glob', [
|
| 640 | 'LBracket',
|
| 641 | 'RBracket',
|
| 642 | 'Star',
|
| 643 | 'QMark',
|
| 644 | 'Bang',
|
| 645 | 'Caret',
|
| 646 | 'EscapedChar',
|
| 647 | 'BadBackslash',
|
| 648 | 'CleanLiterals',
|
| 649 | 'OtherLiteral',
|
| 650 | ])
|
| 651 |
|
| 652 | # For C-escaped strings.
|
| 653 | spec.AddKind(
|
| 654 | 'Format',
|
| 655 | [
|
| 656 | 'EscapedPercent',
|
| 657 | 'Percent', # starts another lexer mode
|
| 658 | 'Flag',
|
| 659 | 'Num',
|
| 660 | 'Dot',
|
| 661 | 'Type',
|
| 662 | 'Star',
|
| 663 | 'Time',
|
| 664 | 'Zero',
|
| 665 | ])
|
| 666 |
|
| 667 | # For parsing prompt strings like PS1.
|
| 668 | spec.AddKind('PS', [
|
| 669 | 'Subst',
|
| 670 | 'Octal3',
|
| 671 | 'LBrace',
|
| 672 | 'RBrace',
|
| 673 | 'Literals',
|
| 674 | 'BadBackslash',
|
| 675 | ])
|
| 676 |
|
| 677 | spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
|
| 678 |
|
| 679 | spec.AddKind(
|
| 680 | 'J8',
|
| 681 | [
|
| 682 | 'LBracket',
|
| 683 | 'RBracket',
|
| 684 | 'LBrace',
|
| 685 | 'RBrace',
|
| 686 | 'Comma',
|
| 687 | 'Colon',
|
| 688 | 'Null',
|
| 689 | 'Bool',
|
| 690 | 'Int', # Number
|
| 691 | 'Float', # Number
|
| 692 |
|
| 693 | # High level tokens for "" b'' u''
|
| 694 | # We don't distinguish them in the parser, because we recognize
|
| 695 | # strings in the lexer.
|
| 696 | 'String',
|
| 697 |
|
| 698 | # JSON8 and NIL8
|
| 699 | 'Identifier',
|
| 700 | 'Newline', # J8 Lines only, similar to Op_Newline
|
| 701 | 'Tab', # Reserved for TSV8
|
| 702 |
|
| 703 | # NIL8 only
|
| 704 | 'LParen',
|
| 705 | 'RParen',
|
| 706 | #'Symbol',
|
| 707 | 'Operator',
|
| 708 | ])
|
| 709 |
|
| 710 |
|
| 711 | # Shared between [[ and test/[.
|
| 712 | _UNARY_STR_CHARS = 'zn' # -z -n
|
| 713 | _UNARY_OTHER_CHARS = 'otvR' # -o is overloaded
|
| 714 | _UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN' # -a is overloaded
|
| 715 |
|
| 716 | _BINARY_PATH = ['ef', 'nt', 'ot']
|
| 717 | _BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
|
| 718 |
|
| 719 |
|
| 720 | def _Dash(strs):
|
| 721 | # type: (List[str]) -> List[Tuple[str, str]]
|
| 722 | # Gives a pair of (token name, string to match)
|
| 723 | return [(s, '-' + s) for s in strs]
|
| 724 |
|
| 725 |
|
| 726 | def AddBoolKinds(spec):
|
| 727 | # type: (IdSpec) -> None
|
| 728 | spec.AddBoolKind('BoolUnary', [
|
| 729 | (bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
|
| 730 | (bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
|
| 731 | (bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
|
| 732 | ])
|
| 733 |
|
| 734 | spec.AddBoolKind('BoolBinary', [
|
| 735 | (bool_arg_type_e.Str, [
|
| 736 | ('GlobEqual', '='),
|
| 737 | ('GlobDEqual', '=='),
|
| 738 | ('GlobNEqual', '!='),
|
| 739 | ('EqualTilde', '=~'),
|
| 740 | ]),
|
| 741 | (bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
|
| 742 | (bool_arg_type_e.Int, _Dash(_BINARY_INT)),
|
| 743 | ])
|
| 744 |
|
| 745 | Id = spec.id_str2int
|
| 746 | # logical, arity, arg_type
|
| 747 | spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
|
| 748 | spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
|
| 749 | spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
|
| 750 |
|
| 751 | spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
|
| 752 | spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
|
| 753 |
|
| 754 |
|
| 755 | def SetupTestBuiltin(
|
| 756 | id_spec, # type: IdSpec
|
| 757 | unary_lookup, # type: Dict[str, int]
|
| 758 | binary_lookup, # type: Dict[str, int]
|
| 759 | other_lookup, # type: Dict[str, int]
|
| 760 | ):
|
| 761 | # type: (...) -> None
|
| 762 | """Setup tokens for test/[.
|
| 763 |
|
| 764 | Similar to _AddBoolKinds above. Differences:
|
| 765 | - =~ doesn't exist
|
| 766 | - && -> -a, || -> -o
|
| 767 | - ( ) -> Op_LParen (they don't appear above)
|
| 768 | """
|
| 769 | Id = id_spec.id_str2int
|
| 770 | Kind = id_spec.kind_str2int
|
| 771 |
|
| 772 | for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
|
| 773 | id_name = 'BoolUnary_%s' % letter
|
| 774 | unary_lookup['-' + letter] = Id[id_name]
|
| 775 |
|
| 776 | for s in _BINARY_PATH + _BINARY_INT:
|
| 777 | id_name = 'BoolBinary_%s' % s
|
| 778 | binary_lookup['-' + s] = Id[id_name]
|
| 779 |
|
| 780 | # Like the [[ definition above, but without globbing and without =~ .
|
| 781 |
|
| 782 | for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
|
| 783 | ('NEqual', '!=')]:
|
| 784 | id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
|
| 785 |
|
| 786 | binary_lookup[token_str] = id_int
|
| 787 |
|
| 788 | # Some of these names don't quite match, but it keeps the BoolParser simple.
|
| 789 | binary_lookup['<'] = Id['Op_Less']
|
| 790 | binary_lookup['>'] = Id['Op_Great']
|
| 791 |
|
| 792 | # NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
|
| 793 | # BoolUnary_o. The parser rather than the tokenizer handles this.
|
| 794 | other_lookup['!'] = Id['KW_Bang'] # like [[ !
|
| 795 | other_lookup['('] = Id['Op_LParen']
|
| 796 | other_lookup[')'] = Id['Op_RParen']
|
| 797 |
|
| 798 | other_lookup[']'] = Id['Arith_RBracket'] # For closing ]
|