| 1 | #!/usr/bin/env python2
|
| 2 | """Consts.py."""
|
| 3 | from __future__ import print_function
|
| 4 |
|
| 5 | from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
|
| 6 | bool_arg_type_t, opt_group_i)
|
| 7 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
|
| 8 | from frontend import builtin_def
|
| 9 | from frontend import lexer_def
|
| 10 | from frontend import option_def
|
| 11 |
|
| 12 | from typing import Tuple, Optional, TYPE_CHECKING
|
| 13 | if TYPE_CHECKING:
|
| 14 | from _devbuild.gen.option_asdl import option_t, builtin_t
|
| 15 |
|
| 16 | NO_INDEX = 0 # for Resolve
|
| 17 |
|
| 18 | # Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
|
| 19 | STRICT_ALL = option_def.STRICT_ALL
|
| 20 | YSH_UPGRADE = option_def.YSH_UPGRADE
|
| 21 | YSH_ALL = option_def.YSH_ALL
|
| 22 | DEFAULT_TRUE = option_def.DEFAULT_TRUE
|
| 23 |
|
| 24 | PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
|
| 25 |
|
| 26 | SET_OPTION_NUMS = [
|
| 27 | opt.index for opt in option_def._SORTED if opt.builtin == 'set'
|
| 28 | ]
|
| 29 | SET_OPTION_NAMES = [
|
| 30 | opt.name for opt in option_def._SORTED if opt.builtin == 'set'
|
| 31 | ]
|
| 32 |
|
| 33 | SHOPT_OPTION_NUMS = [
|
| 34 | opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
|
| 35 | ]
|
| 36 | SHOPT_OPTION_NAMES = [
|
| 37 | opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
|
| 38 | ]
|
| 39 |
|
| 40 | VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
|
| 41 |
|
| 42 | BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
|
| 43 |
|
| 44 | # Keywords for introspection with bash 'compgen' and 'type'
|
| 45 | OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
|
| 46 | OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
|
| 47 | OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
|
| 48 |
|
| 49 | # bash considers these closing delimiters keywords
|
| 50 | OSH_KEYWORD_NAMES.append('}')
|
| 51 | OSH_KEYWORD_NAMES.append(']]')
|
| 52 |
|
| 53 |
|
| 54 | def GetKind(id_):
|
| 55 | # type: (Id_t) -> Kind_t
|
| 56 | """To make coarse-grained parsing decisions."""
|
| 57 |
|
| 58 | from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
|
| 59 | return ID_TO_KIND[id_]
|
| 60 |
|
| 61 |
|
| 62 | def BoolArgType(id_):
|
| 63 | # type: (Id_t) -> bool_arg_type_t
|
| 64 |
|
| 65 | from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
|
| 66 | return BOOL_ARG_TYPES[id_]
|
| 67 |
|
| 68 |
|
| 69 | #
|
| 70 | # Redirect Tables associated with IDs
|
| 71 | #
|
| 72 |
|
| 73 | REDIR_DEFAULT_FD = {
|
| 74 | # filename
|
| 75 | Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
|
| 76 | Id.Redir_Great: 1,
|
| 77 | Id.Redir_DGreat: 1,
|
| 78 | Id.Redir_Clobber: 1,
|
| 79 | Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
|
| 80 | # bash &> and &>>
|
| 81 | Id.Redir_AndGreat: 1,
|
| 82 | Id.Redir_AndDGreat: 1,
|
| 83 |
|
| 84 | # descriptor
|
| 85 | Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
|
| 86 | Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
|
| 87 | Id.Redir_TLess: 0, # here word
|
| 88 |
|
| 89 | # here docs included
|
| 90 | Id.Redir_DLess: 0,
|
| 91 | Id.Redir_DLessDash: 0,
|
| 92 | }
|
| 93 |
|
| 94 | REDIR_ARG_TYPES = {
|
| 95 | # filename
|
| 96 | Id.Redir_Less: redir_arg_type_e.Path,
|
| 97 | Id.Redir_Great: redir_arg_type_e.Path,
|
| 98 | Id.Redir_DGreat: redir_arg_type_e.Path,
|
| 99 | Id.Redir_Clobber: redir_arg_type_e.Path,
|
| 100 | Id.Redir_LessGreat: redir_arg_type_e.Path,
|
| 101 | # bash &> and &>>
|
| 102 | Id.Redir_AndGreat: redir_arg_type_e.Path,
|
| 103 | Id.Redir_AndDGreat: redir_arg_type_e.Path,
|
| 104 |
|
| 105 | # descriptor
|
| 106 | Id.Redir_GreatAnd: redir_arg_type_e.Desc,
|
| 107 | Id.Redir_LessAnd: redir_arg_type_e.Desc,
|
| 108 | Id.Redir_TLess: redir_arg_type_e.Here, # here word
|
| 109 | # note: here docs aren't included
|
| 110 | }
|
| 111 |
|
| 112 |
|
| 113 | def RedirArgType(id_):
|
| 114 | # type: (Id_t) -> redir_arg_type_t
|
| 115 | return REDIR_ARG_TYPES[id_]
|
| 116 |
|
| 117 |
|
| 118 | def RedirDefaultFd(id_):
|
| 119 | # type: (Id_t) -> int
|
| 120 | return REDIR_DEFAULT_FD[id_]
|
| 121 |
|
| 122 |
|
| 123 | #
|
| 124 | # Builtins
|
| 125 | #
|
| 126 |
|
| 127 | _BUILTIN_DICT = builtin_def.BuiltinDict()
|
| 128 |
|
| 129 |
|
| 130 | def LookupSpecialBuiltin(argv0):
|
| 131 | # type: (str) -> builtin_t
|
| 132 | """Is it a special builtin?"""
|
| 133 | b = _BUILTIN_DICT.get(argv0)
|
| 134 | if b and b.kind == 'special':
|
| 135 | return b.index
|
| 136 | else:
|
| 137 | return NO_INDEX
|
| 138 |
|
| 139 |
|
| 140 | def LookupAssignBuiltin(argv0):
|
| 141 | # type: (str) -> builtin_t
|
| 142 | """Is it an assignment builtin?"""
|
| 143 | b = _BUILTIN_DICT.get(argv0)
|
| 144 | if b and b.kind == 'assign':
|
| 145 | return b.index
|
| 146 | else:
|
| 147 | return NO_INDEX
|
| 148 |
|
| 149 |
|
| 150 | def LookupNormalBuiltin(argv0):
|
| 151 | # type: (str) -> builtin_t
|
| 152 | """Is it any other builtin?"""
|
| 153 | b = _BUILTIN_DICT.get(argv0)
|
| 154 | if b and b.kind == 'normal':
|
| 155 | return b.index
|
| 156 | else:
|
| 157 | return NO_INDEX
|
| 158 |
|
| 159 |
|
| 160 | def OptionName(opt_num):
|
| 161 | # type: (option_t) -> str
|
| 162 | """Get the name from an index."""
|
| 163 | return option_def.OPTION_NAMES[opt_num]
|
| 164 |
|
| 165 |
|
| 166 | OPTION_GROUPS = {
|
| 167 | 'strict:all': opt_group_i.StrictAll,
|
| 168 |
|
| 169 | # Aliases to deprecate
|
| 170 | 'oil:upgrade': opt_group_i.YshUpgrade,
|
| 171 | 'oil:all': opt_group_i.YshAll,
|
| 172 | 'ysh:upgrade': opt_group_i.YshUpgrade,
|
| 173 | 'ysh:all': opt_group_i.YshAll,
|
| 174 | }
|
| 175 |
|
| 176 |
|
| 177 | def OptionGroupNum(s):
|
| 178 | # type: (str) -> int
|
| 179 | return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
|
| 180 |
|
| 181 |
|
| 182 | _OPTION_DICT = option_def.OptionDict()
|
| 183 |
|
| 184 |
|
| 185 | def OptionNum(s):
|
| 186 | # type: (str) -> int
|
| 187 | return _OPTION_DICT.get(s, 0) # 0 means not found
|
| 188 |
|
| 189 |
|
| 190 | _CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
|
| 191 | _CONTROL_FLOW_LOOKUP = {}
|
| 192 | for _, name, id_ in lexer_def.CONTROL_FLOW:
|
| 193 | _CONTROL_FLOW_LOOKUP[id_] = name
|
| 194 |
|
| 195 |
|
| 196 | def ControlFlowName(id_):
|
| 197 | # type: (int) -> str
|
| 198 | """For tracing"""
|
| 199 | return _CONTROL_FLOW_LOOKUP[id_]
|
| 200 |
|
| 201 |
|
| 202 | def IsControlFlow(name):
|
| 203 | # type: (str) -> bool
|
| 204 | return name in _CONTROL_FLOW_NAMES
|
| 205 |
|
| 206 |
|
| 207 | def IsKeyword(name):
|
| 208 | # type: (str) -> bool
|
| 209 | return name in OSH_KEYWORD_NAMES
|
| 210 |
|
| 211 |
|
| 212 | #
|
| 213 | # osh/prompt.py and osh/word_compile.py
|
| 214 | #
|
| 215 |
|
| 216 | _ONE_CHAR_C = {
|
| 217 | '0': '\0',
|
| 218 | 'a': '\a',
|
| 219 | 'b': '\b',
|
| 220 | 'e': '\x1b',
|
| 221 | 'E': '\x1b',
|
| 222 | 'f': '\f',
|
| 223 | 'n': '\n',
|
| 224 | 'r': '\r',
|
| 225 | 't': '\t',
|
| 226 | 'v': '\v',
|
| 227 | '\\': '\\',
|
| 228 | "'": "'", # for $'' only, not echo -e
|
| 229 | '"': '"', # not sure why this is escaped within $''
|
| 230 | '/': '/', # for JSON \/ only
|
| 231 | }
|
| 232 |
|
| 233 |
|
| 234 | def LookupCharC(c):
|
| 235 | # type: (str) -> str
|
| 236 | """Fatal if not present."""
|
| 237 | return _ONE_CHAR_C[c]
|
| 238 |
|
| 239 |
|
| 240 | # NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
|
| 241 | # not in PS1.
|
| 242 | _ONE_CHAR_PROMPT = {
|
| 243 | 'a': '\a',
|
| 244 | 'e': '\x1b',
|
| 245 | 'r': '\r',
|
| 246 | 'n': '\n',
|
| 247 | '\\': '\\',
|
| 248 | }
|
| 249 |
|
| 250 |
|
| 251 | def LookupCharPrompt(c):
|
| 252 | # type: (str) -> Optional[str]
|
| 253 | """Returns None if not present."""
|
| 254 | return _ONE_CHAR_PROMPT.get(c)
|
| 255 |
|
| 256 |
|
| 257 | #
|
| 258 | # Constants used by osh/split.py
|
| 259 | #
|
| 260 |
|
| 261 | # IFS splitting is complicated in general. We handle it with three concepts:
|
| 262 | #
|
| 263 | # - CH.* - Kinds of characters (edge labels)
|
| 264 | # - ST.* - States (node labels)
|
| 265 | # - EMIT.* Actions
|
| 266 | #
|
| 267 | # The Split() loop below classifies characters, follows state transitions, and
|
| 268 | # emits spans. A span is a (ignored Bool, end_index Int) pair.
|
| 269 |
|
| 270 | # As an example, consider this string:
|
| 271 | # 'a _ b'
|
| 272 | #
|
| 273 | # The character classes are:
|
| 274 | #
|
| 275 | # a ' ' _ ' ' b
|
| 276 | # Black DE_White DE_Gray DE_White Black
|
| 277 | #
|
| 278 | # The states are:
|
| 279 | #
|
| 280 | # a ' ' _ ' ' b
|
| 281 | # Black DE_White1 DE_Gray DE_White2 Black
|
| 282 | #
|
| 283 | # DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
|
| 284 | #
|
| 285 | # The spans emitted are:
|
| 286 | #
|
| 287 | # (part 'a', ignored ' _ ', part 'b')
|
| 288 |
|
| 289 | # SplitForRead() will check if the last two spans are a \ and \\n. Easy.
|
| 290 |
|
| 291 | # Shorter names for state machine enums
|
| 292 | from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
|
| 293 | from _devbuild.gen.runtime_asdl import emit_i as EMIT
|
| 294 | from _devbuild.gen.runtime_asdl import char_kind_i as CH
|
| 295 | from _devbuild.gen.runtime_asdl import state_i as ST
|
| 296 |
|
| 297 | _IFS_EDGES = {
|
| 298 | # Whitespace should have been stripped
|
| 299 | (ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
|
| 300 | (ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
|
| 301 | (ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
|
| 302 | (ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
|
| 303 | (ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
|
| 304 | (ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
|
| 305 | (ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
|
| 306 | (ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
|
| 307 | (ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
|
| 308 | # Ignore trailing IFS whitespace too. This is necessary for the case:
|
| 309 | # IFS=':' ; read x y z <<< 'a : b : c :'.
|
| 310 | (ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
|
| 311 | (ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
|
| 312 | (ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
|
| 313 | (ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
|
| 314 | (ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
|
| 315 | (ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
|
| 316 | (ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
|
| 317 | (ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
|
| 318 | (ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
|
| 319 | (ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
|
| 320 | (ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
|
| 321 | (ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
|
| 322 | (ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
|
| 323 | (ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
|
| 324 | (ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
|
| 325 | (ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
|
| 326 |
|
| 327 | # Here we emit an ignored \ and the second character as well.
|
| 328 | # We're emitting TWO spans here; we don't wait until the subsequent
|
| 329 | # character. That is OK.
|
| 330 | #
|
| 331 | # Problem: if '\ ' is the last one, we don't want to emit a trailing span?
|
| 332 | # In all other cases we do.
|
| 333 | (ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
|
| 334 | (ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
|
| 335 | (ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
|
| 336 | # NOTE: second character is a backslash, but new state is ST.Black!
|
| 337 | (ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
|
| 338 | (ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
|
| 339 | }
|
| 340 |
|
| 341 |
|
| 342 | def IfsEdge(state, ch):
|
| 343 | # type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
|
| 344 | """Follow edges of the IFS state machine."""
|
| 345 | return _IFS_EDGES[state, ch]
|
| 346 |
|
| 347 |
|
| 348 | # Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
|
| 349 | #
|
| 350 | # We want submatch extraction, which would need a new type of binding, and
|
| 351 | # doing it with libc seems easy enough.
|
| 352 |
|
| 353 | ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + r')((=|\+=)(.*))?$'
|
| 354 |
|
| 355 | # Eggex equivalent:
|
| 356 | #
|
| 357 | # VarName = /
|
| 358 | # [a-z A-Z _ ]
|
| 359 | # [a-z A-Z 0-9 _ ]*
|
| 360 | # /
|
| 361 | #
|
| 362 | # SplitArg = /
|
| 363 | # %begin
|
| 364 | # <capture VarName>
|
| 365 | # (
|
| 366 | # <capture '=' | '+='> <capture dot*>
|
| 367 | # )?
|
| 368 | # %end
|
| 369 |
|
| 370 | # Weird rules for brackets: put ] first
|
| 371 | NOT_BRACKETS = '[^][]*'
|
| 372 | TEST_V_RE = '^(' + lexer_def.VAR_NAME_RE + r')(\[(' + NOT_BRACKETS + ')\])?$'
|
| 373 |
|
| 374 | # NotBracket = / ![ ']' '[' ] /
|
| 375 | #
|
| 376 | # TestV = /
|
| 377 | # %begin
|
| 378 | # <capture VarName>
|
| 379 | # (
|
| 380 | # '[' <capture NotBrackets> ']'
|
| 381 | # )?
|
| 382 | # %end
|
| 383 | # /
|