| 1 | """
|
| 2 | word.py - Utility functions for words, e.g. treating them as "tokens".
|
| 3 | """
|
| 4 |
|
| 5 | from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
| 6 | from _devbuild.gen.syntax_asdl import (
|
| 7 | Token,
|
| 8 | CompoundWord,
|
| 9 | DoubleQuoted,
|
| 10 | SingleQuoted,
|
| 11 | word,
|
| 12 | word_e,
|
| 13 | word_t,
|
| 14 | word_str,
|
| 15 | word_part,
|
| 16 | word_part_t,
|
| 17 | word_part_e,
|
| 18 | AssocPair,
|
| 19 | )
|
| 20 | from frontend import consts
|
| 21 | from frontend import lexer
|
| 22 | from mycpp import mylib
|
| 23 | from mycpp.mylib import tagswitch, log
|
| 24 | from osh import word_compile
|
| 25 |
|
| 26 | from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
|
| 27 | if TYPE_CHECKING:
|
| 28 | from osh.word_parse import WordParser
|
| 29 |
|
| 30 | _ = log
|
| 31 |
|
| 32 |
|
| 33 | def LiteralId(p):
|
| 34 | # type: (word_part_t) -> Id_t
|
| 35 | """If the WordPart consists of a single literal token, return its Id.
|
| 36 |
|
| 37 | Used for Id.KW_For, or Id.RBrace, etc.
|
| 38 | """
|
| 39 | UP_part = p
|
| 40 | if p.tag() == word_part_e.Literal:
|
| 41 | return cast(Token, UP_part).id
|
| 42 | else:
|
| 43 | return Id.Undefined_Tok # unequal to any other Id
|
| 44 |
|
| 45 |
|
| 46 | def _EvalWordPart(part):
|
| 47 | # type: (word_part_t) -> Tuple[bool, str, bool]
|
| 48 | """Evaluate a WordPart at PARSE TIME.
|
| 49 |
|
| 50 | Used for:
|
| 51 |
|
| 52 | 1. here doc delimiters
|
| 53 | 2. function names
|
| 54 | 3. for loop variable names
|
| 55 | 4. Compiling constant regex words at parse time
|
| 56 | 5. a special case for ${a////c} to see if we got a leading slash in the
|
| 57 | pattern.
|
| 58 |
|
| 59 | Returns:
|
| 60 | 3-tuple of
|
| 61 | ok: bool, success. If there are parts that can't be statically
|
| 62 | evaluated, then we return false.
|
| 63 | value: a string (not Value)
|
| 64 | quoted: whether any part of the word was quoted
|
| 65 | """
|
| 66 | UP_part = part
|
| 67 | with tagswitch(part) as case:
|
| 68 | if case(word_part_e.ShArrayLiteral):
|
| 69 | # Array literals aren't good for any of our use cases. TODO: Rename
|
| 70 | # EvalWordToString?
|
| 71 | return False, '', False
|
| 72 |
|
| 73 | elif case(word_part_e.BashAssocLiteral):
|
| 74 | return False, '', False
|
| 75 |
|
| 76 | elif case(word_part_e.Literal):
|
| 77 | tok = cast(Token, UP_part)
|
| 78 | return True, lexer.TokenVal(tok), False
|
| 79 |
|
| 80 | elif case(word_part_e.EscapedLiteral):
|
| 81 | part = cast(word_part.EscapedLiteral, UP_part)
|
| 82 | if mylib.PYTHON:
|
| 83 | val = lexer.TokenVal(part.token)
|
| 84 | assert len(val) == 2, val # e.g. \*
|
| 85 | assert val[0] == '\\'
|
| 86 | s = lexer.TokenSliceLeft(part.token, 1)
|
| 87 | return True, s, True
|
| 88 |
|
| 89 | elif case(word_part_e.SingleQuoted):
|
| 90 | part = cast(SingleQuoted, UP_part)
|
| 91 | tmp = [t.tval for t in part.tokens] # on its own line for mycpp
|
| 92 | s = ''.join(tmp)
|
| 93 | return True, s, True
|
| 94 |
|
| 95 | elif case(word_part_e.DoubleQuoted):
|
| 96 | part = cast(DoubleQuoted, UP_part)
|
| 97 | strs = [] # type: List[str]
|
| 98 | for p in part.parts:
|
| 99 | ok, s, _ = _EvalWordPart(p)
|
| 100 | if not ok:
|
| 101 | return False, '', True
|
| 102 | strs.append(s)
|
| 103 |
|
| 104 | return True, ''.join(strs), True # At least one part was quoted!
|
| 105 |
|
| 106 | elif case(word_part_e.CommandSub, word_part_e.SimpleVarSub,
|
| 107 | word_part_e.BracedVarSub, word_part_e.TildeSub,
|
| 108 | word_part_e.ArithSub, word_part_e.ExtGlob,
|
| 109 | word_part_e.Splice, word_part_e.ExprSub):
|
| 110 | return False, '', False
|
| 111 |
|
| 112 | else:
|
| 113 | raise AssertionError(part.tag())
|
| 114 |
|
| 115 |
|
| 116 | def FastStrEval(w):
|
| 117 | # type: (CompoundWord) -> Optional[str]
|
| 118 | """
|
| 119 | Detects common case
|
| 120 |
|
| 121 | (1) CompoundWord([LiteralPart(Id.LitChars)])
|
| 122 | For echo -e, test x -lt 0, etc.
|
| 123 | (2) single quoted word like 'foo'
|
| 124 |
|
| 125 | TODO:
|
| 126 | - remove tval - word_part.Literal(Token tok, str? sval) -> becomes sval
|
| 127 |
|
| 128 | Other patterns we could detect are:
|
| 129 | (1) "foo"
|
| 130 | (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
|
| 131 | - I think val_ops.Stringify() can handle all the errors
|
| 132 | """
|
| 133 | if len(w.parts) != 1:
|
| 134 | return None
|
| 135 |
|
| 136 | part0 = w.parts[0]
|
| 137 | UP_part0 = part0
|
| 138 | with tagswitch(part0) as case:
|
| 139 | if case(word_part_e.Literal):
|
| 140 | part0 = cast(Token, UP_part0)
|
| 141 |
|
| 142 | if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
|
| 143 | # Could add more tokens in this case
|
| 144 | # e.g. + is Lit_Other, and it's a Token in 'expr'
|
| 145 | # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
|
| 146 | # know those are common
|
| 147 | # { } are not as common
|
| 148 |
|
| 149 | #if part0.line is None:
|
| 150 | # log("part0 %s", part0)
|
| 151 |
|
| 152 | # TODO: word_part.Literal should have lazy (str? sval) field
|
| 153 |
|
| 154 | # TODO: instances created by lexer.DummyToken() don't have
|
| 155 | # tok.line field, so they can't use lexer.TokenVal()
|
| 156 | return part0.tval
|
| 157 | #return lexer.TokenVal(part0)
|
| 158 |
|
| 159 | else:
|
| 160 | # e.g. Id.Lit_Star needs to be glob expanded
|
| 161 | # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
|
| 162 | return None
|
| 163 |
|
| 164 | elif case(word_part_e.SingleQuoted):
|
| 165 | part0 = cast(SingleQuoted, UP_part0)
|
| 166 | # TODO: SingleQuoted should have lazy (str? sval) field
|
| 167 | # This would only affect multi-line strings though?
|
| 168 | return word_compile.EvalSingleQuoted(part0)
|
| 169 |
|
| 170 | else:
|
| 171 | # e.g. DoubleQuoted can't be optimized to a string, because it
|
| 172 | # might have "$@" and such
|
| 173 | return None
|
| 174 |
|
| 175 |
|
| 176 | def StaticEval(UP_w):
|
| 177 | # type: (word_t) -> Tuple[bool, str, bool]
|
| 178 | """Evaluate a Compound at PARSE TIME."""
|
| 179 | quoted = False
|
| 180 |
|
| 181 | # e.g. for ( instead of for (( is a token word
|
| 182 | if UP_w.tag() != word_e.Compound:
|
| 183 | return False, '', quoted
|
| 184 |
|
| 185 | w = cast(CompoundWord, UP_w)
|
| 186 |
|
| 187 | strs = [] # type: List[str]
|
| 188 | for part in w.parts:
|
| 189 | ok, s, q = _EvalWordPart(part)
|
| 190 | if not ok:
|
| 191 | return False, '', quoted
|
| 192 | if q:
|
| 193 | quoted = True # at least one part was quoted
|
| 194 | strs.append(s)
|
| 195 | #log('StaticEval parts %s', w.parts)
|
| 196 | return True, ''.join(strs), quoted
|
| 197 |
|
| 198 |
|
| 199 | # From bash, general.c, unquoted_tilde_word():
|
| 200 | # POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
|
| 201 | # the beginning of the word, followed by all of the characters preceding the
|
| 202 | # first unquoted slash in the word, or all the characters in the word if there
|
| 203 | # is no slash...If none of the characters in the tilde-prefix are quoted, the
|
| 204 | # characters in the tilde-prefix following the tilde shell be treated as a
|
| 205 | # possible login name.
|
| 206 | #define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
|
| 207 | #
|
| 208 | # So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
|
| 209 | # substitutions.
|
| 210 | #
|
| 211 | # We only detect ~Lit_Chars and split. So we might as well just write a regex.
|
| 212 |
|
| 213 |
|
| 214 | def TildeDetect(UP_w):
|
| 215 | # type: (word_t) -> Optional[CompoundWord]
|
| 216 | """Detect tilde expansion in a word.
|
| 217 |
|
| 218 | It might begin with Literal that needs to be turned into a TildeSub.
|
| 219 | (It depends on whether the second token begins with slash).
|
| 220 |
|
| 221 | If so, it return a new word. Otherwise return None.
|
| 222 |
|
| 223 | NOTE:
|
| 224 | - The regex for Lit_TildeLike could be expanded. Right now it's
|
| 225 | conservative, like Lit_Chars without the /.
|
| 226 | - It's possible to write this in a mutating style, since only the first token
|
| 227 | is changed. But note that we CANNOT know this during lexing.
|
| 228 | """
|
| 229 | # BracedTree can't be tilde expanded
|
| 230 | if UP_w.tag() != word_e.Compound:
|
| 231 | return None
|
| 232 |
|
| 233 | w = cast(CompoundWord, UP_w)
|
| 234 | return TildeDetect2(w)
|
| 235 |
|
| 236 |
|
| 237 | def TildeDetect2(w):
|
| 238 | # type: (CompoundWord) -> Optional[CompoundWord]
|
| 239 | """If tilde sub is detected, returns a new CompoundWord.
|
| 240 |
|
| 241 | Accepts CompoundWord, not word_t. After brace expansion, we know we have a
|
| 242 | List[CompoundWord].
|
| 243 |
|
| 244 | Tilde detection:
|
| 245 |
|
| 246 | YES:
|
| 247 | ~ ~/
|
| 248 | ~bob ~bob/
|
| 249 |
|
| 250 | NO:
|
| 251 | ~bob# ~bob#/
|
| 252 | ~bob$x
|
| 253 | ~$x
|
| 254 |
|
| 255 | Pattern to match (all must be word_part_e.Literal):
|
| 256 |
|
| 257 | Lit_Tilde Lit_Chars? (Lit_Slash | %end)
|
| 258 | """
|
| 259 | if len(w.parts) == 0: # ${a-} has no parts
|
| 260 | return None
|
| 261 |
|
| 262 | part0 = w.parts[0]
|
| 263 | id0 = LiteralId(part0)
|
| 264 | if id0 != Id.Lit_Tilde:
|
| 265 | return None # $x is not TildeSub
|
| 266 |
|
| 267 | tok0 = cast(Token, part0)
|
| 268 |
|
| 269 | new_parts = [] # type: List[word_part_t]
|
| 270 |
|
| 271 | if len(w.parts) == 1: # ~
|
| 272 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 273 | return CompoundWord(new_parts)
|
| 274 |
|
| 275 | id1 = LiteralId(w.parts[1])
|
| 276 | if id1 == Id.Lit_Slash: # ~/
|
| 277 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 278 | new_parts.extend(w.parts[1:])
|
| 279 | return CompoundWord(new_parts)
|
| 280 |
|
| 281 | if id1 != Id.Lit_Chars:
|
| 282 | return None # ~$x is not TildeSub
|
| 283 |
|
| 284 | tok1 = cast(Token, w.parts[1])
|
| 285 |
|
| 286 | if len(w.parts) == 2: # ~foo
|
| 287 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 288 | return CompoundWord(new_parts)
|
| 289 |
|
| 290 | id2 = LiteralId(w.parts[2])
|
| 291 | if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
|
| 292 | return None
|
| 293 |
|
| 294 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 295 | new_parts.extend(w.parts[2:])
|
| 296 | return CompoundWord(new_parts)
|
| 297 |
|
| 298 |
|
| 299 | def TildeDetectAssign(w):
|
| 300 | # type: (CompoundWord) -> None
|
| 301 | """Detects multiple tilde sub, like a=~:~/src:~bob
|
| 302 |
|
| 303 | MUTATES its argument.
|
| 304 |
|
| 305 | Pattern for to match (all must be word_part_e.Literal):
|
| 306 |
|
| 307 | Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
|
| 308 | """
|
| 309 | parts = w.parts
|
| 310 |
|
| 311 | # Bail out EARLY if there are no ~ at all
|
| 312 | has_tilde = False
|
| 313 | for part in parts:
|
| 314 | if LiteralId(part) == Id.Lit_Tilde:
|
| 315 | has_tilde = True
|
| 316 | break
|
| 317 | if not has_tilde:
|
| 318 | return # Avoid further work and allocations
|
| 319 |
|
| 320 | # Avoid IndexError, since we have to look ahead up to 2 tokens
|
| 321 | parts.append(None)
|
| 322 | parts.append(None)
|
| 323 |
|
| 324 | new_parts = [] # type: List[word_part_t]
|
| 325 |
|
| 326 | tilde_could_be_next = True # true at first, and true after :
|
| 327 |
|
| 328 | i = 0
|
| 329 | n = len(parts)
|
| 330 |
|
| 331 | while i < n:
|
| 332 | part0 = parts[i]
|
| 333 | if part0 is None:
|
| 334 | break
|
| 335 |
|
| 336 | #log('i = %d', i)
|
| 337 | #log('part0 %s', part0)
|
| 338 |
|
| 339 | # Skip tilde in middle of word, like a=foo~bar
|
| 340 | if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
|
| 341 | # If ~ ends the string, we have
|
| 342 | part1 = parts[i + 1]
|
| 343 | part2 = parts[i + 2]
|
| 344 |
|
| 345 | tok0 = cast(Token, part0)
|
| 346 |
|
| 347 | if part1 is None: # x=foo:~
|
| 348 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 349 | break # at end
|
| 350 |
|
| 351 | id1 = LiteralId(part1)
|
| 352 |
|
| 353 | if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
|
| 354 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 355 | new_parts.append(part1)
|
| 356 | i += 2
|
| 357 | continue
|
| 358 |
|
| 359 | if id1 != Id.Lit_Chars:
|
| 360 | new_parts.append(part0) # unchanged
|
| 361 | new_parts.append(part1) # ...
|
| 362 | i += 2
|
| 363 | continue # x=foo:~$x is not tilde sub
|
| 364 |
|
| 365 | tok1 = cast(Token, part1)
|
| 366 |
|
| 367 | if part2 is None: # x=foo:~foo
|
| 368 | # consume both
|
| 369 | new_parts.append(
|
| 370 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 371 | break # at end
|
| 372 |
|
| 373 | id2 = LiteralId(part2)
|
| 374 | if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
|
| 375 | new_parts.append(part0) # unchanged
|
| 376 | new_parts.append(part1) # ...
|
| 377 | new_parts.append(part2) # ...
|
| 378 | i += 3
|
| 379 | continue
|
| 380 |
|
| 381 | new_parts.append(
|
| 382 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 383 | new_parts.append(part2)
|
| 384 | i += 3
|
| 385 |
|
| 386 | tilde_could_be_next = (id2 == Id.Lit_Colon)
|
| 387 |
|
| 388 | else:
|
| 389 | new_parts.append(part0)
|
| 390 | i += 1
|
| 391 |
|
| 392 | tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
|
| 393 |
|
| 394 | parts.pop()
|
| 395 | parts.pop()
|
| 396 |
|
| 397 | # Mutate argument
|
| 398 | w.parts = new_parts
|
| 399 |
|
| 400 |
|
| 401 | def TildeDetectAll(words):
|
| 402 | # type: (List[word_t]) -> List[word_t]
|
| 403 | out = [] # type: List[word_t]
|
| 404 | for w in words:
|
| 405 | t = TildeDetect(w)
|
| 406 | if t:
|
| 407 | out.append(t)
|
| 408 | else:
|
| 409 | out.append(w)
|
| 410 | return out
|
| 411 |
|
| 412 |
|
| 413 | def HasArrayPart(w):
|
| 414 | # type: (CompoundWord) -> bool
|
| 415 | """Used in cmd_parse."""
|
| 416 | for part in w.parts:
|
| 417 | if part.tag() == word_part_e.ShArrayLiteral:
|
| 418 | return True
|
| 419 | return False
|
| 420 |
|
| 421 |
|
| 422 | def ShFunctionName(w):
|
| 423 | # type: (CompoundWord) -> str
|
| 424 | """Returns a valid shell function name, or the empty string.
|
| 425 |
|
| 426 | TODO: Maybe use this regex to validate:
|
| 427 |
|
| 428 | FUNCTION_NAME_RE = r'[^{}\[\]=]*'
|
| 429 |
|
| 430 | Bash is very lenient, but that would disallow confusing characters, for
|
| 431 | better error messages on a[x]=(), etc.
|
| 432 | """
|
| 433 | ok, s, quoted = StaticEval(w)
|
| 434 | # Function names should not have quotes
|
| 435 | if not ok or quoted:
|
| 436 | return ''
|
| 437 | return s
|
| 438 |
|
| 439 |
|
| 440 | def LooksLikeArithVar(UP_w):
|
| 441 | # type: (word_t) -> Optional[Token]
|
| 442 | """Return a token if this word looks like an arith var.
|
| 443 |
|
| 444 | NOTE: This can't be combined with DetectShAssignment because VarLike and
|
| 445 | ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
|
| 446 | confused between array assignments foo=(1 2) and function calls foo(1, 2).
|
| 447 | """
|
| 448 | if UP_w.tag() != word_e.Compound:
|
| 449 | return None
|
| 450 |
|
| 451 | w = cast(CompoundWord, UP_w)
|
| 452 | if len(w.parts) != 1:
|
| 453 | return None
|
| 454 |
|
| 455 | UP_part0 = w.parts[0]
|
| 456 | if LiteralId(UP_part0) != Id.Lit_ArithVarLike:
|
| 457 | return None
|
| 458 |
|
| 459 | return cast(Token, UP_part0)
|
| 460 |
|
| 461 |
|
| 462 | def IsVarLike(w):
|
| 463 | # type: (CompoundWord) -> bool
|
| 464 | """Tests whether a word looks like FOO=bar.
|
| 465 |
|
| 466 | This is a quick test for the command parser to distinguish:
|
| 467 |
|
| 468 | func() { echo hi; }
|
| 469 | func=(1 2 3)
|
| 470 | """
|
| 471 | if len(w.parts) == 0:
|
| 472 | return False
|
| 473 |
|
| 474 | return LiteralId(w.parts[0]) == Id.Lit_VarLike
|
| 475 |
|
| 476 |
|
| 477 | def DetectShAssignment(w):
|
| 478 | # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
|
| 479 | """Detects whether a word looks like FOO=bar or FOO[x]=bar.
|
| 480 |
|
| 481 | Returns:
|
| 482 | left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
|
| 483 | # assignment
|
| 484 | close_token, # Lit_ArrayLhsClose if it was detected, or None
|
| 485 | part_offset # where to start the value word, 0 if not an assignment
|
| 486 |
|
| 487 | Cases:
|
| 488 |
|
| 489 | s=1
|
| 490 | s+=1
|
| 491 | s[x]=1
|
| 492 | s[x]+=1
|
| 493 |
|
| 494 | a=()
|
| 495 | a+=()
|
| 496 | a[x]=(
|
| 497 | a[x]+=() # We parse this (as bash does), but it's never valid because arrays
|
| 498 | # can't be nested.
|
| 499 | """
|
| 500 | no_token = None # type: Optional[Token]
|
| 501 |
|
| 502 | n = len(w.parts)
|
| 503 | if n == 0:
|
| 504 | return no_token, no_token, 0
|
| 505 |
|
| 506 | UP_part0 = w.parts[0]
|
| 507 | id0 = LiteralId(UP_part0)
|
| 508 | if id0 == Id.Lit_VarLike:
|
| 509 | tok = cast(Token, UP_part0)
|
| 510 | return tok, no_token, 1 # everything after first token is the value
|
| 511 |
|
| 512 | if id0 == Id.Lit_ArrayLhsOpen:
|
| 513 | tok0 = cast(Token, UP_part0)
|
| 514 | # NOTE that a[]=x should be an error. We don't want to silently decay.
|
| 515 | if n < 2:
|
| 516 | return no_token, no_token, 0
|
| 517 | for i in xrange(1, n):
|
| 518 | UP_part = w.parts[i]
|
| 519 | if LiteralId(UP_part) == Id.Lit_ArrayLhsClose:
|
| 520 | tok_close = cast(Token, UP_part)
|
| 521 | return tok0, tok_close, i + 1
|
| 522 |
|
| 523 | # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
|
| 524 | return no_token, no_token, 0
|
| 525 |
|
| 526 |
|
| 527 | def DetectAssocPair(w):
|
| 528 | # type: (CompoundWord) -> Optional[AssocPair]
|
| 529 | """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
|
| 530 |
|
| 531 | The key and the value are both strings. So we just pick out
|
| 532 | word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
|
| 533 | [k] syntax is only used for associative array literals, as opposed
|
| 534 | to indexed array literals.
|
| 535 | """
|
| 536 | parts = w.parts
|
| 537 | if LiteralId(parts[0]) != Id.Lit_LBracket:
|
| 538 | return None
|
| 539 |
|
| 540 | n = len(parts)
|
| 541 | for i in xrange(n):
|
| 542 | id_ = LiteralId(parts[i])
|
| 543 | if id_ == Id.Lit_ArrayLhsClose: # ]=
|
| 544 | # e.g. if we have [$x$y]=$a$b
|
| 545 | key = CompoundWord(parts[1:i]) # $x$y
|
| 546 | value = CompoundWord(parts[i + 1:]) # $a$b from
|
| 547 |
|
| 548 | # Type-annotated intermediate value for mycpp translation
|
| 549 | return AssocPair(key, value)
|
| 550 |
|
| 551 | return None
|
| 552 |
|
| 553 |
|
| 554 | def IsControlFlow(w):
|
| 555 | # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
|
| 556 | """Tests if a word is a control flow word."""
|
| 557 | no_token = None # type: Optional[Token]
|
| 558 |
|
| 559 | if len(w.parts) != 1:
|
| 560 | return Kind.Undefined, no_token
|
| 561 |
|
| 562 | UP_part0 = w.parts[0]
|
| 563 | token_type = LiteralId(UP_part0)
|
| 564 | if token_type == Id.Undefined_Tok:
|
| 565 | return Kind.Undefined, no_token
|
| 566 |
|
| 567 | token_kind = consts.GetKind(token_type)
|
| 568 | if token_kind == Kind.ControlFlow:
|
| 569 | return token_kind, cast(Token, UP_part0)
|
| 570 |
|
| 571 | return Kind.Undefined, no_token
|
| 572 |
|
| 573 |
|
| 574 | def LiteralToken(UP_w):
|
| 575 | # type: (word_t) -> Optional[Token]
|
| 576 | """If a word consists of a literal token, return it.
|
| 577 |
|
| 578 | Otherwise return None.
|
| 579 | """
|
| 580 | # We're casting here because this function is called by the CommandParser for
|
| 581 | # var, setvar, '...', etc. It's easier to cast in one place.
|
| 582 | assert UP_w.tag() == word_e.Compound, UP_w
|
| 583 | w = cast(CompoundWord, UP_w)
|
| 584 |
|
| 585 | if len(w.parts) != 1:
|
| 586 | return None
|
| 587 |
|
| 588 | part0 = w.parts[0]
|
| 589 | if part0.tag() == word_part_e.Literal:
|
| 590 | return cast(Token, part0)
|
| 591 |
|
| 592 | return None
|
| 593 |
|
| 594 |
|
| 595 | def BraceToken(UP_w):
|
| 596 | # type: (word_t) -> Optional[Token]
|
| 597 | """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
|
| 598 |
|
| 599 | This is a special case for osh/cmd_parse.py
|
| 600 |
|
| 601 | The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
|
| 602 | may get a token, not a word.
|
| 603 | """
|
| 604 | with tagswitch(UP_w) as case:
|
| 605 | if case(word_e.Operator):
|
| 606 | tok = cast(Token, UP_w)
|
| 607 | assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
|
| 608 | return tok
|
| 609 |
|
| 610 | elif case(word_e.Compound):
|
| 611 | w = cast(CompoundWord, UP_w)
|
| 612 | return LiteralToken(w)
|
| 613 |
|
| 614 | else:
|
| 615 | raise AssertionError()
|
| 616 |
|
| 617 |
|
| 618 | def AsKeywordToken(UP_w):
|
| 619 | # type: (word_t) -> Token
|
| 620 | """Given a word that IS A CompoundWord containing just a keyword, return
|
| 621 | the single token at the start."""
|
| 622 | assert UP_w.tag() == word_e.Compound, UP_w
|
| 623 | w = cast(CompoundWord, UP_w)
|
| 624 |
|
| 625 | part = w.parts[0]
|
| 626 | assert part.tag() == word_part_e.Literal, part
|
| 627 | tok = cast(Token, part)
|
| 628 | assert consts.GetKind(tok.id) == Kind.KW, tok
|
| 629 | return tok
|
| 630 |
|
| 631 |
|
| 632 | def AsOperatorToken(word):
|
| 633 | # type: (word_t) -> Token
|
| 634 | """For a word that IS an operator (word.Token), return that token.
|
| 635 |
|
| 636 | This must only be called on a word which is known to be an operator
|
| 637 | (word.Token).
|
| 638 | """
|
| 639 | assert word.tag() == word_e.Operator, word
|
| 640 | return cast(Token, word)
|
| 641 |
|
| 642 |
|
| 643 | #
|
| 644 | # Polymorphic between Token and Compound
|
| 645 | #
|
| 646 |
|
| 647 |
|
| 648 | def ArithId(w):
|
| 649 | # type: (word_t) -> Id_t
|
| 650 | if w.tag() == word_e.Operator:
|
| 651 | tok = cast(Token, w)
|
| 652 | return tok.id
|
| 653 |
|
| 654 | assert isinstance(w, CompoundWord)
|
| 655 | return Id.Word_Compound
|
| 656 |
|
| 657 |
|
| 658 | def BoolId(w):
|
| 659 | # type: (word_t) -> Id_t
|
| 660 | UP_w = w
|
| 661 | with tagswitch(w) as case:
|
| 662 | if case(word_e.String): # for test/[
|
| 663 | w = cast(word.String, UP_w)
|
| 664 | return w.id
|
| 665 |
|
| 666 | elif case(word_e.Operator):
|
| 667 | tok = cast(Token, UP_w)
|
| 668 | return tok.id
|
| 669 |
|
| 670 | elif case(word_e.Compound):
|
| 671 | w = cast(CompoundWord, UP_w)
|
| 672 |
|
| 673 | if len(w.parts) != 1:
|
| 674 | return Id.Word_Compound
|
| 675 |
|
| 676 | token_type = LiteralId(w.parts[0])
|
| 677 | if token_type == Id.Undefined_Tok:
|
| 678 | return Id.Word_Compound # It's a regular word
|
| 679 |
|
| 680 | # This is outside the BoolUnary/BoolBinary namespace, but works the same.
|
| 681 | if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
|
| 682 | return token_type # special boolean "tokens"
|
| 683 |
|
| 684 | token_kind = consts.GetKind(token_type)
|
| 685 | if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
|
| 686 | return token_type # boolean operators
|
| 687 |
|
| 688 | return Id.Word_Compound
|
| 689 |
|
| 690 | else:
|
| 691 | # I think Empty never happens in this context?
|
| 692 | raise AssertionError(w.tag())
|
| 693 |
|
| 694 |
|
| 695 | def CommandId(w):
|
| 696 | # type: (word_t) -> Id_t
|
| 697 | UP_w = w
|
| 698 | with tagswitch(w) as case:
|
| 699 | if case(word_e.Operator):
|
| 700 | tok = cast(Token, UP_w)
|
| 701 | return tok.id
|
| 702 |
|
| 703 | elif case(word_e.Compound):
|
| 704 | w = cast(CompoundWord, UP_w)
|
| 705 |
|
| 706 | # Has to be a single literal part
|
| 707 | if len(w.parts) != 1:
|
| 708 | return Id.Word_Compound
|
| 709 |
|
| 710 | token_type = LiteralId(w.parts[0])
|
| 711 | if token_type == Id.Undefined_Tok:
|
| 712 | return Id.Word_Compound
|
| 713 |
|
| 714 | elif token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
|
| 715 | Id.ControlFlow_Return):
|
| 716 | # OSH and YSH recognize: { }
|
| 717 | # YSH recognizes: = return
|
| 718 | return token_type
|
| 719 |
|
| 720 | token_kind = consts.GetKind(token_type)
|
| 721 | if token_kind == Kind.KW:
|
| 722 | return token_type
|
| 723 |
|
| 724 | return Id.Word_Compound
|
| 725 |
|
| 726 | else:
|
| 727 | raise AssertionError(w.tag())
|
| 728 |
|
| 729 |
|
| 730 | def CommandKind(w):
|
| 731 | # type: (word_t) -> Kind_t
|
| 732 | """The CommandKind is for coarse-grained decisions in the CommandParser."""
|
| 733 | if w.tag() == word_e.Operator:
|
| 734 | tok = cast(Token, w)
|
| 735 | return consts.GetKind(tok.id)
|
| 736 |
|
| 737 | # NOTE: This is a bit inconsistent with CommandId, because we never
|
| 738 | # return Kind.KW (or Kind.Lit). But the CommandParser is easier to write
|
| 739 | # this way.
|
| 740 | return Kind.Word
|
| 741 |
|
| 742 |
|
| 743 | # Stubs for converting RHS of assignment to expression mode.
|
| 744 | # For osh2oil.py
|
| 745 | def IsVarSub(w):
|
| 746 | # type: (word_t) -> bool
|
| 747 | """Return whether it's any var sub, or a double quoted one."""
|
| 748 | return False
|
| 749 |
|
| 750 |
|
| 751 | # Doesn't translate with mycpp because of dynamic %
|
| 752 | def ErrorWord(error_str):
|
| 753 | # type: (str) -> CompoundWord
|
| 754 | t = lexer.DummyToken(Id.Lit_Chars, error_str)
|
| 755 | return CompoundWord([t])
|
| 756 |
|
| 757 |
|
| 758 | def Pretty(w):
|
| 759 | # type: (word_t) -> str
|
| 760 | """Return a string to display to the user."""
|
| 761 | UP_w = w
|
| 762 | if w.tag() == word_e.String:
|
| 763 | w = cast(word.String, UP_w)
|
| 764 | if w.id == Id.Eof_Real:
|
| 765 | return 'EOF'
|
| 766 | else:
|
| 767 | return repr(w.s)
|
| 768 | else:
|
| 769 | return word_str(w.tag()) # tag name
|
| 770 |
|
| 771 |
|
| 772 | class ctx_EmitDocToken(object):
|
| 773 | """For doc comments."""
|
| 774 |
|
| 775 | def __init__(self, w_parser):
|
| 776 | # type: (WordParser) -> None
|
| 777 | w_parser.EmitDocToken(True)
|
| 778 | self.w_parser = w_parser
|
| 779 |
|
| 780 | def __enter__(self):
|
| 781 | # type: () -> None
|
| 782 | pass
|
| 783 |
|
| 784 | def __exit__(self, type, value, traceback):
|
| 785 | # type: (Any, Any, Any) -> None
|
| 786 | self.w_parser.EmitDocToken(False)
|
| 787 |
|
| 788 |
|
| 789 | class ctx_Multiline(object):
|
| 790 | """For multiline commands."""
|
| 791 |
|
| 792 | def __init__(self, w_parser):
|
| 793 | # type: (WordParser) -> None
|
| 794 | w_parser.Multiline(True)
|
| 795 | self.w_parser = w_parser
|
| 796 |
|
| 797 | def __enter__(self):
|
| 798 | # type: () -> None
|
| 799 | pass
|
| 800 |
|
| 801 | def __exit__(self, type, value, traceback):
|
| 802 | # type: (Any, Any, Any) -> None
|
| 803 | self.w_parser.Multiline(False)
|