1 | """
|
2 | word.py - Utility functions for words, e.g. treating them as "tokens".
|
3 | """
|
4 |
|
5 | from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
6 | from _devbuild.gen.syntax_asdl import (
|
7 | Token,
|
8 | CompoundWord,
|
9 | DoubleQuoted,
|
10 | SingleQuoted,
|
11 | word,
|
12 | word_e,
|
13 | word_t,
|
14 | word_str,
|
15 | word_part,
|
16 | word_part_t,
|
17 | word_part_e,
|
18 | AssocPair,
|
19 | )
|
20 | from frontend import consts
|
21 | from frontend import lexer
|
22 | from mycpp import mylib
|
23 | from mycpp.mylib import tagswitch, log
|
24 |
|
25 | from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
|
26 | if TYPE_CHECKING:
|
27 | from osh.word_parse import WordParser
|
28 |
|
29 | _ = log
|
30 |
|
31 |
|
32 | def LiteralId(p):
|
33 | # type: (word_part_t) -> Id_t
|
34 | """If the WordPart consists of a single literal token, return its Id.
|
35 |
|
36 | Used for Id.KW_For, or Id.RBrace, etc.
|
37 | """
|
38 | UP_part = p
|
39 | if p.tag() == word_part_e.Literal:
|
40 | return cast(Token, UP_part).id
|
41 | else:
|
42 | return Id.Undefined_Tok # unequal to any other Id
|
43 |
|
44 |
|
45 | def _EvalWordPart(part):
|
46 | # type: (word_part_t) -> Tuple[bool, str, bool]
|
47 | """Evaluate a WordPart at PARSE TIME.
|
48 |
|
49 | Used for:
|
50 |
|
51 | 1. here doc delimiters
|
52 | 2. function names
|
53 | 3. for loop variable names
|
54 | 4. Compiling constant regex words at parse time
|
55 | 5. a special case for ${a////c} to see if we got a leading slash in the
|
56 | pattern.
|
57 |
|
58 | Returns:
|
59 | 3-tuple of
|
60 | ok: bool, success. If there are parts that can't be statically
|
61 | evaluated, then we return false.
|
62 | value: a string (not Value)
|
63 | quoted: whether any part of the word was quoted
|
64 | """
|
65 | UP_part = part
|
66 | with tagswitch(part) as case:
|
67 | if case(word_part_e.ShArrayLiteral):
|
68 | # Array literals aren't good for any of our use cases. TODO: Rename
|
69 | # EvalWordToString?
|
70 | return False, '', False
|
71 |
|
72 | elif case(word_part_e.BashAssocLiteral):
|
73 | return False, '', False
|
74 |
|
75 | elif case(word_part_e.Literal):
|
76 | tok = cast(Token, UP_part)
|
77 | # Weird performance issue: if we change this to lexer.LazyStr(),
|
78 | # the parser slows down, e.g. on configure-coreutils from 805 B
|
79 | # irefs to ~830 B. The real issue is that we should avoid calling
|
80 | # this from CommandParser - for the Hay node.
|
81 | return True, lexer.TokenVal(tok), False
|
82 | #return True, lexer.LazyStr(tok), False
|
83 |
|
84 | elif case(word_part_e.EscapedLiteral):
|
85 | part = cast(word_part.EscapedLiteral, UP_part)
|
86 | if mylib.PYTHON:
|
87 | val = lexer.TokenVal(part.token)
|
88 | assert len(val) == 2, val # e.g. \*
|
89 | assert val[0] == '\\'
|
90 | s = lexer.TokenSliceLeft(part.token, 1)
|
91 | return True, s, True
|
92 |
|
93 | elif case(word_part_e.SingleQuoted):
|
94 | part = cast(SingleQuoted, UP_part)
|
95 | return True, part.sval, True
|
96 |
|
97 | elif case(word_part_e.DoubleQuoted):
|
98 | part = cast(DoubleQuoted, UP_part)
|
99 | strs = [] # type: List[str]
|
100 | for p in part.parts:
|
101 | ok, s, _ = _EvalWordPart(p)
|
102 | if not ok:
|
103 | return False, '', True
|
104 | strs.append(s)
|
105 |
|
106 | return True, ''.join(strs), True # At least one part was quoted!
|
107 |
|
108 | elif case(word_part_e.CommandSub, word_part_e.SimpleVarSub,
|
109 | word_part_e.BracedVarSub, word_part_e.TildeSub,
|
110 | word_part_e.ArithSub, word_part_e.ExtGlob,
|
111 | word_part_e.Splice, word_part_e.ExprSub):
|
112 | return False, '', False
|
113 |
|
114 | else:
|
115 | raise AssertionError(part.tag())
|
116 |
|
117 |
|
118 | def FastStrEval(w):
|
119 | # type: (CompoundWord) -> Optional[str]
|
120 | """
|
121 | Detects common case
|
122 |
|
123 | (1) CompoundWord([LiteralPart(Id.LitChars)])
|
124 | For echo -e, test x -lt 0, etc.
|
125 | (2) single quoted word like 'foo'
|
126 |
|
127 | Other patterns we could detect are:
|
128 | (1) "foo"
|
129 | (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
|
130 | - I think val_ops.Stringify() can handle all the errors
|
131 | """
|
132 | if len(w.parts) != 1:
|
133 | return None
|
134 |
|
135 | part0 = w.parts[0]
|
136 | UP_part0 = part0
|
137 | with tagswitch(part0) as case:
|
138 | if case(word_part_e.Literal):
|
139 | part0 = cast(Token, UP_part0)
|
140 |
|
141 | if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
|
142 | # Could add more tokens in this case
|
143 | # e.g. + is Lit_Other, and it's a Token in 'expr'
|
144 | # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
|
145 | # know those are common
|
146 | # { } are not as common
|
147 | return lexer.LazyStr(part0)
|
148 |
|
149 | else:
|
150 | # e.g. Id.Lit_Star needs to be glob expanded
|
151 | # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
|
152 | return None
|
153 |
|
154 | elif case(word_part_e.SingleQuoted):
|
155 | part0 = cast(SingleQuoted, UP_part0)
|
156 | # TODO: SingleQuoted should have lazy (str? sval) field
|
157 | # This would only affect multi-line strings though?
|
158 | return part0.sval
|
159 |
|
160 | else:
|
161 | # e.g. DoubleQuoted can't be optimized to a string, because it
|
162 | # might have "$@" and such
|
163 | return None
|
164 |
|
165 |
|
166 | def StaticEval(UP_w):
|
167 | # type: (word_t) -> Tuple[bool, str, bool]
|
168 | """Evaluate a Compound at PARSE TIME."""
|
169 | quoted = False
|
170 |
|
171 | # e.g. for ( instead of for (( is a token word
|
172 | if UP_w.tag() != word_e.Compound:
|
173 | return False, '', quoted
|
174 |
|
175 | w = cast(CompoundWord, UP_w)
|
176 |
|
177 | strs = [] # type: List[str]
|
178 | for part in w.parts:
|
179 | ok, s, q = _EvalWordPart(part)
|
180 | if not ok:
|
181 | return False, '', quoted
|
182 | if q:
|
183 | quoted = True # at least one part was quoted
|
184 | strs.append(s)
|
185 | #log('StaticEval parts %s', w.parts)
|
186 | return True, ''.join(strs), quoted
|
187 |
|
188 |
|
189 | # From bash, general.c, unquoted_tilde_word():
|
190 | # POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
|
191 | # the beginning of the word, followed by all of the characters preceding the
|
192 | # first unquoted slash in the word, or all the characters in the word if there
|
193 | # is no slash...If none of the characters in the tilde-prefix are quoted, the
|
194 | # characters in the tilde-prefix following the tilde shell be treated as a
|
195 | # possible login name.
|
196 | #define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
|
197 | #
|
198 | # So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
|
199 | # substitutions.
|
200 | #
|
201 | # We only detect ~Lit_Chars and split. So we might as well just write a regex.
|
202 |
|
203 |
|
204 | def TildeDetect(UP_w):
|
205 | # type: (word_t) -> Optional[CompoundWord]
|
206 | """Detect tilde expansion in a word.
|
207 |
|
208 | It might begin with Literal that needs to be turned into a TildeSub.
|
209 | (It depends on whether the second token begins with slash).
|
210 |
|
211 | If so, it return a new word. Otherwise return None.
|
212 |
|
213 | NOTE:
|
214 | - The regex for Lit_TildeLike could be expanded. Right now it's
|
215 | conservative, like Lit_Chars without the /.
|
216 | - It's possible to write this in a mutating style, since only the first token
|
217 | is changed. But note that we CANNOT know this during lexing.
|
218 | """
|
219 | # BracedTree can't be tilde expanded
|
220 | if UP_w.tag() != word_e.Compound:
|
221 | return None
|
222 |
|
223 | w = cast(CompoundWord, UP_w)
|
224 | return TildeDetect2(w)
|
225 |
|
226 |
|
227 | def TildeDetect2(w):
|
228 | # type: (CompoundWord) -> Optional[CompoundWord]
|
229 | """If tilde sub is detected, returns a new CompoundWord.
|
230 |
|
231 | Accepts CompoundWord, not word_t. After brace expansion, we know we have a
|
232 | List[CompoundWord].
|
233 |
|
234 | Tilde detection:
|
235 |
|
236 | YES:
|
237 | ~ ~/
|
238 | ~bob ~bob/
|
239 |
|
240 | NO:
|
241 | ~bob# ~bob#/
|
242 | ~bob$x
|
243 | ~$x
|
244 |
|
245 | Pattern to match (all must be word_part_e.Literal):
|
246 |
|
247 | Lit_Tilde Lit_Chars? (Lit_Slash | %end)
|
248 | """
|
249 | if len(w.parts) == 0: # ${a-} has no parts
|
250 | return None
|
251 |
|
252 | part0 = w.parts[0]
|
253 | id0 = LiteralId(part0)
|
254 | if id0 != Id.Lit_Tilde:
|
255 | return None # $x is not TildeSub
|
256 |
|
257 | tok0 = cast(Token, part0)
|
258 |
|
259 | new_parts = [] # type: List[word_part_t]
|
260 |
|
261 | if len(w.parts) == 1: # ~
|
262 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
263 | return CompoundWord(new_parts)
|
264 |
|
265 | id1 = LiteralId(w.parts[1])
|
266 | if id1 == Id.Lit_Slash: # ~/
|
267 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
268 | new_parts.extend(w.parts[1:])
|
269 | return CompoundWord(new_parts)
|
270 |
|
271 | if id1 != Id.Lit_Chars:
|
272 | return None # ~$x is not TildeSub
|
273 |
|
274 | tok1 = cast(Token, w.parts[1])
|
275 |
|
276 | if len(w.parts) == 2: # ~foo
|
277 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
278 | return CompoundWord(new_parts)
|
279 |
|
280 | id2 = LiteralId(w.parts[2])
|
281 | if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
|
282 | return None
|
283 |
|
284 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
285 | new_parts.extend(w.parts[2:])
|
286 | return CompoundWord(new_parts)
|
287 |
|
288 |
|
289 | def TildeDetectAssign(w):
|
290 | # type: (CompoundWord) -> None
|
291 | """Detects multiple tilde sub, like a=~:~/src:~bob
|
292 |
|
293 | MUTATES its argument.
|
294 |
|
295 | Pattern for to match (all must be word_part_e.Literal):
|
296 |
|
297 | Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
|
298 | """
|
299 | parts = w.parts
|
300 |
|
301 | # Bail out EARLY if there are no ~ at all
|
302 | has_tilde = False
|
303 | for part in parts:
|
304 | if LiteralId(part) == Id.Lit_Tilde:
|
305 | has_tilde = True
|
306 | break
|
307 | if not has_tilde:
|
308 | return # Avoid further work and allocations
|
309 |
|
310 | # Avoid IndexError, since we have to look ahead up to 2 tokens
|
311 | parts.append(None)
|
312 | parts.append(None)
|
313 |
|
314 | new_parts = [] # type: List[word_part_t]
|
315 |
|
316 | tilde_could_be_next = True # true at first, and true after :
|
317 |
|
318 | i = 0
|
319 | n = len(parts)
|
320 |
|
321 | while i < n:
|
322 | part0 = parts[i]
|
323 | if part0 is None:
|
324 | break
|
325 |
|
326 | #log('i = %d', i)
|
327 | #log('part0 %s', part0)
|
328 |
|
329 | # Skip tilde in middle of word, like a=foo~bar
|
330 | if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
|
331 | # If ~ ends the string, we have
|
332 | part1 = parts[i + 1]
|
333 | part2 = parts[i + 2]
|
334 |
|
335 | tok0 = cast(Token, part0)
|
336 |
|
337 | if part1 is None: # x=foo:~
|
338 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
339 | break # at end
|
340 |
|
341 | id1 = LiteralId(part1)
|
342 |
|
343 | if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
|
344 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
345 | new_parts.append(part1)
|
346 | i += 2
|
347 | continue
|
348 |
|
349 | if id1 != Id.Lit_Chars:
|
350 | new_parts.append(part0) # unchanged
|
351 | new_parts.append(part1) # ...
|
352 | i += 2
|
353 | continue # x=foo:~$x is not tilde sub
|
354 |
|
355 | tok1 = cast(Token, part1)
|
356 |
|
357 | if part2 is None: # x=foo:~foo
|
358 | # consume both
|
359 | new_parts.append(
|
360 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
361 | break # at end
|
362 |
|
363 | id2 = LiteralId(part2)
|
364 | if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
|
365 | new_parts.append(part0) # unchanged
|
366 | new_parts.append(part1) # ...
|
367 | new_parts.append(part2) # ...
|
368 | i += 3
|
369 | continue
|
370 |
|
371 | new_parts.append(
|
372 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
373 | new_parts.append(part2)
|
374 | i += 3
|
375 |
|
376 | tilde_could_be_next = (id2 == Id.Lit_Colon)
|
377 |
|
378 | else:
|
379 | new_parts.append(part0)
|
380 | i += 1
|
381 |
|
382 | tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
|
383 |
|
384 | parts.pop()
|
385 | parts.pop()
|
386 |
|
387 | # Mutate argument
|
388 | w.parts = new_parts
|
389 |
|
390 |
|
391 | def TildeDetectAll(words):
|
392 | # type: (List[word_t]) -> List[word_t]
|
393 | out = [] # type: List[word_t]
|
394 | for w in words:
|
395 | t = TildeDetect(w)
|
396 | if t:
|
397 | out.append(t)
|
398 | else:
|
399 | out.append(w)
|
400 | return out
|
401 |
|
402 |
|
403 | def HasArrayPart(w):
|
404 | # type: (CompoundWord) -> bool
|
405 | """Used in cmd_parse."""
|
406 | for part in w.parts:
|
407 | if part.tag() == word_part_e.ShArrayLiteral:
|
408 | return True
|
409 | return False
|
410 |
|
411 |
|
412 | def ShFunctionName(w):
|
413 | # type: (CompoundWord) -> str
|
414 | """Returns a valid shell function name, or the empty string.
|
415 |
|
416 | TODO: Maybe use this regex to validate:
|
417 |
|
418 | FUNCTION_NAME_RE = r'[^{}\[\]=]*'
|
419 |
|
420 | Bash is very lenient, but that would disallow confusing characters, for
|
421 | better error messages on a[x]=(), etc.
|
422 | """
|
423 | ok, s, quoted = StaticEval(w)
|
424 | # Function names should not have quotes
|
425 | if not ok or quoted:
|
426 | return ''
|
427 | return s
|
428 |
|
429 |
|
430 | def LooksLikeArithVar(UP_w):
|
431 | # type: (word_t) -> Optional[Token]
|
432 | """Return a token if this word looks like an arith var.
|
433 |
|
434 | NOTE: This can't be combined with DetectShAssignment because VarLike and
|
435 | ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
|
436 | confused between array assignments foo=(1 2) and function calls foo(1, 2).
|
437 | """
|
438 | if UP_w.tag() != word_e.Compound:
|
439 | return None
|
440 |
|
441 | w = cast(CompoundWord, UP_w)
|
442 | if len(w.parts) != 1:
|
443 | return None
|
444 |
|
445 | UP_part0 = w.parts[0]
|
446 | if LiteralId(UP_part0) != Id.Lit_ArithVarLike:
|
447 | return None
|
448 |
|
449 | return cast(Token, UP_part0)
|
450 |
|
451 |
|
452 | def IsVarLike(w):
|
453 | # type: (CompoundWord) -> bool
|
454 | """Tests whether a word looks like FOO=bar.
|
455 |
|
456 | This is a quick test for the command parser to distinguish:
|
457 |
|
458 | func() { echo hi; }
|
459 | func=(1 2 3)
|
460 | """
|
461 | if len(w.parts) == 0:
|
462 | return False
|
463 |
|
464 | return LiteralId(w.parts[0]) == Id.Lit_VarLike
|
465 |
|
466 |
|
467 | def DetectShAssignment(w):
|
468 | # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
|
469 | """Detects whether a word looks like FOO=bar or FOO[x]=bar.
|
470 |
|
471 | Returns:
|
472 | left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
|
473 | # assignment
|
474 | close_token, # Lit_ArrayLhsClose if it was detected, or None
|
475 | part_offset # where to start the value word, 0 if not an assignment
|
476 |
|
477 | Cases:
|
478 |
|
479 | s=1
|
480 | s+=1
|
481 | s[x]=1
|
482 | s[x]+=1
|
483 |
|
484 | a=()
|
485 | a+=()
|
486 | a[x]=(
|
487 | a[x]+=() # We parse this (as bash does), but it's never valid because arrays
|
488 | # can't be nested.
|
489 | """
|
490 | no_token = None # type: Optional[Token]
|
491 |
|
492 | n = len(w.parts)
|
493 | if n == 0:
|
494 | return no_token, no_token, 0
|
495 |
|
496 | UP_part0 = w.parts[0]
|
497 | id0 = LiteralId(UP_part0)
|
498 | if id0 == Id.Lit_VarLike:
|
499 | tok = cast(Token, UP_part0)
|
500 | return tok, no_token, 1 # everything after first token is the value
|
501 |
|
502 | if id0 == Id.Lit_ArrayLhsOpen:
|
503 | tok0 = cast(Token, UP_part0)
|
504 | # NOTE that a[]=x should be an error. We don't want to silently decay.
|
505 | if n < 2:
|
506 | return no_token, no_token, 0
|
507 | for i in xrange(1, n):
|
508 | UP_part = w.parts[i]
|
509 | if LiteralId(UP_part) == Id.Lit_ArrayLhsClose:
|
510 | tok_close = cast(Token, UP_part)
|
511 | return tok0, tok_close, i + 1
|
512 |
|
513 | # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
|
514 | return no_token, no_token, 0
|
515 |
|
516 |
|
517 | def DetectAssocPair(w):
|
518 | # type: (CompoundWord) -> Optional[AssocPair]
|
519 | """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
|
520 |
|
521 | The key and the value are both strings. So we just pick out
|
522 | word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
|
523 | [k] syntax is only used for associative array literals, as opposed
|
524 | to indexed array literals.
|
525 | """
|
526 | parts = w.parts
|
527 | if LiteralId(parts[0]) != Id.Lit_LBracket:
|
528 | return None
|
529 |
|
530 | n = len(parts)
|
531 | for i in xrange(n):
|
532 | id_ = LiteralId(parts[i])
|
533 | if id_ == Id.Lit_ArrayLhsClose: # ]=
|
534 | # e.g. if we have [$x$y]=$a$b
|
535 | key = CompoundWord(parts[1:i]) # $x$y
|
536 | value = CompoundWord(parts[i + 1:]) # $a$b from
|
537 |
|
538 | # Type-annotated intermediate value for mycpp translation
|
539 | return AssocPair(key, value)
|
540 |
|
541 | return None
|
542 |
|
543 |
|
544 | def IsControlFlow(w):
|
545 | # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
|
546 | """Tests if a word is a control flow word."""
|
547 | no_token = None # type: Optional[Token]
|
548 |
|
549 | if len(w.parts) != 1:
|
550 | return Kind.Undefined, no_token
|
551 |
|
552 | UP_part0 = w.parts[0]
|
553 | token_type = LiteralId(UP_part0)
|
554 | if token_type == Id.Undefined_Tok:
|
555 | return Kind.Undefined, no_token
|
556 |
|
557 | token_kind = consts.GetKind(token_type)
|
558 | if token_kind == Kind.ControlFlow:
|
559 | return token_kind, cast(Token, UP_part0)
|
560 |
|
561 | return Kind.Undefined, no_token
|
562 |
|
563 |
|
564 | def LiteralToken(UP_w):
|
565 | # type: (word_t) -> Optional[Token]
|
566 | """If a word consists of a literal token, return it.
|
567 |
|
568 | Otherwise return None.
|
569 | """
|
570 | # We're casting here because this function is called by the CommandParser for
|
571 | # var, setvar, '...', etc. It's easier to cast in one place.
|
572 | assert UP_w.tag() == word_e.Compound, UP_w
|
573 | w = cast(CompoundWord, UP_w)
|
574 |
|
575 | if len(w.parts) != 1:
|
576 | return None
|
577 |
|
578 | part0 = w.parts[0]
|
579 | if part0.tag() == word_part_e.Literal:
|
580 | return cast(Token, part0)
|
581 |
|
582 | return None
|
583 |
|
584 |
|
585 | def BraceToken(UP_w):
|
586 | # type: (word_t) -> Optional[Token]
|
587 | """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
|
588 |
|
589 | This is a special case for osh/cmd_parse.py
|
590 |
|
591 | The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
|
592 | may get a token, not a word.
|
593 | """
|
594 | with tagswitch(UP_w) as case:
|
595 | if case(word_e.Operator):
|
596 | tok = cast(Token, UP_w)
|
597 | assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
|
598 | return tok
|
599 |
|
600 | elif case(word_e.Compound):
|
601 | w = cast(CompoundWord, UP_w)
|
602 | return LiteralToken(w)
|
603 |
|
604 | else:
|
605 | raise AssertionError()
|
606 |
|
607 |
|
608 | def AsKeywordToken(UP_w):
|
609 | # type: (word_t) -> Token
|
610 | """Given a word that IS A CompoundWord containing just a keyword, return
|
611 | the single token at the start."""
|
612 | assert UP_w.tag() == word_e.Compound, UP_w
|
613 | w = cast(CompoundWord, UP_w)
|
614 |
|
615 | part = w.parts[0]
|
616 | assert part.tag() == word_part_e.Literal, part
|
617 | tok = cast(Token, part)
|
618 | assert consts.GetKind(tok.id) == Kind.KW, tok
|
619 | return tok
|
620 |
|
621 |
|
622 | def AsOperatorToken(word):
|
623 | # type: (word_t) -> Token
|
624 | """For a word that IS an operator (word.Token), return that token.
|
625 |
|
626 | This must only be called on a word which is known to be an operator
|
627 | (word.Token).
|
628 | """
|
629 | assert word.tag() == word_e.Operator, word
|
630 | return cast(Token, word)
|
631 |
|
632 |
|
633 | #
|
634 | # Polymorphic between Token and Compound
|
635 | #
|
636 |
|
637 |
|
638 | def ArithId(w):
|
639 | # type: (word_t) -> Id_t
|
640 | if w.tag() == word_e.Operator:
|
641 | tok = cast(Token, w)
|
642 | return tok.id
|
643 |
|
644 | assert isinstance(w, CompoundWord)
|
645 | return Id.Word_Compound
|
646 |
|
647 |
|
648 | def BoolId(w):
|
649 | # type: (word_t) -> Id_t
|
650 | UP_w = w
|
651 | with tagswitch(w) as case:
|
652 | if case(word_e.String): # for test/[
|
653 | w = cast(word.String, UP_w)
|
654 | return w.id
|
655 |
|
656 | elif case(word_e.Operator):
|
657 | tok = cast(Token, UP_w)
|
658 | return tok.id
|
659 |
|
660 | elif case(word_e.Compound):
|
661 | w = cast(CompoundWord, UP_w)
|
662 |
|
663 | if len(w.parts) != 1:
|
664 | return Id.Word_Compound
|
665 |
|
666 | token_type = LiteralId(w.parts[0])
|
667 | if token_type == Id.Undefined_Tok:
|
668 | return Id.Word_Compound # It's a regular word
|
669 |
|
670 | # This is outside the BoolUnary/BoolBinary namespace, but works the same.
|
671 | if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
|
672 | return token_type # special boolean "tokens"
|
673 |
|
674 | token_kind = consts.GetKind(token_type)
|
675 | if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
|
676 | return token_type # boolean operators
|
677 |
|
678 | return Id.Word_Compound
|
679 |
|
680 | else:
|
681 | # I think Empty never happens in this context?
|
682 | raise AssertionError(w.tag())
|
683 |
|
684 |
|
685 | def CommandId(w):
|
686 | # type: (word_t) -> Id_t
|
687 | UP_w = w
|
688 | with tagswitch(w) as case:
|
689 | if case(word_e.Operator):
|
690 | tok = cast(Token, UP_w)
|
691 | return tok.id
|
692 |
|
693 | elif case(word_e.Compound):
|
694 | w = cast(CompoundWord, UP_w)
|
695 |
|
696 | # Has to be a single literal part
|
697 | if len(w.parts) != 1:
|
698 | return Id.Word_Compound
|
699 |
|
700 | token_type = LiteralId(w.parts[0])
|
701 | if token_type == Id.Undefined_Tok:
|
702 | return Id.Word_Compound
|
703 |
|
704 | elif token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
|
705 | Id.ControlFlow_Return):
|
706 | # OSH and YSH recognize: { }
|
707 | # YSH recognizes: = return
|
708 | return token_type
|
709 |
|
710 | token_kind = consts.GetKind(token_type)
|
711 | if token_kind == Kind.KW:
|
712 | return token_type
|
713 |
|
714 | return Id.Word_Compound
|
715 |
|
716 | else:
|
717 | raise AssertionError(w.tag())
|
718 |
|
719 |
|
720 | def CommandKind(w):
|
721 | # type: (word_t) -> Kind_t
|
722 | """The CommandKind is for coarse-grained decisions in the CommandParser."""
|
723 | if w.tag() == word_e.Operator:
|
724 | tok = cast(Token, w)
|
725 | return consts.GetKind(tok.id)
|
726 |
|
727 | # NOTE: This is a bit inconsistent with CommandId, because we never
|
728 | # return Kind.KW (or Kind.Lit). But the CommandParser is easier to write
|
729 | # this way.
|
730 | return Kind.Word
|
731 |
|
732 |
|
733 | # Stubs for converting RHS of assignment to expression mode.
|
734 | # For osh2oil.py
|
735 | def IsVarSub(w):
|
736 | # type: (word_t) -> bool
|
737 | """Return whether it's any var sub, or a double quoted one."""
|
738 | return False
|
739 |
|
740 |
|
741 | # Doesn't translate with mycpp because of dynamic %
|
742 | def ErrorWord(error_str):
|
743 | # type: (str) -> CompoundWord
|
744 | t = lexer.DummyToken(Id.Lit_Chars, error_str)
|
745 | return CompoundWord([t])
|
746 |
|
747 |
|
748 | def Pretty(w):
|
749 | # type: (word_t) -> str
|
750 | """Return a string to display to the user."""
|
751 | UP_w = w
|
752 | if w.tag() == word_e.String:
|
753 | w = cast(word.String, UP_w)
|
754 | if w.id == Id.Eof_Real:
|
755 | return 'EOF'
|
756 | else:
|
757 | return repr(w.s)
|
758 | else:
|
759 | return word_str(w.tag()) # tag name
|
760 |
|
761 |
|
762 | class ctx_EmitDocToken(object):
|
763 | """For doc comments."""
|
764 |
|
765 | def __init__(self, w_parser):
|
766 | # type: (WordParser) -> None
|
767 | w_parser.EmitDocToken(True)
|
768 | self.w_parser = w_parser
|
769 |
|
770 | def __enter__(self):
|
771 | # type: () -> None
|
772 | pass
|
773 |
|
774 | def __exit__(self, type, value, traceback):
|
775 | # type: (Any, Any, Any) -> None
|
776 | self.w_parser.EmitDocToken(False)
|
777 |
|
778 |
|
779 | class ctx_Multiline(object):
|
780 | """For multiline commands."""
|
781 |
|
782 | def __init__(self, w_parser):
|
783 | # type: (WordParser) -> None
|
784 | w_parser.Multiline(True)
|
785 | self.w_parser = w_parser
|
786 |
|
787 | def __enter__(self):
|
788 | # type: () -> None
|
789 | pass
|
790 |
|
791 | def __exit__(self, type, value, traceback):
|
792 | # type: (Any, Any, Any) -> None
|
793 | self.w_parser.Multiline(False)
|