1 | """
|
2 | word.py - Utility functions for words, e.g. treating them as "tokens".
|
3 | """
|
4 |
|
5 | from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
6 | from _devbuild.gen.syntax_asdl import (
|
7 | Token,
|
8 | CompoundWord,
|
9 | DoubleQuoted,
|
10 | SingleQuoted,
|
11 | word,
|
12 | word_e,
|
13 | word_t,
|
14 | word_str,
|
15 | word_part,
|
16 | word_part_t,
|
17 | word_part_e,
|
18 | AssocPair,
|
19 | )
|
20 | from frontend import consts
|
21 | from frontend import lexer
|
22 | from mycpp import mylib
|
23 | from mycpp.mylib import tagswitch, log
|
24 | from osh import word_compile
|
25 |
|
26 | from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
|
27 | if TYPE_CHECKING:
|
28 | from osh.word_parse import WordParser
|
29 |
|
30 | _ = log
|
31 |
|
32 |
|
33 | def LiteralId(p):
|
34 | # type: (word_part_t) -> Id_t
|
35 | """If the WordPart consists of a single literal token, return its Id.
|
36 |
|
37 | Used for Id.KW_For, or Id.RBrace, etc.
|
38 | """
|
39 | UP_part = p
|
40 | if p.tag() == word_part_e.Literal:
|
41 | return cast(Token, UP_part).id
|
42 | else:
|
43 | return Id.Undefined_Tok # unequal to any other Id
|
44 |
|
45 |
|
46 | def _EvalWordPart(part):
|
47 | # type: (word_part_t) -> Tuple[bool, str, bool]
|
48 | """Evaluate a WordPart at PARSE TIME.
|
49 |
|
50 | Used for:
|
51 |
|
52 | 1. here doc delimiters
|
53 | 2. function names
|
54 | 3. for loop variable names
|
55 | 4. Compiling constant regex words at parse time
|
56 | 5. a special case for ${a////c} to see if we got a leading slash in the
|
57 | pattern.
|
58 |
|
59 | Returns:
|
60 | 3-tuple of
|
61 | ok: bool, success. If there are parts that can't be statically
|
62 | evaluated, then we return false.
|
63 | value: a string (not Value)
|
64 | quoted: whether any part of the word was quoted
|
65 | """
|
66 | UP_part = part
|
67 | with tagswitch(part) as case:
|
68 | if case(word_part_e.ShArrayLiteral):
|
69 | # Array literals aren't good for any of our use cases. TODO: Rename
|
70 | # EvalWordToString?
|
71 | return False, '', False
|
72 |
|
73 | elif case(word_part_e.BashAssocLiteral):
|
74 | return False, '', False
|
75 |
|
76 | elif case(word_part_e.Literal):
|
77 | tok = cast(Token, UP_part)
|
78 | return True, lexer.TokenVal(tok), False
|
79 |
|
80 | elif case(word_part_e.EscapedLiteral):
|
81 | part = cast(word_part.EscapedLiteral, UP_part)
|
82 | if mylib.PYTHON:
|
83 | val = lexer.TokenVal(part.token)
|
84 | assert len(val) == 2, val # e.g. \*
|
85 | assert val[0] == '\\'
|
86 | s = lexer.TokenSliceLeft(part.token, 1)
|
87 | return True, s, True
|
88 |
|
89 | elif case(word_part_e.SingleQuoted):
|
90 | part = cast(SingleQuoted, UP_part)
|
91 | tmp = [t.tval for t in part.tokens] # on its own line for mycpp
|
92 | s = ''.join(tmp)
|
93 | return True, s, True
|
94 |
|
95 | elif case(word_part_e.DoubleQuoted):
|
96 | part = cast(DoubleQuoted, UP_part)
|
97 | strs = [] # type: List[str]
|
98 | for p in part.parts:
|
99 | ok, s, _ = _EvalWordPart(p)
|
100 | if not ok:
|
101 | return False, '', True
|
102 | strs.append(s)
|
103 |
|
104 | return True, ''.join(strs), True # At least one part was quoted!
|
105 |
|
106 | elif case(word_part_e.CommandSub, word_part_e.SimpleVarSub,
|
107 | word_part_e.BracedVarSub, word_part_e.TildeSub,
|
108 | word_part_e.ArithSub, word_part_e.ExtGlob,
|
109 | word_part_e.Splice, word_part_e.ExprSub):
|
110 | return False, '', False
|
111 |
|
112 | else:
|
113 | raise AssertionError(part.tag())
|
114 |
|
115 |
|
116 | def FastStrEval(w):
|
117 | # type: (CompoundWord) -> Optional[str]
|
118 | """
|
119 | Detects common case
|
120 |
|
121 | (1) CompoundWord([LiteralPart(Id.LitChars)])
|
122 | For echo -e, test x -lt 0, etc.
|
123 | (2) single quoted word like 'foo'
|
124 |
|
125 | TODO:
|
126 | - remove tval - word_part.Literal(Token tok, str? sval) -> becomes sval
|
127 |
|
128 | Other patterns we could detect are:
|
129 | (1) "foo"
|
130 | (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
|
131 | - I think val_ops.Stringify() can handle all the errors
|
132 | """
|
133 | if len(w.parts) != 1:
|
134 | return None
|
135 |
|
136 | part0 = w.parts[0]
|
137 | UP_part0 = part0
|
138 | with tagswitch(part0) as case:
|
139 | if case(word_part_e.Literal):
|
140 | part0 = cast(Token, UP_part0)
|
141 |
|
142 | if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
|
143 | # Could add more tokens in this case
|
144 | # e.g. + is Lit_Other, and it's a Token in 'expr'
|
145 | # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
|
146 | # know those are common
|
147 | # { } are not as common
|
148 |
|
149 | #if part0.line is None:
|
150 | # log("part0 %s", part0)
|
151 |
|
152 | # TODO: word_part.Literal should have lazy (str? sval) field
|
153 |
|
154 | # TODO: instances created by lexer.DummyToken() don't have
|
155 | # tok.line field, so they can't use lexer.TokenVal()
|
156 | return part0.tval
|
157 | #return lexer.TokenVal(part0)
|
158 |
|
159 | else:
|
160 | # e.g. Id.Lit_Star needs to be glob expanded
|
161 | # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
|
162 | return None
|
163 |
|
164 | elif case(word_part_e.SingleQuoted):
|
165 | part0 = cast(SingleQuoted, UP_part0)
|
166 | # TODO: SingleQuoted should have lazy (str? sval) field
|
167 | # This would only affect multi-line strings though?
|
168 | return word_compile.EvalSingleQuoted(part0)
|
169 |
|
170 | else:
|
171 | # e.g. DoubleQuoted can't be optimized to a string, because it
|
172 | # might have "$@" and such
|
173 | return None
|
174 |
|
175 |
|
176 | def StaticEval(UP_w):
|
177 | # type: (word_t) -> Tuple[bool, str, bool]
|
178 | """Evaluate a Compound at PARSE TIME."""
|
179 | quoted = False
|
180 |
|
181 | # e.g. for ( instead of for (( is a token word
|
182 | if UP_w.tag() != word_e.Compound:
|
183 | return False, '', quoted
|
184 |
|
185 | w = cast(CompoundWord, UP_w)
|
186 |
|
187 | strs = [] # type: List[str]
|
188 | for part in w.parts:
|
189 | ok, s, q = _EvalWordPart(part)
|
190 | if not ok:
|
191 | return False, '', quoted
|
192 | if q:
|
193 | quoted = True # at least one part was quoted
|
194 | strs.append(s)
|
195 | #log('StaticEval parts %s', w.parts)
|
196 | return True, ''.join(strs), quoted
|
197 |
|
198 |
|
199 | # From bash, general.c, unquoted_tilde_word():
|
200 | # POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
|
201 | # the beginning of the word, followed by all of the characters preceding the
|
202 | # first unquoted slash in the word, or all the characters in the word if there
|
203 | # is no slash...If none of the characters in the tilde-prefix are quoted, the
|
204 | # characters in the tilde-prefix following the tilde shell be treated as a
|
205 | # possible login name.
|
206 | #define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
|
207 | #
|
208 | # So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
|
209 | # substitutions.
|
210 | #
|
211 | # We only detect ~Lit_Chars and split. So we might as well just write a regex.
|
212 |
|
213 |
|
214 | def TildeDetect(UP_w):
|
215 | # type: (word_t) -> Optional[CompoundWord]
|
216 | """Detect tilde expansion in a word.
|
217 |
|
218 | It might begin with Literal that needs to be turned into a TildeSub.
|
219 | (It depends on whether the second token begins with slash).
|
220 |
|
221 | If so, it return a new word. Otherwise return None.
|
222 |
|
223 | NOTE:
|
224 | - The regex for Lit_TildeLike could be expanded. Right now it's
|
225 | conservative, like Lit_Chars without the /.
|
226 | - It's possible to write this in a mutating style, since only the first token
|
227 | is changed. But note that we CANNOT know this during lexing.
|
228 | """
|
229 | # BracedTree can't be tilde expanded
|
230 | if UP_w.tag() != word_e.Compound:
|
231 | return None
|
232 |
|
233 | w = cast(CompoundWord, UP_w)
|
234 | return TildeDetect2(w)
|
235 |
|
236 |
|
237 | def TildeDetect2(w):
|
238 | # type: (CompoundWord) -> Optional[CompoundWord]
|
239 | """If tilde sub is detected, returns a new CompoundWord.
|
240 |
|
241 | Accepts CompoundWord, not word_t. After brace expansion, we know we have a
|
242 | List[CompoundWord].
|
243 |
|
244 | Tilde detection:
|
245 |
|
246 | YES:
|
247 | ~ ~/
|
248 | ~bob ~bob/
|
249 |
|
250 | NO:
|
251 | ~bob# ~bob#/
|
252 | ~bob$x
|
253 | ~$x
|
254 |
|
255 | Pattern to match (all must be word_part_e.Literal):
|
256 |
|
257 | Lit_Tilde Lit_Chars? (Lit_Slash | %end)
|
258 | """
|
259 | if len(w.parts) == 0: # ${a-} has no parts
|
260 | return None
|
261 |
|
262 | part0 = w.parts[0]
|
263 | id0 = LiteralId(part0)
|
264 | if id0 != Id.Lit_Tilde:
|
265 | return None # $x is not TildeSub
|
266 |
|
267 | tok0 = cast(Token, part0)
|
268 |
|
269 | new_parts = [] # type: List[word_part_t]
|
270 |
|
271 | if len(w.parts) == 1: # ~
|
272 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
273 | return CompoundWord(new_parts)
|
274 |
|
275 | id1 = LiteralId(w.parts[1])
|
276 | if id1 == Id.Lit_Slash: # ~/
|
277 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
278 | new_parts.extend(w.parts[1:])
|
279 | return CompoundWord(new_parts)
|
280 |
|
281 | if id1 != Id.Lit_Chars:
|
282 | return None # ~$x is not TildeSub
|
283 |
|
284 | tok1 = cast(Token, w.parts[1])
|
285 |
|
286 | if len(w.parts) == 2: # ~foo
|
287 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
288 | return CompoundWord(new_parts)
|
289 |
|
290 | id2 = LiteralId(w.parts[2])
|
291 | if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
|
292 | return None
|
293 |
|
294 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
295 | new_parts.extend(w.parts[2:])
|
296 | return CompoundWord(new_parts)
|
297 |
|
298 |
|
299 | def TildeDetectAssign(w):
|
300 | # type: (CompoundWord) -> None
|
301 | """Detects multiple tilde sub, like a=~:~/src:~bob
|
302 |
|
303 | MUTATES its argument.
|
304 |
|
305 | Pattern for to match (all must be word_part_e.Literal):
|
306 |
|
307 | Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
|
308 | """
|
309 | parts = w.parts
|
310 |
|
311 | # Bail out EARLY if there are no ~ at all
|
312 | has_tilde = False
|
313 | for part in parts:
|
314 | if LiteralId(part) == Id.Lit_Tilde:
|
315 | has_tilde = True
|
316 | break
|
317 | if not has_tilde:
|
318 | return # Avoid further work and allocations
|
319 |
|
320 | # Avoid IndexError, since we have to look ahead up to 2 tokens
|
321 | parts.append(None)
|
322 | parts.append(None)
|
323 |
|
324 | new_parts = [] # type: List[word_part_t]
|
325 |
|
326 | tilde_could_be_next = True # true at first, and true after :
|
327 |
|
328 | i = 0
|
329 | n = len(parts)
|
330 |
|
331 | while i < n:
|
332 | part0 = parts[i]
|
333 | if part0 is None:
|
334 | break
|
335 |
|
336 | #log('i = %d', i)
|
337 | #log('part0 %s', part0)
|
338 |
|
339 | # Skip tilde in middle of word, like a=foo~bar
|
340 | if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
|
341 | # If ~ ends the string, we have
|
342 | part1 = parts[i + 1]
|
343 | part2 = parts[i + 2]
|
344 |
|
345 | tok0 = cast(Token, part0)
|
346 |
|
347 | if part1 is None: # x=foo:~
|
348 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
349 | break # at end
|
350 |
|
351 | id1 = LiteralId(part1)
|
352 |
|
353 | if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
|
354 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
355 | new_parts.append(part1)
|
356 | i += 2
|
357 | continue
|
358 |
|
359 | if id1 != Id.Lit_Chars:
|
360 | new_parts.append(part0) # unchanged
|
361 | new_parts.append(part1) # ...
|
362 | i += 2
|
363 | continue # x=foo:~$x is not tilde sub
|
364 |
|
365 | tok1 = cast(Token, part1)
|
366 |
|
367 | if part2 is None: # x=foo:~foo
|
368 | # consume both
|
369 | new_parts.append(
|
370 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
371 | break # at end
|
372 |
|
373 | id2 = LiteralId(part2)
|
374 | if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
|
375 | new_parts.append(part0) # unchanged
|
376 | new_parts.append(part1) # ...
|
377 | new_parts.append(part2) # ...
|
378 | i += 3
|
379 | continue
|
380 |
|
381 | new_parts.append(
|
382 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
383 | new_parts.append(part2)
|
384 | i += 3
|
385 |
|
386 | tilde_could_be_next = (id2 == Id.Lit_Colon)
|
387 |
|
388 | else:
|
389 | new_parts.append(part0)
|
390 | i += 1
|
391 |
|
392 | tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
|
393 |
|
394 | parts.pop()
|
395 | parts.pop()
|
396 |
|
397 | # Mutate argument
|
398 | w.parts = new_parts
|
399 |
|
400 |
|
401 | def TildeDetectAll(words):
|
402 | # type: (List[word_t]) -> List[word_t]
|
403 | out = [] # type: List[word_t]
|
404 | for w in words:
|
405 | t = TildeDetect(w)
|
406 | if t:
|
407 | out.append(t)
|
408 | else:
|
409 | out.append(w)
|
410 | return out
|
411 |
|
412 |
|
413 | def HasArrayPart(w):
|
414 | # type: (CompoundWord) -> bool
|
415 | """Used in cmd_parse."""
|
416 | for part in w.parts:
|
417 | if part.tag() == word_part_e.ShArrayLiteral:
|
418 | return True
|
419 | return False
|
420 |
|
421 |
|
422 | def ShFunctionName(w):
|
423 | # type: (CompoundWord) -> str
|
424 | """Returns a valid shell function name, or the empty string.
|
425 |
|
426 | TODO: Maybe use this regex to validate:
|
427 |
|
428 | FUNCTION_NAME_RE = r'[^{}\[\]=]*'
|
429 |
|
430 | Bash is very lenient, but that would disallow confusing characters, for
|
431 | better error messages on a[x]=(), etc.
|
432 | """
|
433 | ok, s, quoted = StaticEval(w)
|
434 | # Function names should not have quotes
|
435 | if not ok or quoted:
|
436 | return ''
|
437 | return s
|
438 |
|
439 |
|
440 | def LooksLikeArithVar(UP_w):
|
441 | # type: (word_t) -> Optional[Token]
|
442 | """Return a token if this word looks like an arith var.
|
443 |
|
444 | NOTE: This can't be combined with DetectShAssignment because VarLike and
|
445 | ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
|
446 | confused between array assignments foo=(1 2) and function calls foo(1, 2).
|
447 | """
|
448 | if UP_w.tag() != word_e.Compound:
|
449 | return None
|
450 |
|
451 | w = cast(CompoundWord, UP_w)
|
452 | if len(w.parts) != 1:
|
453 | return None
|
454 |
|
455 | UP_part0 = w.parts[0]
|
456 | if LiteralId(UP_part0) != Id.Lit_ArithVarLike:
|
457 | return None
|
458 |
|
459 | return cast(Token, UP_part0)
|
460 |
|
461 |
|
462 | def IsVarLike(w):
|
463 | # type: (CompoundWord) -> bool
|
464 | """Tests whether a word looks like FOO=bar.
|
465 |
|
466 | This is a quick test for the command parser to distinguish:
|
467 |
|
468 | func() { echo hi; }
|
469 | func=(1 2 3)
|
470 | """
|
471 | if len(w.parts) == 0:
|
472 | return False
|
473 |
|
474 | return LiteralId(w.parts[0]) == Id.Lit_VarLike
|
475 |
|
476 |
|
477 | def DetectShAssignment(w):
|
478 | # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
|
479 | """Detects whether a word looks like FOO=bar or FOO[x]=bar.
|
480 |
|
481 | Returns:
|
482 | left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
|
483 | # assignment
|
484 | close_token, # Lit_ArrayLhsClose if it was detected, or None
|
485 | part_offset # where to start the value word, 0 if not an assignment
|
486 |
|
487 | Cases:
|
488 |
|
489 | s=1
|
490 | s+=1
|
491 | s[x]=1
|
492 | s[x]+=1
|
493 |
|
494 | a=()
|
495 | a+=()
|
496 | a[x]=(
|
497 | a[x]+=() # We parse this (as bash does), but it's never valid because arrays
|
498 | # can't be nested.
|
499 | """
|
500 | no_token = None # type: Optional[Token]
|
501 |
|
502 | n = len(w.parts)
|
503 | if n == 0:
|
504 | return no_token, no_token, 0
|
505 |
|
506 | UP_part0 = w.parts[0]
|
507 | id0 = LiteralId(UP_part0)
|
508 | if id0 == Id.Lit_VarLike:
|
509 | tok = cast(Token, UP_part0)
|
510 | return tok, no_token, 1 # everything after first token is the value
|
511 |
|
512 | if id0 == Id.Lit_ArrayLhsOpen:
|
513 | tok0 = cast(Token, UP_part0)
|
514 | # NOTE that a[]=x should be an error. We don't want to silently decay.
|
515 | if n < 2:
|
516 | return no_token, no_token, 0
|
517 | for i in xrange(1, n):
|
518 | UP_part = w.parts[i]
|
519 | if LiteralId(UP_part) == Id.Lit_ArrayLhsClose:
|
520 | tok_close = cast(Token, UP_part)
|
521 | return tok0, tok_close, i + 1
|
522 |
|
523 | # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
|
524 | return no_token, no_token, 0
|
525 |
|
526 |
|
527 | def DetectAssocPair(w):
|
528 | # type: (CompoundWord) -> Optional[AssocPair]
|
529 | """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
|
530 |
|
531 | The key and the value are both strings. So we just pick out
|
532 | word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
|
533 | [k] syntax is only used for associative array literals, as opposed
|
534 | to indexed array literals.
|
535 | """
|
536 | parts = w.parts
|
537 | if LiteralId(parts[0]) != Id.Lit_LBracket:
|
538 | return None
|
539 |
|
540 | n = len(parts)
|
541 | for i in xrange(n):
|
542 | id_ = LiteralId(parts[i])
|
543 | if id_ == Id.Lit_ArrayLhsClose: # ]=
|
544 | # e.g. if we have [$x$y]=$a$b
|
545 | key = CompoundWord(parts[1:i]) # $x$y
|
546 | value = CompoundWord(parts[i + 1:]) # $a$b from
|
547 |
|
548 | # Type-annotated intermediate value for mycpp translation
|
549 | return AssocPair(key, value)
|
550 |
|
551 | return None
|
552 |
|
553 |
|
554 | def IsControlFlow(w):
|
555 | # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
|
556 | """Tests if a word is a control flow word."""
|
557 | no_token = None # type: Optional[Token]
|
558 |
|
559 | if len(w.parts) != 1:
|
560 | return Kind.Undefined, no_token
|
561 |
|
562 | UP_part0 = w.parts[0]
|
563 | token_type = LiteralId(UP_part0)
|
564 | if token_type == Id.Undefined_Tok:
|
565 | return Kind.Undefined, no_token
|
566 |
|
567 | token_kind = consts.GetKind(token_type)
|
568 | if token_kind == Kind.ControlFlow:
|
569 | return token_kind, cast(Token, UP_part0)
|
570 |
|
571 | return Kind.Undefined, no_token
|
572 |
|
573 |
|
574 | def LiteralToken(UP_w):
|
575 | # type: (word_t) -> Optional[Token]
|
576 | """If a word consists of a literal token, return it.
|
577 |
|
578 | Otherwise return None.
|
579 | """
|
580 | # We're casting here because this function is called by the CommandParser for
|
581 | # var, setvar, '...', etc. It's easier to cast in one place.
|
582 | assert UP_w.tag() == word_e.Compound, UP_w
|
583 | w = cast(CompoundWord, UP_w)
|
584 |
|
585 | if len(w.parts) != 1:
|
586 | return None
|
587 |
|
588 | part0 = w.parts[0]
|
589 | if part0.tag() == word_part_e.Literal:
|
590 | return cast(Token, part0)
|
591 |
|
592 | return None
|
593 |
|
594 |
|
595 | def BraceToken(UP_w):
|
596 | # type: (word_t) -> Optional[Token]
|
597 | """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
|
598 |
|
599 | This is a special case for osh/cmd_parse.py
|
600 |
|
601 | The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
|
602 | may get a token, not a word.
|
603 | """
|
604 | with tagswitch(UP_w) as case:
|
605 | if case(word_e.Operator):
|
606 | tok = cast(Token, UP_w)
|
607 | assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
|
608 | return tok
|
609 |
|
610 | elif case(word_e.Compound):
|
611 | w = cast(CompoundWord, UP_w)
|
612 | return LiteralToken(w)
|
613 |
|
614 | else:
|
615 | raise AssertionError()
|
616 |
|
617 |
|
618 | def AsKeywordToken(UP_w):
|
619 | # type: (word_t) -> Token
|
620 | """Given a word that IS A CompoundWord containing just a keyword, return
|
621 | the single token at the start."""
|
622 | assert UP_w.tag() == word_e.Compound, UP_w
|
623 | w = cast(CompoundWord, UP_w)
|
624 |
|
625 | part = w.parts[0]
|
626 | assert part.tag() == word_part_e.Literal, part
|
627 | tok = cast(Token, part)
|
628 | assert consts.GetKind(tok.id) == Kind.KW, tok
|
629 | return tok
|
630 |
|
631 |
|
632 | def AsOperatorToken(word):
|
633 | # type: (word_t) -> Token
|
634 | """For a word that IS an operator (word.Token), return that token.
|
635 |
|
636 | This must only be called on a word which is known to be an operator
|
637 | (word.Token).
|
638 | """
|
639 | assert word.tag() == word_e.Operator, word
|
640 | return cast(Token, word)
|
641 |
|
642 |
|
643 | #
|
644 | # Polymorphic between Token and Compound
|
645 | #
|
646 |
|
647 |
|
648 | def ArithId(w):
|
649 | # type: (word_t) -> Id_t
|
650 | if w.tag() == word_e.Operator:
|
651 | tok = cast(Token, w)
|
652 | return tok.id
|
653 |
|
654 | assert isinstance(w, CompoundWord)
|
655 | return Id.Word_Compound
|
656 |
|
657 |
|
658 | def BoolId(w):
|
659 | # type: (word_t) -> Id_t
|
660 | UP_w = w
|
661 | with tagswitch(w) as case:
|
662 | if case(word_e.String): # for test/[
|
663 | w = cast(word.String, UP_w)
|
664 | return w.id
|
665 |
|
666 | elif case(word_e.Operator):
|
667 | tok = cast(Token, UP_w)
|
668 | return tok.id
|
669 |
|
670 | elif case(word_e.Compound):
|
671 | w = cast(CompoundWord, UP_w)
|
672 |
|
673 | if len(w.parts) != 1:
|
674 | return Id.Word_Compound
|
675 |
|
676 | token_type = LiteralId(w.parts[0])
|
677 | if token_type == Id.Undefined_Tok:
|
678 | return Id.Word_Compound # It's a regular word
|
679 |
|
680 | # This is outside the BoolUnary/BoolBinary namespace, but works the same.
|
681 | if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
|
682 | return token_type # special boolean "tokens"
|
683 |
|
684 | token_kind = consts.GetKind(token_type)
|
685 | if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
|
686 | return token_type # boolean operators
|
687 |
|
688 | return Id.Word_Compound
|
689 |
|
690 | else:
|
691 | # I think Empty never happens in this context?
|
692 | raise AssertionError(w.tag())
|
693 |
|
694 |
|
695 | def CommandId(w):
|
696 | # type: (word_t) -> Id_t
|
697 | UP_w = w
|
698 | with tagswitch(w) as case:
|
699 | if case(word_e.Operator):
|
700 | tok = cast(Token, UP_w)
|
701 | return tok.id
|
702 |
|
703 | elif case(word_e.Compound):
|
704 | w = cast(CompoundWord, UP_w)
|
705 |
|
706 | # Has to be a single literal part
|
707 | if len(w.parts) != 1:
|
708 | return Id.Word_Compound
|
709 |
|
710 | token_type = LiteralId(w.parts[0])
|
711 | if token_type == Id.Undefined_Tok:
|
712 | return Id.Word_Compound
|
713 |
|
714 | elif token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
|
715 | Id.ControlFlow_Return):
|
716 | # OSH and YSH recognize: { }
|
717 | # YSH recognizes: = return
|
718 | return token_type
|
719 |
|
720 | token_kind = consts.GetKind(token_type)
|
721 | if token_kind == Kind.KW:
|
722 | return token_type
|
723 |
|
724 | return Id.Word_Compound
|
725 |
|
726 | else:
|
727 | raise AssertionError(w.tag())
|
728 |
|
729 |
|
730 | def CommandKind(w):
|
731 | # type: (word_t) -> Kind_t
|
732 | """The CommandKind is for coarse-grained decisions in the CommandParser."""
|
733 | if w.tag() == word_e.Operator:
|
734 | tok = cast(Token, w)
|
735 | return consts.GetKind(tok.id)
|
736 |
|
737 | # NOTE: This is a bit inconsistent with CommandId, because we never
|
738 | # return Kind.KW (or Kind.Lit). But the CommandParser is easier to write
|
739 | # this way.
|
740 | return Kind.Word
|
741 |
|
742 |
|
743 | # Stubs for converting RHS of assignment to expression mode.
|
744 | # For osh2oil.py
|
745 | def IsVarSub(w):
|
746 | # type: (word_t) -> bool
|
747 | """Return whether it's any var sub, or a double quoted one."""
|
748 | return False
|
749 |
|
750 |
|
751 | # Doesn't translate with mycpp because of dynamic %
|
752 | def ErrorWord(error_str):
|
753 | # type: (str) -> CompoundWord
|
754 | t = lexer.DummyToken(Id.Lit_Chars, error_str)
|
755 | return CompoundWord([t])
|
756 |
|
757 |
|
758 | def Pretty(w):
|
759 | # type: (word_t) -> str
|
760 | """Return a string to display to the user."""
|
761 | UP_w = w
|
762 | if w.tag() == word_e.String:
|
763 | w = cast(word.String, UP_w)
|
764 | if w.id == Id.Eof_Real:
|
765 | return 'EOF'
|
766 | else:
|
767 | return repr(w.s)
|
768 | else:
|
769 | return word_str(w.tag()) # tag name
|
770 |
|
771 |
|
772 | class ctx_EmitDocToken(object):
|
773 | """For doc comments."""
|
774 |
|
775 | def __init__(self, w_parser):
|
776 | # type: (WordParser) -> None
|
777 | w_parser.EmitDocToken(True)
|
778 | self.w_parser = w_parser
|
779 |
|
780 | def __enter__(self):
|
781 | # type: () -> None
|
782 | pass
|
783 |
|
784 | def __exit__(self, type, value, traceback):
|
785 | # type: (Any, Any, Any) -> None
|
786 | self.w_parser.EmitDocToken(False)
|
787 |
|
788 |
|
789 | class ctx_Multiline(object):
|
790 | """For multiline commands."""
|
791 |
|
792 | def __init__(self, w_parser):
|
793 | # type: (WordParser) -> None
|
794 | w_parser.Multiline(True)
|
795 | self.w_parser = w_parser
|
796 |
|
797 | def __enter__(self):
|
798 | # type: () -> None
|
799 | pass
|
800 |
|
801 | def __exit__(self, type, value, traceback):
|
802 | # type: (Any, Any, Any) -> None
|
803 | self.w_parser.Multiline(False)
|