osh/word_eval.py

OILS / osh / word_eval.py View on Github | oilshell.org

2356 lines, 1417 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	AssignArg,
37	a_index,
38	a_index_e,
39	VTestPlace,
40	VarSubState,
41	Piece,
42	)
43	from _devbuild.gen.option_asdl import option_i
44	from _devbuild.gen.value_asdl import (
45	value,
46	value_e,
47	value_t,
48	sh_lvalue,
49	sh_lvalue_t,
50	)
51	from core import error
52	from core import pyos
53	from core import pyutil
54	from core import state
55	from core import ui
56	from core import util
57	from data_lang import j8_lite
58	from core.error import e_die
59	from frontend import consts
60	from frontend import lexer
61	from frontend import location
62	from mycpp import mops
63	from mycpp.mylib import log, tagswitch, NewDict
64	from osh import braces
65	from osh import glob_
66	from osh import string_ops
67	from osh import word_
68	from ysh import expr_eval
69	from ysh import val_ops
70
71	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
72
73	if TYPE_CHECKING:
74	from _devbuild.gen.syntax_asdl import word_part_t
75	from _devbuild.gen.option_asdl import builtin_t
76	from core import optview
77	from core.state import Mem
78	from core.ui import ErrorFormatter
79	from core.vm import _Executor
80	from osh.split import SplitContext
81	from osh import prompt
82	from osh import sh_expr_eval
83
84	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
85	QUOTED = 1 << 0
86	IS_SUBST = 1 << 1
87
88	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
89	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
90	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
91
92	# For EvalWordToString
93	QUOTE_FNMATCH = 1 << 5
94	QUOTE_ERE = 1 << 6
95
96	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
97	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
98	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
99
100
101	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
102	# type: (str, optview.Exec, bool) -> bool
103	"""Return whether we should allow ${a} to mean ${a[0]}."""
104	return (not exec_opts.strict_array() or
105	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
106
107
108	def DecayArray(val):
109	# type: (value_t) -> value_t
110	"""Resolve ${array} to ${array[0]}."""
111	if val.tag() == value_e.BashArray:
112	array_val = cast(value.BashArray, val)
113	s = array_val.strs[0] if len(array_val.strs) else None
114	elif val.tag() == value_e.BashAssoc:
115	assoc_val = cast(value.BashAssoc, val)
116	s = assoc_val.d['0'] if '0' in assoc_val.d else None
117	else:
118	raise AssertionError(val.tag())
119
120	if s is None:
121	return value.Undef
122	else:
123	return value.Str(s)
124
125
126	def GetArrayItem(strs, index):
127	# type: (List[str], int) -> Optional[str]
128
129	n = len(strs)
130	if index < 0:
131	index += n
132
133	if 0 <= index and index < n:
134	# TODO: strs->index() has a redundant check for (i < 0)
135	s = strs[index]
136	# note: s could be None because representation is sparse
137	else:
138	s = None
139	return s
140
141
142	# Use libc to parse NAME, NAME=value, and NAME+=value. We want submatch
143	# extraction, but I haven't used that in re2c, and we would need a new kind of
144	# binding.
145	#
146	ASSIGN_ARG_RE = '^([a-zA-Z_][a-zA-Z0-9_])((=\|\+=)(.))?$'
147
148	# Eggex equivalent:
149	#
150	# VarName = /
151	# [a-z A-Z _ ]
152	# [a-z A-Z 0-9 _ ]*
153	# /
154	#
155	# SplitArg = /
156	# %begin
157	# < VarName >
158	# < < '=' \| '+=' > < dot* > > ?
159	# %end
160	# /
161	# Note: must use < > for grouping because there is no non-capturing group.
162
163
164	def _SplitAssignArg(arg, blame_word):
165	# type: (str, CompoundWord) -> AssignArg
166	"""Dynamically parse argument to declare, export, etc.
167
168	This is a fallback to the static parsing done below.
169	"""
170	# Note: it would be better to cache regcomp(), but we don't have an API for
171	# that, and it probably isn't a bottleneck now
172	m = util.simple_regex_search(ASSIGN_ARG_RE, arg)
173	if m is None:
174	e_die("Assignment builtin expected NAME=value, got %r" % arg,
175	blame_word)
176
177	var_name = m[1]
178	# m[2] is used for grouping; ERE doesn't have non-capturing groups
179
180	op = m[3]
181	if op is not None and len(op): # declare NAME=
182	val = value.Str(m[4]) # type: Optional[value_t]
183	append = op[0] == '+'
184	else: # declare NAME
185	val = None # no operator
186	append = False
187
188	return AssignArg(var_name, val, append, blame_word)
189
190
191	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
192	def _BackslashEscape(s):
193	# type: (str) -> str
194	"""Double up backslashes.
195
196	Useful for strings about to be globbed and strings about to be IFS
197	escaped.
198	"""
199	return s.replace('\\', '\\\\')
200
201
202	def _ValueToPartValue(val, quoted, part_loc):
203	# type: (value_t, bool, word_part_t) -> part_value_t
204	"""Helper for VarSub evaluation.
205
206	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
207	"""
208	UP_val = val
209
210	with tagswitch(val) as case:
211	if case(value_e.Undef):
212	# This happens in the case of ${undef+foo}. We skipped _EmptyStrOrError,
213	# but we have to append to the empty string.
214	return Piece('', quoted, not quoted)
215
216	elif case(value_e.Str):
217	val = cast(value.Str, UP_val)
218	return Piece(val.s, quoted, not quoted)
219
220	elif case(value_e.BashArray):
221	val = cast(value.BashArray, UP_val)
222	return part_value.Array(val.strs)
223
224	elif case(value_e.BashAssoc):
225	val = cast(value.BashAssoc, UP_val)
226	# bash behavior: splice values!
227	return part_value.Array(val.d.values())
228
229	# Cases added for YSH
230	# value_e.List is also here - we use val_ops.stringify()s err message
231	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
232	value_e.Eggex, value_e.List):
233	s = val_ops.Stringify(val, loc.Missing)
234	return Piece(s, quoted, not quoted)
235
236	else:
237	raise error.TypeErr(val, "Can't substitute into word",
238	loc.WordPart(part_loc))
239
240	raise AssertionError('for -Wreturn-type in C++')
241
242
243	def _MakeWordFrames(part_vals):
244	# type: (List[part_value_t]) -> List[List[Piece]]
245	"""A word evaluates to a flat list of part_value (String or Array). frame
246	is a portion that results in zero or more args. It can never be joined.
247	This idea exists because of arrays like "$@" and "${a[@]}".
248
249	Example:
250
251	a=(1 '2 3' 4)
252	x=x
253	y=y
254
255	# This word
256	$x"${a[@]}"$y
257
258	# Results in Three frames:
259	[ ('x', False, True), ('1', True, False) ]
260	[ ('2 3', True, False) ]
261	[ ('4', True, False), ('y', False, True) ]
262
263	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
264	should make that top level type.
265
266	TODO:
267	- Instead of List[List[Piece]], where List[Piece] is a Frame
268	- Change this representation to
269	Frames = (List[Piece] pieces, List[int] break_indices)
270	# where break_indices are the end
271
272	Consider a common case like "$x" or "${x}" - I think this a lot more
273	efficient?
274
275	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
276	"""
277	current = [] # type: List[Piece]
278	frames = [current]
279
280	for p in part_vals:
281	UP_p = p
282
283	with tagswitch(p) as case:
284	if case(part_value_e.String):
285	p = cast(Piece, UP_p)
286	current.append(p)
287
288	elif case(part_value_e.Array):
289	p = cast(part_value.Array, UP_p)
290
291	is_first = True
292	for s in p.strs:
293	if s is None:
294	continue # ignore undefined array entries
295
296	# Arrays parts are always quoted; otherwise they would have decayed to
297	# a string.
298	piece = Piece(s, True, False)
299	if is_first:
300	current.append(piece)
301	is_first = False
302	else:
303	current = [piece]
304	frames.append(current) # singleton frame
305
306	else:
307	raise AssertionError()
308
309	return frames
310
311
312	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
313	def _DecayPartValuesToString(part_vals, join_char):
314	# type: (List[part_value_t], str) -> str
315	# Decay ${a=x"$@"x} to string.
316	out = [] # type: List[str]
317	for p in part_vals:
318	UP_p = p
319	with tagswitch(p) as case:
320	if case(part_value_e.String):
321	p = cast(Piece, UP_p)
322	out.append(p.s)
323	elif case(part_value_e.Array):
324	p = cast(part_value.Array, UP_p)
325	# TODO: Eliminate double join for speed?
326	tmp = [s for s in p.strs if s is not None]
327	out.append(join_char.join(tmp))
328	else:
329	raise AssertionError()
330	return ''.join(out)
331
332
333	def _PerformSlice(
334	val, # type: value_t
335	begin, # type: int
336	length, # type: int
337	has_length, # type: bool
338	part, # type: BracedVarSub
339	arg0_val, # type: value.Str
340	):
341	# type: (...) -> value_t
342	UP_val = val
343	with tagswitch(val) as case:
344	if case(value_e.Str): # Slice UTF-8 characters in a string.
345	val = cast(value.Str, UP_val)
346	s = val.s
347	n = len(s)
348
349	if begin < 0: # Compute offset with unicode
350	byte_begin = n
351	num_iters = -begin
352	for _ in xrange(num_iters):
353	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
354	else:
355	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
356
357	if has_length:
358	if length < 0: # Compute offset with unicode
359	# Confusing: this is a POSITION
360	byte_end = n
361	num_iters = -length
362	for _ in xrange(num_iters):
363	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
364	else:
365	byte_end = string_ops.AdvanceUtf8Chars(
366	s, length, byte_begin)
367	else:
368	byte_end = len(s)
369
370	substr = s[byte_begin:byte_end]
371	result = value.Str(substr) # type: value_t
372
373	elif case(value_e.BashArray): # Slice array entries.
374	val = cast(value.BashArray, UP_val)
375	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
376	# strings.
377	if has_length and length < 0:
378	e_die(
379	"The length index of a array slice can't be negative: %d" %
380	length, loc.WordPart(part))
381
382	# Quirk: "begin" for positional arguments ($@ and $*) counts $0.
383	if arg0_val is not None:
384	orig = [arg0_val.s]
385	orig.extend(val.strs)
386	else:
387	orig = val.strs
388
389	n = len(orig)
390	if begin < 0:
391	i = n + begin # ${@:-3} starts counts from the end
392	else:
393	i = begin
394	strs = [] # type: List[str]
395	count = 0
396	while i < n:
397	if has_length and count == length: # length could be 0
398	break
399	s = orig[i]
400	if s is not None: # Unset elements don't count towards the length
401	strs.append(s)
402	count += 1
403	i += 1
404
405	result = value.BashArray(strs)
406
407	elif case(value_e.BashAssoc):
408	e_die("Can't slice associative arrays", loc.WordPart(part))
409
410	else:
411	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
412	loc.WordPart(part))
413
414	return result
415
416
417	class StringWordEvaluator(object):
418	"""Interface used by ArithEvaluator / BoolEvaluator"""
419
420	def __init__(self):
421	# type: () -> None
422	"""Empty constructor for mycpp."""
423	pass
424
425	def EvalWordToString(self, w, eval_flags=0):
426	# type: (word_t, int) -> value.Str
427	raise NotImplementedError()
428
429
430	def _GetDollarHyphen(exec_opts):
431	# type: (optview.Exec) -> str
432	chars = [] # type: List[str]
433	if exec_opts.interactive():
434	chars.append('i')
435
436	if exec_opts.errexit():
437	chars.append('e')
438	if exec_opts.noglob():
439	chars.append('f')
440	if exec_opts.noexec():
441	chars.append('n')
442	if exec_opts.nounset():
443	chars.append('u')
444	# NO letter for pipefail?
445	if exec_opts.xtrace():
446	chars.append('x')
447	if exec_opts.noclobber():
448	chars.append('C')
449
450	# bash has:
451	# - c for sh -c, i for sh -i (mksh also has this)
452	# - h for hashing (mksh also has this)
453	# - B for brace expansion
454	return ''.join(chars)
455
456
457	class TildeEvaluator(object):
458
459	def __init__(self, mem, exec_opts):
460	# type: (Mem, optview.Exec) -> None
461	self.mem = mem
462	self.exec_opts = exec_opts
463
464	def GetMyHomeDir(self):
465	# type: () -> Optional[str]
466	"""Consult $HOME first, and then make a libc call.
467
468	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
469	#1578.
470	"""
471	# First look up the HOME var, then ask the OS. This is what bash does.
472	val = self.mem.GetValue('HOME')
473	UP_val = val
474	if val.tag() == value_e.Str:
475	val = cast(value.Str, UP_val)
476	return val.s
477	return pyos.GetMyHomeDir()
478
479	def Eval(self, part):
480	# type: (word_part.TildeSub) -> str
481	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
482
483	if part.user_name is None:
484	result = self.GetMyHomeDir()
485	else:
486	result = pyos.GetHomeDir(part.user_name)
487
488	if result is None:
489	if self.exec_opts.strict_tilde():
490	e_die("Error expanding tilde (e.g. invalid user)", part.left)
491	else:
492	# Return ~ or ~user literally
493	result = '~'
494	if part.user_name is not None:
495	result = result + part.user_name # mycpp doesn't have +=
496
497	return result
498
499
500	class AbstractWordEvaluator(StringWordEvaluator):
501	"""Abstract base class for word evaluators.
502
503	Public entry points:
504	EvalWordToString EvalForPlugin EvalRhsWord
505	EvalWordSequence EvalWordSequence2
506	"""
507
508	def __init__(
509	self,
510	mem, # type: state.Mem
511	exec_opts, # type: optview.Exec
512	mutable_opts, # type: state.MutableOpts
513	tilde_ev, # type: TildeEvaluator
514	splitter, # type: SplitContext
515	errfmt, # type: ui.ErrorFormatter
516	):
517	# type: (...) -> None
518	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
519	self.expr_ev = None # type: expr_eval.ExprEvaluator
520	self.prompt_ev = None # type: prompt.Evaluator
521
522	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
523
524	self.tilde_ev = tilde_ev
525
526	self.mem = mem # for $HOME, $1, etc.
527	self.exec_opts = exec_opts # for nounset
528	self.mutable_opts = mutable_opts # for _allow_command_sub
529	self.splitter = splitter
530	self.errfmt = errfmt
531
532	self.globber = glob_.Globber(exec_opts)
533
534	def CheckCircularDeps(self):
535	# type: () -> None
536	raise NotImplementedError()
537
538	def _EvalCommandSub(self, cs_part, quoted):
539	# type: (CommandSub, bool) -> part_value_t
540	"""Abstract since it has a side effect."""
541	raise NotImplementedError()
542
543	def _EvalProcessSub(self, cs_part):
544	# type: (CommandSub) -> part_value_t
545	"""Abstract since it has a side effect."""
546	raise NotImplementedError()
547
548	def _EvalVarNum(self, var_num):
549	# type: (int) -> value_t
550	assert var_num >= 0
551	return self.mem.GetArgNum(var_num)
552
553	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
554	# type: (int, bool, VarSubState) -> value_t
555	"""Evaluate $?
556
557	and so forth
558	"""
559	# $@ is special -- it need to know whether it is in a double quoted
560	# context.
561	#
562	# - If it's $@ in a double quoted context, return an ARRAY.
563	# - If it's $@ in a normal context, return a STRING, which then will be
564	# subject to splitting.
565
566	if op_id in (Id.VSub_At, Id.VSub_Star):
567	argv = self.mem.GetArgv()
568	val = value.BashArray(argv) # type: value_t
569	if op_id == Id.VSub_At:
570	# "$@" evaluates to an array, $@ should be decayed
571	vsub_state.join_array = not quoted
572	else: # $* "$*" are both decayed
573	vsub_state.join_array = True
574
575	elif op_id == Id.VSub_Hyphen:
576	val = value.Str(_GetDollarHyphen(self.exec_opts))
577
578	else:
579	val = self.mem.GetSpecialVar(op_id)
580
581	return val
582
583	def _ApplyTestOp(
584	self,
585	val, # type: value_t
586	op, # type: suffix_op.Unary
587	quoted, # type: bool
588	part_vals, # type: Optional[List[part_value_t]]
589	vtest_place, # type: VTestPlace
590	blame_token, # type: Token
591	):
592	# type: (...) -> bool
593	"""
594	Returns:
595	Whether part_vals was mutated
596
597	${a:-} returns part_value[]
598	${a:+} returns part_value[]
599	${a:?error} returns error word?
600	${a:=} returns part_value[] but also needs self.mem for side effects.
601
602	So I guess it should return part_value[], and then a flag for raising an
603	error, and then a flag for assigning it?
604	The original BracedVarSub will have the name.
605
606	Example of needing multiple part_value[]
607
608	echo X-${a:-'def'"ault"}-X
609
610	We return two part values from the BracedVarSub. Also consider:
611
612	echo ${a:-x"$@"x}
613	"""
614	eval_flags = IS_SUBST
615	if quoted:
616	eval_flags \|= QUOTED
617
618	tok = op.op
619	# NOTE: Splicing part_values is necessary because of code like
620	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
621	# do_glob/do_elide setting.
622	UP_val = val
623	with tagswitch(val) as case:
624	if case(value_e.Undef):
625	is_falsey = True
626	elif case(value_e.Str):
627	val = cast(value.Str, UP_val)
628	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
629	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
630	is_falsey = len(val.s) == 0
631	else:
632	is_falsey = False
633	elif case(value_e.BashArray):
634	val = cast(value.BashArray, UP_val)
635	# TODO: allow undefined
636	is_falsey = len(val.strs) == 0
637	elif case(value_e.BashAssoc):
638	val = cast(value.BashAssoc, UP_val)
639	is_falsey = len(val.d) == 0
640	else:
641	# value.Eggex, etc. are all false
642	is_falsey = False
643
644	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
645	if is_falsey:
646	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
647	return True
648	else:
649	return False
650
651	# Inverse of the above.
652	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
653	if is_falsey:
654	return False
655	else:
656	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
657	return True
658
659	# Splice and assign
660	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
661	if is_falsey:
662	# Collect new part vals.
663	assign_part_vals = [] # type: List[part_value_t]
664	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
665	eval_flags)
666	# Append them to out param AND return them.
667	part_vals.extend(assign_part_vals)
668
669	if vtest_place.name is None:
670	# TODO: error context
671	e_die("Can't assign to special variable")
672	else:
673	# NOTE: This decays arrays too! 'shopt -s strict_array' could
674	# avoid it.
675	rhs_str = _DecayPartValuesToString(
676	assign_part_vals, self.splitter.GetJoinChar())
677	if vtest_place.index is None: # using None when no index
678	lval = location.LName(
679	vtest_place.name) # type: sh_lvalue_t
680	else:
681	var_name = vtest_place.name
682	var_index = vtest_place.index
683	UP_var_index = var_index
684
685	with tagswitch(var_index) as case:
686	if case(a_index_e.Int):
687	var_index = cast(a_index.Int, UP_var_index)
688	lval = sh_lvalue.Indexed(
689	var_name, var_index.i, loc.Missing)
690	elif case(a_index_e.Str):
691	var_index = cast(a_index.Str, UP_var_index)
692	lval = sh_lvalue.Keyed(var_name, var_index.s,
693	loc.Missing)
694	else:
695	raise AssertionError()
696
697	state.OshLanguageSetValue(self.mem, lval,
698	value.Str(rhs_str))
699	return True
700
701	else:
702	return False
703
704	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
705	if is_falsey:
706	# The arg is the error message
707	error_part_vals = [] # type: List[part_value_t]
708	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
709	eval_flags)
710	error_str = _DecayPartValuesToString(
711	error_part_vals, self.splitter.GetJoinChar())
712	e_die("unset variable %r" % error_str, blame_token)
713
714	else:
715	return False
716
717	else:
718	raise AssertionError(tok.id)
719
720	def _Length(self, val, token):
721	# type: (value_t, Token) -> int
722	"""Returns the length of the value, for ${#var}"""
723	UP_val = val
724	with tagswitch(val) as case:
725	if case(value_e.Str):
726	val = cast(value.Str, UP_val)
727	# NOTE: Whether bash counts bytes or chars is affected by LANG
728	# environment variables.
729	# Should we respect that, or another way to select? set -o
730	# count-bytes?
731
732	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
733	try:
734	length = string_ops.CountUtf8Chars(val.s)
735	except error.Strict as e:
736	# Add this here so we don't have to add it so far down the stack.
737	# TODO: It's better to show BOTH this CODE an the actual DATA
738	# somehow.
739	e.location = token
740
741	if self.exec_opts.strict_word_eval():
742	raise
743	else:
744	# NOTE: Doesn't make the command exit with 1; it just returns a
745	# length of -1.
746	self.errfmt.PrettyPrintError(e, prefix='warning: ')
747	return -1
748
749	elif case(value_e.BashArray):
750	val = cast(value.BashArray, UP_val)
751	# There can be empty placeholder values in the array.
752	length = 0
753	for s in val.strs:
754	if s is not None:
755	length += 1
756
757	elif case(value_e.BashAssoc):
758	val = cast(value.BashAssoc, UP_val)
759	length = len(val.d)
760
761	else:
762	raise error.TypeErr(
763	val, "Length op expected Str, BashArray, BashAssoc", token)
764
765	return length
766
767	def _Keys(self, val, token):
768	# type: (value_t, Token) -> value_t
769	"""Return keys of a container, for ${!array[@]}"""
770
771	UP_val = val
772	with tagswitch(val) as case:
773	if case(value_e.BashArray):
774	val = cast(value.BashArray, UP_val)
775	# translation issue: tuple indices not supported in list comprehensions
776	#indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
777	indices = [] # type: List[str]
778	for i, s in enumerate(val.strs):
779	if s is not None:
780	indices.append(str(i))
781	return value.BashArray(indices)
782
783	elif case(value_e.BashAssoc):
784	val = cast(value.BashAssoc, UP_val)
785	assert val.d is not None # for MyPy, so it's not Optional[]
786
787	# BUG: Keys aren't ordered according to insertion!
788	return value.BashArray(val.d.keys())
789
790	else:
791	raise error.TypeErr(val, 'Keys op expected Str', token)
792
793	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
794	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
795	"""Handles indirect expansion like ${!var} and ${!a[0]}.
796
797	Args:
798	blame_tok: 'foo' for ${!foo}
799	"""
800	UP_val = val
801	with tagswitch(val) as case:
802	if case(value_e.Undef):
803	return value.Undef # ${!undef} is just weird bash behavior
804
805	elif case(value_e.Str):
806	val = cast(value.Str, UP_val)
807	bvs_part = self.unsafe_arith.ParseVarRef(val.s, blame_tok)
808	return self._VarRefValue(bvs_part, quoted, vsub_state,
809	vtest_place)
810
811	elif case(value_e.BashArray): # caught earlier but OK
812	e_die('Indirect expansion of array')
813
814	elif case(value_e.BashAssoc): # caught earlier but OK
815	e_die('Indirect expansion of assoc array')
816
817	else:
818	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
819
820	def _ApplyUnarySuffixOp(self, val, op):
821	# type: (value_t, suffix_op.Unary) -> value_t
822	assert val.tag() != value_e.Undef
823
824	op_kind = consts.GetKind(op.op.id)
825
826	if op_kind == Kind.VOp1:
827	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
828	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
829	# shortcut for constant strings.
830	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
831	assert arg_val.tag() == value_e.Str
832
833	UP_val = val
834	with tagswitch(val) as case:
835	if case(value_e.Str):
836	val = cast(value.Str, UP_val)
837	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
838	has_extglob)
839	#log('%r %r -> %r', val.s, arg_val.s, s)
840	new_val = value.Str(s) # type: value_t
841
842	elif case(value_e.BashArray):
843	val = cast(value.BashArray, UP_val)
844	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
845	strs = [] # type: List[str]
846	for s in val.strs:
847	if s is not None:
848	strs.append(
849	string_ops.DoUnarySuffixOp(
850	s, op.op, arg_val.s, has_extglob))
851	new_val = value.BashArray(strs)
852
853	elif case(value_e.BashAssoc):
854	val = cast(value.BashAssoc, UP_val)
855	strs = []
856	for s in val.d.values():
857	strs.append(
858	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
859	has_extglob))
860	new_val = value.BashArray(strs)
861
862	else:
863	raise error.TypeErr(
864	val, 'Unary op expected Str, BashArray, BashAssoc',
865	op.op)
866
867	else:
868	raise AssertionError(Kind_str(op_kind))
869
870	return new_val
871
872	def _PatSub(self, val, op):
873	# type: (value_t, suffix_op.PatSub) -> value_t
874
875	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
876	# Extended globs aren't supported because we only translate * ? etc. to
877	# ERE. I don't think there's a straightforward translation from !(*.py) to
878	# ERE! You would need an engine that supports negation? (Derivatives?)
879	if has_extglob:
880	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
881
882	if op.replace:
883	replace_val = self.EvalRhsWord(op.replace)
884	# Can't have an array, so must be a string
885	assert replace_val.tag() == value_e.Str, replace_val
886	replace_str = cast(value.Str, replace_val).s
887	else:
888	replace_str = ''
889
890	# note: doesn't support self.exec_opts.extglob()!
891	regex, warnings = glob_.GlobToERE(pat_val.s)
892	if len(warnings):
893	# TODO:
894	# - Add 'shopt -s strict_glob' mode and expose warnings.
895	# "Glob is not in CANONICAL FORM".
896	# - Propagate location info back to the 'op.pat' word.
897	pass
898	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
899
900	with tagswitch(val) as case2:
901	if case2(value_e.Str):
902	str_val = cast(value.Str, val)
903	s = replacer.Replace(str_val.s, op)
904	val = value.Str(s)
905
906	elif case2(value_e.BashArray):
907	array_val = cast(value.BashArray, val)
908	strs = [] # type: List[str]
909	for s in array_val.strs:
910	if s is not None:
911	strs.append(replacer.Replace(s, op))
912	val = value.BashArray(strs)
913
914	elif case2(value_e.BashAssoc):
915	assoc_val = cast(value.BashAssoc, val)
916	strs = []
917	for s in assoc_val.d.values():
918	strs.append(replacer.Replace(s, op))
919	val = value.BashArray(strs)
920
921	else:
922	raise error.TypeErr(
923	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
924	op.slash_tok)
925
926	return val
927
928	def _Slice(self, val, op, var_name, part):
929	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
930
931	if op.begin:
932	begin = self.arith_ev.EvalToInt(op.begin)
933	else:
934	begin = 0
935
936	# Note: bash allows lengths to be negative (with odd semantics), but
937	# we don't allow that right now.
938	has_length = False
939	length = -1
940	if op.length:
941	has_length = True
942	length = self.arith_ev.EvalToInt(op.length)
943
944	try:
945	arg0_val = None # type: value.Str
946	if var_name is None: # $* or $@
947	arg0_val = self.mem.GetArg0()
948	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
949	except error.Strict as e:
950	if self.exec_opts.strict_word_eval():
951	raise
952	else:
953	self.errfmt.PrettyPrintError(e, prefix='warning: ')
954	with tagswitch(val) as case2:
955	if case2(value_e.Str):
956	val = value.Str('')
957	elif case2(value_e.BashArray):
958	val = value.BashArray([])
959	else:
960	raise NotImplementedError()
961	return val
962
963	def _Nullary(self, val, op, var_name):
964	# type: (value_t, Token, Optional[str]) -> Tuple[value.Str, bool]
965
966	UP_val = val
967	quoted2 = False
968	op_id = op.id
969	if op_id == Id.VOp0_P:
970	with tagswitch(val) as case:
971	if case(value_e.Str):
972	str_val = cast(value.Str, UP_val)
973	prompt = self.prompt_ev.EvalPrompt(str_val)
974	# readline gets rid of these, so we should too.
975	p = prompt.replace('\x01', '').replace('\x02', '')
976	result = value.Str(p)
977	else:
978	e_die("Can't use @P on %s" % ui.ValType(val), op)
979
980	elif op_id == Id.VOp0_Q:
981	with tagswitch(val) as case:
982	if case(value_e.Str):
983	str_val = cast(value.Str, UP_val)
984
985	# TODO: use fastfunc.ShellEncode or
986	# fastfunc.PosixShellEncode()
987	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
988	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in bash
989	quoted2 = True
990	elif case(value_e.BashArray):
991	array_val = cast(value.BashArray, UP_val)
992
993	# TODO: should use fastfunc.ShellEncode
994	tmp = [j8_lite.MaybeShellEncode(s) for s in array_val.strs]
995	result = value.Str(' '.join(tmp))
996	else:
997	e_die("Can't use @Q on %s" % ui.ValType(val), op)
998
999	elif op_id == Id.VOp0_a:
1000	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1001	# spec/ble-idioms.test.sh.
1002	chars = [] # type: List[str]
1003	with tagswitch(val) as case:
1004	if case(value_e.BashArray):
1005	chars.append('a')
1006	elif case(value_e.BashAssoc):
1007	chars.append('A')
1008
1009	if var_name is not None: # e.g. ${?@a} is allowed
1010	cell = self.mem.GetCell(var_name)
1011	if cell:
1012	if cell.readonly:
1013	chars.append('r')
1014	if cell.exported:
1015	chars.append('x')
1016	if cell.nameref:
1017	chars.append('n')
1018
1019	result = value.Str(''.join(chars))
1020
1021	else:
1022	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1023
1024	return result, quoted2
1025
1026	def _WholeArray(self, val, part, quoted, vsub_state):
1027	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1028	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1029
1030	if op_id == Id.Lit_At:
1031	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1032	UP_val = val
1033	with tagswitch(val) as case2:
1034	if case2(value_e.Undef):
1035	if not vsub_state.has_test_op:
1036	val = self._EmptyBashArrayOrError(part.token)
1037	elif case2(value_e.Str):
1038	if self.exec_opts.strict_array():
1039	e_die("Can't index string with @", loc.WordPart(part))
1040	elif case2(value_e.BashArray):
1041	pass # no-op
1042
1043	elif op_id == Id.Arith_Star:
1044	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1045	UP_val = val
1046	with tagswitch(val) as case2:
1047	if case2(value_e.Undef):
1048	if not vsub_state.has_test_op:
1049	val = self._EmptyBashArrayOrError(part.token)
1050	elif case2(value_e.Str):
1051	if self.exec_opts.strict_array():
1052	e_die("Can't index string with *", loc.WordPart(part))
1053	elif case2(value_e.BashArray):
1054	pass # no-op
1055
1056	else:
1057	raise AssertionError(op_id) # unknown
1058
1059	return val
1060
1061	def _ArrayIndex(self, val, part, vtest_place):
1062	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1063	"""Process a numeric array index like ${a[i+1]}"""
1064	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1065
1066	UP_val = val
1067	with tagswitch(val) as case2:
1068	if case2(value_e.Undef):
1069	pass # it will be checked later
1070
1071	elif case2(value_e.Str):
1072	# Bash treats any string as an array, so we can't add our own
1073	# behavior here without making valid OSH invalid bash.
1074	e_die("Can't index string %r with integer" % part.var_name,
1075	part.token)
1076
1077	elif case2(value_e.BashArray):
1078	array_val = cast(value.BashArray, UP_val)
1079	index = self.arith_ev.EvalToInt(anode)
1080	vtest_place.index = a_index.Int(index)
1081
1082	s = GetArrayItem(array_val.strs, index)
1083
1084	if s is None:
1085	val = value.Undef
1086	else:
1087	val = value.Str(s)
1088
1089	elif case2(value_e.BashAssoc):
1090	assoc_val = cast(value.BashAssoc, UP_val)
1091	key = self.arith_ev.EvalWordToString(anode)
1092	vtest_place.index = a_index.Str(key) # out param
1093	s = assoc_val.d.get(key)
1094
1095	if s is None:
1096	val = value.Undef
1097	else:
1098	val = value.Str(s)
1099
1100	else:
1101	raise error.TypeErr(val,
1102	'Index op expected BashArray, BashAssoc',
1103	loc.WordPart(part))
1104
1105	return val
1106
1107	def _EvalDoubleQuoted(self, parts, part_vals):
1108	# type: (List[word_part_t], List[part_value_t]) -> None
1109	"""Evaluate parts of a DoubleQuoted part.
1110
1111	Args:
1112	part_vals: output param to append to.
1113	"""
1114	# Example of returning array:
1115	# $ a=(1 2); b=(3); $ c=(4 5)
1116	# $ argv "${a[@]}${b[@]}${c[@]}"
1117	# ['1', '234', '5']
1118	#
1119	# Example of multiple parts
1120	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1121	# ['1', '24', '5']
1122
1123	# Special case for "". The parser outputs (DoubleQuoted []), instead
1124	# of (DoubleQuoted [Literal '']). This is better but it means we
1125	# have to check for it.
1126	if len(parts) == 0:
1127	v = Piece('', True, False)
1128	part_vals.append(v)
1129	return
1130
1131	for p in parts:
1132	self._EvalWordPart(p, part_vals, QUOTED)
1133
1134	def EvalDoubleQuotedToString(self, dq_part):
1135	# type: (DoubleQuoted) -> str
1136	"""For double quoted strings in YSH expressions.
1137
1138	Example: var x = "$foo-${foo}"
1139	"""
1140	part_vals = [] # type: List[part_value_t]
1141	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1142	return self._ConcatPartVals(part_vals, dq_part.left)
1143
1144	def _DecayArray(self, val):
1145	# type: (value.BashArray) -> value.Str
1146	"""Decay $* to a string."""
1147	assert val.tag() == value_e.BashArray, val
1148	sep = self.splitter.GetJoinChar()
1149	tmp = [s for s in val.strs if s is not None]
1150	return value.Str(sep.join(tmp))
1151
1152	def _EmptyStrOrError(self, val, token):
1153	# type: (value_t, Token) -> value_t
1154	if val.tag() != value_e.Undef:
1155	return val
1156
1157	if not self.exec_opts.nounset():
1158	return value.Str('')
1159
1160	tok_str = lexer.TokenVal(token)
1161	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1162	e_die('Undefined variable %r' % name, token)
1163
1164	def _EmptyBashArrayOrError(self, token):
1165	# type: (Token) -> value_t
1166	assert token is not None
1167	if self.exec_opts.nounset():
1168	e_die('Undefined array %r' % lexer.TokenVal(token), token)
1169	else:
1170	return value.BashArray([])
1171
1172	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1173	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1174
1175	if part.bracket_op:
1176	with tagswitch(part.bracket_op) as case:
1177	if case(bracket_op_e.WholeArray):
1178	val = self._WholeArray(val, part, quoted, vsub_state)
1179
1180	elif case(bracket_op_e.ArrayIndex):
1181	val = self._ArrayIndex(val, part, vtest_place)
1182
1183	else:
1184	raise AssertionError(part.bracket_op.tag())
1185
1186	else: # no bracket op
1187	var_name = vtest_place.name
1188	if (var_name is not None and
1189	val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1190	not vsub_state.is_type_query):
1191	if ShouldArrayDecay(var_name, self.exec_opts,
1192	not (part.prefix_op or part.suffix_op)):
1193	# for ${BASH_SOURCE}, etc.
1194	val = DecayArray(val)
1195	else:
1196	e_die(
1197	"Array %r can't be referred to as a scalar (without @ or *)"
1198	% var_name, loc.WordPart(part))
1199
1200	return val
1201
1202	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1203	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1204	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1205	value_t."""
1206
1207	# 1. Evaluate from (var_name, var_num, token Id) -> value
1208	if part.token.id == Id.VSub_Name:
1209	vtest_place.name = part.var_name
1210	val = self.mem.GetValue(part.var_name)
1211
1212	elif part.token.id == Id.VSub_Number:
1213	var_num = int(part.var_name)
1214	val = self._EvalVarNum(var_num)
1215
1216	else:
1217	# $* decays
1218	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1219
1220	# We don't need var_index because it's only for L-Values of test ops?
1221	if self.exec_opts.eval_unsafe_arith():
1222	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1223	vtest_place)
1224	else:
1225	with state.ctx_Option(self.mutable_opts,
1226	[option_i._allow_command_sub], False):
1227	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1228	vtest_place)
1229
1230	return val
1231
1232	def _EvalBracedVarSub(self, part, part_vals, quoted):
1233	# type: (BracedVarSub, List[part_value_t], bool) -> None
1234	"""
1235	Args:
1236	part_vals: output param to append to.
1237	"""
1238	# We have different operators that interact in a non-obvious order.
1239	#
1240	# 1. bracket_op: value -> value, with side effect on vsub_state
1241	#
1242	# 2. prefix_op
1243	# a. length ${#x}: value -> value
1244	# b. var ref ${!ref}: can expand to an array
1245	#
1246	# 3. suffix_op:
1247	# a. no operator: you have a value
1248	# b. Test: value -> part_value[]
1249	# c. Other Suffix: value -> value
1250	#
1251	# 4. Process vsub_state.join_array here before returning.
1252	#
1253	# These cases are hard to distinguish:
1254	# - ${!prefix@} prefix query
1255	# - ${!array[@]} keys
1256	# - ${!ref} named reference
1257	# - ${!ref[0]} named reference
1258	#
1259	# I think we need several stages:
1260	#
1261	# 1. value: name, number, special, prefix query
1262	# 2. bracket_op
1263	# 3. prefix length -- this is TERMINAL
1264	# 4. indirection? Only for some of the ! cases
1265	# 5. string transformation suffix ops like ##
1266	# 6. test op
1267	# 7. vsub_state.join_array
1268
1269	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1270	# suffix ops are applied. If we take the length with a prefix op, the
1271	# distinction is ignored.
1272
1273	var_name = None # type: Optional[str] # used throughout the function
1274	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1275	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1276
1277	# 1. Evaluate from (var_name, var_num, token Id) -> value
1278	if part.token.id == Id.VSub_Name:
1279	# Handle ${!prefix@} first, since that looks at names and not values
1280	# Do NOT handle ${!A[@]@a} here!
1281	if (part.prefix_op is not None and part.bracket_op is None and
1282	part.suffix_op is not None and
1283	part.suffix_op.tag() == suffix_op_e.Nullary):
1284	nullary_op = cast(Token, part.suffix_op)
1285	# ${!x@} but not ${!x@P}
1286	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1287	names = self.mem.VarNamesStartingWith(part.var_name)
1288	names.sort()
1289
1290	if quoted and nullary_op.id == Id.VOp3_At:
1291	part_vals.append(part_value.Array(names))
1292	else:
1293	sep = self.splitter.GetJoinChar()
1294	part_vals.append(Piece(sep.join(names), quoted, True))
1295	return # EARLY RETURN
1296
1297	var_name = part.var_name
1298	vtest_place.name = var_name # for _ApplyTestOp
1299
1300	val = self.mem.GetValue(var_name)
1301
1302	elif part.token.id == Id.VSub_Number:
1303	var_num = int(part.var_name)
1304	val = self._EvalVarNum(var_num)
1305	else:
1306	# $* decays
1307	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1308
1309	suffix_op_ = part.suffix_op
1310	if suffix_op_:
1311	UP_op = suffix_op_
1312	with tagswitch(suffix_op_) as case:
1313	if case(suffix_op_e.Nullary):
1314	suffix_op_ = cast(Token, UP_op)
1315
1316	# Type query ${array@a} is a STRING, not an array
1317	# NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1318	# ${array[@]@Q}
1319	if suffix_op_.id == Id.VOp0_a:
1320	vsub_state.is_type_query = True
1321
1322	elif case(suffix_op_e.Unary):
1323	suffix_op_ = cast(suffix_op.Unary, UP_op)
1324
1325	# Do the _EmptyStrOrError/_EmptyBashArrayOrError up front, EXCEPT in
1326	# the case of Kind.VTest
1327	if consts.GetKind(suffix_op_.op.id) == Kind.VTest:
1328	vsub_state.has_test_op = True
1329
1330	# 2. Bracket Op
1331	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1332
1333	if part.prefix_op:
1334	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1335	if not vsub_state.has_test_op: # undef -> '' BEFORE length
1336	val = self._EmptyStrOrError(val, part.token)
1337
1338	n = self._Length(val, part.token)
1339	part_vals.append(Piece(str(n), quoted, False))
1340	return # EARLY EXIT: nothing else can come after length
1341
1342	elif part.prefix_op.id == Id.VSub_Bang:
1343	if (part.bracket_op and
1344	part.bracket_op.tag() == bracket_op_e.WholeArray):
1345	if vsub_state.has_test_op:
1346	# ${!a[@]-'default'} is a non-fatal runtime error in bash. Here
1347	# it's fatal.
1348	op_tok = cast(suffix_op.Unary, UP_op).op
1349	e_die('Test operation not allowed with ${!array[@]}',
1350	op_tok)
1351
1352	# ${!array[@]} to get indices/keys
1353	val = self._Keys(val, part.token)
1354	# already set vsub_State.join_array ABOVE
1355	else:
1356	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1357	# ${!a[@]} !
1358	# ${!ref} can expand into an array if ref='array[@]'
1359
1360	# Clear it now that we have a var ref
1361	vtest_place.name = None
1362	vtest_place.index = None
1363
1364	val = self._EvalVarRef(val, part.token, quoted, vsub_state,
1365	vtest_place)
1366
1367	if not vsub_state.has_test_op: # undef -> '' AFTER indirection
1368	val = self._EmptyStrOrError(val, part.token)
1369
1370	else:
1371	raise AssertionError(part.prefix_op)
1372
1373	else:
1374	if not vsub_state.has_test_op: # undef -> '' if no prefix op
1375	val = self._EmptyStrOrError(val, part.token)
1376
1377	quoted2 = False # another bit for @Q
1378	if suffix_op_:
1379	op = suffix_op_ # could get rid of this alias
1380
1381	with tagswitch(suffix_op_) as case:
1382	if case(suffix_op_e.Nullary):
1383	op = cast(Token, UP_op)
1384	val, quoted2 = self._Nullary(val, op, var_name)
1385
1386	elif case(suffix_op_e.Unary):
1387	op = cast(suffix_op.Unary, UP_op)
1388	if consts.GetKind(op.op.id) == Kind.VTest:
1389	if self._ApplyTestOp(val, op, quoted, part_vals,
1390	vtest_place, part.token):
1391	# e.g. to evaluate ${undef:-'default'}, we already appended
1392	# what we need
1393	return
1394
1395	else:
1396	# Other suffix: value -> value
1397	val = self._ApplyUnarySuffixOp(val, op)
1398
1399	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1400	op = cast(suffix_op.PatSub, UP_op)
1401	val = self._PatSub(val, op)
1402
1403	elif case(suffix_op_e.Slice):
1404	op = cast(suffix_op.Slice, UP_op)
1405	val = self._Slice(val, op, var_name, part)
1406
1407	elif case(suffix_op_e.Static):
1408	op = cast(suffix_op.Static, UP_op)
1409	e_die('Not implemented', op.tok)
1410
1411	else:
1412	raise AssertionError()
1413
1414	# After applying suffixes, process join_array here.
1415	UP_val = val
1416	if val.tag() == value_e.BashArray:
1417	array_val = cast(value.BashArray, UP_val)
1418	if vsub_state.join_array:
1419	val = self._DecayArray(array_val)
1420	else:
1421	val = array_val
1422
1423	# For example, ${a} evaluates to value.Str(), but we want a
1424	# Piece().
1425	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1426	part_vals.append(part_val)
1427
1428	def _ConcatPartVals(self, part_vals, location):
1429	# type: (List[part_value_t], loc_t) -> str
1430
1431	strs = [] # type: List[str]
1432	for part_val in part_vals:
1433	UP_part_val = part_val
1434	with tagswitch(part_val) as case:
1435	if case(part_value_e.String):
1436	part_val = cast(Piece, UP_part_val)
1437	s = part_val.s
1438
1439	elif case(part_value_e.Array):
1440	part_val = cast(part_value.Array, UP_part_val)
1441	if self.exec_opts.strict_array():
1442	# Examples: echo f > "$@"; local foo="$@"
1443	e_die("Illegal array word part (strict_array)",
1444	location)
1445	else:
1446	# It appears to not respect IFS
1447	# TODO: eliminate double join()?
1448	tmp = [s for s in part_val.strs if s is not None]
1449	s = ' '.join(tmp)
1450
1451	else:
1452	raise AssertionError()
1453
1454	strs.append(s)
1455
1456	return ''.join(strs)
1457
1458	def EvalBracedVarSubToString(self, part):
1459	# type: (BracedVarSub) -> str
1460	"""For double quoted strings in YSH expressions.
1461
1462	Example: var x = "$foo-${foo}"
1463	"""
1464	part_vals = [] # type: List[part_value_t]
1465	self._EvalBracedVarSub(part, part_vals, False)
1466	# blame ${ location
1467	return self._ConcatPartVals(part_vals, part.left)
1468
1469	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1470	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1471
1472	token = part.tok
1473
1474	vsub_state = VarSubState.CreateNull()
1475
1476	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1477	if token.id == Id.VSub_DollarName:
1478	var_name = lexer.LazyStr(token)
1479	# TODO: Special case for LINENO
1480	val = self.mem.GetValue(var_name)
1481	if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1482	if ShouldArrayDecay(var_name, self.exec_opts):
1483	# for $BASH_SOURCE, etc.
1484	val = DecayArray(val)
1485	else:
1486	e_die(
1487	"Array %r can't be referred to as a scalar (without @ or *)"
1488	% var_name, token)
1489
1490	elif token.id == Id.VSub_Number:
1491	var_num = int(lexer.LazyStr(token))
1492	val = self._EvalVarNum(var_num)
1493
1494	else:
1495	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1496
1497	#log('SIMPLE %s', part)
1498	val = self._EmptyStrOrError(val, token)
1499	UP_val = val
1500	if val.tag() == value_e.BashArray:
1501	array_val = cast(value.BashArray, UP_val)
1502	if vsub_state.join_array:
1503	val = self._DecayArray(array_val)
1504	else:
1505	val = array_val
1506
1507	v = _ValueToPartValue(val, quoted, part)
1508	part_vals.append(v)
1509
1510	def EvalSimpleVarSubToString(self, node):
1511	# type: (SimpleVarSub) -> str
1512	"""For double quoted strings in YSH expressions.
1513
1514	Example: var x = "$foo-${foo}"
1515	"""
1516	part_vals = [] # type: List[part_value_t]
1517	self._EvalSimpleVarSub(node, part_vals, False)
1518	return self._ConcatPartVals(part_vals, node.tok)
1519
1520	def _EvalExtGlob(self, part, part_vals):
1521	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1522	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1523	op = part.op
1524	if op.id == Id.ExtGlob_Comma:
1525	op_str = '@('
1526	else:
1527	op_str = lexer.LazyStr(op)
1528	# Do NOT split these.
1529	part_vals.append(Piece(op_str, False, False))
1530
1531	for i, w in enumerate(part.arms):
1532	if i != 0:
1533	part_vals.append(Piece('\|', False, False)) # separator
1534	# FLATTEN the tree of extglob "arms".
1535	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1536	part_vals.append(Piece(')', False, False)) # closing )
1537
1538	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1539	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1540	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1541
1542	We need both glob and fnmatch patterns. _EvalExtGlob does the
1543	flattening.
1544	"""
1545	for i, part_val in enumerate(part_vals):
1546	UP_part_val = part_val
1547	with tagswitch(part_val) as case:
1548	if case(part_value_e.String):
1549	part_val = cast(Piece, UP_part_val)
1550	if part_val.quoted and not self.exec_opts.noglob():
1551	s = glob_.GlobEscape(part_val.s)
1552	else:
1553	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1554	s = part_val.s
1555	glob_parts.append(s)
1556	fnmatch_parts.append(s) # from _EvalExtGlob()
1557
1558	elif case(part_value_e.Array):
1559	# Disallow array
1560	e_die(
1561	"Extended globs and arrays can't appear in the same word",
1562	w)
1563
1564	elif case(part_value_e.ExtGlob):
1565	part_val = cast(part_value.ExtGlob, UP_part_val)
1566	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1567	self._TranslateExtGlob(part_val.part_vals, w, [],
1568	fnmatch_parts)
1569	glob_parts.append('*')
1570
1571	else:
1572	raise AssertionError()
1573
1574	def _EvalWordPart(self, part, part_vals, flags):
1575	# type: (word_part_t, List[part_value_t], int) -> None
1576	"""Evaluate a word part.
1577
1578	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1579
1580	Args:
1581	part: What to evaluate
1582	part_vals: Output parameter.
1583	quoted: was the part quoted like "$x"
1584	is_subst: do_split
1585
1586	Returns:
1587	None
1588	"""
1589	quoted = bool(flags & QUOTED)
1590	is_subst = bool(flags & IS_SUBST)
1591
1592	UP_part = part
1593	with tagswitch(part) as case:
1594	if case(word_part_e.ShArrayLiteral):
1595	part = cast(ShArrayLiteral, UP_part)
1596	e_die("Unexpected array literal", loc.WordPart(part))
1597	elif case(word_part_e.BashAssocLiteral):
1598	part = cast(word_part.BashAssocLiteral, UP_part)
1599	e_die("Unexpected associative array literal",
1600	loc.WordPart(part))
1601
1602	elif case(word_part_e.Literal):
1603	part = cast(Token, UP_part)
1604	# Split if it's in a substitution.
1605	# That is: echo is not split, but ${foo:-echo} is split
1606	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1607	part_vals.append(v)
1608
1609	elif case(word_part_e.EscapedLiteral):
1610	part = cast(word_part.EscapedLiteral, UP_part)
1611	v = Piece(part.ch, True, False)
1612	part_vals.append(v)
1613
1614	elif case(word_part_e.SingleQuoted):
1615	part = cast(SingleQuoted, UP_part)
1616	v = Piece(part.sval, True, False)
1617	part_vals.append(v)
1618
1619	elif case(word_part_e.DoubleQuoted):
1620	part = cast(DoubleQuoted, UP_part)
1621	self._EvalDoubleQuoted(part.parts, part_vals)
1622
1623	elif case(word_part_e.CommandSub):
1624	part = cast(CommandSub, UP_part)
1625	id_ = part.left_token.id
1626	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1627	Id.Left_Backtick):
1628	sv = self._EvalCommandSub(part,
1629	quoted) # type: part_value_t
1630
1631	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1632	sv = self._EvalProcessSub(part)
1633
1634	else:
1635	raise AssertionError(id_)
1636
1637	part_vals.append(sv)
1638
1639	elif case(word_part_e.SimpleVarSub):
1640	part = cast(SimpleVarSub, UP_part)
1641	self._EvalSimpleVarSub(part, part_vals, quoted)
1642
1643	elif case(word_part_e.BracedVarSub):
1644	part = cast(BracedVarSub, UP_part)
1645	self._EvalBracedVarSub(part, part_vals, quoted)
1646
1647	elif case(word_part_e.TildeSub):
1648	part = cast(word_part.TildeSub, UP_part)
1649	# We never parse a quoted string into a TildeSub.
1650	assert not quoted
1651	s = self.tilde_ev.Eval(part)
1652	v = Piece(s, True, False) # NOT split even when unquoted!
1653	part_vals.append(v)
1654
1655	elif case(word_part_e.ArithSub):
1656	part = cast(word_part.ArithSub, UP_part)
1657	num = self.arith_ev.EvalToBigInt(part.anode)
1658	v = Piece(mops.ToStr(num), quoted, not quoted)
1659	part_vals.append(v)
1660
1661	elif case(word_part_e.ExtGlob):
1662	part = cast(word_part.ExtGlob, UP_part)
1663	#if not self.exec_opts.extglob():
1664	# die() # disallow at runtime? Don't just decay
1665
1666	# Create a node to hold the flattened tree. The caller decides whether
1667	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1668	part_vals2 = [] # type: List[part_value_t]
1669	self._EvalExtGlob(part, part_vals2) # flattens tree
1670	part_vals.append(part_value.ExtGlob(part_vals2))
1671
1672	elif case(word_part_e.Splice):
1673	part = cast(word_part.Splice, UP_part)
1674	val = self.mem.GetValue(part.var_name)
1675
1676	strs = self.expr_ev.SpliceValue(val, part)
1677	part_vals.append(part_value.Array(strs))
1678
1679	elif case(word_part_e.ExprSub):
1680	part = cast(word_part.ExprSub, UP_part)
1681	part_val = self.expr_ev.EvalExprSub(part)
1682	part_vals.append(part_val)
1683
1684	else:
1685	raise AssertionError(part.tag())
1686
1687	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1688	# type: (rhs_word_t, List[part_value_t], int) -> None
1689	quoted = bool(eval_flags & QUOTED)
1690
1691	UP_w = w
1692	with tagswitch(w) as case:
1693	if case(rhs_word_e.Empty):
1694	part_vals.append(Piece('', quoted, not quoted))
1695
1696	elif case(rhs_word_e.Compound):
1697	w = cast(CompoundWord, UP_w)
1698	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1699
1700	else:
1701	raise AssertionError()
1702
1703	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1704	# type: (CompoundWord, List[part_value_t], int) -> None
1705	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1706
1707	Returns:
1708	Appends to part_vals. Note that this is a TREE.
1709	"""
1710	# Does the word have an extended glob? This is a special case because
1711	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1712	# implement extended globs. It's hard to carry that extra information
1713	# all the way past the word splitting stage.
1714
1715	# OSH semantic limitations: If a word has an extended glob part, then
1716	# 1. It can't have an array
1717	# 2. Word splitting of unquoted words isn't respected
1718
1719	word_part_vals = [] # type: List[part_value_t]
1720	has_extglob = False
1721	for p in w.parts:
1722	if p.tag() == word_part_e.ExtGlob:
1723	has_extglob = True
1724	self._EvalWordPart(p, word_part_vals, eval_flags)
1725
1726	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1727	if has_extglob:
1728	if bool(eval_flags & EXTGLOB_FILES):
1729	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1730	# word because of the way we use libc:
1731	# 1. With '*' for extglob parts
1732	# 2. With _EvalExtGlob() for extglob parts
1733
1734	glob_parts = [] # type: List[str]
1735	fnmatch_parts = [] # type: List[str]
1736	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1737	fnmatch_parts)
1738
1739	#log('word_part_vals %s', word_part_vals)
1740	glob_pat = ''.join(glob_parts)
1741	fnmatch_pat = ''.join(fnmatch_parts)
1742	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1743
1744	results = [] # type: List[str]
1745	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1746	if n < 0:
1747	raise error.FailGlob(
1748	'Extended glob %r matched no files' % fnmatch_pat, w)
1749
1750	part_vals.append(part_value.Array(results))
1751	elif bool(eval_flags & EXTGLOB_NESTED):
1752	# We only glob at the TOP level of @(nested\|@(pattern))
1753	part_vals.extend(word_part_vals)
1754	else:
1755	# e.g. simple_word_eval, assignment builtin
1756	e_die('Extended glob not allowed in this word', w)
1757	else:
1758	part_vals.extend(word_part_vals)
1759
1760	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1761	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1762	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1763
1764	Note: arg 'w' could just be a span ID
1765	"""
1766	for part_val in part_vals:
1767	UP_part_val = part_val
1768	with tagswitch(part_val) as case:
1769	if case(part_value_e.String):
1770	part_val = cast(Piece, UP_part_val)
1771	s = part_val.s
1772	if part_val.quoted:
1773	if eval_flags & QUOTE_FNMATCH:
1774	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
1775	s = glob_.GlobEscape(s)
1776	elif eval_flags & QUOTE_ERE:
1777	s = glob_.ExtendedRegexEscape(s)
1778	strs.append(s)
1779
1780	elif case(part_value_e.Array):
1781	part_val = cast(part_value.Array, UP_part_val)
1782	if self.exec_opts.strict_array():
1783	# Examples: echo f > "$@"; local foo="$@"
1784
1785	# TODO: This attributes too coarsely, to the word rather than the
1786	# parts. Problem: the word is a TREE of parts, but we only have a
1787	# flat list of part_vals. The only case where we really get arrays
1788	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1789	e_die(
1790	"This word should yield a string, but it contains an array",
1791	w)
1792
1793	# TODO: Maybe add detail like this.
1794	#e_die('RHS of assignment should only have strings. '
1795	# 'To assign arrays, use b=( "${a[@]}" )')
1796	else:
1797	# It appears to not respect IFS
1798	tmp = [s for s in part_val.strs if s is not None]
1799	s = ' '.join(tmp) # TODO: eliminate double join()?
1800	strs.append(s)
1801
1802	elif case(part_value_e.ExtGlob):
1803	part_val = cast(part_value.ExtGlob, UP_part_val)
1804
1805	# Extended globs are only allowed where we expect them!
1806	if not bool(eval_flags & QUOTE_FNMATCH):
1807	e_die('extended glob not allowed in this word', w)
1808
1809	# recursive call
1810	self._PartValsToString(part_val.part_vals, w, eval_flags,
1811	strs)
1812
1813	else:
1814	raise AssertionError()
1815
1816	def EvalWordToString(self, UP_w, eval_flags=0):
1817	# type: (word_t, int) -> value.Str
1818	"""Given a word, return a string.
1819
1820	Flags can contain a quoting algorithm.
1821	"""
1822	assert UP_w.tag() == word_e.Compound, UP_w
1823	w = cast(CompoundWord, UP_w)
1824
1825	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1826	fast_str = word_.FastStrEval(w)
1827	if fast_str is not None:
1828	return value.Str(fast_str)
1829
1830	# Could we additionally optimize a=$b, if we know $b isn't an array
1831	# etc.?
1832
1833	# Note: these empty lists are hot in fib benchmark
1834
1835	part_vals = [] # type: List[part_value_t]
1836	for p in w.parts:
1837	# this doesn't use eval_flags, which is slightly confusing
1838	self._EvalWordPart(p, part_vals, 0)
1839
1840	strs = [] # type: List[str]
1841	self._PartValsToString(part_vals, w, eval_flags, strs)
1842	return value.Str(''.join(strs))
1843
1844	def EvalWordToPattern(self, UP_w):
1845	# type: (rhs_word_t) -> Tuple[value.Str, bool]
1846	"""Like EvalWordToString, but returns whether we got ExtGlob."""
1847	if UP_w.tag() == rhs_word_e.Empty:
1848	return value.Str(''), False
1849
1850	assert UP_w.tag() == rhs_word_e.Compound, UP_w
1851	w = cast(CompoundWord, UP_w)
1852
1853	has_extglob = False
1854	part_vals = [] # type: List[part_value_t]
1855	for p in w.parts:
1856	# this doesn't use eval_flags, which is slightly confusing
1857	self._EvalWordPart(p, part_vals, 0)
1858	if p.tag() == word_part_e.ExtGlob:
1859	has_extglob = True
1860
1861	strs = [] # type: List[str]
1862	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1863	return value.Str(''.join(strs)), has_extglob
1864
1865	def EvalForPlugin(self, w):
1866	# type: (CompoundWord) -> value.Str
1867	"""Wrapper around EvalWordToString that prevents errors.
1868
1869	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
1870	are handled here.
1871
1872	Similar to ExprEvaluator.PluginCall().
1873	"""
1874	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
1875	try:
1876	val = self.EvalWordToString(w)
1877	except error.FatalRuntime as e:
1878	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
1879
1880	except (IOError, OSError) as e:
1881	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
1882
1883	except KeyboardInterrupt:
1884	val = value.Str('<Ctrl-C>')
1885
1886	return val
1887
1888	def EvalRhsWord(self, UP_w):
1889	# type: (rhs_word_t) -> value_t
1890	"""Used for RHS of assignment.
1891
1892	There is no splitting.
1893	"""
1894	if UP_w.tag() == rhs_word_e.Empty:
1895	return value.Str('')
1896
1897	assert UP_w.tag() == word_e.Compound, UP_w
1898	w = cast(CompoundWord, UP_w)
1899
1900	if len(w.parts) == 1:
1901	part0 = w.parts[0]
1902	UP_part0 = part0
1903	tag = part0.tag()
1904	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
1905	# don't look like assignments.
1906	if tag == word_part_e.ShArrayLiteral:
1907	part0 = cast(ShArrayLiteral, UP_part0)
1908	array_words = part0.words
1909	words = braces.BraceExpandWords(array_words)
1910	strs = self.EvalWordSequence(words)
1911	return value.BashArray(strs)
1912
1913	if tag == word_part_e.BashAssocLiteral:
1914	part0 = cast(word_part.BashAssocLiteral, UP_part0)
1915	d = NewDict() # type: Dict[str, str]
1916	for pair in part0.pairs:
1917	k = self.EvalWordToString(pair.key)
1918	v = self.EvalWordToString(pair.value)
1919	d[k.s] = v.s
1920	return value.BashAssoc(d)
1921
1922	# If RHS doesn't look like a=( ... ), then it must be a string.
1923	return self.EvalWordToString(w)
1924
1925	def _EvalWordFrame(self, frame, argv):
1926	# type: (List[Piece], List[str]) -> None
1927	all_empty = True
1928	all_quoted = True
1929	any_quoted = False
1930
1931	#log('--- frame %s', frame)
1932
1933	for piece in frame:
1934	if len(piece.s):
1935	all_empty = False
1936
1937	if piece.quoted:
1938	any_quoted = True
1939	else:
1940	all_quoted = False
1941
1942	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
1943	if all_empty and not any_quoted:
1944	return
1945
1946	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
1947	# don't do word splitting or globbing.
1948	if all_quoted:
1949	tmp = [piece.s for piece in frame]
1950	a = ''.join(tmp)
1951	argv.append(a)
1952	return
1953
1954	will_glob = not self.exec_opts.noglob()
1955
1956	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
1957	frags = [] # type: List[str]
1958	for piece in frame:
1959	if will_glob and piece.quoted:
1960	frag = glob_.GlobEscape(piece.s)
1961	else:
1962	# If we have a literal \, then we turn it into \\\\.
1963	# Splitting takes \\\\ -> \\
1964	# Globbing takes \\ to \ if it doesn't match
1965	frag = _BackslashEscape(piece.s)
1966
1967	if piece.do_split:
1968	frag = _BackslashEscape(frag)
1969	else:
1970	frag = self.splitter.Escape(frag)
1971
1972	frags.append(frag)
1973
1974	flat = ''.join(frags)
1975	#log('flat: %r', flat)
1976
1977	args = self.splitter.SplitForWordEval(flat)
1978
1979	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
1980	# Add it back and don't bother globbing.
1981	if len(args) == 0 and any_quoted:
1982	argv.append('')
1983	return
1984
1985	#log('split args: %r', args)
1986	for a in args:
1987	if glob_.LooksLikeGlob(a):
1988	n = self.globber.Expand(a, argv)
1989	if n < 0:
1990	# TODO: location info, with span IDs carried through the frame
1991	raise error.FailGlob('Pattern %r matched no files' % a,
1992	loc.Missing)
1993	else:
1994	argv.append(glob_.GlobUnescape(a))
1995
1996	def _EvalWordToArgv(self, w):
1997	# type: (CompoundWord) -> List[str]
1998	"""Helper for _EvalAssignBuiltin.
1999
2000	Splitting and globbing are disabled for assignment builtins.
2001
2002	Example: declare -"${a[@]}" b=(1 2)
2003	where a is [x b=a d=a]
2004	"""
2005	part_vals = [] # type: List[part_value_t]
2006	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2007	frames = _MakeWordFrames(part_vals)
2008	argv = [] # type: List[str]
2009	for frame in frames:
2010	if len(frame): # empty array gives empty frame!
2011	tmp = [piece.s for piece in frame]
2012	argv.append(''.join(tmp)) # no split or glob
2013	#log('argv: %s', argv)
2014	return argv
2015
2016	def _EvalAssignBuiltin(self, builtin_id, arg0, words):
2017	# type: (builtin_t, str, List[CompoundWord]) -> cmd_value.Assign
2018	"""Handles both static and dynamic assignment, e.g.
2019
2020	x='foo=bar' local a=(1 2) $x
2021	"""
2022	# Grammar:
2023	#
2024	# ('builtin' \| 'command')* keyword flag* pair*
2025	# flag = [-+].*
2026	#
2027	# There is also command -p, but we haven't implemented it. Maybe just punt
2028	# on it. Punted on 'builtin' and 'command' for now too.
2029
2030	eval_to_pairs = True # except for -f and -F
2031	started_pairs = False
2032
2033	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2034	flag_locs = [words[0]]
2035	assign_args = [] # type: List[AssignArg]
2036
2037	n = len(words)
2038	for i in xrange(1, n): # skip first word
2039	w = words[i]
2040
2041	if word_.IsVarLike(w):
2042	started_pairs = True # Everything from now on is an assign_pair
2043
2044	if started_pairs:
2045	left_token, close_token, part_offset = word_.DetectShAssignment(
2046	w)
2047	if left_token: # Detected statically
2048	if left_token.id != Id.Lit_VarLike:
2049	# (not guaranteed since started_pairs is set twice)
2050	e_die('LHS array not allowed in assignment builtin', w)
2051
2052	if lexer.IsPlusEquals(left_token):
2053	var_name = lexer.TokenSliceRight(left_token, -2)
2054	append = True
2055	else:
2056	var_name = lexer.TokenSliceRight(left_token, -1)
2057	append = False
2058
2059	if part_offset == len(w.parts):
2060	rhs = rhs_word.Empty # type: rhs_word_t
2061	else:
2062	# tmp is for intersection of C++/MyPy type systems
2063	tmp = CompoundWord(w.parts[part_offset:])
2064	word_.TildeDetectAssign(tmp)
2065	rhs = tmp
2066
2067	with state.ctx_AssignBuiltin(self.mutable_opts):
2068	right = self.EvalRhsWord(rhs)
2069
2070	arg2 = AssignArg(var_name, right, append, w)
2071	assign_args.append(arg2)
2072
2073	else: # e.g. export $dynamic
2074	argv = self._EvalWordToArgv(w)
2075	for arg in argv:
2076	arg2 = _SplitAssignArg(arg, w)
2077	assign_args.append(arg2)
2078
2079	else:
2080	argv = self._EvalWordToArgv(w)
2081	for arg in argv:
2082	if arg.startswith('-') or arg.startswith('+'):
2083	# e.g. declare -r +r
2084	flags.append(arg)
2085	flag_locs.append(w)
2086
2087	# Shortcut that relies on -f and -F always meaning "function" for
2088	# all assignment builtins
2089	if 'f' in arg or 'F' in arg:
2090	eval_to_pairs = False
2091
2092	else: # e.g. export $dynamic
2093	if eval_to_pairs:
2094	arg2 = _SplitAssignArg(arg, w)
2095	assign_args.append(arg2)
2096	started_pairs = True
2097	else:
2098	flags.append(arg)
2099
2100	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2101
2102	def SimpleEvalWordSequence2(self, words, allow_assign):
2103	# type: (List[CompoundWord], bool) -> cmd_value_t
2104	"""Simple word evaluation for YSH."""
2105	strs = [] # type: List[str]
2106	locs = [] # type: List[CompoundWord]
2107
2108	for i, w in enumerate(words):
2109	# No globbing in the first arg for command.Simple.
2110	if i == 0 and allow_assign:
2111	strs0 = self._EvalWordToArgv(w) # respects strict-array
2112	if len(strs0) == 1:
2113	arg0 = strs0[0]
2114	builtin_id = consts.LookupAssignBuiltin(arg0)
2115	if builtin_id != consts.NO_INDEX:
2116	# Same logic as legacy word eval, with no splitting
2117	return self._EvalAssignBuiltin(builtin_id, arg0, words)
2118
2119	strs.extend(strs0)
2120	for _ in strs0:
2121	locs.append(w)
2122	continue
2123
2124	if glob_.LooksLikeStaticGlob(w):
2125	val = self.EvalWordToString(w) # respects strict-array
2126	num_appended = self.globber.Expand(val.s, strs)
2127	if num_appended < 0:
2128	raise error.FailGlob('Pattern %r matched no files' % val.s,
2129	w)
2130	for _ in xrange(num_appended):
2131	locs.append(w)
2132	continue
2133
2134	part_vals = [] # type: List[part_value_t]
2135	self._EvalWordToParts(w, part_vals, 0) # not quoted
2136
2137	if 0:
2138	log('')
2139	log('Static: part_vals after _EvalWordToParts:')
2140	for entry in part_vals:
2141	log(' %s', entry)
2142
2143	# Still need to process
2144	frames = _MakeWordFrames(part_vals)
2145
2146	if 0:
2147	log('')
2148	log('Static: frames after _MakeWordFrames:')
2149	for entry in frames:
2150	log(' %s', entry)
2151
2152	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2153	# disallows such expressions at parse time.
2154	for frame in frames:
2155	if len(frame): # empty array gives empty frame!
2156	tmp = [piece.s for piece in frame]
2157	strs.append(''.join(tmp)) # no split or glob
2158	locs.append(w)
2159
2160	return cmd_value.Argv(strs, locs, None, None, None)
2161
2162	def EvalWordSequence2(self, words, allow_assign=False):
2163	# type: (List[CompoundWord], bool) -> cmd_value_t
2164	"""Turns a list of Words into a list of strings.
2165
2166	Unlike the EvalWord*() methods, it does globbing.
2167
2168	Args:
2169	words: list of Word instances
2170
2171	Returns:
2172	argv: list of string arguments, or None if there was an eval error
2173	"""
2174	if self.exec_opts.simple_word_eval():
2175	return self.SimpleEvalWordSequence2(words, allow_assign)
2176
2177	# Parse time:
2178	# 1. brace expansion. TODO: Do at parse time.
2179	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2180	# first WordPart.
2181	#
2182	# Run time:
2183	# 3. tilde sub, var sub, command sub, arith sub. These are all
2184	# "concurrent" on WordParts. (optional process sub with <() )
2185	# 4. word splitting. Can turn this off with a shell option? Definitely
2186	# off for oil.
2187	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2188
2189	#log('W %s', words)
2190	strs = [] # type: List[str]
2191	locs = [] # type: List[CompoundWord]
2192
2193	n = 0
2194	for i, w in enumerate(words):
2195	fast_str = word_.FastStrEval(w)
2196	if fast_str is not None:
2197	strs.append(fast_str)
2198	locs.append(w)
2199
2200	# e.g. the 'local' in 'local a=b c=d' will be here
2201	if allow_assign and i == 0:
2202	builtin_id = consts.LookupAssignBuiltin(fast_str)
2203	if builtin_id != consts.NO_INDEX:
2204	return self._EvalAssignBuiltin(builtin_id, fast_str,
2205	words)
2206	continue
2207
2208	part_vals = [] # type: List[part_value_t]
2209	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2210
2211	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2212	# change the rest of the evaluation algorithm if so.
2213	#
2214	# We want to allow:
2215	# e=export
2216	# $e foo=bar
2217	#
2218	# But we don't want to evaluate the first word twice in the case of:
2219	# $(some-command) --flag
2220	if allow_assign and i == 0 and len(part_vals) == 1:
2221	val0 = part_vals[0]
2222	UP_val0 = val0
2223	if val0.tag() == part_value_e.String:
2224	val0 = cast(Piece, UP_val0)
2225	if not val0.quoted:
2226	builtin_id = consts.LookupAssignBuiltin(val0.s)
2227	if builtin_id != consts.NO_INDEX:
2228	return self._EvalAssignBuiltin(
2229	builtin_id, val0.s, words)
2230
2231	if 0:
2232	log('')
2233	log('part_vals after _EvalWordToParts:')
2234	for entry in part_vals:
2235	log(' %s', entry)
2236
2237	frames = _MakeWordFrames(part_vals)
2238	if 0:
2239	log('')
2240	log('frames after _MakeWordFrames:')
2241	for entry in frames:
2242	log(' %s', entry)
2243
2244	# Do splitting and globbing. Each frame will append zero or more args.
2245	for frame in frames:
2246	self._EvalWordFrame(frame, strs)
2247
2248	# Fill in locations parallel to strs.
2249	n_next = len(strs)
2250	for _ in xrange(n_next - n):
2251	locs.append(w)
2252	n = n_next
2253
2254	# A non-assignment command.
2255	# NOTE: Can't look up builtins here like we did for assignment, because
2256	# functions can override builtins.
2257	return cmd_value.Argv(strs, locs, None, None, None)
2258
2259	def EvalWordSequence(self, words):
2260	# type: (List[CompoundWord]) -> List[str]
2261	"""For arrays and for loops.
2262
2263	They don't allow assignment builtins.
2264	"""
2265	UP_cmd_val = self.EvalWordSequence2(words)
2266
2267	assert UP_cmd_val.tag() == cmd_value_e.Argv
2268	cmd_val = cast(cmd_value.Argv, UP_cmd_val)
2269	return cmd_val.argv
2270
2271
2272	class NormalWordEvaluator(AbstractWordEvaluator):
2273
2274	def __init__(
2275	self,
2276	mem, # type: state.Mem
2277	exec_opts, # type: optview.Exec
2278	mutable_opts, # type: state.MutableOpts
2279	tilde_ev, # type: TildeEvaluator
2280	splitter, # type: SplitContext
2281	errfmt, # type: ErrorFormatter
2282	):
2283	# type: (...) -> None
2284	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2285	tilde_ev, splitter, errfmt)
2286	self.shell_ex = None # type: _Executor
2287
2288	def CheckCircularDeps(self):
2289	# type: () -> None
2290	assert self.arith_ev is not None
2291	# Disabled for pure OSH
2292	#assert self.expr_ev is not None
2293	assert self.shell_ex is not None
2294	assert self.prompt_ev is not None
2295
2296	def _EvalCommandSub(self, cs_part, quoted):
2297	# type: (CommandSub, bool) -> part_value_t
2298	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2299	if cs_part.left_token.id == Id.Left_AtParen:
2300	strs = self.splitter.SplitForWordEval(stdout_str)
2301	return part_value.Array(strs)
2302	else:
2303	return Piece(stdout_str, quoted, not quoted)
2304
2305	def _EvalProcessSub(self, cs_part):
2306	# type: (CommandSub) -> Piece
2307	dev_path = self.shell_ex.RunProcessSub(cs_part)
2308	# pretend it's quoted; no split or glob
2309	return Piece(dev_path, True, False)
2310
2311
2312	_DUMMY = '__NO_COMMAND_SUB__'
2313
2314
2315	class CompletionWordEvaluator(AbstractWordEvaluator):
2316	"""An evaluator that has no access to an executor.
2317
2318	NOTE: core/completion.py doesn't actually try to use these strings to
2319	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2320	inner command as the last one, and knows that it is not at the end of the
2321	line.
2322	"""
2323
2324	def __init__(
2325	self,
2326	mem, # type: state.Mem
2327	exec_opts, # type: optview.Exec
2328	mutable_opts, # type: state.MutableOpts
2329	tilde_ev, # type: TildeEvaluator
2330	splitter, # type: SplitContext
2331	errfmt, # type: ErrorFormatter
2332	):
2333	# type: (...) -> None
2334	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2335	tilde_ev, splitter, errfmt)
2336
2337	def CheckCircularDeps(self):
2338	# type: () -> None
2339	assert self.prompt_ev is not None
2340	assert self.arith_ev is not None
2341	assert self.expr_ev is not None
2342
2343	def _EvalCommandSub(self, cs_part, quoted):
2344	# type: (CommandSub, bool) -> part_value_t
2345	if cs_part.left_token.id == Id.Left_AtParen:
2346	return part_value.Array([_DUMMY])
2347	else:
2348	return Piece(_DUMMY, quoted, not quoted)
2349
2350	def _EvalProcessSub(self, cs_part):
2351	# type: (CommandSub) -> Piece
2352	# pretend it's quoted; no split or glob
2353	return Piece('__NO_PROCESS_SUB__', True, False)
2354
2355
2356	# vim: sw=4