osh/sh_expr_eval.py

OILS / osh / sh_expr_eval.py View on Github | oilshell.org

1210 lines, 788 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	sh_expr_eval.py -- Shell boolean and arithmetic expressions.
10	"""
11	from __future__ import print_function
12
13	from _devbuild.gen.id_kind_asdl import Id
14	from _devbuild.gen.runtime_asdl import scope_t
15	from _devbuild.gen.syntax_asdl import (
16	word_t,
17	CompoundWord,
18	Token,
19	loc,
20	loc_t,
21	source,
22	arith_expr,
23	arith_expr_e,
24	arith_expr_t,
25	bool_expr,
26	bool_expr_e,
27	bool_expr_t,
28	sh_lhs,
29	sh_lhs_e,
30	sh_lhs_t,
31	BracedVarSub,
32	)
33	from _devbuild.gen.option_asdl import option_i
34	from _devbuild.gen.types_asdl import bool_arg_type_e
35	from _devbuild.gen.value_asdl import (
36	value,
37	value_e,
38	value_t,
39	sh_lvalue,
40	sh_lvalue_e,
41	sh_lvalue_t,
42	LeftName,
43	eggex_ops,
44	regex_match,
45	RegexMatch,
46	)
47	from core import alloc
48	from core import error
49	from core.error import e_die, e_die_status, e_strict, e_usage
50	from core import num
51	from core import state
52	from display import ui
53	from core import util
54	from frontend import consts
55	from frontend import lexer
56	from frontend import location
57	from frontend import match
58	from frontend import parse_lib
59	from frontend import reader
60	from mycpp import mops
61	from mycpp import mylib
62	from mycpp.mylib import log, tagswitch, switch, str_cmp
63	from osh import bool_stat
64	from osh import word_eval
65
66	import libc # for fnmatch
67	# Import these names directly because the C++ translation uses macros literally.
68	from libc import FNM_CASEFOLD, REG_ICASE
69
70	from typing import Tuple, Optional, cast, TYPE_CHECKING
71	if TYPE_CHECKING:
72	from core import optview
73
74	_ = log
75
76	#
77	# Arith and Command/Word variants of assignment
78	#
79	# Calls EvalShellLhs()
80	# a[$key]=$val # osh/cmd_eval.py:814 (command_e.ShAssignment)
81	# Calls EvalArithLhs()
82	# (( a[key] = val )) # osh/sh_expr_eval.py:326 (_EvalLhsArith)
83	#
84	# Calls OldValue()
85	# a[$key]+=$val # osh/cmd_eval.py:795 (assign_op_e.PlusEqual)
86	# (( a[key] += val )) # osh/sh_expr_eval.py:308 (_EvalLhsAndLookupArith)
87	#
88	# RHS Indexing
89	# val=${a[$key]} # osh/word_eval.py:639 (bracket_op_e.ArrayIndex)
90	# (( val = a[key] )) # osh/sh_expr_eval.py:509 (Id.Arith_LBracket)
91	#
92
93
94	def OldValue(lval, mem, exec_opts):
95	# type: (sh_lvalue_t, state.Mem, Optional[optview.Exec]) -> value_t
96	"""Look up for augmented assignment.
97
98	For s+=val and (( i += 1 ))
99
100	Args:
101	lval: value we need to
102	exec_opts: can be None if we don't want to check set -u!
103	Because s+=val doesn't check it.
104
105	TODO: A stricter and less ambiguous version for YSH.
106	- Problem: why does sh_lvalue have Indexed and Keyed, while sh_lhs only has
107	IndexedName?
108	- should I have location.LName and sh_lvalue.Indexed only?
109	- and Indexed uses the index_t type?
110	- well that might be Str or Int
111	"""
112	assert isinstance(lval, sh_lvalue_t), lval
113
114	# TODO: refactor sh_lvalue_t to make this simpler
115	UP_lval = lval
116	with tagswitch(lval) as case:
117	if case(sh_lvalue_e.Var): # (( i++ ))
118	lval = cast(LeftName, UP_lval)
119	var_name = lval.name
120	elif case(sh_lvalue_e.Indexed): # (( a[i]++ ))
121	lval = cast(sh_lvalue.Indexed, UP_lval)
122	var_name = lval.name
123	elif case(sh_lvalue_e.Keyed): # (( A['K']++ )) ? I think this works
124	lval = cast(sh_lvalue.Keyed, UP_lval)
125	var_name = lval.name
126	else:
127	raise AssertionError()
128
129	val = mem.GetValue(var_name)
130	if exec_opts and exec_opts.nounset() and val.tag() == value_e.Undef:
131	e_die('Undefined variable %r' % var_name) # TODO: location info
132
133	UP_val = val
134	with tagswitch(lval) as case:
135	if case(sh_lvalue_e.Var):
136	return val
137
138	elif case(sh_lvalue_e.Indexed):
139	lval = cast(sh_lvalue.Indexed, UP_lval)
140
141	array_val = None # type: value.BashArray
142	with tagswitch(val) as case2:
143	if case2(value_e.Undef):
144	array_val = value.BashArray([])
145	elif case2(value_e.BashArray):
146	tmp = cast(value.BashArray, UP_val)
147	# mycpp rewrite: add tmp. cast() creates a new var in inner scope
148	array_val = tmp
149	else:
150	e_die("Can't use [] on value of type %s" % ui.ValType(val))
151
152	s = word_eval.GetArrayItem(array_val.strs, lval.index)
153
154	if s is None:
155	val = value.Str('') # NOTE: Other logic is value.Undef? 0?
156	else:
157	assert isinstance(s, str), s
158	val = value.Str(s)
159
160	elif case(sh_lvalue_e.Keyed):
161	lval = cast(sh_lvalue.Keyed, UP_lval)
162
163	assoc_val = None # type: value.BashAssoc
164	with tagswitch(val) as case2:
165	if case2(value_e.Undef):
166	# This never happens, because undef[x]+= is assumed to
167	raise AssertionError()
168	elif case2(value_e.BashAssoc):
169	tmp2 = cast(value.BashAssoc, UP_val)
170	# mycpp rewrite: add tmp. cast() creates a new var in inner scope
171	assoc_val = tmp2
172	else:
173	e_die("Can't use [] on value of type %s" % ui.ValType(val))
174
175	s = assoc_val.d.get(lval.key)
176	if s is None:
177	val = value.Str('')
178	else:
179	val = value.Str(s)
180
181	else:
182	raise AssertionError()
183
184	return val
185
186
187	# TODO: Should refactor for int/char-based processing
188	if mylib.PYTHON:
189
190	def IsLower(ch):
191	# type: (str) -> bool
192	return 'a' <= ch and ch <= 'z'
193
194	def IsUpper(ch):
195	# type: (str) -> bool
196	return 'A' <= ch and ch <= 'Z'
197
198
199	class UnsafeArith(object):
200	"""For parsing a[i] at RUNTIME."""
201
202	def __init__(
203	self,
204	mem, # type: state.Mem
205	exec_opts, # type: optview.Exec
206	mutable_opts, # type: state.MutableOpts
207	parse_ctx, # type: parse_lib.ParseContext
208	arith_ev, # type: ArithEvaluator
209	errfmt, # type: ui.ErrorFormatter
210	):
211	# type: (...) -> None
212	self.mem = mem
213	self.exec_opts = exec_opts
214	self.mutable_opts = mutable_opts
215	self.parse_ctx = parse_ctx
216	self.arith_ev = arith_ev
217	self.errfmt = errfmt
218
219	self.arena = self.parse_ctx.arena
220
221	def ParseLValue(self, s, location):
222	# type: (str, loc_t) -> sh_lvalue_t
223	"""Parse sh_lvalue for 'unset' and 'printf -v'.
224
225	It uses the arith parser, so it behaves like the LHS of (( a[i] = x ))
226	"""
227	if not self.parse_ctx.parse_opts.parse_sh_arith():
228	# Do something simpler for YSH
229	if not match.IsValidVarName(s):
230	e_die('Invalid variable name %r (parse_sh_arith is off)' % s,
231	location)
232	return LeftName(s, location)
233
234	a_parser = self.parse_ctx.MakeArithParser(s)
235
236	with alloc.ctx_SourceCode(self.arena,
237	source.ArgvWord('dynamic LHS', location)):
238	try:
239	anode = a_parser.Parse()
240	except error.Parse as e:
241	self.errfmt.PrettyPrintError(e)
242	# Exception for builtins 'unset' and 'printf'
243	e_usage('got invalid LHS expression', location)
244
245	# Note: we parse '1+2', and then it becomes a runtime error because
246	# it's not a valid LHS. Could be a parse error.
247
248	if self.exec_opts.eval_unsafe_arith():
249	lval = self.arith_ev.EvalArithLhs(anode)
250	else:
251	# Prevent attacks like these by default:
252	#
253	# unset -v 'A["$(echo K; rm *)"]'
254	with state.ctx_Option(self.mutable_opts,
255	[option_i._allow_command_sub], False):
256	lval = self.arith_ev.EvalArithLhs(anode)
257
258	return lval
259
260	def ParseVarRef(self, ref_str, blame_tok):
261	# type: (str, Token) -> BracedVarSub
262	"""Parse and evaluate value for ${!ref}
263
264	This supports:
265	- 0 to 9 for $0 to $9
266	- @ for "$@" etc.
267
268	See grammar in osh/word_parse.py, which is related to grammar in
269	osh/word_parse.py _ReadBracedVarSub
270
271	Note: declare -n allows 'varname' and 'varname[i]' and 'varname[@]', but it
272	does NOT allow 0 to 9, @, *
273
274	NamerefExpr = NAME Subscript? # this allows @ and * too
275
276	_ResolveNameOrRef currently gives you a 'cell'. So it might not support
277	sh_lvalue.Indexed?
278	"""
279	line_reader = reader.StringLineReader(ref_str, self.arena)
280	lexer = self.parse_ctx.MakeLexer(line_reader)
281	w_parser = self.parse_ctx.MakeWordParser(lexer, line_reader)
282
283	src = source.VarRef(blame_tok)
284	with alloc.ctx_SourceCode(self.arena, src):
285	try:
286	bvs_part = w_parser.ParseVarRef()
287	except error.Parse as e:
288	# This prints the inner location
289	self.errfmt.PrettyPrintError(e)
290
291	# this affects builtins 'unset' and 'printf'
292	e_die("Invalid var ref expression", blame_tok)
293
294	return bvs_part
295
296
297	class ArithEvaluator(object):
298	"""Shared between arith and bool evaluators.
299
300	They both:
301
302	1. Convert strings to integers, respecting shopt -s strict_arith.
303	2. Look up variables and evaluate words.
304	"""
305
306	def __init__(
307	self,
308	mem, # type: state.Mem
309	exec_opts, # type: optview.Exec
310	mutable_opts, # type: state.MutableOpts
311	parse_ctx, # type: Optional[parse_lib.ParseContext]
312	errfmt, # type: ui.ErrorFormatter
313	):
314	# type: (...) -> None
315	self.word_ev = None # type: word_eval.StringWordEvaluator
316	self.mem = mem
317	self.exec_opts = exec_opts
318	self.mutable_opts = mutable_opts
319	self.parse_ctx = parse_ctx
320	self.errfmt = errfmt
321
322	def CheckCircularDeps(self):
323	# type: () -> None
324	assert self.word_ev is not None
325
326	def _StringToBigInt(self, s, blame_loc):
327	# type: (str, loc_t) -> mops.BigInt
328	"""Use bash-like rules to coerce a string to an integer.
329
330	Runtime parsing enables silly stuff like $(( $(echo 1)$(echo 2) + 1 )) => 13
331
332	0xAB -- hex constant
333	042 -- octal constant
334	42 -- decimal constant
335	64#z -- arbitrary base constant
336
337	bare word: variable
338	quoted word: string (not done?)
339	"""
340	if s.startswith('0x'):
341	try:
342	integer = mops.FromStr(s, 16)
343	except ValueError:
344	e_strict('Invalid hex constant %r' % s, blame_loc)
345	# TODO: don't truncate
346	return integer
347
348	if s.startswith('0'):
349	try:
350	integer = mops.FromStr(s, 8)
351	except ValueError:
352	e_strict('Invalid octal constant %r' % s, blame_loc)
353	return integer
354
355	b, digits = mylib.split_once(s, '#') # see if it has #
356	if digits is not None:
357	try:
358	base = int(b) # machine integer, not BigInt
359	except ValueError:
360	e_strict('Invalid base for numeric constant %r' % b, blame_loc)
361
362	integer = mops.ZERO
363	for ch in digits:
364	if IsLower(ch):
365	digit = ord(ch) - ord('a') + 10
366	elif IsUpper(ch):
367	digit = ord(ch) - ord('A') + 36
368	elif ch == '@': # horrible syntax
369	digit = 62
370	elif ch == '_':
371	digit = 63
372	elif ch.isdigit():
373	digit = int(ch)
374	else:
375	e_strict('Invalid digits for numeric constant %r' % digits,
376	blame_loc)
377
378	if digit >= base:
379	e_strict(
380	'Digits %r out of range for base %d' % (digits, base),
381	blame_loc)
382
383	#integer = integer * base + digit
384	integer = mops.Add(mops.Mul(integer, mops.BigInt(base)),
385	mops.BigInt(digit))
386	return integer
387
388	try:
389	# Normal base 10 integer. This includes negative numbers like '-42'.
390	integer = mops.FromStr(s)
391	except ValueError:
392	# doesn't look like an integer
393
394	# note: 'test' and '[' never evaluate recursively
395	if self.parse_ctx:
396	arena = self.parse_ctx.arena
397
398	# Special case so we don't get EOF error
399	if len(s.strip()) == 0:
400	return mops.ZERO
401
402	# For compatibility: Try to parse it as an expression and evaluate it.
403	a_parser = self.parse_ctx.MakeArithParser(s)
404
405	# TODO: Fill in the variable name
406	with alloc.ctx_SourceCode(arena,
407	source.Variable(None, blame_loc)):
408	try:
409	node2 = a_parser.Parse() # may raise error.Parse
410	except error.Parse as e:
411	self.errfmt.PrettyPrintError(e)
412	e_die('Parse error in recursive arithmetic',
413	e.location)
414
415	# Prevent infinite recursion of $(( 1x )) -- it's a word that evaluates
416	# to itself, and you don't want to reparse it as a word.
417	if node2.tag() == arith_expr_e.Word:
418	e_die("Invalid integer constant %r" % s, blame_loc)
419
420	if self.exec_opts.eval_unsafe_arith():
421	integer = self.EvalToBigInt(node2)
422	else:
423	# BoolEvaluator doesn't have parse_ctx or mutable_opts
424	assert self.mutable_opts is not None
425
426	# We don't need to flip _allow_process_sub, because they can't be
427	# parsed. See spec/bugs.test.sh.
428	with state.ctx_Option(self.mutable_opts,
429	[option_i._allow_command_sub],
430	False):
431	integer = self.EvalToBigInt(node2)
432
433	else:
434	if len(s.strip()) == 0 or match.IsValidVarName(s):
435	# x42 could evaluate to 0
436	e_strict("Invalid integer constant %r" % s, blame_loc)
437	else:
438	# 42x is always fatal!
439	e_die("Invalid integer constant %r" % s, blame_loc)
440
441	return integer
442
443	def _ValToIntOrError(self, val, blame):
444	# type: (value_t, arith_expr_t) -> mops.BigInt
445	try:
446	UP_val = val
447	with tagswitch(val) as case:
448	if case(value_e.Undef):
449	# 'nounset' already handled before got here
450	# Happens upon a[undefined]=42, which unfortunately turns into a[0]=42.
451	e_strict('Undefined value in arithmetic context',
452	loc.Arith(blame))
453
454	elif case(value_e.Int):
455	val = cast(value.Int, UP_val)
456	return val.i
457
458	elif case(value_e.Str):
459	val = cast(value.Str, UP_val)
460	# calls e_strict
461	return self._StringToBigInt(val.s, loc.Arith(blame))
462
463	except error.Strict as e:
464	if self.exec_opts.strict_arith():
465	raise
466	else:
467	return mops.ZERO
468
469	# Arrays and associative arrays always fail -- not controlled by
470	# strict_arith.
471	# In bash, (( a )) is like (( a[0] )), but I don't want that.
472	# And returning '0' gives different results.
473	e_die(
474	"Expected a value convertible to integer, got %s" %
475	ui.ValType(val), loc.Arith(blame))
476
477	def _EvalLhsAndLookupArith(self, node):
478	# type: (arith_expr_t) -> Tuple[mops.BigInt, sh_lvalue_t]
479	""" For x = y and x += y and ++x """
480
481	lval = self.EvalArithLhs(node)
482	val = OldValue(lval, self.mem, self.exec_opts)
483
484	# BASH_LINENO, arr (array name without strict_array), etc.
485	if (val.tag() in (value_e.BashArray, value_e.BashAssoc) and
486	lval.tag() == sh_lvalue_e.Var):
487	named_lval = cast(LeftName, lval)
488	if word_eval.ShouldArrayDecay(named_lval.name, self.exec_opts):
489	if val.tag() == value_e.BashArray:
490	lval = sh_lvalue.Indexed(named_lval.name, 0, loc.Missing)
491	elif val.tag() == value_e.BashAssoc:
492	lval = sh_lvalue.Keyed(named_lval.name, '0', loc.Missing)
493	val = word_eval.DecayArray(val)
494
495	# This error message could be better, but we already have one
496	#if val.tag() == value_e.BashArray:
497	# e_die("Can't use assignment like ++ or += on arrays")
498
499	i = self._ValToIntOrError(val, node)
500	return i, lval
501
502	def _Store(self, lval, new_int):
503	# type: (sh_lvalue_t, mops.BigInt) -> None
504	val = value.Str(mops.ToStr(new_int))
505	state.OshLanguageSetValue(self.mem, lval, val)
506
507	def EvalToBigInt(self, node):
508	# type: (arith_expr_t) -> mops.BigInt
509	"""Used externally by ${a[i+1]} and ${a:start:len}.
510
511	Also used internally.
512	"""
513	val = self.Eval(node)
514
515	# BASH_LINENO, arr (array name without strict_array), etc.
516	if (val.tag() in (value_e.BashArray, value_e.BashAssoc) and
517	node.tag() == arith_expr_e.VarSub):
518	vsub = cast(Token, node)
519	if word_eval.ShouldArrayDecay(lexer.LazyStr(vsub), self.exec_opts):
520	val = word_eval.DecayArray(val)
521
522	i = self._ValToIntOrError(val, node)
523	return i
524
525	def EvalToInt(self, node):
526	# type: (arith_expr_t) -> int
527	return mops.BigTruncate(self.EvalToBigInt(node))
528
529	def Eval(self, node):
530	# type: (arith_expr_t) -> value_t
531	"""
532	Returns:
533	None for Undef (e.g. empty cell) TODO: Don't return 0!
534	int for Str
535	List[int] for BashArray
536	Dict[str, str] for BashAssoc (TODO: Should we support this?)
537
538	NOTE: (( A['x'] = 'x' )) and (( x = A['x'] )) are syntactically valid in
539	bash, but don't do what you'd think. 'x' sometimes a variable name and
540	sometimes a key.
541	"""
542	# OSH semantics: Variable NAMES cannot be formed dynamically; but INTEGERS
543	# can. ${foo:-3}4 is OK. $? will be a compound word too, so we don't have
544	# to handle that as a special case.
545
546	UP_node = node
547	with tagswitch(node) as case:
548	if case(arith_expr_e.EmptyZero): # $(( ))
549	return value.Int(mops.ZERO) # Weird axiom
550
551	elif case(arith_expr_e.EmptyOne): # for (( ; ; ))
552	return value.Int(mops.ONE)
553
554	elif case(arith_expr_e.VarSub): # $(( x )) (can be array)
555	vsub = cast(Token, UP_node)
556	var_name = lexer.LazyStr(vsub)
557	val = self.mem.GetValue(var_name)
558	if val.tag() == value_e.Undef and self.exec_opts.nounset():
559	e_die('Undefined variable %r' % var_name, vsub)
560	return val
561
562	elif case(arith_expr_e.Word): # $(( $x )) $(( ${x}${y} )), etc.
563	w = cast(CompoundWord, UP_node)
564	return self.word_ev.EvalWordToString(w)
565
566	elif case(arith_expr_e.UnaryAssign): # a++
567	node = cast(arith_expr.UnaryAssign, UP_node)
568
569	op_id = node.op_id
570	old_big, lval = self._EvalLhsAndLookupArith(node.child)
571
572	if op_id == Id.Node_PostDPlus: # post-increment
573	new_big = mops.Add(old_big, mops.ONE)
574	result = old_big
575
576	elif op_id == Id.Node_PostDMinus: # post-decrement
577	new_big = mops.Sub(old_big, mops.ONE)
578	result = old_big
579
580	elif op_id == Id.Arith_DPlus: # pre-increment
581	new_big = mops.Add(old_big, mops.ONE)
582	result = new_big
583
584	elif op_id == Id.Arith_DMinus: # pre-decrement
585	new_big = mops.Sub(old_big, mops.ONE)
586	result = new_big
587
588	else:
589	raise AssertionError(op_id)
590
591	self._Store(lval, new_big)
592	return value.Int(result)
593
594	elif case(arith_expr_e.BinaryAssign): # a=1, a+=5, a[1]+=5
595	node = cast(arith_expr.BinaryAssign, UP_node)
596	op_id = node.op_id
597
598	if op_id == Id.Arith_Equal:
599	# Don't really need a span ID here, because tdop.CheckLhsExpr should
600	# have done all the validation.
601	lval = self.EvalArithLhs(node.left)
602	rhs_big = self.EvalToBigInt(node.right)
603
604	self._Store(lval, rhs_big)
605	return value.Int(rhs_big)
606
607	old_big, lval = self._EvalLhsAndLookupArith(node.left)
608	rhs_big = self.EvalToBigInt(node.right)
609
610	if op_id == Id.Arith_PlusEqual:
611	new_big = mops.Add(old_big, rhs_big)
612	elif op_id == Id.Arith_MinusEqual:
613	new_big = mops.Sub(old_big, rhs_big)
614	elif op_id == Id.Arith_StarEqual:
615	new_big = mops.Mul(old_big, rhs_big)
616
617	elif op_id == Id.Arith_SlashEqual:
618	if mops.Equal(rhs_big, mops.ZERO):
619	e_die('Divide by zero') # TODO: location
620	new_big = num.IntDivide(old_big, rhs_big)
621
622	elif op_id == Id.Arith_PercentEqual:
623	if mops.Equal(rhs_big, mops.ZERO):
624	e_die('Divide by zero') # TODO: location
625	new_big = num.IntRemainder(old_big, rhs_big)
626
627	elif op_id == Id.Arith_DGreatEqual:
628	new_big = mops.RShift(old_big, rhs_big)
629	elif op_id == Id.Arith_DLessEqual:
630	new_big = mops.LShift(old_big, rhs_big)
631	elif op_id == Id.Arith_AmpEqual:
632	new_big = mops.BitAnd(old_big, rhs_big)
633	elif op_id == Id.Arith_PipeEqual:
634	new_big = mops.BitOr(old_big, rhs_big)
635	elif op_id == Id.Arith_CaretEqual:
636	new_big = mops.BitXor(old_big, rhs_big)
637	else:
638	raise AssertionError(op_id) # shouldn't get here
639
640	self._Store(lval, new_big)
641	return value.Int(new_big)
642
643	elif case(arith_expr_e.Unary):
644	node = cast(arith_expr.Unary, UP_node)
645	op_id = node.op_id
646
647	i = self.EvalToBigInt(node.child)
648
649	if op_id == Id.Node_UnaryPlus: # +i
650	result = i
651	elif op_id == Id.Node_UnaryMinus: # -i
652	result = mops.Sub(mops.ZERO, i)
653
654	elif op_id == Id.Arith_Bang: # logical negation
655	if mops.Equal(i, mops.ZERO):
656	result = mops.ONE
657	else:
658	result = mops.ZERO
659	elif op_id == Id.Arith_Tilde: # bitwise complement
660	result = mops.BitNot(i)
661	else:
662	raise AssertionError(op_id) # shouldn't get here
663
664	return value.Int(result)
665
666	elif case(arith_expr_e.Binary):
667	node = cast(arith_expr.Binary, UP_node)
668	op_id = node.op.id
669
670	# Short-circuit evaluation for \|\| and &&.
671	if op_id == Id.Arith_DPipe:
672	lhs_big = self.EvalToBigInt(node.left)
673	if mops.Equal(lhs_big, mops.ZERO):
674	rhs_big = self.EvalToBigInt(node.right)
675	if mops.Equal(rhs_big, mops.ZERO):
676	result = mops.ZERO # false
677	else:
678	result = mops.ONE # true
679	else:
680	result = mops.ONE # true
681	return value.Int(result)
682
683	if op_id == Id.Arith_DAmp:
684	lhs_big = self.EvalToBigInt(node.left)
685	if mops.Equal(lhs_big, mops.ZERO):
686	result = mops.ZERO # false
687	else:
688	rhs_big = self.EvalToBigInt(node.right)
689	if mops.Equal(rhs_big, mops.ZERO):
690	result = mops.ZERO # false
691	else:
692	result = mops.ONE # true
693	return value.Int(result)
694
695	if op_id == Id.Arith_LBracket:
696	# NOTE: Similar to bracket_op_e.ArrayIndex in osh/word_eval.py
697
698	left = self.Eval(node.left)
699	UP_left = left
700	with tagswitch(left) as case:
701	if case(value_e.BashArray):
702	array_val = cast(value.BashArray, UP_left)
703	small_i = mops.BigTruncate(
704	self.EvalToBigInt(node.right))
705	s = word_eval.GetArrayItem(array_val.strs, small_i)
706
707	elif case(value_e.BashAssoc):
708	left = cast(value.BashAssoc, UP_left)
709	key = self.EvalWordToString(node.right)
710	s = left.d.get(key)
711
712	elif case(value_e.Str):
713	left = cast(value.Str, UP_left)
714	if self.exec_opts.strict_arith():
715	e_die(
716	"Value of type Str can't be indexed (strict_arith)",
717	node.op)
718	index = self.EvalToBigInt(node.right)
719	# s[0] evaluates to s
720	# s[1] evaluates to Undef
721	s = left.s if mops.Equal(index,
722	mops.ZERO) else None
723
724	elif case(value_e.Undef):
725	if self.exec_opts.strict_arith():
726	e_die(
727	"Value of type Undef can't be indexed (strict_arith)",
728	node.op)
729	s = None # value.Undef
730
731	# There isn't a way to distinguish Undef vs. empty
732	# string, even with set -o nounset?
733	# s = ''
734
735	else:
736	# TODO: Add error context
737	e_die(
738	"Value of type %s can't be indexed" %
739	ui.ValType(left), node.op)
740
741	if s is None:
742	val = value.Undef
743	else:
744	val = value.Str(s)
745
746	return val
747
748	if op_id == Id.Arith_Comma:
749	self.EvalToBigInt(node.left) # throw away result
750	result = self.EvalToBigInt(node.right)
751	return value.Int(result)
752
753	# Rest are integers
754	lhs_big = self.EvalToBigInt(node.left)
755	rhs_big = self.EvalToBigInt(node.right)
756
757	if op_id == Id.Arith_Plus:
758	result = mops.Add(lhs_big, rhs_big)
759	elif op_id == Id.Arith_Minus:
760	result = mops.Sub(lhs_big, rhs_big)
761	elif op_id == Id.Arith_Star:
762	result = mops.Mul(lhs_big, rhs_big)
763	elif op_id == Id.Arith_Slash:
764	if mops.Equal(rhs_big, mops.ZERO):
765	e_die('Divide by zero', node.op)
766	result = num.IntDivide(lhs_big, rhs_big)
767
768	elif op_id == Id.Arith_Percent:
769	if mops.Equal(rhs_big, mops.ZERO):
770	e_die('Divide by zero', node.op)
771	result = num.IntRemainder(lhs_big, rhs_big)
772
773	elif op_id == Id.Arith_DStar:
774	if mops.Greater(mops.ZERO, rhs_big):
775	e_die("Exponent can't be a negative number",
776	loc.Arith(node.right))
777	result = num.Exponent(lhs_big, rhs_big)
778
779	elif op_id == Id.Arith_DEqual:
780	result = mops.FromBool(mops.Equal(lhs_big, rhs_big))
781	elif op_id == Id.Arith_NEqual:
782	result = mops.FromBool(not mops.Equal(lhs_big, rhs_big))
783	elif op_id == Id.Arith_Great:
784	result = mops.FromBool(mops.Greater(lhs_big, rhs_big))
785	elif op_id == Id.Arith_GreatEqual:
786	result = mops.FromBool(
787	mops.Greater(lhs_big, rhs_big) or
788	mops.Equal(lhs_big, rhs_big))
789	elif op_id == Id.Arith_Less:
790	result = mops.FromBool(mops.Greater(rhs_big, lhs_big))
791	elif op_id == Id.Arith_LessEqual:
792	result = mops.FromBool(
793	mops.Greater(rhs_big, lhs_big) or
794	mops.Equal(lhs_big, rhs_big))
795
796	elif op_id == Id.Arith_Pipe:
797	result = mops.BitOr(lhs_big, rhs_big)
798	elif op_id == Id.Arith_Amp:
799	result = mops.BitAnd(lhs_big, rhs_big)
800	elif op_id == Id.Arith_Caret:
801	result = mops.BitXor(lhs_big, rhs_big)
802
803	# Note: how to define shift of negative numbers?
804	elif op_id == Id.Arith_DLess:
805	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
806	raise error.Expr("Can't left shift by negative number",
807	node.op)
808	result = mops.LShift(lhs_big, rhs_big)
809	elif op_id == Id.Arith_DGreat:
810	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
811	raise error.Expr(
812	"Can't right shift by negative number", node.op)
813	result = mops.RShift(lhs_big, rhs_big)
814	else:
815	raise AssertionError(op_id)
816
817	return value.Int(result)
818
819	elif case(arith_expr_e.TernaryOp):
820	node = cast(arith_expr.TernaryOp, UP_node)
821
822	cond = self.EvalToBigInt(node.cond)
823	if mops.Equal(cond, mops.ZERO):
824	return self.Eval(node.false_expr)
825	else:
826	return self.Eval(node.true_expr)
827
828	else:
829	raise AssertionError(node.tag())
830
831	raise AssertionError('for -Wreturn-type in C++')
832
833	def EvalWordToString(self, node, blame_loc=loc.Missing):
834	# type: (arith_expr_t, loc_t) -> str
835	"""
836	Raises:
837	error.FatalRuntime if the expression isn't a string
838	or if it contains a bare variable like a[x]
839
840	These are allowed because they're unambiguous, unlike a[x]
841
842	a[$x] a["$x"] a["x"] a['x']
843	"""
844	UP_node = node
845	if node.tag() == arith_expr_e.Word: # $(( $x )) $(( ${x}${y} )), etc.
846	w = cast(CompoundWord, UP_node)
847	val = self.word_ev.EvalWordToString(w)
848	return val.s
849	else:
850	# A[x] is the "Parsing Bash is Undecidable" problem
851	# It is a string or var name?
852	# (It's parsed as arith_expr.VarSub)
853	e_die(
854	"Assoc array keys must be strings: $x 'x' \"$x\" etc. (OILS-ERR-101)",
855	blame_loc)
856
857	def EvalShellLhs(self, node, which_scopes):
858	# type: (sh_lhs_t, scope_t) -> sh_lvalue_t
859	"""Evaluate a shell LHS expression
860
861	For a=b and a[x]=b etc.
862	"""
863	assert isinstance(node, sh_lhs_t), node
864
865	UP_node = node
866	lval = None # type: sh_lvalue_t
867	with tagswitch(node) as case:
868	if case(sh_lhs_e.Name): # a=x
869	node = cast(sh_lhs.Name, UP_node)
870	assert node.name is not None
871
872	lval1 = LeftName(node.name, node.left)
873	lval = lval1
874
875	elif case(sh_lhs_e.IndexedName): # a[1+2]=x
876	node = cast(sh_lhs.IndexedName, UP_node)
877	assert node.name is not None
878
879	if self.mem.IsBashAssoc(node.name):
880	key = self.EvalWordToString(node.index,
881	blame_loc=node.left)
882	# node.left points to A[ in A[x]=1
883	lval2 = sh_lvalue.Keyed(node.name, key, node.left)
884	lval = lval2
885	else:
886	index = mops.BigTruncate(self.EvalToBigInt(node.index))
887	lval3 = sh_lvalue.Indexed(node.name, index, node.left)
888	lval = lval3
889
890	else:
891	raise AssertionError(node.tag())
892
893	return lval
894
895	def _VarNameOrWord(self, anode):
896	# type: (arith_expr_t) -> Tuple[Optional[str], loc_t]
897	"""
898	Returns a variable name if the arith node can be interpreted that way.
899	"""
900	UP_anode = anode
901	with tagswitch(anode) as case:
902	if case(arith_expr_e.VarSub):
903	tok = cast(Token, UP_anode)
904	return (lexer.LazyStr(tok), tok)
905
906	elif case(arith_expr_e.Word):
907	w = cast(CompoundWord, UP_anode)
908	var_name = self.EvalWordToString(w)
909	return (var_name, w)
910
911	no_str = None # type: str
912	return (no_str, loc.Missing)
913
914	def EvalArithLhs(self, anode):
915	# type: (arith_expr_t) -> sh_lvalue_t
916	"""
917	For (( a[x] = 1 )) etc.
918	"""
919	UP_anode = anode
920	if anode.tag() == arith_expr_e.Binary:
921	anode = cast(arith_expr.Binary, UP_anode)
922	if anode.op.id == Id.Arith_LBracket:
923	var_name, blame_loc = self._VarNameOrWord(anode.left)
924
925	# (( 1[2] = 3 )) isn't valid
926	if not match.IsValidVarName(var_name):
927	e_die('Invalid variable name %r' % var_name, blame_loc)
928
929	if var_name is not None:
930	if self.mem.IsBashAssoc(var_name):
931	arith_loc = location.TokenForArith(anode)
932	key = self.EvalWordToString(anode.right,
933	blame_loc=arith_loc)
934	return sh_lvalue.Keyed(var_name, key, blame_loc)
935	else:
936	index = mops.BigTruncate(self.EvalToBigInt(
937	anode.right))
938	return sh_lvalue.Indexed(var_name, index, blame_loc)
939
940	var_name, blame_loc = self._VarNameOrWord(anode)
941	if var_name is not None:
942	return LeftName(var_name, blame_loc)
943
944	# e.g. unset 'x-y'. status 2 for runtime parse error
945	e_die_status(2, 'Invalid LHS to modify', blame_loc)
946
947
948	class BoolEvaluator(ArithEvaluator):
949	"""This is also an ArithEvaluator because it has to understand.
950
951	[[ x -eq 3 ]]
952
953	where x='1+2'
954	"""
955
956	def __init__(
957	self,
958	mem, # type: state.Mem
959	exec_opts, # type: optview.Exec
960	mutable_opts, # type: Optional[state.MutableOpts]
961	parse_ctx, # type: Optional[parse_lib.ParseContext]
962	errfmt, # type: ui.ErrorFormatter
963	always_strict=False # type: bool
964	):
965	# type: (...) -> None
966	ArithEvaluator.__init__(self, mem, exec_opts, mutable_opts, parse_ctx,
967	errfmt)
968	self.always_strict = always_strict
969
970	def _IsDefined(self, s, blame_loc):
971	# type: (str, loc_t) -> bool
972
973	m = util.RegexSearch(consts.TEST_V_RE, s)
974	if m is None:
975	if self.exec_opts.strict_word_eval():
976	e_die('-v expected name or name[index]', blame_loc)
977	return False
978
979	var_name = m[1]
980	index_str = m[3]
981
982	val = self.mem.GetValue(var_name)
983	if len(index_str) == 0: # it's just a variable name
984	return val.tag() != value_e.Undef
985
986	UP_val = val
987	with tagswitch(val) as case:
988	if case(value_e.BashArray):
989	val = cast(value.BashArray, UP_val)
990
991	# TODO: use mops.BigStr
992	try:
993	index = int(index_str)
994	except ValueError as e:
995	if self.exec_opts.strict_word_eval():
996	e_die(
997	'-v got BashArray and invalid index %r' %
998	index_str, blame_loc)
999	return False
1000
1001	if index < 0:
1002	if self.exec_opts.strict_word_eval():
1003	e_die('-v got invalid negative index %s' % index_str,
1004	blame_loc)
1005	return False
1006
1007	if index < len(val.strs):
1008	return val.strs[index] is not None
1009
1010	# out of range
1011	return False
1012
1013	elif case(value_e.BashAssoc):
1014	val = cast(value.BashAssoc, UP_val)
1015	return index_str in val.d
1016
1017	else:
1018	# work around mycpp bug! parses as 'elif'
1019	pass
1020
1021	if self.exec_opts.strict_word_eval():
1022	raise error.TypeErr(val, 'Expected BashArray or BashAssoc',
1023	blame_loc)
1024	return False
1025	raise AssertionError()
1026
1027	def _StringToBigIntOrError(self, s, blame_word=None):
1028	# type: (str, Optional[word_t]) -> mops.BigInt
1029	"""Used by both [[ $x -gt 3 ]] and (( $x ))."""
1030	if blame_word:
1031	location = loc.Word(blame_word) # type: loc_t
1032	else:
1033	location = loc.Missing
1034
1035	try:
1036	i = self._StringToBigInt(s, location)
1037	except error.Strict as e:
1038	if self.always_strict or self.exec_opts.strict_arith():
1039	raise
1040	else:
1041	i = mops.ZERO
1042	return i
1043
1044	def _EvalCompoundWord(self, word, eval_flags=0):
1045	# type: (word_t, int) -> str
1046	val = self.word_ev.EvalWordToString(word, eval_flags)
1047	return val.s
1048
1049	def EvalB(self, node):
1050	# type: (bool_expr_t) -> bool
1051
1052	UP_node = node
1053	with tagswitch(node) as case:
1054	if case(bool_expr_e.WordTest):
1055	node = cast(bool_expr.WordTest, UP_node)
1056	s = self._EvalCompoundWord(node.w)
1057	return bool(s)
1058
1059	elif case(bool_expr_e.LogicalNot):
1060	node = cast(bool_expr.LogicalNot, UP_node)
1061	b = self.EvalB(node.child)
1062	return not b
1063
1064	elif case(bool_expr_e.LogicalAnd):
1065	node = cast(bool_expr.LogicalAnd, UP_node)
1066	# Short-circuit evaluation
1067	if self.EvalB(node.left):
1068	return self.EvalB(node.right)
1069	else:
1070	return False
1071
1072	elif case(bool_expr_e.LogicalOr):
1073	node = cast(bool_expr.LogicalOr, UP_node)
1074	if self.EvalB(node.left):
1075	return True
1076	else:
1077	return self.EvalB(node.right)
1078
1079	elif case(bool_expr_e.Unary):
1080	node = cast(bool_expr.Unary, UP_node)
1081	op_id = node.op_id
1082	s = self._EvalCompoundWord(node.child)
1083
1084	# Now dispatch on arg type. (arg_type could be static in the
1085	# LST?)
1086	arg_type = consts.BoolArgType(op_id)
1087
1088	if arg_type == bool_arg_type_e.Path:
1089	return bool_stat.DoUnaryOp(op_id, s)
1090
1091	if arg_type == bool_arg_type_e.Str:
1092	if op_id == Id.BoolUnary_z:
1093	return not bool(s)
1094	if op_id == Id.BoolUnary_n:
1095	return bool(s)
1096
1097	raise AssertionError(op_id) # should never happen
1098
1099	if arg_type == bool_arg_type_e.Other:
1100	if op_id == Id.BoolUnary_t:
1101	return bool_stat.isatty(s, node.child)
1102
1103	# See whether 'set -o' options have been set
1104	if op_id == Id.BoolUnary_o:
1105	index = consts.OptionNum(s)
1106	if index == 0:
1107	return False
1108	else:
1109	return self.exec_opts.opt0_array[index]
1110
1111	if op_id == Id.BoolUnary_v:
1112	return self._IsDefined(s, loc.Word(node.child))
1113
1114	e_die("%s isn't implemented" %
1115	ui.PrettyId(op_id)) # implicit location
1116
1117	raise AssertionError(arg_type)
1118
1119	elif case(bool_expr_e.Binary):
1120	node = cast(bool_expr.Binary, UP_node)
1121
1122	op_id = node.op_id
1123	# Whether to glob escape
1124	eval_flags = 0
1125	with switch(op_id) as case2:
1126	if case2(Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual,
1127	Id.BoolBinary_GlobNEqual):
1128	eval_flags \|= word_eval.QUOTE_FNMATCH
1129	elif case2(Id.BoolBinary_EqualTilde):
1130	eval_flags \|= word_eval.QUOTE_ERE
1131
1132	s1 = self._EvalCompoundWord(node.left)
1133	s2 = self._EvalCompoundWord(node.right, eval_flags)
1134
1135	# Now dispatch on arg type
1136	arg_type = consts.BoolArgType(op_id)
1137
1138	if arg_type == bool_arg_type_e.Path:
1139	return bool_stat.DoBinaryOp(op_id, s1, s2)
1140
1141	if arg_type == bool_arg_type_e.Int:
1142	# NOTE: We assume they are constants like [[ 3 -eq 3 ]].
1143	# Bash also allows [[ 1+2 -eq 3 ]].
1144	i1 = self._StringToBigIntOrError(s1, blame_word=node.left)
1145	i2 = self._StringToBigIntOrError(s2, blame_word=node.right)
1146
1147	if op_id == Id.BoolBinary_eq:
1148	return mops.Equal(i1, i2)
1149	if op_id == Id.BoolBinary_ne:
1150	return not mops.Equal(i1, i2)
1151	if op_id == Id.BoolBinary_gt:
1152	return mops.Greater(i1, i2)
1153	if op_id == Id.BoolBinary_ge:
1154	return mops.Greater(i1, i2) or mops.Equal(i1, i2)
1155	if op_id == Id.BoolBinary_lt:
1156	return mops.Greater(i2, i1)
1157	if op_id == Id.BoolBinary_le:
1158	return mops.Greater(i2, i1) or mops.Equal(i1, i2)
1159
1160	raise AssertionError(op_id) # should never happen
1161
1162	if arg_type == bool_arg_type_e.Str:
1163	fnmatch_flags = (FNM_CASEFOLD
1164	if self.exec_opts.nocasematch() else 0)
1165
1166	if op_id in (Id.BoolBinary_GlobEqual,
1167	Id.BoolBinary_GlobDEqual):
1168	#log('Matching %s against pattern %s', s1, s2)
1169	return libc.fnmatch(s2, s1, fnmatch_flags)
1170
1171	if op_id == Id.BoolBinary_GlobNEqual:
1172	return not libc.fnmatch(s2, s1, fnmatch_flags)
1173
1174	if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual):
1175	return s1 == s2
1176
1177	if op_id == Id.BoolBinary_NEqual:
1178	return s1 != s2
1179
1180	if op_id == Id.BoolBinary_EqualTilde:
1181	# TODO: This should go to --debug-file
1182	#log('Matching %r against regex %r', s1, s2)
1183	regex_flags = (REG_ICASE
1184	if self.exec_opts.nocasematch() else 0)
1185
1186	try:
1187	indices = libc.regex_search(s2, regex_flags, s1, 0)
1188	except ValueError as e:
1189	# Status 2 indicates a regex parse error. This is
1190	# fatal in OSH but not in bash, which treats [[
1191	# like a command with an exit code.
1192	e_die_status(2, e.message, loc.Word(node.right))
1193
1194	if indices is not None:
1195	self.mem.SetRegexMatch(
1196	RegexMatch(s1, indices, eggex_ops.No))
1197	return True
1198	else:
1199	self.mem.SetRegexMatch(regex_match.No)
1200	return False
1201
1202	if op_id == Id.Op_Less:
1203	return str_cmp(s1, s2) < 0
1204
1205	if op_id == Id.Op_Great:
1206	return str_cmp(s1, s2) > 0
1207
1208	raise AssertionError(op_id) # should never happen
1209
1210	raise AssertionError(node.tag())