frontend/consts.py

OILS / frontend / consts.py View on Github | oilshell.org

340 lines, 176 significant

1	#!/usr/bin/env python2
2	"""Consts.py."""
3	from __future__ import print_function
4
5	from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
6	bool_arg_type_t, opt_group_i)
7	from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
8	from frontend import builtin_def
9	from frontend import lexer_def
10	from frontend import option_def
11
12	from typing import Tuple, Optional, TYPE_CHECKING
13	if TYPE_CHECKING:
14	from _devbuild.gen.option_asdl import option_t, builtin_t
15
16	NO_INDEX = 0 # for Resolve
17
18	# Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
19	STRICT_ALL = option_def.STRICT_ALL
20	YSH_UPGRADE = option_def.YSH_UPGRADE
21	YSH_ALL = option_def.YSH_ALL
22	DEFAULT_TRUE = option_def.DEFAULT_TRUE
23
24	PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
25
26	SET_OPTION_NUMS = [
27	opt.index for opt in option_def._SORTED if opt.builtin == 'set'
28	]
29	SET_OPTION_NAMES = [
30	opt.name for opt in option_def._SORTED if opt.builtin == 'set'
31	]
32
33	SHOPT_OPTION_NUMS = [
34	opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
35	]
36	SHOPT_OPTION_NAMES = [
37	opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
38	]
39
40	VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
41
42	BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
43
44	# The 'compen' and 'type' builtins introspect on keywords and builtins.
45	OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
46	OSH_KEYWORD_NAMES.append('{') # not in our lexer list
47
48
49	def GetKind(id_):
50	# type: (Id_t) -> Kind_t
51	"""To make coarse-grained parsing decisions."""
52
53	from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
54	return ID_TO_KIND[id_]
55
56
57	def BoolArgType(id_):
58	# type: (Id_t) -> bool_arg_type_t
59
60	from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
61	return BOOL_ARG_TYPES[id_]
62
63
64	#
65	# Redirect Tables associated with IDs
66	#
67
68	REDIR_DEFAULT_FD = {
69	# filename
70	Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
71	Id.Redir_Great: 1,
72	Id.Redir_DGreat: 1,
73	Id.Redir_Clobber: 1,
74	Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
75	# bash &> and &>>
76	Id.Redir_AndGreat: 1,
77	Id.Redir_AndDGreat: 1,
78
79	# descriptor
80	Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
81	Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
82	Id.Redir_TLess: 0, # here word
83
84	# here docs included
85	Id.Redir_DLess: 0,
86	Id.Redir_DLessDash: 0,
87	}
88
89	REDIR_ARG_TYPES = {
90	# filename
91	Id.Redir_Less: redir_arg_type_e.Path,
92	Id.Redir_Great: redir_arg_type_e.Path,
93	Id.Redir_DGreat: redir_arg_type_e.Path,
94	Id.Redir_Clobber: redir_arg_type_e.Path,
95	Id.Redir_LessGreat: redir_arg_type_e.Path,
96	# bash &> and &>>
97	Id.Redir_AndGreat: redir_arg_type_e.Path,
98	Id.Redir_AndDGreat: redir_arg_type_e.Path,
99
100	# descriptor
101	Id.Redir_GreatAnd: redir_arg_type_e.Desc,
102	Id.Redir_LessAnd: redir_arg_type_e.Desc,
103	Id.Redir_TLess: redir_arg_type_e.Here, # here word
104	# note: here docs aren't included
105	}
106
107
108	def RedirArgType(id_):
109	# type: (Id_t) -> redir_arg_type_t
110	return REDIR_ARG_TYPES[id_]
111
112
113	def RedirDefaultFd(id_):
114	# type: (Id_t) -> int
115	return REDIR_DEFAULT_FD[id_]
116
117
118	#
119	# Builtins
120	#
121
122	_BUILTIN_DICT = builtin_def.BuiltinDict()
123
124
125	def LookupSpecialBuiltin(argv0):
126	# type: (str) -> builtin_t
127	"""Is it a special builtin?"""
128	b = _BUILTIN_DICT.get(argv0)
129	if b and b.kind == 'special':
130	return b.index
131	else:
132	return NO_INDEX
133
134
135	def LookupAssignBuiltin(argv0):
136	# type: (str) -> builtin_t
137	"""Is it an assignment builtin?"""
138	b = _BUILTIN_DICT.get(argv0)
139	if b and b.kind == 'assign':
140	return b.index
141	else:
142	return NO_INDEX
143
144
145	def LookupNormalBuiltin(argv0):
146	# type: (str) -> builtin_t
147	"""Is it any other builtin?"""
148	b = _BUILTIN_DICT.get(argv0)
149	if b and b.kind == 'normal':
150	return b.index
151	else:
152	return NO_INDEX
153
154
155	def OptionName(opt_num):
156	# type: (option_t) -> str
157	"""Get the name from an index."""
158	return option_def.OPTION_NAMES[opt_num]
159
160
161	OPTION_GROUPS = {
162	'strict:all': opt_group_i.StrictAll,
163
164	# Aliases to deprecate
165	'oil:upgrade': opt_group_i.YshUpgrade,
166	'oil:all': opt_group_i.YshAll,
167	'ysh:upgrade': opt_group_i.YshUpgrade,
168	'ysh:all': opt_group_i.YshAll,
169	}
170
171
172	def OptionGroupNum(s):
173	# type: (str) -> int
174	return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
175
176
177	_OPTION_DICT = option_def.OptionDict()
178
179
180	def OptionNum(s):
181	# type: (str) -> int
182	return _OPTION_DICT.get(s, 0) # 0 means not found
183
184
185	_CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
186	_CONTROL_FLOW_LOOKUP = {}
187	for _, name, id_ in lexer_def.CONTROL_FLOW:
188	_CONTROL_FLOW_LOOKUP[id_] = name
189
190
191	def ControlFlowName(id_):
192	# type: (int) -> str
193	"""For tracing"""
194	return _CONTROL_FLOW_LOOKUP[id_]
195
196
197	def IsControlFlow(name):
198	# type: (str) -> bool
199	return name in _CONTROL_FLOW_NAMES
200
201
202	def IsKeyword(name):
203	# type: (str) -> bool
204	return name in OSH_KEYWORD_NAMES
205
206
207	#
208	# osh/prompt.py and osh/word_compile.py
209	#
210
211	_ONE_CHAR_C = {
212	'0': '\0',
213	'a': '\a',
214	'b': '\b',
215	'e': '\x1b',
216	'E': '\x1b',
217	'f': '\f',
218	'n': '\n',
219	'r': '\r',
220	't': '\t',
221	'v': '\v',
222	'\\': '\\',
223	"'": "'", # for $'' only, not echo -e
224	'"': '"', # not sure why this is escaped within $''
225	'/': '/', # for JSON \/ only
226	}
227
228
229	def LookupCharC(c):
230	# type: (str) -> str
231	"""Fatal if not present."""
232	return _ONE_CHAR_C[c]
233
234
235	# NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
236	# not in PS1.
237	_ONE_CHAR_PROMPT = {
238	'a': '\a',
239	'e': '\x1b',
240	'r': '\r',
241	'n': '\n',
242	'\\': '\\',
243	}
244
245
246	def LookupCharPrompt(c):
247	# type: (str) -> Optional[str]
248	"""Returns None if not present."""
249	return _ONE_CHAR_PROMPT.get(c)
250
251
252	#
253	# Constants used by osh/split.py
254	#
255
256	# IFS splitting is complicated in general. We handle it with three concepts:
257	#
258	# - CH.* - Kinds of characters (edge labels)
259	# - ST.* - States (node labels)
260	# - EMIT.* Actions
261	#
262	# The Split() loop below classifies characters, follows state transitions, and
263	# emits spans. A span is a (ignored Bool, end_index Int) pair.
264
265	# As an example, consider this string:
266	# 'a _ b'
267	#
268	# The character classes are:
269	#
270	# a ' ' _ ' ' b
271	# Black DE_White DE_Gray DE_White Black
272	#
273	# The states are:
274	#
275	# a ' ' _ ' ' b
276	# Black DE_White1 DE_Gray DE_White2 Black
277	#
278	# DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
279	#
280	# The spans emitted are:
281	#
282	# (part 'a', ignored ' _ ', part 'b')
283
284	# SplitForRead() will check if the last two spans are a \ and \\n. Easy.
285
286	# Shorter names for state machine enums
287	from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
288	from _devbuild.gen.runtime_asdl import emit_i as EMIT
289	from _devbuild.gen.runtime_asdl import char_kind_i as CH
290	from _devbuild.gen.runtime_asdl import state_i as ST
291
292	_IFS_EDGES = {
293	# Whitespace should have been stripped
294	(ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
295	(ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
296	(ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
297	(ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
298	(ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
299	(ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
300	(ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
301	(ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
302	(ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
303	# Ignore trailing IFS whitespace too. This is necessary for the case:
304	# IFS=':' ; read x y z <<< 'a : b : c :'.
305	(ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
306	(ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
307	(ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
308	(ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
309	(ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
310	(ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
311	(ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
312	(ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
313	(ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
314	(ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
315	(ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
316	(ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
317	(ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
318	(ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
319	(ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
320	(ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
321
322	# Here we emit an ignored \ and the second character as well.
323	# We're emitting TWO spans here; we don't wait until the subsequent
324	# character. That is OK.
325	#
326	# Problem: if '\ ' is the last one, we don't want to emit a trailing span?
327	# In all other cases we do.
328	(ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
329	(ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
330	(ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
331	# NOTE: second character is a backslash, but new state is ST.Black!
332	(ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
333	(ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
334	}
335
336
337	def IfsEdge(state, ch):
338	# type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
339	"""Follow edges of the IFS state machine."""
340	return _IFS_EDGES[state, ch]