builtin/read

OILS / builtin / read_osh.py View on Github | oilshell.org

513 lines, 294 significant

1	from __future__ import print_function
2
3	from errno import EINTR
4
5	from _devbuild.gen import arg_types
6	from _devbuild.gen.runtime_asdl import (span_e, cmd_value)
7	from _devbuild.gen.syntax_asdl import source, loc_t
8	from _devbuild.gen.value_asdl import value, LeftName
9	from core import alloc
10	from core import error
11	from core.error import e_die
12	from core import pyos
13	from core import pyutil
14	from core import state
15	from core import ui
16	from core import vm
17	from frontend import flag_util
18	from frontend import reader
19	from frontend import typed_args
20	from mycpp import mops
21	from mycpp import mylib
22	from mycpp.mylib import log, STDIN_FILENO
23
24	import posix_ as posix
25
26	from typing import Tuple, List, Any, TYPE_CHECKING
27	if TYPE_CHECKING:
28	from _devbuild.gen.runtime_asdl import span_t
29	from frontend.parse_lib import ParseContext
30	from frontend import args
31	from osh.cmd_eval import CommandEvaluator
32	from osh.split import SplitContext
33
34	_ = log
35
36	# The Read builtin splits using IFS.
37	#
38	# Summary:
39	# - Split with IFS, except \ can escape them! This is different than the
40	# algorithm for splitting words (at least the way I've represented it.)
41
42	# Bash manual:
43	# - If there are more words than names, the remaining words and their
44	# intervening delimiters are assigned to the last name.
45	# - If there are fewer words read from the input stream than names, the
46	# remaining names are assigned empty values.
47	# - The characters in the value of the IFS variable are used to split the line
48	# into words using the same rules the shell uses for expansion (described
49	# above in Word Splitting).
50	# - The backslash character '\' may be used to remove any special meaning for
51	# the next character read and for line continuation.
52
53
54	def _AppendParts(
55	s, # type: str
56	spans, # type: List[Tuple[span_t, int]]
57	max_results, # type: int
58	join_next, # type: bool
59	parts, # type: List[mylib.BufWriter]
60	):
61	# type: (...) -> Tuple[bool, bool]
62	"""Append to 'parts', for the 'read' builtin.
63
64	Similar to _SpansToParts in osh/split.py
65
66	Args:
67	s: The original string
68	spans: List of (span, end_index)
69	max_results: the maximum number of parts we want
70	join_next: Whether to join the next span to the previous part. This
71	happens in two cases:
72	- when we have '\ '
73	- and when we have more spans # than max_results.
74	"""
75	start_index = 0
76	# If the last span was black, and we get a backslash, set join_next to merge
77	# two black spans.
78	last_span_was_black = False
79
80	for span_type, end_index in spans:
81	if span_type == span_e.Black:
82	if join_next and len(parts):
83	parts[-1].write(s[start_index:end_index])
84	join_next = False
85	else:
86	buf = mylib.BufWriter()
87	buf.write(s[start_index:end_index])
88	parts.append(buf)
89	last_span_was_black = True
90
91	elif span_type == span_e.Delim:
92	if join_next:
93	parts[-1].write(s[start_index:end_index])
94	join_next = False
95	last_span_was_black = False
96
97	elif span_type == span_e.Backslash:
98	if last_span_was_black:
99	join_next = True
100	last_span_was_black = False
101
102	if max_results and len(parts) >= max_results:
103	join_next = True
104
105	start_index = end_index
106
107	done = True
108	if len(spans):
109	#log('%s %s', s, spans)
110	#log('%s', spans[-1])
111	last_span_type, _ = spans[-1]
112	if last_span_type == span_e.Backslash:
113	done = False
114
115	#log('PARTS %s', parts)
116	return done, join_next
117
118
119	#
120	# Three read() wrappers for 'read' builtin that RunPendingTraps: _ReadN,
121	# _ReadPortion, and ReadLineSlowly
122	#
123
124
125	def _ReadN(num_bytes, cmd_ev):
126	# type: (int, CommandEvaluator) -> str
127	chunks = [] # type: List[str]
128	bytes_left = num_bytes
129	while bytes_left > 0:
130	n, err_num = pyos.Read(STDIN_FILENO, bytes_left,
131	chunks) # read up to n bytes
132
133	if n < 0:
134	if err_num == EINTR:
135	cmd_ev.RunPendingTraps()
136	# retry after running traps
137	else:
138	raise pyos.ReadError(err_num)
139
140	elif n == 0: # EOF
141	break
142
143	else:
144	bytes_left -= n
145
146	return ''.join(chunks)
147
148
149	def _ReadPortion(delim_byte, max_chars, cmd_ev):
150	# type: (int, int, CommandEvaluator) -> Tuple[str, bool]
151	"""Read a portion of stdin.
152
153	Reads until delimiter or max_chars, which ever comes first. Will ignore
154	max_chars if it's set to -1.
155
156	The delimiter is not included in the result.
157	"""
158	eof = False
159	ch_array = [] # type: List[int]
160	bytes_read = 0
161	while True:
162	if max_chars >= 0 and bytes_read >= max_chars:
163	break
164
165	ch, err_num = pyos.ReadByte(0)
166	if ch < 0:
167	if err_num == EINTR:
168	cmd_ev.RunPendingTraps()
169	# retry after running traps
170	else:
171	raise pyos.ReadError(err_num)
172
173	elif ch == pyos.EOF_SENTINEL:
174	eof = True
175	break
176
177	elif ch == delim_byte:
178	break
179
180	else:
181	ch_array.append(ch)
182
183	bytes_read += 1
184
185	return pyutil.ChArrayToString(ch_array), eof
186
187
188	# sys.stdin.readline() in Python has its own buffering which is incompatible
189	# with shell semantics. dash, mksh, and zsh all read a single byte at a
190	# time with read(0, 1).
191
192	# TODO:
193	# - ReadLineSlowly should have keep_newline (mapfile -t)
194	# - this halves memory usage!
195
196
197	def ReadLineSlowly(cmd_ev):
198	# type: (CommandEvaluator) -> str
199	"""Read a line from stdin."""
200	ch_array = [] # type: List[int]
201	while True:
202	ch, err_num = pyos.ReadByte(0)
203
204	if ch < 0:
205	if err_num == EINTR:
206	cmd_ev.RunPendingTraps()
207	# retry after running traps
208	else:
209	raise pyos.ReadError(err_num)
210
211	elif ch == pyos.EOF_SENTINEL:
212	break
213
214	else:
215	ch_array.append(ch)
216
217	# TODO: Add option to omit newline
218	if ch == pyos.NEWLINE_CH:
219	break
220
221	return pyutil.ChArrayToString(ch_array)
222
223
224	def ReadAll():
225	# type: () -> str
226	"""Read all of stdin.
227
228	Similar to command sub in core/executor.py.
229	"""
230	chunks = [] # type: List[str]
231	while True:
232	n, err_num = pyos.Read(0, 4096, chunks)
233
234	if n < 0:
235	if err_num == EINTR:
236	# Retry only. Like read --line (and command sub), read --all doesn't
237	# run traps. It would be a bit weird to run every 4096 bytes.
238	pass
239	else:
240	raise pyos.ReadError(err_num)
241
242	elif n == 0: # EOF
243	break
244
245	return ''.join(chunks)
246
247
248	class ctx_TermAttrs(object):
249
250	def __init__(self, fd, local_modes):
251	# type: (int, int) -> None
252	self.fd = fd
253
254	# We change term_attrs[3] in Python, which is lflag "local modes"
255	orig_local_modes, term_attrs = pyos.PushTermAttrs(fd, local_modes)
256
257	# Workaround: destructured assignment into members doesn't work
258	self.orig_local_modes = orig_local_modes
259	self.term_attrs = term_attrs
260
261	def __enter__(self):
262	# type: () -> None
263	pass
264
265	def __exit__(self, type, value, traceback):
266	# type: (Any, Any, Any) -> None
267	pyos.PopTermAttrs(self.fd, self.orig_local_modes, self.term_attrs)
268
269
270	class Read(vm._Builtin):
271
272	def __init__(
273	self,
274	splitter, # type: SplitContext
275	mem, # type: state.Mem
276	parse_ctx, # type: ParseContext
277	cmd_ev, # type: CommandEvaluator
278	errfmt, # type: ui.ErrorFormatter
279	):
280	# type: (...) -> None
281	self.splitter = splitter
282	self.mem = mem
283	self.parse_ctx = parse_ctx
284	self.cmd_ev = cmd_ev
285	self.errfmt = errfmt
286	self.stdin_ = mylib.Stdin()
287
288	# Was --qsn, might be restored as --j8-word or --j8-line
289	if 0:
290	#from data_lang import qsn_native
291	def _MaybeDecodeLine(self, line):
292	# type: (str) -> str
293	"""Raises error.Parse if line isn't valid."""
294
295	# Lines that don't start with a single quote aren't QSN. They may
296	# contain a single quote internally, like:
297	#
298	# Fool's Gold
299	if not line.startswith("'"):
300	return line
301
302	arena = self.parse_ctx.arena
303	line_reader = reader.StringLineReader(line, arena)
304	lexer = self.parse_ctx.MakeLexer(line_reader)
305
306	# The parser only yields valid tokens:
307	# Char_Literals, Char_OneChar, Char_Hex, Char_UBraced
308	# So we can use word_compile.EvalCStringToken, which is also used for
309	# $''.
310	# Important: we don't generate Id.Unknown_Backslash because that is valid
311	# in echo -e. We just make it Id.Unknown_Tok?
312
313	# TODO: read location info should know about stdin, and redirects, and
314	# pipelines?
315	with alloc.ctx_SourceCode(arena, source.Stdin('')):
316	#tokens = qsn_native.Parse(lexer)
317	pass
318	#tmp = [word_compile.EvalCStringToken(t) for t in tokens]
319	#return ''.join(tmp)
320	return ''
321
322	def Run(self, cmd_val):
323	# type: (cmd_value.Argv) -> int
324	try:
325	status = self._Run(cmd_val)
326	except pyos.ReadError as e: # different paths for read -d, etc.
327	# don't quote code since YSH errexit will likely quote
328	self.errfmt.PrintMessage("Oils read error: %s" %
329	posix.strerror(e.err_num))
330	status = 1
331	except (IOError, OSError) as e: # different paths for read -d, etc.
332	self.errfmt.PrintMessage("Oils read I/O error: %s" %
333	pyutil.strerror(e))
334	status = 1
335	return status
336
337	def _ReadYsh(self, arg, arg_r, cmd_val):
338	# type: (arg_types.read, args.Reader, cmd_value.Argv) -> int
339	"""
340	Usage:
341
342	read --all # sets _reply
343	read --all (&x) # sets x
344
345	Invalid for now:
346
347	read (&x) # YSH doesn't have token splitting
348	# we probably want read --row too
349	"""
350	place = None # type: value.Place
351
352	if cmd_val.typed_args: # read --line (&x)
353	rd = typed_args.ReaderForProc(cmd_val)
354	place = rd.PosPlace()
355	rd.Done()
356
357	blame_loc = cmd_val.typed_args.left # type: loc_t
358
359	else: # read --line
360	var_name = '_reply'
361
362	#log('VAR %s', var_name)
363	blame_loc = cmd_val.arg_locs[0]
364	place = value.Place(LeftName(var_name, blame_loc),
365	self.mem.TopNamespace())
366
367	next_arg, next_loc = arg_r.Peek2()
368	if next_arg is not None:
369	raise error.Usage('got extra argument', next_loc)
370
371	if arg.line: # read --line is buffered, calls getline()
372	raise error.Usage(
373	"no longer supports --line; please use read -r instead (unbuffered I/O)",
374	next_loc)
375
376	if arg.all: # read --all
377	contents = ReadAll()
378	self.mem.SetPlace(place, value.Str(contents), blame_loc)
379	return 0
380
381	# arg.line or arg.all should be true
382	raise AssertionError()
383
384	def _Run(self, cmd_val):
385	# type: (cmd_value.Argv) -> int
386	attrs, arg_r = flag_util.ParseCmdVal('read',
387	cmd_val,
388	accept_typed_args=True)
389	arg = arg_types.read(attrs.attrs)
390	names = arg_r.Rest()
391
392	#if arg.q and not arg.line:
393	# e_usage('--qsn can only be used with --line', loc.Missing)
394
395	if arg.line or arg.all:
396	return self._ReadYsh(arg, arg_r, cmd_val)
397
398	if cmd_val.typed_args:
399	raise error.Usage(
400	"doesn't accept typed args without --line or --all",
401	cmd_val.typed_args.left)
402
403	if arg.t >= 0.0:
404	if arg.t != 0.0:
405	e_die("read -t isn't implemented (except t=0)")
406	else:
407	return 0 if pyos.InputAvailable(STDIN_FILENO) else 1
408
409	bits = 0
410	if self.stdin_.isatty():
411	# -d and -n should be unbuffered
412	if arg.d is not None or mops.BigTruncate(arg.n) >= 0:
413	bits \|= pyos.TERM_ICANON
414	if arg.s: # silent
415	bits \|= pyos.TERM_ECHO
416
417	if arg.p is not None: # only if tty
418	mylib.Stderr().write(arg.p)
419
420	if bits == 0:
421	status = self._Read(arg, names)
422	else:
423	with ctx_TermAttrs(STDIN_FILENO, ~bits):
424	status = self._Read(arg, names)
425	return status
426
427	def _Read(self, arg, names):
428	# type: (arg_types.read, List[str]) -> int
429
430	# read a certain number of bytes (-1 means unset)
431	arg_N = mops.BigTruncate(arg.N)
432	if arg_N >= 0:
433	if len(names):
434	name = names[0]
435	else:
436	name = 'REPLY' # default variable name
437
438	s = _ReadN(arg_N, self.cmd_ev)
439
440	state.BuiltinSetString(self.mem, name, s)
441
442	# Clear extra names, as bash does
443	for i in xrange(1, len(names)):
444	state.BuiltinSetString(self.mem, names[i], '')
445
446	# Did we read all the bytes we wanted?
447	return 0 if len(s) == arg_N else 1
448
449	if len(names) == 0:
450	names.append('REPLY')
451
452	# leftover words assigned to the last name
453	if arg.a is not None:
454	max_results = 0 # no max
455	else:
456	max_results = len(names)
457
458	if arg.Z: # -0 is synonym for -r -d ''
459	raw = True
460	delim_byte = 0
461	else:
462	raw = arg.r
463	if arg.d is not None:
464	if len(arg.d):
465	delim_byte = ord(arg.d[0])
466	else:
467	delim_byte = 0 # -d '' delimits by NUL
468	else:
469	delim_byte = pyos.NEWLINE_CH # read a line
470
471	# We have to read more than one line if there is a line continuation (and
472	# it's not -r).
473	parts = [] # type: List[mylib.BufWriter]
474	join_next = False
475	status = 0
476	while True:
477	line, eof = _ReadPortion(delim_byte, mops.BigTruncate(arg.n),
478	self.cmd_ev)
479
480	if eof:
481	# status 1 to terminate loop. (This is true even though we set
482	# variables).
483	status = 1
484
485	#log('LINE %r', line)
486	if len(line) == 0:
487	break
488
489	spans = self.splitter.SplitForRead(line, not raw)
490	done, join_next = _AppendParts(line, spans, max_results, join_next,
491	parts)
492
493	#log('PARTS %s continued %s', parts, continued)
494	if done:
495	break
496
497	entries = [buf.getvalue() for buf in parts]
498	num_parts = len(entries)
499	if arg.a is not None:
500	state.BuiltinSetArray(self.mem, arg.a, entries)
501	else:
502	for i in xrange(max_results):
503	if i < num_parts:
504	s = entries[i]
505	else:
506	s = '' # if there are too many variables
507	var_name = names[i]
508	if var_name.startswith(':'):
509	var_name = var_name[1:]
510	#log('read: %s = %s', var_name, s)
511	state.BuiltinSetString(self.mem, var_name, s)
512
513	return status