OILS / builtin / read_osh.py View on Github | oilshell.org

513 lines, 294 significant
1from __future__ import print_function
2
3from errno import EINTR
4
5from _devbuild.gen import arg_types
6from _devbuild.gen.runtime_asdl import (span_e, cmd_value)
7from _devbuild.gen.syntax_asdl import source, loc_t
8from _devbuild.gen.value_asdl import value, LeftName
9from core import alloc
10from core import error
11from core.error import e_die
12from core import pyos
13from core import pyutil
14from core import state
15from core import ui
16from core import vm
17from frontend import flag_util
18from frontend import reader
19from frontend import typed_args
20from mycpp import mops
21from mycpp import mylib
22from mycpp.mylib import log, STDIN_FILENO
23
24import posix_ as posix
25
26from typing import Tuple, List, Any, TYPE_CHECKING
27if TYPE_CHECKING:
28 from _devbuild.gen.runtime_asdl import span_t
29 from frontend.parse_lib import ParseContext
30 from frontend import args
31 from osh.cmd_eval import CommandEvaluator
32 from osh.split import SplitContext
33
34_ = log
35
36# The Read builtin splits using IFS.
37#
38# Summary:
39# - Split with IFS, except \ can escape them! This is different than the
40# algorithm for splitting words (at least the way I've represented it.)
41
42# Bash manual:
43# - If there are more words than names, the remaining words and their
44# intervening delimiters are assigned to the last name.
45# - If there are fewer words read from the input stream than names, the
46# remaining names are assigned empty values.
47# - The characters in the value of the IFS variable are used to split the line
48# into words using the same rules the shell uses for expansion (described
49# above in Word Splitting).
50# - The backslash character '\' may be used to remove any special meaning for
51# the next character read and for line continuation.
52
53
54def _AppendParts(
55 s, # type: str
56 spans, # type: List[Tuple[span_t, int]]
57 max_results, # type: int
58 join_next, # type: bool
59 parts, # type: List[mylib.BufWriter]
60):
61 # type: (...) -> Tuple[bool, bool]
62 """Append to 'parts', for the 'read' builtin.
63
64 Similar to _SpansToParts in osh/split.py
65
66 Args:
67 s: The original string
68 spans: List of (span, end_index)
69 max_results: the maximum number of parts we want
70 join_next: Whether to join the next span to the previous part. This
71 happens in two cases:
72 - when we have '\ '
73 - and when we have more spans # than max_results.
74 """
75 start_index = 0
76 # If the last span was black, and we get a backslash, set join_next to merge
77 # two black spans.
78 last_span_was_black = False
79
80 for span_type, end_index in spans:
81 if span_type == span_e.Black:
82 if join_next and len(parts):
83 parts[-1].write(s[start_index:end_index])
84 join_next = False
85 else:
86 buf = mylib.BufWriter()
87 buf.write(s[start_index:end_index])
88 parts.append(buf)
89 last_span_was_black = True
90
91 elif span_type == span_e.Delim:
92 if join_next:
93 parts[-1].write(s[start_index:end_index])
94 join_next = False
95 last_span_was_black = False
96
97 elif span_type == span_e.Backslash:
98 if last_span_was_black:
99 join_next = True
100 last_span_was_black = False
101
102 if max_results and len(parts) >= max_results:
103 join_next = True
104
105 start_index = end_index
106
107 done = True
108 if len(spans):
109 #log('%s %s', s, spans)
110 #log('%s', spans[-1])
111 last_span_type, _ = spans[-1]
112 if last_span_type == span_e.Backslash:
113 done = False
114
115 #log('PARTS %s', parts)
116 return done, join_next
117
118
119#
120# Three read() wrappers for 'read' builtin that RunPendingTraps: _ReadN,
121# _ReadPortion, and ReadLineSlowly
122#
123
124
125def _ReadN(num_bytes, cmd_ev):
126 # type: (int, CommandEvaluator) -> str
127 chunks = [] # type: List[str]
128 bytes_left = num_bytes
129 while bytes_left > 0:
130 n, err_num = pyos.Read(STDIN_FILENO, bytes_left,
131 chunks) # read up to n bytes
132
133 if n < 0:
134 if err_num == EINTR:
135 cmd_ev.RunPendingTraps()
136 # retry after running traps
137 else:
138 raise pyos.ReadError(err_num)
139
140 elif n == 0: # EOF
141 break
142
143 else:
144 bytes_left -= n
145
146 return ''.join(chunks)
147
148
149def _ReadPortion(delim_byte, max_chars, cmd_ev):
150 # type: (int, int, CommandEvaluator) -> Tuple[str, bool]
151 """Read a portion of stdin.
152
153 Reads until delimiter or max_chars, which ever comes first. Will ignore
154 max_chars if it's set to -1.
155
156 The delimiter is not included in the result.
157 """
158 eof = False
159 ch_array = [] # type: List[int]
160 bytes_read = 0
161 while True:
162 if max_chars >= 0 and bytes_read >= max_chars:
163 break
164
165 ch, err_num = pyos.ReadByte(0)
166 if ch < 0:
167 if err_num == EINTR:
168 cmd_ev.RunPendingTraps()
169 # retry after running traps
170 else:
171 raise pyos.ReadError(err_num)
172
173 elif ch == pyos.EOF_SENTINEL:
174 eof = True
175 break
176
177 elif ch == delim_byte:
178 break
179
180 else:
181 ch_array.append(ch)
182
183 bytes_read += 1
184
185 return pyutil.ChArrayToString(ch_array), eof
186
187
188# sys.stdin.readline() in Python has its own buffering which is incompatible
189# with shell semantics. dash, mksh, and zsh all read a single byte at a
190# time with read(0, 1).
191
192# TODO:
193# - ReadLineSlowly should have keep_newline (mapfile -t)
194# - this halves memory usage!
195
196
197def ReadLineSlowly(cmd_ev):
198 # type: (CommandEvaluator) -> str
199 """Read a line from stdin."""
200 ch_array = [] # type: List[int]
201 while True:
202 ch, err_num = pyos.ReadByte(0)
203
204 if ch < 0:
205 if err_num == EINTR:
206 cmd_ev.RunPendingTraps()
207 # retry after running traps
208 else:
209 raise pyos.ReadError(err_num)
210
211 elif ch == pyos.EOF_SENTINEL:
212 break
213
214 else:
215 ch_array.append(ch)
216
217 # TODO: Add option to omit newline
218 if ch == pyos.NEWLINE_CH:
219 break
220
221 return pyutil.ChArrayToString(ch_array)
222
223
224def ReadAll():
225 # type: () -> str
226 """Read all of stdin.
227
228 Similar to command sub in core/executor.py.
229 """
230 chunks = [] # type: List[str]
231 while True:
232 n, err_num = pyos.Read(0, 4096, chunks)
233
234 if n < 0:
235 if err_num == EINTR:
236 # Retry only. Like read --line (and command sub), read --all doesn't
237 # run traps. It would be a bit weird to run every 4096 bytes.
238 pass
239 else:
240 raise pyos.ReadError(err_num)
241
242 elif n == 0: # EOF
243 break
244
245 return ''.join(chunks)
246
247
248class ctx_TermAttrs(object):
249
250 def __init__(self, fd, local_modes):
251 # type: (int, int) -> None
252 self.fd = fd
253
254 # We change term_attrs[3] in Python, which is lflag "local modes"
255 orig_local_modes, term_attrs = pyos.PushTermAttrs(fd, local_modes)
256
257 # Workaround: destructured assignment into members doesn't work
258 self.orig_local_modes = orig_local_modes
259 self.term_attrs = term_attrs
260
261 def __enter__(self):
262 # type: () -> None
263 pass
264
265 def __exit__(self, type, value, traceback):
266 # type: (Any, Any, Any) -> None
267 pyos.PopTermAttrs(self.fd, self.orig_local_modes, self.term_attrs)
268
269
270class Read(vm._Builtin):
271
272 def __init__(
273 self,
274 splitter, # type: SplitContext
275 mem, # type: state.Mem
276 parse_ctx, # type: ParseContext
277 cmd_ev, # type: CommandEvaluator
278 errfmt, # type: ui.ErrorFormatter
279 ):
280 # type: (...) -> None
281 self.splitter = splitter
282 self.mem = mem
283 self.parse_ctx = parse_ctx
284 self.cmd_ev = cmd_ev
285 self.errfmt = errfmt
286 self.stdin_ = mylib.Stdin()
287
288 # Was --qsn, might be restored as --j8-word or --j8-line
289 if 0:
290 #from data_lang import qsn_native
291 def _MaybeDecodeLine(self, line):
292 # type: (str) -> str
293 """Raises error.Parse if line isn't valid."""
294
295 # Lines that don't start with a single quote aren't QSN. They may
296 # contain a single quote internally, like:
297 #
298 # Fool's Gold
299 if not line.startswith("'"):
300 return line
301
302 arena = self.parse_ctx.arena
303 line_reader = reader.StringLineReader(line, arena)
304 lexer = self.parse_ctx.MakeLexer(line_reader)
305
306 # The parser only yields valid tokens:
307 # Char_Literals, Char_OneChar, Char_Hex, Char_UBraced
308 # So we can use word_compile.EvalCStringToken, which is also used for
309 # $''.
310 # Important: we don't generate Id.Unknown_Backslash because that is valid
311 # in echo -e. We just make it Id.Unknown_Tok?
312
313 # TODO: read location info should know about stdin, and redirects, and
314 # pipelines?
315 with alloc.ctx_SourceCode(arena, source.Stdin('')):
316 #tokens = qsn_native.Parse(lexer)
317 pass
318 #tmp = [word_compile.EvalCStringToken(t) for t in tokens]
319 #return ''.join(tmp)
320 return ''
321
322 def Run(self, cmd_val):
323 # type: (cmd_value.Argv) -> int
324 try:
325 status = self._Run(cmd_val)
326 except pyos.ReadError as e: # different paths for read -d, etc.
327 # don't quote code since YSH errexit will likely quote
328 self.errfmt.PrintMessage("Oils read error: %s" %
329 posix.strerror(e.err_num))
330 status = 1
331 except (IOError, OSError) as e: # different paths for read -d, etc.
332 self.errfmt.PrintMessage("Oils read I/O error: %s" %
333 pyutil.strerror(e))
334 status = 1
335 return status
336
337 def _ReadYsh(self, arg, arg_r, cmd_val):
338 # type: (arg_types.read, args.Reader, cmd_value.Argv) -> int
339 """
340 Usage:
341
342 read --all # sets _reply
343 read --all (&x) # sets x
344
345 Invalid for now:
346
347 read (&x) # YSH doesn't have token splitting
348 # we probably want read --row too
349 """
350 place = None # type: value.Place
351
352 if cmd_val.typed_args: # read --line (&x)
353 rd = typed_args.ReaderForProc(cmd_val)
354 place = rd.PosPlace()
355 rd.Done()
356
357 blame_loc = cmd_val.typed_args.left # type: loc_t
358
359 else: # read --line
360 var_name = '_reply'
361
362 #log('VAR %s', var_name)
363 blame_loc = cmd_val.arg_locs[0]
364 place = value.Place(LeftName(var_name, blame_loc),
365 self.mem.TopNamespace())
366
367 next_arg, next_loc = arg_r.Peek2()
368 if next_arg is not None:
369 raise error.Usage('got extra argument', next_loc)
370
371 if arg.line: # read --line is buffered, calls getline()
372 raise error.Usage(
373 "no longer supports --line; please use read -r instead (unbuffered I/O)",
374 next_loc)
375
376 if arg.all: # read --all
377 contents = ReadAll()
378 self.mem.SetPlace(place, value.Str(contents), blame_loc)
379 return 0
380
381 # arg.line or arg.all should be true
382 raise AssertionError()
383
384 def _Run(self, cmd_val):
385 # type: (cmd_value.Argv) -> int
386 attrs, arg_r = flag_util.ParseCmdVal('read',
387 cmd_val,
388 accept_typed_args=True)
389 arg = arg_types.read(attrs.attrs)
390 names = arg_r.Rest()
391
392 #if arg.q and not arg.line:
393 # e_usage('--qsn can only be used with --line', loc.Missing)
394
395 if arg.line or arg.all:
396 return self._ReadYsh(arg, arg_r, cmd_val)
397
398 if cmd_val.typed_args:
399 raise error.Usage(
400 "doesn't accept typed args without --line or --all",
401 cmd_val.typed_args.left)
402
403 if arg.t >= 0.0:
404 if arg.t != 0.0:
405 e_die("read -t isn't implemented (except t=0)")
406 else:
407 return 0 if pyos.InputAvailable(STDIN_FILENO) else 1
408
409 bits = 0
410 if self.stdin_.isatty():
411 # -d and -n should be unbuffered
412 if arg.d is not None or mops.BigTruncate(arg.n) >= 0:
413 bits |= pyos.TERM_ICANON
414 if arg.s: # silent
415 bits |= pyos.TERM_ECHO
416
417 if arg.p is not None: # only if tty
418 mylib.Stderr().write(arg.p)
419
420 if bits == 0:
421 status = self._Read(arg, names)
422 else:
423 with ctx_TermAttrs(STDIN_FILENO, ~bits):
424 status = self._Read(arg, names)
425 return status
426
427 def _Read(self, arg, names):
428 # type: (arg_types.read, List[str]) -> int
429
430 # read a certain number of bytes (-1 means unset)
431 arg_N = mops.BigTruncate(arg.N)
432 if arg_N >= 0:
433 if len(names):
434 name = names[0]
435 else:
436 name = 'REPLY' # default variable name
437
438 s = _ReadN(arg_N, self.cmd_ev)
439
440 state.BuiltinSetString(self.mem, name, s)
441
442 # Clear extra names, as bash does
443 for i in xrange(1, len(names)):
444 state.BuiltinSetString(self.mem, names[i], '')
445
446 # Did we read all the bytes we wanted?
447 return 0 if len(s) == arg_N else 1
448
449 if len(names) == 0:
450 names.append('REPLY')
451
452 # leftover words assigned to the last name
453 if arg.a is not None:
454 max_results = 0 # no max
455 else:
456 max_results = len(names)
457
458 if arg.Z: # -0 is synonym for -r -d ''
459 raw = True
460 delim_byte = 0
461 else:
462 raw = arg.r
463 if arg.d is not None:
464 if len(arg.d):
465 delim_byte = ord(arg.d[0])
466 else:
467 delim_byte = 0 # -d '' delimits by NUL
468 else:
469 delim_byte = pyos.NEWLINE_CH # read a line
470
471 # We have to read more than one line if there is a line continuation (and
472 # it's not -r).
473 parts = [] # type: List[mylib.BufWriter]
474 join_next = False
475 status = 0
476 while True:
477 line, eof = _ReadPortion(delim_byte, mops.BigTruncate(arg.n),
478 self.cmd_ev)
479
480 if eof:
481 # status 1 to terminate loop. (This is true even though we set
482 # variables).
483 status = 1
484
485 #log('LINE %r', line)
486 if len(line) == 0:
487 break
488
489 spans = self.splitter.SplitForRead(line, not raw)
490 done, join_next = _AppendParts(line, spans, max_results, join_next,
491 parts)
492
493 #log('PARTS %s continued %s', parts, continued)
494 if done:
495 break
496
497 entries = [buf.getvalue() for buf in parts]
498 num_parts = len(entries)
499 if arg.a is not None:
500 state.BuiltinSetArray(self.mem, arg.a, entries)
501 else:
502 for i in xrange(max_results):
503 if i < num_parts:
504 s = entries[i]
505 else:
506 s = '' # if there are too many variables
507 var_name = names[i]
508 if var_name.startswith(':'):
509 var_name = var_name[1:]
510 #log('read: %s = %s', var_name, s)
511 state.BuiltinSetString(self.mem, var_name, s)
512
513 return status