core/dev.py

OILS / core / dev.py View on Github | oilshell.org

639 lines, 360 significant

1	"""
2	dev.py - Devtools / introspection.
3	"""
4	from __future__ import print_function
5
6	from _devbuild.gen.option_asdl import option_i, builtin_i, builtin_t
7	from _devbuild.gen.runtime_asdl import (cmd_value, scope_e, trace, trace_e,
8	trace_t)
9	from _devbuild.gen.syntax_asdl import assign_op_e, Token
10	from _devbuild.gen.value_asdl import (value, value_e, value_t, sh_lvalue,
11	sh_lvalue_e, LeftName)
12
13	from core import error
14	from core import optview
15	from core import num
16	from core import state
17	from core import ui
18	from data_lang import j8
19	from mycpp.mylib import log
20	from frontend import location
21	from osh import word_
22	from data_lang import j8_lite
23	from pylib import os_path
24	from mycpp import mylib
25	from mycpp.mylib import tagswitch, iteritems
26
27	import posix_ as posix
28
29	from typing import List, Dict, Optional, Any, cast, TYPE_CHECKING
30	if TYPE_CHECKING:
31	from _devbuild.gen.syntax_asdl import assign_op_t, CompoundWord
32	from _devbuild.gen.runtime_asdl import scope_t
33	from _devbuild.gen.value_asdl import sh_lvalue_t
34	from core import alloc
35	from core.error import _ErrorWithLocation
36	from core import process
37	from core import util
38	from frontend.parse_lib import ParseContext
39	from osh.word_eval import NormalWordEvaluator
40	from osh.cmd_eval import CommandEvaluator
41
42
43	class CrashDumper(object):
44	"""Controls if we collect a crash dump, and where we write it to.
45
46	An object that can be serialized to JSON.
47
48	trap CRASHDUMP upload-to-server
49
50	# it gets written to a file first
51	upload-to-server() {
52	local path=$1
53	curl -X POST https://osh-trace.oilshell.org < $path
54	}
55
56	Things to dump:
57	CommandEvaluator
58	functions, aliases, traps, completion hooks, fd_state, dir_stack
59
60	debug info for the source? Or does that come elsewhere?
61
62	Yeah I think you should have two separate files.
63	- debug info for a given piece of code (needs hash)
64	- this could just be the raw source files? Does it need anything else?
65	- I think it needs a hash so the VM dump can refer to it.
66	- vm dump.
67	- Combine those and you get a UI.
68
69	One is constant at build time; the other is constant at runtime.
70	"""
71
72	def __init__(self, crash_dump_dir, fd_state):
73	# type: (str, process.FdState) -> None
74	self.crash_dump_dir = crash_dump_dir
75	self.fd_state = fd_state
76
77	# whether we should collect a dump, at the highest level of the stack
78	self.do_collect = bool(crash_dump_dir)
79	self.collected = False # whether we have anything to dump
80
81	self.var_stack = None # type: List[value_t]
82	self.argv_stack = None # type: List[value_t]
83	self.debug_stack = None # type: List[value_t]
84	self.error = None # type: Dict[str, value_t]
85
86	def MaybeRecord(self, cmd_ev, err):
87	# type: (CommandEvaluator, _ErrorWithLocation) -> None
88	"""Collect data for a crash dump.
89
90	Args:
91	cmd_ev: CommandEvaluator instance
92	error: _ErrorWithLocation (ParseError or error.FatalRuntime)
93	"""
94	if not self.do_collect: # Either we already did it, or there is no file
95	return
96
97	self.var_stack, self.argv_stack, self.debug_stack = cmd_ev.mem.Dump()
98	blame_tok = location.TokenFor(err.location)
99
100	self.error = {
101	'msg': value.Str(err.UserErrorString()),
102	}
103
104	if blame_tok:
105	# Could also do msg % args separately, but JavaScript won't be able to
106	# render that.
107	self.error['source'] = value.Str(
108	ui.GetLineSourceString(blame_tok.line))
109	self.error['line_num'] = num.ToBig(blame_tok.line.line_num)
110	self.error['line'] = value.Str(blame_tok.line.content)
111
112	# TODO: Collect functions, aliases, etc.
113	self.do_collect = False
114	self.collected = True
115
116	def MaybeDump(self, status):
117	# type: (int) -> None
118	"""Write the dump as JSON.
119
120	User can configure it two ways:
121	- dump unconditionally -- a daily cron job. This would be fine.
122	- dump on non-zero exit code
123
124	OILS_FAIL
125	Maybe counters are different than failure
126
127	OILS_CRASH_DUMP='function alias trap completion stack' ?
128	OILS_COUNTER_DUMP='function alias trap completion'
129	and then
130	I think both of these should dump the (path, mtime, checksum) of the source
131	they ran? And then you can match those up with source control or whatever?
132	"""
133	if not self.collected:
134	return
135
136	my_pid = posix.getpid() # Get fresh PID here
137
138	# Other things we need: the reason for the crash! _ErrorWithLocation is
139	# required I think.
140	d = {
141	'var_stack': value.List(self.var_stack),
142	'argv_stack': value.List(self.argv_stack),
143	'debug_stack': value.List(self.debug_stack),
144	'error': value.Dict(self.error),
145	'status': num.ToBig(status),
146	'pid': num.ToBig(my_pid),
147	} # type: Dict[str, value_t]
148
149	path = os_path.join(self.crash_dump_dir,
150	'%d-osh-crash-dump.json' % my_pid)
151
152	# TODO: This should be JSON with unicode replacement char?
153	buf = mylib.BufWriter()
154	j8.PrintMessage(value.Dict(d), buf, 2)
155	json_str = buf.getvalue()
156
157	try:
158	f = self.fd_state.OpenForWrite(path)
159	except (IOError, OSError) as e:
160	# Ignore error
161	return
162
163	f.write(json_str)
164
165	# TODO: mylib.Writer() needs close()? Also for DebugFile()
166	#f.close()
167
168	log('[%d] Wrote crash dump to %s', my_pid, path)
169
170
171	class ctx_Tracer(object):
172	"""A stack for tracing synchronous constructs."""
173
174	def __init__(self, tracer, label, argv):
175	# type: (Tracer, str, Optional[List[str]]) -> None
176	self.arg = None # type: Optional[str]
177	if label == 'proc':
178	self.arg = argv[0]
179	elif label == 'source':
180	self.arg = argv[1]
181
182	tracer.PushMessage(label, argv)
183	self.label = label
184	self.tracer = tracer
185
186	def __enter__(self):
187	# type: () -> None
188	pass
189
190	def __exit__(self, type, value, traceback):
191	# type: (Any, Any, Any) -> None
192	self.tracer.PopMessage(self.label, self.arg)
193
194
195	def _PrintShValue(val, buf):
196	# type: (value_t, mylib.BufWriter) -> None
197	"""Print ShAssignment values.
198
199	NOTE: This is a bit like _PrintVariables for declare -p
200	"""
201	# I think this should never happen because it's for ShAssignment
202	result = '?'
203
204	# Using maybe_shell_encode() because it's shell
205	UP_val = val
206	with tagswitch(val) as case:
207	if case(value_e.Str):
208	val = cast(value.Str, UP_val)
209	result = j8_lite.MaybeShellEncode(val.s)
210
211	elif case(value_e.BashArray):
212	val = cast(value.BashArray, UP_val)
213	parts = ['(']
214	for s in val.strs:
215	parts.append(j8_lite.MaybeShellEncode(s))
216	parts.append(')')
217	result = ' '.join(parts)
218
219	elif case(value_e.BashAssoc):
220	val = cast(value.BashAssoc, UP_val)
221	parts = ['(']
222	for k, v in iteritems(val.d):
223	# key must be quoted
224	parts.append(
225	'[%s]=%s' %
226	(j8_lite.ShellEncode(k), j8_lite.MaybeShellEncode(v)))
227	parts.append(')')
228	result = ' '.join(parts)
229
230	buf.write(result)
231
232
233	def PrintShellArgv(argv, buf):
234	# type: (List[str], mylib.BufWriter) -> None
235	for i, arg in enumerate(argv):
236	if i != 0:
237	buf.write(' ')
238	buf.write(j8_lite.MaybeShellEncode(arg))
239
240
241	def _PrintYshArgv(argv, buf):
242	# type: (List[str], mylib.BufWriter) -> None
243
244	# We're printing $'hi\n' for OSH, but we might want to print u'hi\n' or
245	# b'\n' for YSH. We could have a shopt --set xtrace_j8 or something.
246	#
247	# This used to be xtrace_rich, but I think that was too subtle.
248
249	for arg in argv:
250	buf.write(' ')
251	# TODO: use unquoted -> POSIX '' -> b''
252	# This would use JSON "", which CONFLICTS with shell. So we need
253	# another function.
254	#j8.EncodeString(arg, buf, unquoted_ok=True)
255
256	buf.write(j8_lite.MaybeShellEncode(arg))
257	buf.write('\n')
258
259
260	class Tracer(object):
261	"""For shell's set -x, and Oil's hierarchical, parsable tracing.
262
263	See doc/xtrace.md for details.
264
265	- TODO: Connect it somehow to tracers for other processes. So you can make
266	an HTML report offline.
267	- Could inherit SHX_*
268
269	https://www.gnu.org/software/bash/manual/html_node/Bash-Variables.html#Bash-Variables
270
271	Other hooks:
272
273	- Command completion starts other processes
274	- YSH command constructs: BareDecl, VarDecl, Mutation, Expr
275	"""
276
277	def __init__(
278	self,
279	parse_ctx, # type: ParseContext
280	exec_opts, # type: optview.Exec
281	mutable_opts, # type: state.MutableOpts
282	mem, # type: state.Mem
283	f, # type: util._DebugFile
284	):
285	# type: (...) -> None
286	"""
287	Args:
288	parse_ctx: For parsing PS4.
289	exec_opts: For xtrace setting
290	mem: for retrieving PS4
291	word_ev: for evaluating PS4
292	"""
293	self.parse_ctx = parse_ctx
294	self.exec_opts = exec_opts
295	self.mutable_opts = mutable_opts
296	self.mem = mem
297	self.f = f # can be stderr, the --debug-file, etc.
298
299	self.word_ev = None # type: NormalWordEvaluator
300
301	self.ind = 0 # changed by process, proc, source, eval
302	self.indents = [''] # "pooled" to avoid allocations
303
304	# PS4 value -> CompoundWord. PS4 is scoped.
305	self.parse_cache = {} # type: Dict[str, CompoundWord]
306
307	# Mutate objects to save allocations
308	self.val_indent = value.Str('')
309	self.val_punct = value.Str('')
310	# TODO: show something for root process by default? INTERLEAVED output
311	# can be confusing, e.g. debugging traps in forkred subinterpreter
312	# created by a pipeline.
313	self.val_pid_str = value.Str('') # mutated by SetProcess
314
315	# Can these be global constants? I don't think we have that in ASDL yet.
316	self.lval_indent = location.LName('SHX_indent')
317	self.lval_punct = location.LName('SHX_punct')
318	self.lval_pid_str = location.LName('SHX_pid_str')
319
320	def CheckCircularDeps(self):
321	# type: () -> None
322	assert self.word_ev is not None
323
324	def _EvalPS4(self, punct):
325	# type: (str) -> str
326	"""The prefix of each line."""
327	val = self.mem.GetValue('PS4')
328	if val.tag() == value_e.Str:
329	ps4 = cast(value.Str, val).s
330	else:
331	ps4 = ''
332
333	# NOTE: This cache is slightly broken because aliases are mutable! I think
334	# that is more or less harmless though.
335	ps4_word = self.parse_cache.get(ps4)
336	if ps4_word is None:
337	# We have to parse this at runtime. PS4 should usually remain constant.
338	w_parser = self.parse_ctx.MakeWordParserForPlugin(ps4)
339
340	# NOTE: could use source.Variable, like $PS1 prompt does
341	try:
342	ps4_word = w_parser.ReadForPlugin()
343	except error.Parse as e:
344	ps4_word = word_.ErrorWord("<ERROR: Can't parse PS4: %s>" %
345	e.UserErrorString())
346	self.parse_cache[ps4] = ps4_word
347
348	# Mutate objects to save allocations
349	if self.exec_opts.xtrace_rich():
350	self.val_indent.s = self.indents[self.ind]
351	else:
352	self.val_indent.s = ''
353	self.val_punct.s = punct
354
355	# Prevent infinite loop when PS4 has command sub!
356	assert self.exec_opts.xtrace() # We shouldn't call this unless it's on
357
358	# TODO: Remove allocation for [] ?
359	with state.ctx_Option(self.mutable_opts, [option_i.xtrace], False):
360	with state.ctx_Temp(self.mem):
361	self.mem.SetNamed(self.lval_indent, self.val_indent,
362	scope_e.LocalOnly)
363	self.mem.SetNamed(self.lval_punct, self.val_punct,
364	scope_e.LocalOnly)
365	self.mem.SetNamed(self.lval_pid_str, self.val_pid_str,
366	scope_e.LocalOnly)
367	prefix = self.word_ev.EvalForPlugin(ps4_word)
368	return prefix.s
369
370	def _Inc(self):
371	# type: () -> None
372	self.ind += 1
373	if self.ind >= len(self.indents): # make sure there are enough
374	self.indents.append(' ' * self.ind)
375
376	def _Dec(self):
377	# type: () -> None
378	self.ind -= 1
379
380	def _ShTraceBegin(self):
381	# type: () -> Optional[mylib.BufWriter]
382	if not self.exec_opts.xtrace() or not self.exec_opts.xtrace_details():
383	return None
384
385	# Note: bash repeats the + for command sub, eval, source. Other shells
386	# don't do it. Leave this out for now.
387	prefix = self._EvalPS4('+')
388	buf = mylib.BufWriter()
389	buf.write(prefix)
390	return buf
391
392	def _RichTraceBegin(self, punct):
393	# type: (str) -> Optional[mylib.BufWriter]
394	"""For the stack printed by xtrace_rich."""
395	if not self.exec_opts.xtrace() or not self.exec_opts.xtrace_rich():
396	return None
397
398	prefix = self._EvalPS4(punct)
399	buf = mylib.BufWriter()
400	buf.write(prefix)
401	return buf
402
403	def OnProcessStart(self, pid, why):
404	# type: (int, trace_t) -> None
405	buf = self._RichTraceBegin('\|')
406	if not buf:
407	return
408
409	# TODO: ProcessSub and PipelinePart are commonly command.Simple, and also
410	# Fork/ForkWait through the BraceGroup. We could print those argv arrays.
411
412	UP_why = why
413	with tagswitch(why) as case:
414	# Synchronous cases
415	if case(trace_e.External):
416	why = cast(trace.External, UP_why)
417	buf.write('command %d:' % pid)
418	_PrintYshArgv(why.argv, buf)
419
420	# Everything below is the same. Could use string literals?
421	elif case(trace_e.ForkWait):
422	buf.write('forkwait %d\n' % pid)
423	elif case(trace_e.CommandSub):
424	buf.write('command sub %d\n' % pid)
425
426	# Async cases
427	elif case(trace_e.ProcessSub):
428	buf.write('proc sub %d\n' % pid)
429	elif case(trace_e.HereDoc):
430	buf.write('here doc %d\n' % pid)
431	elif case(trace_e.Fork):
432	buf.write('fork %d\n' % pid)
433	elif case(trace_e.PipelinePart):
434	buf.write('part %d\n' % pid)
435
436	else:
437	raise AssertionError()
438
439	self.f.write(buf.getvalue())
440
441	def OnProcessEnd(self, pid, status):
442	# type: (int, int) -> None
443	buf = self._RichTraceBegin(';')
444	if not buf:
445	return
446
447	buf.write('process %d: status %d\n' % (pid, status))
448	self.f.write(buf.getvalue())
449
450	def SetProcess(self, pid):
451	# type: (int) -> None
452	"""All trace lines have a PID prefix, except those from the root
453	process."""
454	self.val_pid_str.s = ' %d' % pid
455	self._Inc()
456
457	def PushMessage(self, label, argv):
458	# type: (str, Optional[List[str]]) -> None
459	"""For synchronous constructs that aren't processes."""
460	buf = self._RichTraceBegin('>')
461	if buf:
462	buf.write(label)
463	if label == 'proc':
464	_PrintYshArgv(argv, buf)
465	elif label == 'source':
466	_PrintYshArgv(argv[1:], buf)
467	elif label == 'wait':
468	_PrintYshArgv(argv[1:], buf)
469	else:
470	buf.write('\n')
471	self.f.write(buf.getvalue())
472
473	self._Inc()
474
475	def PopMessage(self, label, arg):
476	# type: (str, Optional[str]) -> None
477	"""For synchronous constructs that aren't processes.
478
479	e.g. source or proc
480	"""
481	self._Dec()
482
483	buf = self._RichTraceBegin('<')
484	if buf:
485	buf.write(label)
486	if arg is not None:
487	buf.write(' ')
488	# TODO: use unquoted -> POSIX '' -> b''
489	buf.write(j8_lite.MaybeShellEncode(arg))
490	buf.write('\n')
491	self.f.write(buf.getvalue())
492
493	def OtherMessage(self, message):
494	# type: (str) -> None
495	"""Can be used when receiving signals."""
496	buf = self._RichTraceBegin('!')
497	if not buf:
498	return
499
500	buf.write(message)
501	buf.write('\n')
502	self.f.write(buf.getvalue())
503
504	def OnExec(self, argv):
505	# type: (List[str]) -> None
506	buf = self._RichTraceBegin('.')
507	if not buf:
508	return
509	buf.write('exec')
510	_PrintYshArgv(argv, buf)
511	self.f.write(buf.getvalue())
512
513	def OnBuiltin(self, builtin_id, argv):
514	# type: (builtin_t, List[str]) -> None
515	if builtin_id in (builtin_i.eval, builtin_i.source, builtin_i.wait):
516	return # These 3 builtins handled separately
517
518	buf = self._RichTraceBegin('.')
519	if not buf:
520	return
521	buf.write('builtin')
522	_PrintYshArgv(argv, buf)
523	self.f.write(buf.getvalue())
524
525	#
526	# Shell Tracing That Begins with _ShTraceBegin
527	#
528
529	def OnSimpleCommand(self, argv):
530	# type: (List[str]) -> None
531	"""For legacy set -x.
532
533	Called before we know if it's a builtin, external, or proc.
534	"""
535	buf = self._ShTraceBegin()
536	if not buf:
537	return
538
539	# Redundant with OnProcessStart (external), PushMessage (proc), and OnBuiltin
540	if self.exec_opts.xtrace_rich():
541	return
542
543	# Legacy: Use SHELL encoding, NOT _PrintYshArgv()
544	PrintShellArgv(argv, buf)
545	buf.write('\n')
546	self.f.write(buf.getvalue())
547
548	def OnAssignBuiltin(self, cmd_val):
549	# type: (cmd_value.Assign) -> None
550	buf = self._ShTraceBegin()
551	if not buf:
552	return
553
554	for i, arg in enumerate(cmd_val.argv):
555	if i != 0:
556	buf.write(' ')
557	buf.write(arg)
558
559	for pair in cmd_val.pairs:
560	buf.write(' ')
561	buf.write(pair.var_name)
562	buf.write('=')
563	if pair.rval:
564	_PrintShValue(pair.rval, buf)
565
566	buf.write('\n')
567	self.f.write(buf.getvalue())
568
569	def OnShAssignment(self, lval, op, val, flags, which_scopes):
570	# type: (sh_lvalue_t, assign_op_t, value_t, int, scope_t) -> None
571	buf = self._ShTraceBegin()
572	if not buf:
573	return
574
575	left = '?'
576	UP_lval = lval
577	with tagswitch(lval) as case:
578	if case(sh_lvalue_e.Var):
579	lval = cast(LeftName, UP_lval)
580	left = lval.name
581	elif case(sh_lvalue_e.Indexed):
582	lval = cast(sh_lvalue.Indexed, UP_lval)
583	left = '%s[%d]' % (lval.name, lval.index)
584	elif case(sh_lvalue_e.Keyed):
585	lval = cast(sh_lvalue.Keyed, UP_lval)
586	left = '%s[%s]' % (lval.name, j8_lite.MaybeShellEncode(
587	lval.key))
588	buf.write(left)
589
590	# Only two possibilities here
591	buf.write('+=' if op == assign_op_e.PlusEqual else '=')
592
593	_PrintShValue(val, buf)
594
595	buf.write('\n')
596	self.f.write(buf.getvalue())
597
598	def OnControlFlow(self, keyword, arg):
599	# type: (str, int) -> None
600
601	# This is NOT affected by xtrace_rich or xtrace_details. Works in both.
602	if not self.exec_opts.xtrace():
603	return
604
605	prefix = self._EvalPS4('+')
606	buf = mylib.BufWriter()
607	buf.write(prefix)
608
609	buf.write(keyword)
610	buf.write(' ')
611	buf.write(str(arg)) # Note: 'return' is equivalent to 'return 0'
612	buf.write('\n')
613
614	self.f.write(buf.getvalue())
615
616	def PrintSourceCode(self, left_tok, right_tok, arena):
617	# type: (Token, Token, alloc.Arena) -> None
618	"""For (( )) and [[ ]].
619
620	Bash traces these.
621	"""
622	buf = self._ShTraceBegin()
623	if not buf:
624	return
625
626	line = left_tok.line.content
627	start = left_tok.col
628
629	if left_tok.line == right_tok.line:
630	end = right_tok.col + right_tok.length
631	buf.write(line[start:end])
632	else:
633	# Print first line only
634	end = -1 if line.endswith('\n') else len(line)
635	buf.write(line[start:end])
636	buf.write(' ...')
637
638	buf.write('\n')
639	self.f.write(buf.getvalue())