OILS / core / dev.py View on Github | oilshell.org

639 lines, 360 significant
1"""
2dev.py - Devtools / introspection.
3"""
4from __future__ import print_function
5
6from _devbuild.gen.option_asdl import option_i, builtin_i, builtin_t
7from _devbuild.gen.runtime_asdl import (cmd_value, scope_e, trace, trace_e,
8 trace_t)
9from _devbuild.gen.syntax_asdl import assign_op_e, Token
10from _devbuild.gen.value_asdl import (value, value_e, value_t, sh_lvalue,
11 sh_lvalue_e, LeftName)
12
13from core import error
14from core import optview
15from core import num
16from core import state
17from core import ui
18from data_lang import j8
19from mycpp.mylib import log
20from frontend import location
21from osh import word_
22from data_lang import j8_lite
23from pylib import os_path
24from mycpp import mylib
25from mycpp.mylib import tagswitch, iteritems
26
27import posix_ as posix
28
29from typing import List, Dict, Optional, Any, cast, TYPE_CHECKING
30if TYPE_CHECKING:
31 from _devbuild.gen.syntax_asdl import assign_op_t, CompoundWord
32 from _devbuild.gen.runtime_asdl import scope_t
33 from _devbuild.gen.value_asdl import sh_lvalue_t
34 from core import alloc
35 from core.error import _ErrorWithLocation
36 from core import process
37 from core import util
38 from frontend.parse_lib import ParseContext
39 from osh.word_eval import NormalWordEvaluator
40 from osh.cmd_eval import CommandEvaluator
41
42
43class CrashDumper(object):
44 """Controls if we collect a crash dump, and where we write it to.
45
46 An object that can be serialized to JSON.
47
48 trap CRASHDUMP upload-to-server
49
50 # it gets written to a file first
51 upload-to-server() {
52 local path=$1
53 curl -X POST https://osh-trace.oilshell.org < $path
54 }
55
56 Things to dump:
57 CommandEvaluator
58 functions, aliases, traps, completion hooks, fd_state, dir_stack
59
60 debug info for the source? Or does that come elsewhere?
61
62 Yeah I think you should have two separate files.
63 - debug info for a given piece of code (needs hash)
64 - this could just be the raw source files? Does it need anything else?
65 - I think it needs a hash so the VM dump can refer to it.
66 - vm dump.
67 - Combine those and you get a UI.
68
69 One is constant at build time; the other is constant at runtime.
70 """
71
72 def __init__(self, crash_dump_dir, fd_state):
73 # type: (str, process.FdState) -> None
74 self.crash_dump_dir = crash_dump_dir
75 self.fd_state = fd_state
76
77 # whether we should collect a dump, at the highest level of the stack
78 self.do_collect = bool(crash_dump_dir)
79 self.collected = False # whether we have anything to dump
80
81 self.var_stack = None # type: List[value_t]
82 self.argv_stack = None # type: List[value_t]
83 self.debug_stack = None # type: List[value_t]
84 self.error = None # type: Dict[str, value_t]
85
86 def MaybeRecord(self, cmd_ev, err):
87 # type: (CommandEvaluator, _ErrorWithLocation) -> None
88 """Collect data for a crash dump.
89
90 Args:
91 cmd_ev: CommandEvaluator instance
92 error: _ErrorWithLocation (ParseError or error.FatalRuntime)
93 """
94 if not self.do_collect: # Either we already did it, or there is no file
95 return
96
97 self.var_stack, self.argv_stack, self.debug_stack = cmd_ev.mem.Dump()
98 blame_tok = location.TokenFor(err.location)
99
100 self.error = {
101 'msg': value.Str(err.UserErrorString()),
102 }
103
104 if blame_tok:
105 # Could also do msg % args separately, but JavaScript won't be able to
106 # render that.
107 self.error['source'] = value.Str(
108 ui.GetLineSourceString(blame_tok.line))
109 self.error['line_num'] = num.ToBig(blame_tok.line.line_num)
110 self.error['line'] = value.Str(blame_tok.line.content)
111
112 # TODO: Collect functions, aliases, etc.
113 self.do_collect = False
114 self.collected = True
115
116 def MaybeDump(self, status):
117 # type: (int) -> None
118 """Write the dump as JSON.
119
120 User can configure it two ways:
121 - dump unconditionally -- a daily cron job. This would be fine.
122 - dump on non-zero exit code
123
124 OILS_FAIL
125 Maybe counters are different than failure
126
127 OILS_CRASH_DUMP='function alias trap completion stack' ?
128 OILS_COUNTER_DUMP='function alias trap completion'
129 and then
130 I think both of these should dump the (path, mtime, checksum) of the source
131 they ran? And then you can match those up with source control or whatever?
132 """
133 if not self.collected:
134 return
135
136 my_pid = posix.getpid() # Get fresh PID here
137
138 # Other things we need: the reason for the crash! _ErrorWithLocation is
139 # required I think.
140 d = {
141 'var_stack': value.List(self.var_stack),
142 'argv_stack': value.List(self.argv_stack),
143 'debug_stack': value.List(self.debug_stack),
144 'error': value.Dict(self.error),
145 'status': num.ToBig(status),
146 'pid': num.ToBig(my_pid),
147 } # type: Dict[str, value_t]
148
149 path = os_path.join(self.crash_dump_dir,
150 '%d-osh-crash-dump.json' % my_pid)
151
152 # TODO: This should be JSON with unicode replacement char?
153 buf = mylib.BufWriter()
154 j8.PrintMessage(value.Dict(d), buf, 2)
155 json_str = buf.getvalue()
156
157 try:
158 f = self.fd_state.OpenForWrite(path)
159 except (IOError, OSError) as e:
160 # Ignore error
161 return
162
163 f.write(json_str)
164
165 # TODO: mylib.Writer() needs close()? Also for DebugFile()
166 #f.close()
167
168 log('[%d] Wrote crash dump to %s', my_pid, path)
169
170
171class ctx_Tracer(object):
172 """A stack for tracing synchronous constructs."""
173
174 def __init__(self, tracer, label, argv):
175 # type: (Tracer, str, Optional[List[str]]) -> None
176 self.arg = None # type: Optional[str]
177 if label == 'proc':
178 self.arg = argv[0]
179 elif label == 'source':
180 self.arg = argv[1]
181
182 tracer.PushMessage(label, argv)
183 self.label = label
184 self.tracer = tracer
185
186 def __enter__(self):
187 # type: () -> None
188 pass
189
190 def __exit__(self, type, value, traceback):
191 # type: (Any, Any, Any) -> None
192 self.tracer.PopMessage(self.label, self.arg)
193
194
195def _PrintShValue(val, buf):
196 # type: (value_t, mylib.BufWriter) -> None
197 """Print ShAssignment values.
198
199 NOTE: This is a bit like _PrintVariables for declare -p
200 """
201 # I think this should never happen because it's for ShAssignment
202 result = '?'
203
204 # Using maybe_shell_encode() because it's shell
205 UP_val = val
206 with tagswitch(val) as case:
207 if case(value_e.Str):
208 val = cast(value.Str, UP_val)
209 result = j8_lite.MaybeShellEncode(val.s)
210
211 elif case(value_e.BashArray):
212 val = cast(value.BashArray, UP_val)
213 parts = ['(']
214 for s in val.strs:
215 parts.append(j8_lite.MaybeShellEncode(s))
216 parts.append(')')
217 result = ' '.join(parts)
218
219 elif case(value_e.BashAssoc):
220 val = cast(value.BashAssoc, UP_val)
221 parts = ['(']
222 for k, v in iteritems(val.d):
223 # key must be quoted
224 parts.append(
225 '[%s]=%s' %
226 (j8_lite.ShellEncode(k), j8_lite.MaybeShellEncode(v)))
227 parts.append(')')
228 result = ' '.join(parts)
229
230 buf.write(result)
231
232
233def PrintShellArgv(argv, buf):
234 # type: (List[str], mylib.BufWriter) -> None
235 for i, arg in enumerate(argv):
236 if i != 0:
237 buf.write(' ')
238 buf.write(j8_lite.MaybeShellEncode(arg))
239
240
241def _PrintYshArgv(argv, buf):
242 # type: (List[str], mylib.BufWriter) -> None
243
244 # We're printing $'hi\n' for OSH, but we might want to print u'hi\n' or
245 # b'\n' for YSH. We could have a shopt --set xtrace_j8 or something.
246 #
247 # This used to be xtrace_rich, but I think that was too subtle.
248
249 for arg in argv:
250 buf.write(' ')
251 # TODO: use unquoted -> POSIX '' -> b''
252 # This would use JSON "", which CONFLICTS with shell. So we need
253 # another function.
254 #j8.EncodeString(arg, buf, unquoted_ok=True)
255
256 buf.write(j8_lite.MaybeShellEncode(arg))
257 buf.write('\n')
258
259
260class Tracer(object):
261 """For shell's set -x, and Oil's hierarchical, parsable tracing.
262
263 See doc/xtrace.md for details.
264
265 - TODO: Connect it somehow to tracers for other processes. So you can make
266 an HTML report offline.
267 - Could inherit SHX_*
268
269 https://www.gnu.org/software/bash/manual/html_node/Bash-Variables.html#Bash-Variables
270
271 Other hooks:
272
273 - Command completion starts other processes
274 - YSH command constructs: BareDecl, VarDecl, Mutation, Expr
275 """
276
277 def __init__(
278 self,
279 parse_ctx, # type: ParseContext
280 exec_opts, # type: optview.Exec
281 mutable_opts, # type: state.MutableOpts
282 mem, # type: state.Mem
283 f, # type: util._DebugFile
284 ):
285 # type: (...) -> None
286 """
287 Args:
288 parse_ctx: For parsing PS4.
289 exec_opts: For xtrace setting
290 mem: for retrieving PS4
291 word_ev: for evaluating PS4
292 """
293 self.parse_ctx = parse_ctx
294 self.exec_opts = exec_opts
295 self.mutable_opts = mutable_opts
296 self.mem = mem
297 self.f = f # can be stderr, the --debug-file, etc.
298
299 self.word_ev = None # type: NormalWordEvaluator
300
301 self.ind = 0 # changed by process, proc, source, eval
302 self.indents = [''] # "pooled" to avoid allocations
303
304 # PS4 value -> CompoundWord. PS4 is scoped.
305 self.parse_cache = {} # type: Dict[str, CompoundWord]
306
307 # Mutate objects to save allocations
308 self.val_indent = value.Str('')
309 self.val_punct = value.Str('')
310 # TODO: show something for root process by default? INTERLEAVED output
311 # can be confusing, e.g. debugging traps in forkred subinterpreter
312 # created by a pipeline.
313 self.val_pid_str = value.Str('') # mutated by SetProcess
314
315 # Can these be global constants? I don't think we have that in ASDL yet.
316 self.lval_indent = location.LName('SHX_indent')
317 self.lval_punct = location.LName('SHX_punct')
318 self.lval_pid_str = location.LName('SHX_pid_str')
319
320 def CheckCircularDeps(self):
321 # type: () -> None
322 assert self.word_ev is not None
323
324 def _EvalPS4(self, punct):
325 # type: (str) -> str
326 """The prefix of each line."""
327 val = self.mem.GetValue('PS4')
328 if val.tag() == value_e.Str:
329 ps4 = cast(value.Str, val).s
330 else:
331 ps4 = ''
332
333 # NOTE: This cache is slightly broken because aliases are mutable! I think
334 # that is more or less harmless though.
335 ps4_word = self.parse_cache.get(ps4)
336 if ps4_word is None:
337 # We have to parse this at runtime. PS4 should usually remain constant.
338 w_parser = self.parse_ctx.MakeWordParserForPlugin(ps4)
339
340 # NOTE: could use source.Variable, like $PS1 prompt does
341 try:
342 ps4_word = w_parser.ReadForPlugin()
343 except error.Parse as e:
344 ps4_word = word_.ErrorWord("<ERROR: Can't parse PS4: %s>" %
345 e.UserErrorString())
346 self.parse_cache[ps4] = ps4_word
347
348 # Mutate objects to save allocations
349 if self.exec_opts.xtrace_rich():
350 self.val_indent.s = self.indents[self.ind]
351 else:
352 self.val_indent.s = ''
353 self.val_punct.s = punct
354
355 # Prevent infinite loop when PS4 has command sub!
356 assert self.exec_opts.xtrace() # We shouldn't call this unless it's on
357
358 # TODO: Remove allocation for [] ?
359 with state.ctx_Option(self.mutable_opts, [option_i.xtrace], False):
360 with state.ctx_Temp(self.mem):
361 self.mem.SetNamed(self.lval_indent, self.val_indent,
362 scope_e.LocalOnly)
363 self.mem.SetNamed(self.lval_punct, self.val_punct,
364 scope_e.LocalOnly)
365 self.mem.SetNamed(self.lval_pid_str, self.val_pid_str,
366 scope_e.LocalOnly)
367 prefix = self.word_ev.EvalForPlugin(ps4_word)
368 return prefix.s
369
370 def _Inc(self):
371 # type: () -> None
372 self.ind += 1
373 if self.ind >= len(self.indents): # make sure there are enough
374 self.indents.append(' ' * self.ind)
375
376 def _Dec(self):
377 # type: () -> None
378 self.ind -= 1
379
380 def _ShTraceBegin(self):
381 # type: () -> Optional[mylib.BufWriter]
382 if not self.exec_opts.xtrace() or not self.exec_opts.xtrace_details():
383 return None
384
385 # Note: bash repeats the + for command sub, eval, source. Other shells
386 # don't do it. Leave this out for now.
387 prefix = self._EvalPS4('+')
388 buf = mylib.BufWriter()
389 buf.write(prefix)
390 return buf
391
392 def _RichTraceBegin(self, punct):
393 # type: (str) -> Optional[mylib.BufWriter]
394 """For the stack printed by xtrace_rich."""
395 if not self.exec_opts.xtrace() or not self.exec_opts.xtrace_rich():
396 return None
397
398 prefix = self._EvalPS4(punct)
399 buf = mylib.BufWriter()
400 buf.write(prefix)
401 return buf
402
403 def OnProcessStart(self, pid, why):
404 # type: (int, trace_t) -> None
405 buf = self._RichTraceBegin('|')
406 if not buf:
407 return
408
409 # TODO: ProcessSub and PipelinePart are commonly command.Simple, and also
410 # Fork/ForkWait through the BraceGroup. We could print those argv arrays.
411
412 UP_why = why
413 with tagswitch(why) as case:
414 # Synchronous cases
415 if case(trace_e.External):
416 why = cast(trace.External, UP_why)
417 buf.write('command %d:' % pid)
418 _PrintYshArgv(why.argv, buf)
419
420 # Everything below is the same. Could use string literals?
421 elif case(trace_e.ForkWait):
422 buf.write('forkwait %d\n' % pid)
423 elif case(trace_e.CommandSub):
424 buf.write('command sub %d\n' % pid)
425
426 # Async cases
427 elif case(trace_e.ProcessSub):
428 buf.write('proc sub %d\n' % pid)
429 elif case(trace_e.HereDoc):
430 buf.write('here doc %d\n' % pid)
431 elif case(trace_e.Fork):
432 buf.write('fork %d\n' % pid)
433 elif case(trace_e.PipelinePart):
434 buf.write('part %d\n' % pid)
435
436 else:
437 raise AssertionError()
438
439 self.f.write(buf.getvalue())
440
441 def OnProcessEnd(self, pid, status):
442 # type: (int, int) -> None
443 buf = self._RichTraceBegin(';')
444 if not buf:
445 return
446
447 buf.write('process %d: status %d\n' % (pid, status))
448 self.f.write(buf.getvalue())
449
450 def SetProcess(self, pid):
451 # type: (int) -> None
452 """All trace lines have a PID prefix, except those from the root
453 process."""
454 self.val_pid_str.s = ' %d' % pid
455 self._Inc()
456
457 def PushMessage(self, label, argv):
458 # type: (str, Optional[List[str]]) -> None
459 """For synchronous constructs that aren't processes."""
460 buf = self._RichTraceBegin('>')
461 if buf:
462 buf.write(label)
463 if label == 'proc':
464 _PrintYshArgv(argv, buf)
465 elif label == 'source':
466 _PrintYshArgv(argv[1:], buf)
467 elif label == 'wait':
468 _PrintYshArgv(argv[1:], buf)
469 else:
470 buf.write('\n')
471 self.f.write(buf.getvalue())
472
473 self._Inc()
474
475 def PopMessage(self, label, arg):
476 # type: (str, Optional[str]) -> None
477 """For synchronous constructs that aren't processes.
478
479 e.g. source or proc
480 """
481 self._Dec()
482
483 buf = self._RichTraceBegin('<')
484 if buf:
485 buf.write(label)
486 if arg is not None:
487 buf.write(' ')
488 # TODO: use unquoted -> POSIX '' -> b''
489 buf.write(j8_lite.MaybeShellEncode(arg))
490 buf.write('\n')
491 self.f.write(buf.getvalue())
492
493 def OtherMessage(self, message):
494 # type: (str) -> None
495 """Can be used when receiving signals."""
496 buf = self._RichTraceBegin('!')
497 if not buf:
498 return
499
500 buf.write(message)
501 buf.write('\n')
502 self.f.write(buf.getvalue())
503
504 def OnExec(self, argv):
505 # type: (List[str]) -> None
506 buf = self._RichTraceBegin('.')
507 if not buf:
508 return
509 buf.write('exec')
510 _PrintYshArgv(argv, buf)
511 self.f.write(buf.getvalue())
512
513 def OnBuiltin(self, builtin_id, argv):
514 # type: (builtin_t, List[str]) -> None
515 if builtin_id in (builtin_i.eval, builtin_i.source, builtin_i.wait):
516 return # These 3 builtins handled separately
517
518 buf = self._RichTraceBegin('.')
519 if not buf:
520 return
521 buf.write('builtin')
522 _PrintYshArgv(argv, buf)
523 self.f.write(buf.getvalue())
524
525 #
526 # Shell Tracing That Begins with _ShTraceBegin
527 #
528
529 def OnSimpleCommand(self, argv):
530 # type: (List[str]) -> None
531 """For legacy set -x.
532
533 Called before we know if it's a builtin, external, or proc.
534 """
535 buf = self._ShTraceBegin()
536 if not buf:
537 return
538
539 # Redundant with OnProcessStart (external), PushMessage (proc), and OnBuiltin
540 if self.exec_opts.xtrace_rich():
541 return
542
543 # Legacy: Use SHELL encoding, NOT _PrintYshArgv()
544 PrintShellArgv(argv, buf)
545 buf.write('\n')
546 self.f.write(buf.getvalue())
547
548 def OnAssignBuiltin(self, cmd_val):
549 # type: (cmd_value.Assign) -> None
550 buf = self._ShTraceBegin()
551 if not buf:
552 return
553
554 for i, arg in enumerate(cmd_val.argv):
555 if i != 0:
556 buf.write(' ')
557 buf.write(arg)
558
559 for pair in cmd_val.pairs:
560 buf.write(' ')
561 buf.write(pair.var_name)
562 buf.write('=')
563 if pair.rval:
564 _PrintShValue(pair.rval, buf)
565
566 buf.write('\n')
567 self.f.write(buf.getvalue())
568
569 def OnShAssignment(self, lval, op, val, flags, which_scopes):
570 # type: (sh_lvalue_t, assign_op_t, value_t, int, scope_t) -> None
571 buf = self._ShTraceBegin()
572 if not buf:
573 return
574
575 left = '?'
576 UP_lval = lval
577 with tagswitch(lval) as case:
578 if case(sh_lvalue_e.Var):
579 lval = cast(LeftName, UP_lval)
580 left = lval.name
581 elif case(sh_lvalue_e.Indexed):
582 lval = cast(sh_lvalue.Indexed, UP_lval)
583 left = '%s[%d]' % (lval.name, lval.index)
584 elif case(sh_lvalue_e.Keyed):
585 lval = cast(sh_lvalue.Keyed, UP_lval)
586 left = '%s[%s]' % (lval.name, j8_lite.MaybeShellEncode(
587 lval.key))
588 buf.write(left)
589
590 # Only two possibilities here
591 buf.write('+=' if op == assign_op_e.PlusEqual else '=')
592
593 _PrintShValue(val, buf)
594
595 buf.write('\n')
596 self.f.write(buf.getvalue())
597
598 def OnControlFlow(self, keyword, arg):
599 # type: (str, int) -> None
600
601 # This is NOT affected by xtrace_rich or xtrace_details. Works in both.
602 if not self.exec_opts.xtrace():
603 return
604
605 prefix = self._EvalPS4('+')
606 buf = mylib.BufWriter()
607 buf.write(prefix)
608
609 buf.write(keyword)
610 buf.write(' ')
611 buf.write(str(arg)) # Note: 'return' is equivalent to 'return 0'
612 buf.write('\n')
613
614 self.f.write(buf.getvalue())
615
616 def PrintSourceCode(self, left_tok, right_tok, arena):
617 # type: (Token, Token, alloc.Arena) -> None
618 """For (( )) and [[ ]].
619
620 Bash traces these.
621 """
622 buf = self._ShTraceBegin()
623 if not buf:
624 return
625
626 line = left_tok.line.content
627 start = left_tok.col
628
629 if left_tok.line == right_tok.line:
630 end = right_tok.col + right_tok.length
631 buf.write(line[start:end])
632 else:
633 # Print first line only
634 end = -1 if line.endswith('\n') else len(line)
635 buf.write(line[start:end])
636 buf.write(' ...')
637
638 buf.write('\n')
639 self.f.write(buf.getvalue())