data_lang/pretty.py

OILS / data_lang / pretty.py View on Github | oilshell.org

580 lines, 283 significant

1	#!/usr/bin/env python2
2	"""
3	Pretty print Oils values (and later other data/languages as well).
4
5	Pretty printing means intelligently choosing whitespace including indentation
6	and newline placement, to attempt to display data nicely while staying within a
7	maximum line width.
8	"""
9
10	# ~~~ Architecture ~~~
11	#
12	# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13	#
14	# Pretty printing proceeds in two phases:
15	#
16	# 1. Convert the thing you want to print into a `doc`.
17	# 2. Print the `doc` using a standard algorithm.
18	#
19	# This separation keeps the details of the data you want to print separate from
20	# the printing algorithm.
21
22	# ~~~ Pretty Printing Overview ~~~
23	#
24	# If you're just using this file, you don't need to know how pretty printing
25	# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26	# or extend how values are printed, you'll need to know, so here's an overview.
27	#
28	# You may want to first read Walder's "A Prettier Printer", which this is based
29	# off of:
30	# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31	#
32	# Some additional reading, though only tangentially related:
33	#
34	# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35	# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36	# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37	# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38	# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39	#
40	# ~ Constructors ~
41	#
42	# There are just a few constructors for `doc`, from which everything else is
43	# built from.
44	#
45	# Text(string) prints like:
46	# \|string
47	#
48	# Break(string) prints like:
49	# \|string
50	# or like a newline:
51	# \|
52	# \|
53	# (It does the latter if printed in "flat" mode, and the former otherwise. See
54	# Group for details.)
55	#
56	# Concat(a, b) prints like:
57	# \|AAAAA
58	# \|AAABBB
59	# \|BBBBB
60	#
61	# Indent(3, a) prints like:
62	# \|AAAAA
63	# \| AAAAA
64	# \| AAAAA
65	#
66	# Group(a) makes a decision. It either:
67	# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68	# string instead of as a newline, and (ii) every Group nested inside of it is
69	# printed flat.
70	# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71	# newlines, and (ii) the Groups inside of it make their own decision about
72	# whether to be flat.
73	# It makes this decision greedily. If the current line would not overflow if the
74	# group printed flat, then it will print flat. This takes into account not only
75	# the group itself, but the content before and after it on the same line.
76	#
77	# ~ Measures ~
78	#
79	# The algorithm used here is close to the one originally described by Wadler,
80	# but it precomputes a "measure" for each node in the `doc`. This "measure"
81	# allows each Groups to decide whether to print flat or not without needing to
82	# look ahead per Wadler's algorithm. A measure has two pieces of information:
83	#
84	# - Measure.flat is the width of the doc if it's printed flat.
85	# - Measure.nonflat is the width of the doc until the _earliest possible_
86	# newline, or -1 if it doesn't contain a Break.
87	#
88	# Measures are used in two steps. First, they're computed bottom-up on the
89	# `doc`, measuring the size of each node. Later, _PrintDoc() stores a measure in
90	# each DocFragment. These Measures measure something different: the width from
91	# the doc _to the end of the entire doc tree_. This second set of Measures (the
92	# ones in the DocFragments) are computed top-down, and they're used to decide
93	# for each Group whether to use flat mode or not, without needing to scan ahead.
94
95	from __future__ import print_function
96
97	from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
98	from _devbuild.gen.value_asdl import value, value_e, value_t, value_str
99	from data_lang.j8 import ValueIdString, HeapValueId
100	from core import ansi
101	from frontend import match
102	from mycpp import mops
103	from mycpp.mylib import log, tagswitch, BufWriter, iteritems
104	from typing import cast, List, Dict
105	import fastfunc
106	import libc
107
108	_ = log
109
110	################
111	# Measurements #
112	################
113
114
115	def _StrWidth(string):
116	# type: (str) -> int
117	return libc.wcswidth(string)
118
119
120	def _EmptyMeasure():
121	# type: () -> Measure
122	"""The measure of an empty doc."""
123	return Measure(0, -1)
124
125
126	def _FlattenMeasure(measure):
127	# type: (Measure) -> Measure
128	"""The measure if its document is rendered flat."""
129	return Measure(measure.flat, -1)
130
131
132	def _ConcatMeasure(m1, m2):
133	# type: (Measure, Measure) -> Measure
134	"""Compute the measure of concatenated docs.
135
136	If m1 and m2 are the measures of doc1 and doc2,
137	then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
138	This concatenation is associative but not commutative."""
139	if m1.nonflat != -1:
140	return Measure(m1.flat + m2.flat, m1.nonflat)
141	elif m2.nonflat != -1:
142	return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
143	else:
144	return Measure(m1.flat + m2.flat, -1)
145
146
147	def _SuffixLen(measure):
148	# type: (Measure) -> int
149	"""The width until the earliest possible newline, or end of document."""
150	if measure.nonflat != -1:
151	return measure.nonflat
152	else:
153	return measure.flat
154
155
156	####################
157	# Doc Construction #
158	####################
159
160
161	def _Text(string):
162	# type: (str) -> MeasuredDoc
163	"""Print `string` (which must not contain a newline)."""
164	return MeasuredDoc(doc.Text(string), Measure(_StrWidth(string), -1))
165
166
167	def _Break(string):
168	# type: (str) -> MeasuredDoc
169	"""If in `flat` mode, print `string`, otherwise print `\n`."""
170	return MeasuredDoc(doc.Break(string), Measure(_StrWidth(string), 0))
171
172
173	def _Indent(indent, mdoc):
174	# type: (int, MeasuredDoc) -> MeasuredDoc
175	"""Add `indent` spaces after every newline in `mdoc`."""
176	return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
177
178
179	def _Concat(mdocs):
180	# type: (List[MeasuredDoc]) -> MeasuredDoc
181	"""Print the mdocs in order (with no space in between)."""
182	measure = _EmptyMeasure()
183	for mdoc in mdocs:
184	measure = _ConcatMeasure(measure, mdoc.measure)
185	return MeasuredDoc(doc.Concat(mdocs), measure)
186
187
188	def _Group(mdoc):
189	# type: (MeasuredDoc) -> MeasuredDoc
190	"""Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
191	return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
192
193
194	###################
195	# Pretty Printing #
196	###################
197
198	_DEFAULT_MAX_WIDTH = 80
199	_DEFAULT_INDENTATION = 4
200	_DEFAULT_USE_STYLES = True
201	_DEFAULT_SHOW_TYPE_PREFIX = True
202
203
204	class PrettyPrinter(object):
205	"""Pretty print an Oils value."""
206
207	def __init__(self):
208	# type: () -> None
209	"""Construct a PrettyPrinter with default configuration options.
210
211	Use the Set*() methods for configuration before printing."""
212	self.max_width = _DEFAULT_MAX_WIDTH
213	self.indent = _DEFAULT_INDENTATION
214	self.use_styles = _DEFAULT_USE_STYLES
215	self.show_type_prefix = _DEFAULT_SHOW_TYPE_PREFIX
216
217	def SetMaxWidth(self, max_width):
218	# type: (int) -> None
219	"""Set the maximum line width.
220
221	Pretty printing will attempt to (but does not guarantee to) fit the doc
222	within this width.
223	"""
224	self.max_width = max_width
225
226	def SetIndent(self, indent):
227	# type: (int) -> None
228	"""Set the number of spaces per indentation level."""
229	self.indent = indent
230
231	def SetUseStyles(self, use_styles):
232	# type: (bool) -> None
233	"""If true, print with ansi colors and styles. Otherwise print with plain text."""
234	self.use_styles = use_styles
235
236	def SetShowTypePrefix(self, show_type_prefix):
237	# type: (bool) -> None
238	"""Set whether or not to print a type before the top-level value.
239
240	E.g. `(Bool) true`"""
241	self.show_type_prefix = show_type_prefix
242
243	def PrintValue(self, val, buf):
244	# type: (value_t, BufWriter) -> None
245	"""Pretty print an Oils value to a BufWriter."""
246	constructor = _DocConstructor(self.indent, self.use_styles,
247	self.show_type_prefix)
248	document = constructor.Value(val)
249	self._PrintDoc(document, buf)
250
251	def _Fits(self, prefix_len, group, suffix_measure):
252	# type: (int, doc.Group, Measure) -> bool
253	"""Will `group` fit flat on the current line?"""
254	measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
255	suffix_measure)
256	return prefix_len + _SuffixLen(measure) <= self.max_width
257
258	def _PrintDoc(self, document, buf):
259	# type: (MeasuredDoc, BufWriter) -> None
260	"""Pretty print a `pretty.doc` to a BufWriter."""
261
262	# The width of the text we've printed so far on the current line
263	prefix_len = 0
264	# A _stack_ of document fragments to print. Each fragment contains:
265	# - A MeasuredDoc (doc node and its measure, saying how "big" it is)
266	# - The indentation level to print this doc node at.
267	# - Is this doc node being printed in flat mode?
268	# - The measure _from just after the doc node, to the end of the entire document_.
269	# (Call this the suffix_measure)
270	fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
271
272	while len(fragments) > 0:
273	frag = fragments.pop()
274	with tagswitch(frag.mdoc.doc) as case:
275
276	if case(doc_e.Text):
277	text = cast(doc.Text, frag.mdoc.doc)
278	buf.write(text.string)
279	prefix_len += frag.mdoc.measure.flat
280
281	elif case(doc_e.Break):
282	if frag.is_flat:
283	break_str = cast(doc.Break, frag.mdoc.doc).string
284	buf.write(break_str)
285	prefix_len += frag.mdoc.measure.flat
286	else:
287	buf.write('\n')
288	buf.write_spaces(frag.indent)
289	prefix_len = frag.indent
290
291	elif case(doc_e.Indent):
292	indented = cast(doc.Indent, frag.mdoc.doc)
293	fragments.append(
294	DocFragment(indented.mdoc,
295	frag.indent + indented.indent,
296	frag.is_flat, frag.measure))
297
298	elif case(doc_e.Concat):
299	# If we encounter Concat([A, B, C]) with a suffix measure M,
300	# we need to push A,B,C onto the stack in reverse order:
301	# - C, with suffix_measure = B.measure + A.measure + M
302	# - B, with suffix_measure = A.measure + M
303	# - A, with suffix_measure = M
304	concat = cast(doc.Concat, frag.mdoc.doc)
305	measure = frag.measure
306	for mdoc in reversed(concat.mdocs):
307	fragments.append(
308	DocFragment(mdoc, frag.indent, frag.is_flat,
309	measure))
310	measure = _ConcatMeasure(mdoc.measure, measure)
311
312	elif case(doc_e.Group):
313	# If the group would fit on the current line when printed
314	# flat, do so. Otherwise, print it non-flat.
315	group = cast(doc.Group, frag.mdoc.doc)
316	flat = self._Fits(prefix_len, group, frag.measure)
317	fragments.append(
318	DocFragment(group.mdoc, frag.indent, flat,
319	frag.measure))
320
321
322	################
323	# Value -> Doc #
324	################
325
326
327	class _DocConstructor:
328	"""Converts Oil values into `doc`s, which can then be pretty printed."""
329
330	def __init__(self, indent, use_styles, show_type_prefix):
331	# type: (int, bool, bool) -> None
332	self.indent = indent
333	self.use_styles = use_styles
334	self.show_type_prefix = show_type_prefix
335	self.visiting = {} # type: Dict[int, bool]
336
337	# These can be configurable later
338	self.number_style = ansi.YELLOW
339	self.null_style = ansi.BOLD + ansi.RED
340	self.bool_style = ansi.BOLD + ansi.BLUE
341	self.string_style = ansi.GREEN
342	self.cycle_style = ansi.BOLD + ansi.MAGENTA
343	self.type_style = ansi.CYAN
344
345	def Value(self, val):
346	# type: (value_t) -> MeasuredDoc
347	"""Convert an Oils value into a `doc`, which can then be pretty printed."""
348	self.visiting.clear()
349	if self.show_type_prefix:
350	ysh_type = value_str(val.tag(), dot=False)
351	return _Group(
352	_Concat([
353	_Text("(" + ysh_type + ")"),
354	_Break(" "),
355	self._Value(val)
356	]))
357	else:
358	return self._Value(val)
359
360	def _Styled(self, style, mdoc):
361	# type: (str, MeasuredDoc) -> MeasuredDoc
362	"""Apply the ANSI style string to the given node, if use_styles is set."""
363	if self.use_styles:
364	return _Concat([
365	MeasuredDoc(doc.Text(style), _EmptyMeasure()), mdoc,
366	MeasuredDoc(doc.Text(ansi.RESET), _EmptyMeasure())
367	])
368	else:
369	return mdoc
370
371	def _Surrounded(self, open, mdoc, close):
372	# type: (str, MeasuredDoc, str) -> MeasuredDoc
373	"""Print one of two options (using '[', ']' for open, close):
374
375	```
376	[mdoc]
377	------
378	[
379	mdoc
380	]
381	```
382	"""
383	return _Group(
384	_Concat([
385	_Text(open),
386	_Indent(self.indent, _Concat([_Break(""), mdoc])),
387	_Break(""),
388	_Text(close)
389	]))
390
391	def _SurroundedAndPrefixed(self, open, prefix, sep, mdoc, close):
392	# type: (str, MeasuredDoc, str, MeasuredDoc, str) -> MeasuredDoc
393	"""Print one of two options
394	(using '[', 'prefix', ':', 'mdoc', ']' for open, prefix, sep, mdoc, close):
395
396	```
397	[prefix:mdoc]
398	------
399	[prefix
400	mdoc
401	]
402	```
403	"""
404	return _Group(
405	_Concat([
406	_Text(open), prefix,
407	_Indent(self.indent, _Concat([_Break(sep), mdoc])),
408	_Break(""),
409	_Text(close)
410	]))
411
412	def _Join(self, items, sep, space):
413	# type: (List[MeasuredDoc], str, str) -> MeasuredDoc
414	"""Join `items`, using either 'sep+space' or 'sep+newline' between them.
415
416	E.g., if sep and space are ',' and '_', print one of these two cases:
417	```
418	first,_second,_third
419	------
420	first,
421	second,
422	third
423	```
424	"""
425	seq = [items[0]]
426	for item in items[1:]:
427	seq.append(_Text(sep))
428	seq.append(_Break(space))
429	seq.append(item)
430	return _Concat(seq)
431
432	def _DictKey(self, s):
433	# type: (str) -> MeasuredDoc
434	if match.IsValidVarName(s):
435	return _Text(s)
436	else:
437	return _Text(fastfunc.J8EncodeString(s, True)) # lossy_json=True
438
439	def _StringLiteral(self, s):
440	# type: (str) -> MeasuredDoc
441	return self._Styled(self.string_style,
442	_Text(fastfunc.J8EncodeString(
443	s, True))) # lossy_json=True
444
445	def _BashStringLiteral(self, s):
446	# type: (str) -> MeasuredDoc
447	return self._Styled(self.string_style,
448	_Text(fastfunc.ShellEncodeString(s, 0)))
449
450	def _YshList(self, vlist):
451	# type: (value.List) -> MeasuredDoc
452	"""Print a string literal."""
453	if len(vlist.items) == 0:
454	return _Text("[]")
455	mdocs = [self._Value(item) for item in vlist.items]
456	return self._Surrounded("[", self._Join(mdocs, ",", " "), "]")
457
458	def _YshDict(self, vdict):
459	# type: (value.Dict) -> MeasuredDoc
460	if len(vdict.d) == 0:
461	return _Text("{}")
462	mdocs = [] # type: List[MeasuredDoc]
463	for k, v in iteritems(vdict.d):
464	mdocs.append(
465	_Concat([self._DictKey(k),
466	_Text(": "),
467	self._Value(v)]))
468	return self._Surrounded("{", self._Join(mdocs, ",", " "), "}")
469
470	def _BashArray(self, varray):
471	# type: (value.BashArray) -> MeasuredDoc
472	type_name = self._Styled(self.type_style, _Text("BashArray"))
473	if len(varray.strs) == 0:
474	return _Concat([_Text("("), type_name, _Text(")")])
475	mdocs = [] # type: List[MeasuredDoc]
476	for s in varray.strs:
477	if s is None:
478	mdocs.append(_Text("null"))
479	else:
480	mdocs.append(self._BashStringLiteral(s))
481	return self._SurroundedAndPrefixed("(", type_name, " ",
482	self._Join(mdocs, "", " "), ")")
483
484	def _BashAssoc(self, vassoc):
485	# type: (value.BashAssoc) -> MeasuredDoc
486	type_name = self._Styled(self.type_style, _Text("BashAssoc"))
487	if len(vassoc.d) == 0:
488	return _Concat([_Text("("), type_name, _Text(")")])
489	mdocs = [] # type: List[MeasuredDoc]
490	for k2, v2 in iteritems(vassoc.d):
491	mdocs.append(
492	_Concat([
493	_Text("["),
494	self._BashStringLiteral(k2),
495	_Text("]="),
496	self._BashStringLiteral(v2)
497	]))
498	return self._SurroundedAndPrefixed("(", type_name, " ",
499	self._Join(mdocs, "", " "), ")")
500
501	def _Value(self, val):
502	# type: (value_t) -> MeasuredDoc
503
504	with tagswitch(val) as case:
505	if case(value_e.Null):
506	return self._Styled(self.null_style, _Text("null"))
507
508	elif case(value_e.Bool):
509	b = cast(value.Bool, val).b
510	return self._Styled(self.bool_style,
511	_Text("true" if b else "false"))
512
513	elif case(value_e.Int):
514	i = cast(value.Int, val).i
515	return self._Styled(self.number_style, _Text(mops.ToStr(i)))
516
517	elif case(value_e.Float):
518	f = cast(value.Float, val).f
519	return self._Styled(self.number_style, _Text(str(f)))
520
521	elif case(value_e.Str):
522	s = cast(value.Str, val).s
523	return self._StringLiteral(s)
524
525	elif case(value_e.Range):
526	r = cast(value.Range, val)
527	return self._Styled(
528	self.number_style,
529	_Concat([
530	_Text(str(r.lower)),
531	_Text(" .. "),
532	_Text(str(r.upper))
533	]))
534
535	elif case(value_e.List):
536	vlist = cast(value.List, val)
537	heap_id = HeapValueId(vlist)
538	if self.visiting.get(heap_id, False):
539	return _Concat([
540	_Text("["),
541	self._Styled(self.cycle_style, _Text("...")),
542	_Text("]")
543	])
544	else:
545	self.visiting[heap_id] = True
546	result = self._YshList(vlist)
547	self.visiting[heap_id] = False
548	return result
549
550	elif case(value_e.Dict):
551	vdict = cast(value.Dict, val)
552	heap_id = HeapValueId(vdict)
553	if self.visiting.get(heap_id, False):
554	return _Concat([
555	_Text("{"),
556	self._Styled(self.cycle_style, _Text("...")),
557	_Text("}")
558	])
559	else:
560	self.visiting[heap_id] = True
561	result = self._YshDict(vdict)
562	self.visiting[heap_id] = False
563	return result
564
565	elif case(value_e.BashArray):
566	varray = cast(value.BashArray, val)
567	return self._BashArray(varray)
568
569	elif case(value_e.BashAssoc):
570	vassoc = cast(value.BashAssoc, val)
571	return self._BashAssoc(vassoc)
572
573	else:
574	ysh_type = value_str(val.tag(), dot=False)
575	id_str = ValueIdString(val)
576	return self._Styled(self.type_style,
577	_Text("<" + ysh_type + id_str + ">"))
578
579
580	# vim: sw=4