data_lang/pretty.py

OILS / data_lang / pretty.py View on Github | oilshell.org

589 lines, 289 significant

1	#!/usr/bin/env python2
2	"""
3	Pretty print Oils values (and later other data/languages as well).
4
5	Pretty printing means intelligently choosing whitespace including indentation
6	and newline placement, to attempt to display data nicely while staying within a
7	maximum line width.
8	"""
9
10	# ~~~ Architecture ~~~
11	#
12	# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13	#
14	# Pretty printing proceeds in two phases:
15	#
16	# 1. Convert the thing you want to print into a `doc`.
17	# 2. Print the `doc` using a standard algorithm.
18	#
19	# This separation keeps the details of the data you want to print separate from
20	# the printing algorithm.
21
22	# ~~~ Pretty Printing Overview ~~~
23	#
24	# If you're just using this file, you don't need to know how pretty printing
25	# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26	# or extend how values are printed, you'll need to know, so here's an overview.
27	#
28	# You may want to first read Walder's "A Prettier Printer", which this is based
29	# off of:
30	# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31	#
32	# Some additional reading, though only tangentially related:
33	#
34	# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35	# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36	# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37	# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38	# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39	#
40	# ~ Constructors ~
41	#
42	# There are just a few constructors for `doc`, from which everything else is
43	# built from.
44	#
45	# Text(string) prints like:
46	# \|string
47	#
48	# Break(string) prints like:
49	# \|string
50	# or like a newline:
51	# \|
52	# \|
53	# (It does the latter if printed in "flat" mode, and the former otherwise. See
54	# Group for details.)
55	#
56	# Concat(a, b) prints like:
57	# \|AAAAA
58	# \|AAABBB
59	# \|BBBBB
60	#
61	# Indent(3, a) prints like:
62	# \|AAAAA
63	# \| AAAAA
64	# \| AAAAA
65	#
66	# Group(a) makes a decision. It either:
67	# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68	# string instead of as a newline, and (ii) every Group nested inside of it is
69	# printed flat.
70	# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71	# newlines, and (ii) the Groups inside of it make their own decision about
72	# whether to be flat.
73	# It makes this decision greedily. If the current line would not overflow if the
74	# group printed flat, then it will print flat. This takes into account not only
75	# the group itself, but the content before and after it on the same line.
76	#
77	# ~ Measures ~
78	#
79	# The algorithm used here is close to the one originally described by Wadler,
80	# but it precomputes a "measure" for each node in the `doc`. This "measure"
81	# allows each Groups to decide whether to print flat or not without needing to
82	# look ahead per Wadler's algorithm. A measure has two pieces of information:
83	#
84	# - Measure.flat is the width of the doc if it's printed flat.
85	# - Measure.nonflat is the width of the doc until the _earliest possible_
86	# newline, or -1 if it doesn't contain a Break.
87	#
88	# Measures are used in two steps. First, they're computed bottom-up on the
89	# `doc`, measuring the size of each node. Later, _PrintDoc() stores a measure in
90	# each DocFragment. These Measures measure something different: the width from
91	# the doc _to the end of the entire doc tree_. This second set of Measures (the
92	# ones in the DocFragments) are computed top-down, and they're used to decide
93	# for each Group whether to use flat mode or not, without needing to scan ahead.
94
95	from __future__ import print_function
96
97	from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
98	from _devbuild.gen.value_asdl import value, value_e, value_t, value_str
99	from data_lang.j8 import ValueIdString, HeapValueId
100	from core import ansi
101	from frontend import match
102	from mycpp import mops
103	from mycpp.mylib import log, tagswitch, BufWriter, iteritems
104	from typing import cast, List, Dict
105	import fastfunc
106	import libc
107
108	_ = log
109
110	################
111	# Measurements #
112	################
113
114
115	def TryUnicodeWidth(s):
116	# type: (str) -> int
117	try:
118	width = libc.wcswidth(s)
119	except UnicodeError:
120	# e.g. en_US.UTF-8 locale missing, just return the number of bytes
121	width = len(s)
122
123	if width == -1: # non-printable wide char
124	return len(s)
125
126	return width
127
128
129	def _EmptyMeasure():
130	# type: () -> Measure
131	"""The measure of an empty doc."""
132	return Measure(0, -1)
133
134
135	def _FlattenMeasure(measure):
136	# type: (Measure) -> Measure
137	"""The measure if its document is rendered flat."""
138	return Measure(measure.flat, -1)
139
140
141	def _ConcatMeasure(m1, m2):
142	# type: (Measure, Measure) -> Measure
143	"""Compute the measure of concatenated docs.
144
145	If m1 and m2 are the measures of doc1 and doc2,
146	then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
147	This concatenation is associative but not commutative."""
148	if m1.nonflat != -1:
149	return Measure(m1.flat + m2.flat, m1.nonflat)
150	elif m2.nonflat != -1:
151	return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
152	else:
153	return Measure(m1.flat + m2.flat, -1)
154
155
156	def _SuffixLen(measure):
157	# type: (Measure) -> int
158	"""The width until the earliest possible newline, or end of document."""
159	if measure.nonflat != -1:
160	return measure.nonflat
161	else:
162	return measure.flat
163
164
165	####################
166	# Doc Construction #
167	####################
168
169
170	def _Text(string):
171	# type: (str) -> MeasuredDoc
172	"""Print `string` (which must not contain a newline)."""
173	return MeasuredDoc(doc.Text(string), Measure(TryUnicodeWidth(string), -1))
174
175
176	def _Break(string):
177	# type: (str) -> MeasuredDoc
178	"""If in `flat` mode, print `string`, otherwise print `\n`."""
179	return MeasuredDoc(doc.Break(string), Measure(TryUnicodeWidth(string), 0))
180
181
182	def _Indent(indent, mdoc):
183	# type: (int, MeasuredDoc) -> MeasuredDoc
184	"""Add `indent` spaces after every newline in `mdoc`."""
185	return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
186
187
188	def _Concat(mdocs):
189	# type: (List[MeasuredDoc]) -> MeasuredDoc
190	"""Print the mdocs in order (with no space in between)."""
191	measure = _EmptyMeasure()
192	for mdoc in mdocs:
193	measure = _ConcatMeasure(measure, mdoc.measure)
194	return MeasuredDoc(doc.Concat(mdocs), measure)
195
196
197	def _Group(mdoc):
198	# type: (MeasuredDoc) -> MeasuredDoc
199	"""Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
200	return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
201
202
203	###################
204	# Pretty Printing #
205	###################
206
207	_DEFAULT_MAX_WIDTH = 80
208	_DEFAULT_INDENTATION = 4
209	_DEFAULT_USE_STYLES = True
210	_DEFAULT_SHOW_TYPE_PREFIX = True
211
212
213	class PrettyPrinter(object):
214	"""Pretty print an Oils value."""
215
216	def __init__(self):
217	# type: () -> None
218	"""Construct a PrettyPrinter with default configuration options.
219
220	Use the Set*() methods for configuration before printing."""
221	self.max_width = _DEFAULT_MAX_WIDTH
222	self.indent = _DEFAULT_INDENTATION
223	self.use_styles = _DEFAULT_USE_STYLES
224	self.show_type_prefix = _DEFAULT_SHOW_TYPE_PREFIX
225
226	def SetMaxWidth(self, max_width):
227	# type: (int) -> None
228	"""Set the maximum line width.
229
230	Pretty printing will attempt to (but does not guarantee to) fit the doc
231	within this width.
232	"""
233	self.max_width = max_width
234
235	def SetIndent(self, indent):
236	# type: (int) -> None
237	"""Set the number of spaces per indentation level."""
238	self.indent = indent
239
240	def SetUseStyles(self, use_styles):
241	# type: (bool) -> None
242	"""If true, print with ansi colors and styles. Otherwise print with plain text."""
243	self.use_styles = use_styles
244
245	def SetShowTypePrefix(self, show_type_prefix):
246	# type: (bool) -> None
247	"""Set whether or not to print a type before the top-level value.
248
249	E.g. `(Bool) true`"""
250	self.show_type_prefix = show_type_prefix
251
252	def PrintValue(self, val, buf):
253	# type: (value_t, BufWriter) -> None
254	"""Pretty print an Oils value to a BufWriter."""
255	constructor = _DocConstructor(self.indent, self.use_styles,
256	self.show_type_prefix)
257	document = constructor.Value(val)
258	self._PrintDoc(document, buf)
259
260	def _Fits(self, prefix_len, group, suffix_measure):
261	# type: (int, doc.Group, Measure) -> bool
262	"""Will `group` fit flat on the current line?"""
263	measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
264	suffix_measure)
265	return prefix_len + _SuffixLen(measure) <= self.max_width
266
267	def _PrintDoc(self, document, buf):
268	# type: (MeasuredDoc, BufWriter) -> None
269	"""Pretty print a `pretty.doc` to a BufWriter."""
270
271	# The width of the text we've printed so far on the current line
272	prefix_len = 0
273	# A _stack_ of document fragments to print. Each fragment contains:
274	# - A MeasuredDoc (doc node and its measure, saying how "big" it is)
275	# - The indentation level to print this doc node at.
276	# - Is this doc node being printed in flat mode?
277	# - The measure _from just after the doc node, to the end of the entire document_.
278	# (Call this the suffix_measure)
279	fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
280
281	while len(fragments) > 0:
282	frag = fragments.pop()
283	with tagswitch(frag.mdoc.doc) as case:
284
285	if case(doc_e.Text):
286	text = cast(doc.Text, frag.mdoc.doc)
287	buf.write(text.string)
288	prefix_len += frag.mdoc.measure.flat
289
290	elif case(doc_e.Break):
291	if frag.is_flat:
292	break_str = cast(doc.Break, frag.mdoc.doc).string
293	buf.write(break_str)
294	prefix_len += frag.mdoc.measure.flat
295	else:
296	buf.write('\n')
297	buf.write_spaces(frag.indent)
298	prefix_len = frag.indent
299
300	elif case(doc_e.Indent):
301	indented = cast(doc.Indent, frag.mdoc.doc)
302	fragments.append(
303	DocFragment(indented.mdoc,
304	frag.indent + indented.indent,
305	frag.is_flat, frag.measure))
306
307	elif case(doc_e.Concat):
308	# If we encounter Concat([A, B, C]) with a suffix measure M,
309	# we need to push A,B,C onto the stack in reverse order:
310	# - C, with suffix_measure = B.measure + A.measure + M
311	# - B, with suffix_measure = A.measure + M
312	# - A, with suffix_measure = M
313	concat = cast(doc.Concat, frag.mdoc.doc)
314	measure = frag.measure
315	for mdoc in reversed(concat.mdocs):
316	fragments.append(
317	DocFragment(mdoc, frag.indent, frag.is_flat,
318	measure))
319	measure = _ConcatMeasure(mdoc.measure, measure)
320
321	elif case(doc_e.Group):
322	# If the group would fit on the current line when printed
323	# flat, do so. Otherwise, print it non-flat.
324	group = cast(doc.Group, frag.mdoc.doc)
325	flat = self._Fits(prefix_len, group, frag.measure)
326	fragments.append(
327	DocFragment(group.mdoc, frag.indent, flat,
328	frag.measure))
329
330
331	################
332	# Value -> Doc #
333	################
334
335
336	class _DocConstructor:
337	"""Converts Oil values into `doc`s, which can then be pretty printed."""
338
339	def __init__(self, indent, use_styles, show_type_prefix):
340	# type: (int, bool, bool) -> None
341	self.indent = indent
342	self.use_styles = use_styles
343	self.show_type_prefix = show_type_prefix
344	self.visiting = {} # type: Dict[int, bool]
345
346	# These can be configurable later
347	self.number_style = ansi.YELLOW
348	self.null_style = ansi.BOLD + ansi.RED
349	self.bool_style = ansi.BOLD + ansi.BLUE
350	self.string_style = ansi.GREEN
351	self.cycle_style = ansi.BOLD + ansi.MAGENTA
352	self.type_style = ansi.CYAN
353
354	def Value(self, val):
355	# type: (value_t) -> MeasuredDoc
356	"""Convert an Oils value into a `doc`, which can then be pretty printed."""
357	self.visiting.clear()
358	if self.show_type_prefix:
359	ysh_type = value_str(val.tag(), dot=False)
360	return _Group(
361	_Concat([
362	_Text("(" + ysh_type + ")"),
363	_Break(" "),
364	self._Value(val)
365	]))
366	else:
367	return self._Value(val)
368
369	def _Styled(self, style, mdoc):
370	# type: (str, MeasuredDoc) -> MeasuredDoc
371	"""Apply the ANSI style string to the given node, if use_styles is set."""
372	if self.use_styles:
373	return _Concat([
374	MeasuredDoc(doc.Text(style), _EmptyMeasure()), mdoc,
375	MeasuredDoc(doc.Text(ansi.RESET), _EmptyMeasure())
376	])
377	else:
378	return mdoc
379
380	def _Surrounded(self, open, mdoc, close):
381	# type: (str, MeasuredDoc, str) -> MeasuredDoc
382	"""Print one of two options (using '[', ']' for open, close):
383
384	```
385	[mdoc]
386	------
387	[
388	mdoc
389	]
390	```
391	"""
392	return _Group(
393	_Concat([
394	_Text(open),
395	_Indent(self.indent, _Concat([_Break(""), mdoc])),
396	_Break(""),
397	_Text(close)
398	]))
399
400	def _SurroundedAndPrefixed(self, open, prefix, sep, mdoc, close):
401	# type: (str, MeasuredDoc, str, MeasuredDoc, str) -> MeasuredDoc
402	"""Print one of two options
403	(using '[', 'prefix', ':', 'mdoc', ']' for open, prefix, sep, mdoc, close):
404
405	```
406	[prefix:mdoc]
407	------
408	[prefix
409	mdoc
410	]
411	```
412	"""
413	return _Group(
414	_Concat([
415	_Text(open), prefix,
416	_Indent(self.indent, _Concat([_Break(sep), mdoc])),
417	_Break(""),
418	_Text(close)
419	]))
420
421	def _Join(self, items, sep, space):
422	# type: (List[MeasuredDoc], str, str) -> MeasuredDoc
423	"""Join `items`, using either 'sep+space' or 'sep+newline' between them.
424
425	E.g., if sep and space are ',' and '_', print one of these two cases:
426	```
427	first,_second,_third
428	------
429	first,
430	second,
431	third
432	```
433	"""
434	seq = [items[0]]
435	for item in items[1:]:
436	seq.append(_Text(sep))
437	seq.append(_Break(space))
438	seq.append(item)
439	return _Concat(seq)
440
441	def _DictKey(self, s):
442	# type: (str) -> MeasuredDoc
443	if match.IsValidVarName(s):
444	return _Text(s)
445	else:
446	return _Text(fastfunc.J8EncodeString(s, True)) # lossy_json=True
447
448	def _StringLiteral(self, s):
449	# type: (str) -> MeasuredDoc
450	return self._Styled(self.string_style,
451	_Text(fastfunc.J8EncodeString(
452	s, True))) # lossy_json=True
453
454	def _BashStringLiteral(self, s):
455	# type: (str) -> MeasuredDoc
456	return self._Styled(self.string_style,
457	_Text(fastfunc.ShellEncodeString(s, 0)))
458
459	def _YshList(self, vlist):
460	# type: (value.List) -> MeasuredDoc
461	"""Print a string literal."""
462	if len(vlist.items) == 0:
463	return _Text("[]")
464	mdocs = [self._Value(item) for item in vlist.items]
465	return self._Surrounded("[", self._Join(mdocs, ",", " "), "]")
466
467	def _YshDict(self, vdict):
468	# type: (value.Dict) -> MeasuredDoc
469	if len(vdict.d) == 0:
470	return _Text("{}")
471	mdocs = [] # type: List[MeasuredDoc]
472	for k, v in iteritems(vdict.d):
473	mdocs.append(
474	_Concat([self._DictKey(k),
475	_Text(": "),
476	self._Value(v)]))
477	return self._Surrounded("{", self._Join(mdocs, ",", " "), "}")
478
479	def _BashArray(self, varray):
480	# type: (value.BashArray) -> MeasuredDoc
481	type_name = self._Styled(self.type_style, _Text("BashArray"))
482	if len(varray.strs) == 0:
483	return _Concat([_Text("("), type_name, _Text(")")])
484	mdocs = [] # type: List[MeasuredDoc]
485	for s in varray.strs:
486	if s is None:
487	mdocs.append(_Text("null"))
488	else:
489	mdocs.append(self._BashStringLiteral(s))
490	return self._SurroundedAndPrefixed("(", type_name, " ",
491	self._Join(mdocs, "", " "), ")")
492
493	def _BashAssoc(self, vassoc):
494	# type: (value.BashAssoc) -> MeasuredDoc
495	type_name = self._Styled(self.type_style, _Text("BashAssoc"))
496	if len(vassoc.d) == 0:
497	return _Concat([_Text("("), type_name, _Text(")")])
498	mdocs = [] # type: List[MeasuredDoc]
499	for k2, v2 in iteritems(vassoc.d):
500	mdocs.append(
501	_Concat([
502	_Text("["),
503	self._BashStringLiteral(k2),
504	_Text("]="),
505	self._BashStringLiteral(v2)
506	]))
507	return self._SurroundedAndPrefixed("(", type_name, " ",
508	self._Join(mdocs, "", " "), ")")
509
510	def _Value(self, val):
511	# type: (value_t) -> MeasuredDoc
512
513	with tagswitch(val) as case:
514	if case(value_e.Null):
515	return self._Styled(self.null_style, _Text("null"))
516
517	elif case(value_e.Bool):
518	b = cast(value.Bool, val).b
519	return self._Styled(self.bool_style,
520	_Text("true" if b else "false"))
521
522	elif case(value_e.Int):
523	i = cast(value.Int, val).i
524	return self._Styled(self.number_style, _Text(mops.ToStr(i)))
525
526	elif case(value_e.Float):
527	f = cast(value.Float, val).f
528	return self._Styled(self.number_style, _Text(str(f)))
529
530	elif case(value_e.Str):
531	s = cast(value.Str, val).s
532	return self._StringLiteral(s)
533
534	elif case(value_e.Range):
535	r = cast(value.Range, val)
536	return self._Styled(
537	self.number_style,
538	_Concat([
539	_Text(str(r.lower)),
540	_Text(" .. "),
541	_Text(str(r.upper))
542	]))
543
544	elif case(value_e.List):
545	vlist = cast(value.List, val)
546	heap_id = HeapValueId(vlist)
547	if self.visiting.get(heap_id, False):
548	return _Concat([
549	_Text("["),
550	self._Styled(self.cycle_style, _Text("...")),
551	_Text("]")
552	])
553	else:
554	self.visiting[heap_id] = True
555	result = self._YshList(vlist)
556	self.visiting[heap_id] = False
557	return result
558
559	elif case(value_e.Dict):
560	vdict = cast(value.Dict, val)
561	heap_id = HeapValueId(vdict)
562	if self.visiting.get(heap_id, False):
563	return _Concat([
564	_Text("{"),
565	self._Styled(self.cycle_style, _Text("...")),
566	_Text("}")
567	])
568	else:
569	self.visiting[heap_id] = True
570	result = self._YshDict(vdict)
571	self.visiting[heap_id] = False
572	return result
573
574	elif case(value_e.BashArray):
575	varray = cast(value.BashArray, val)
576	return self._BashArray(varray)
577
578	elif case(value_e.BashAssoc):
579	vassoc = cast(value.BashAssoc, val)
580	return self._BashAssoc(vassoc)
581
582	else:
583	ysh_type = value_str(val.tag(), dot=False)
584	id_str = ValueIdString(val)
585	return self._Styled(self.type_style,
586	_Text("<" + ysh_type + id_str + ">"))
587
588
589	# vim: sw=4