OILS / data_lang / pretty.py View on Github | oilshell.org

580 lines, 283 significant
1#!/usr/bin/env python2
2"""
3Pretty print Oils values (and later other data/languages as well).
4
5Pretty printing means intelligently choosing whitespace including indentation
6and newline placement, to attempt to display data nicely while staying within a
7maximum line width.
8"""
9
10# ~~~ Architecture ~~~
11#
12# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13#
14# Pretty printing proceeds in two phases:
15#
16# 1. Convert the thing you want to print into a `doc`.
17# 2. Print the `doc` using a standard algorithm.
18#
19# This separation keeps the details of the data you want to print separate from
20# the printing algorithm.
21
22# ~~~ Pretty Printing Overview ~~~
23#
24# If you're just using this file, you don't need to know how pretty printing
25# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26# or extend how values are printed, you'll need to know, so here's an overview.
27#
28# You may want to first read Walder's "A Prettier Printer", which this is based
29# off of:
30# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31#
32# Some additional reading, though only tangentially related:
33#
34# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39#
40# ~ Constructors ~
41#
42# There are just a few constructors for `doc`, from which everything else is
43# built from.
44#
45# Text(string) prints like:
46# |string
47#
48# Break(string) prints like:
49# |string
50# or like a newline:
51# |
52# |
53# (It does the latter if printed in "flat" mode, and the former otherwise. See
54# Group for details.)
55#
56# Concat(a, b) prints like:
57# |AAAAA
58# |AAABBB
59# |BBBBB
60#
61# Indent(3, a) prints like:
62# |AAAAA
63# | AAAAA
64# | AAAAA
65#
66# Group(a) makes a decision. It either:
67# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68# string instead of as a newline, and (ii) every Group nested inside of it is
69# printed flat.
70# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71# newlines, and (ii) the Groups inside of it make their own decision about
72# whether to be flat.
73# It makes this decision greedily. If the current line would not overflow if the
74# group printed flat, then it will print flat. This takes into account not only
75# the group itself, but the content before and after it on the same line.
76#
77# ~ Measures ~
78#
79# The algorithm used here is close to the one originally described by Wadler,
80# but it precomputes a "measure" for each node in the `doc`. This "measure"
81# allows each Groups to decide whether to print flat or not without needing to
82# look ahead per Wadler's algorithm. A measure has two pieces of information:
83#
84# - Measure.flat is the width of the doc if it's printed flat.
85# - Measure.nonflat is the width of the doc until the _earliest possible_
86# newline, or -1 if it doesn't contain a Break.
87#
88# Measures are used in two steps. First, they're computed bottom-up on the
89# `doc`, measuring the size of each node. Later, _PrintDoc() stores a measure in
90# each DocFragment. These Measures measure something different: the width from
91# the doc _to the end of the entire doc tree_. This second set of Measures (the
92# ones in the DocFragments) are computed top-down, and they're used to decide
93# for each Group whether to use flat mode or not, without needing to scan ahead.
94
95from __future__ import print_function
96
97from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
98from _devbuild.gen.value_asdl import value, value_e, value_t, value_str
99from data_lang.j8 import ValueIdString, HeapValueId
100from core import ansi
101from frontend import match
102from mycpp import mops
103from mycpp.mylib import log, tagswitch, BufWriter, iteritems
104from typing import cast, List, Dict
105import fastfunc
106import libc
107
108_ = log
109
110################
111# Measurements #
112################
113
114
115def _StrWidth(string):
116 # type: (str) -> int
117 return libc.wcswidth(string)
118
119
120def _EmptyMeasure():
121 # type: () -> Measure
122 """The measure of an empty doc."""
123 return Measure(0, -1)
124
125
126def _FlattenMeasure(measure):
127 # type: (Measure) -> Measure
128 """The measure if its document is rendered flat."""
129 return Measure(measure.flat, -1)
130
131
132def _ConcatMeasure(m1, m2):
133 # type: (Measure, Measure) -> Measure
134 """Compute the measure of concatenated docs.
135
136 If m1 and m2 are the measures of doc1 and doc2,
137 then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
138 This concatenation is associative but not commutative."""
139 if m1.nonflat != -1:
140 return Measure(m1.flat + m2.flat, m1.nonflat)
141 elif m2.nonflat != -1:
142 return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
143 else:
144 return Measure(m1.flat + m2.flat, -1)
145
146
147def _SuffixLen(measure):
148 # type: (Measure) -> int
149 """The width until the earliest possible newline, or end of document."""
150 if measure.nonflat != -1:
151 return measure.nonflat
152 else:
153 return measure.flat
154
155
156####################
157# Doc Construction #
158####################
159
160
161def _Text(string):
162 # type: (str) -> MeasuredDoc
163 """Print `string` (which must not contain a newline)."""
164 return MeasuredDoc(doc.Text(string), Measure(_StrWidth(string), -1))
165
166
167def _Break(string):
168 # type: (str) -> MeasuredDoc
169 """If in `flat` mode, print `string`, otherwise print `\n`."""
170 return MeasuredDoc(doc.Break(string), Measure(_StrWidth(string), 0))
171
172
173def _Indent(indent, mdoc):
174 # type: (int, MeasuredDoc) -> MeasuredDoc
175 """Add `indent` spaces after every newline in `mdoc`."""
176 return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
177
178
179def _Concat(mdocs):
180 # type: (List[MeasuredDoc]) -> MeasuredDoc
181 """Print the mdocs in order (with no space in between)."""
182 measure = _EmptyMeasure()
183 for mdoc in mdocs:
184 measure = _ConcatMeasure(measure, mdoc.measure)
185 return MeasuredDoc(doc.Concat(mdocs), measure)
186
187
188def _Group(mdoc):
189 # type: (MeasuredDoc) -> MeasuredDoc
190 """Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
191 return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
192
193
194###################
195# Pretty Printing #
196###################
197
198_DEFAULT_MAX_WIDTH = 80
199_DEFAULT_INDENTATION = 4
200_DEFAULT_USE_STYLES = True
201_DEFAULT_SHOW_TYPE_PREFIX = True
202
203
204class PrettyPrinter(object):
205 """Pretty print an Oils value."""
206
207 def __init__(self):
208 # type: () -> None
209 """Construct a PrettyPrinter with default configuration options.
210
211 Use the Set*() methods for configuration before printing."""
212 self.max_width = _DEFAULT_MAX_WIDTH
213 self.indent = _DEFAULT_INDENTATION
214 self.use_styles = _DEFAULT_USE_STYLES
215 self.show_type_prefix = _DEFAULT_SHOW_TYPE_PREFIX
216
217 def SetMaxWidth(self, max_width):
218 # type: (int) -> None
219 """Set the maximum line width.
220
221 Pretty printing will attempt to (but does not guarantee to) fit the doc
222 within this width.
223 """
224 self.max_width = max_width
225
226 def SetIndent(self, indent):
227 # type: (int) -> None
228 """Set the number of spaces per indentation level."""
229 self.indent = indent
230
231 def SetUseStyles(self, use_styles):
232 # type: (bool) -> None
233 """If true, print with ansi colors and styles. Otherwise print with plain text."""
234 self.use_styles = use_styles
235
236 def SetShowTypePrefix(self, show_type_prefix):
237 # type: (bool) -> None
238 """Set whether or not to print a type before the top-level value.
239
240 E.g. `(Bool) true`"""
241 self.show_type_prefix = show_type_prefix
242
243 def PrintValue(self, val, buf):
244 # type: (value_t, BufWriter) -> None
245 """Pretty print an Oils value to a BufWriter."""
246 constructor = _DocConstructor(self.indent, self.use_styles,
247 self.show_type_prefix)
248 document = constructor.Value(val)
249 self._PrintDoc(document, buf)
250
251 def _Fits(self, prefix_len, group, suffix_measure):
252 # type: (int, doc.Group, Measure) -> bool
253 """Will `group` fit flat on the current line?"""
254 measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
255 suffix_measure)
256 return prefix_len + _SuffixLen(measure) <= self.max_width
257
258 def _PrintDoc(self, document, buf):
259 # type: (MeasuredDoc, BufWriter) -> None
260 """Pretty print a `pretty.doc` to a BufWriter."""
261
262 # The width of the text we've printed so far on the current line
263 prefix_len = 0
264 # A _stack_ of document fragments to print. Each fragment contains:
265 # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
266 # - The indentation level to print this doc node at.
267 # - Is this doc node being printed in flat mode?
268 # - The measure _from just after the doc node, to the end of the entire document_.
269 # (Call this the suffix_measure)
270 fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
271
272 while len(fragments) > 0:
273 frag = fragments.pop()
274 with tagswitch(frag.mdoc.doc) as case:
275
276 if case(doc_e.Text):
277 text = cast(doc.Text, frag.mdoc.doc)
278 buf.write(text.string)
279 prefix_len += frag.mdoc.measure.flat
280
281 elif case(doc_e.Break):
282 if frag.is_flat:
283 break_str = cast(doc.Break, frag.mdoc.doc).string
284 buf.write(break_str)
285 prefix_len += frag.mdoc.measure.flat
286 else:
287 buf.write('\n')
288 buf.write_spaces(frag.indent)
289 prefix_len = frag.indent
290
291 elif case(doc_e.Indent):
292 indented = cast(doc.Indent, frag.mdoc.doc)
293 fragments.append(
294 DocFragment(indented.mdoc,
295 frag.indent + indented.indent,
296 frag.is_flat, frag.measure))
297
298 elif case(doc_e.Concat):
299 # If we encounter Concat([A, B, C]) with a suffix measure M,
300 # we need to push A,B,C onto the stack in reverse order:
301 # - C, with suffix_measure = B.measure + A.measure + M
302 # - B, with suffix_measure = A.measure + M
303 # - A, with suffix_measure = M
304 concat = cast(doc.Concat, frag.mdoc.doc)
305 measure = frag.measure
306 for mdoc in reversed(concat.mdocs):
307 fragments.append(
308 DocFragment(mdoc, frag.indent, frag.is_flat,
309 measure))
310 measure = _ConcatMeasure(mdoc.measure, measure)
311
312 elif case(doc_e.Group):
313 # If the group would fit on the current line when printed
314 # flat, do so. Otherwise, print it non-flat.
315 group = cast(doc.Group, frag.mdoc.doc)
316 flat = self._Fits(prefix_len, group, frag.measure)
317 fragments.append(
318 DocFragment(group.mdoc, frag.indent, flat,
319 frag.measure))
320
321
322################
323# Value -> Doc #
324################
325
326
327class _DocConstructor:
328 """Converts Oil values into `doc`s, which can then be pretty printed."""
329
330 def __init__(self, indent, use_styles, show_type_prefix):
331 # type: (int, bool, bool) -> None
332 self.indent = indent
333 self.use_styles = use_styles
334 self.show_type_prefix = show_type_prefix
335 self.visiting = {} # type: Dict[int, bool]
336
337 # These can be configurable later
338 self.number_style = ansi.YELLOW
339 self.null_style = ansi.BOLD + ansi.RED
340 self.bool_style = ansi.BOLD + ansi.BLUE
341 self.string_style = ansi.GREEN
342 self.cycle_style = ansi.BOLD + ansi.MAGENTA
343 self.type_style = ansi.CYAN
344
345 def Value(self, val):
346 # type: (value_t) -> MeasuredDoc
347 """Convert an Oils value into a `doc`, which can then be pretty printed."""
348 self.visiting.clear()
349 if self.show_type_prefix:
350 ysh_type = value_str(val.tag(), dot=False)
351 return _Group(
352 _Concat([
353 _Text("(" + ysh_type + ")"),
354 _Break(" "),
355 self._Value(val)
356 ]))
357 else:
358 return self._Value(val)
359
360 def _Styled(self, style, mdoc):
361 # type: (str, MeasuredDoc) -> MeasuredDoc
362 """Apply the ANSI style string to the given node, if use_styles is set."""
363 if self.use_styles:
364 return _Concat([
365 MeasuredDoc(doc.Text(style), _EmptyMeasure()), mdoc,
366 MeasuredDoc(doc.Text(ansi.RESET), _EmptyMeasure())
367 ])
368 else:
369 return mdoc
370
371 def _Surrounded(self, open, mdoc, close):
372 # type: (str, MeasuredDoc, str) -> MeasuredDoc
373 """Print one of two options (using '[', ']' for open, close):
374
375 ```
376 [mdoc]
377 ------
378 [
379 mdoc
380 ]
381 ```
382 """
383 return _Group(
384 _Concat([
385 _Text(open),
386 _Indent(self.indent, _Concat([_Break(""), mdoc])),
387 _Break(""),
388 _Text(close)
389 ]))
390
391 def _SurroundedAndPrefixed(self, open, prefix, sep, mdoc, close):
392 # type: (str, MeasuredDoc, str, MeasuredDoc, str) -> MeasuredDoc
393 """Print one of two options
394 (using '[', 'prefix', ':', 'mdoc', ']' for open, prefix, sep, mdoc, close):
395
396 ```
397 [prefix:mdoc]
398 ------
399 [prefix
400 mdoc
401 ]
402 ```
403 """
404 return _Group(
405 _Concat([
406 _Text(open), prefix,
407 _Indent(self.indent, _Concat([_Break(sep), mdoc])),
408 _Break(""),
409 _Text(close)
410 ]))
411
412 def _Join(self, items, sep, space):
413 # type: (List[MeasuredDoc], str, str) -> MeasuredDoc
414 """Join `items`, using either 'sep+space' or 'sep+newline' between them.
415
416 E.g., if sep and space are ',' and '_', print one of these two cases:
417 ```
418 first,_second,_third
419 ------
420 first,
421 second,
422 third
423 ```
424 """
425 seq = [items[0]]
426 for item in items[1:]:
427 seq.append(_Text(sep))
428 seq.append(_Break(space))
429 seq.append(item)
430 return _Concat(seq)
431
432 def _DictKey(self, s):
433 # type: (str) -> MeasuredDoc
434 if match.IsValidVarName(s):
435 return _Text(s)
436 else:
437 return _Text(fastfunc.J8EncodeString(s, True)) # lossy_json=True
438
439 def _StringLiteral(self, s):
440 # type: (str) -> MeasuredDoc
441 return self._Styled(self.string_style,
442 _Text(fastfunc.J8EncodeString(
443 s, True))) # lossy_json=True
444
445 def _BashStringLiteral(self, s):
446 # type: (str) -> MeasuredDoc
447 return self._Styled(self.string_style,
448 _Text(fastfunc.ShellEncodeString(s, 0)))
449
450 def _YshList(self, vlist):
451 # type: (value.List) -> MeasuredDoc
452 """Print a string literal."""
453 if len(vlist.items) == 0:
454 return _Text("[]")
455 mdocs = [self._Value(item) for item in vlist.items]
456 return self._Surrounded("[", self._Join(mdocs, ",", " "), "]")
457
458 def _YshDict(self, vdict):
459 # type: (value.Dict) -> MeasuredDoc
460 if len(vdict.d) == 0:
461 return _Text("{}")
462 mdocs = [] # type: List[MeasuredDoc]
463 for k, v in iteritems(vdict.d):
464 mdocs.append(
465 _Concat([self._DictKey(k),
466 _Text(": "),
467 self._Value(v)]))
468 return self._Surrounded("{", self._Join(mdocs, ",", " "), "}")
469
470 def _BashArray(self, varray):
471 # type: (value.BashArray) -> MeasuredDoc
472 type_name = self._Styled(self.type_style, _Text("BashArray"))
473 if len(varray.strs) == 0:
474 return _Concat([_Text("("), type_name, _Text(")")])
475 mdocs = [] # type: List[MeasuredDoc]
476 for s in varray.strs:
477 if s is None:
478 mdocs.append(_Text("null"))
479 else:
480 mdocs.append(self._BashStringLiteral(s))
481 return self._SurroundedAndPrefixed("(", type_name, " ",
482 self._Join(mdocs, "", " "), ")")
483
484 def _BashAssoc(self, vassoc):
485 # type: (value.BashAssoc) -> MeasuredDoc
486 type_name = self._Styled(self.type_style, _Text("BashAssoc"))
487 if len(vassoc.d) == 0:
488 return _Concat([_Text("("), type_name, _Text(")")])
489 mdocs = [] # type: List[MeasuredDoc]
490 for k2, v2 in iteritems(vassoc.d):
491 mdocs.append(
492 _Concat([
493 _Text("["),
494 self._BashStringLiteral(k2),
495 _Text("]="),
496 self._BashStringLiteral(v2)
497 ]))
498 return self._SurroundedAndPrefixed("(", type_name, " ",
499 self._Join(mdocs, "", " "), ")")
500
501 def _Value(self, val):
502 # type: (value_t) -> MeasuredDoc
503
504 with tagswitch(val) as case:
505 if case(value_e.Null):
506 return self._Styled(self.null_style, _Text("null"))
507
508 elif case(value_e.Bool):
509 b = cast(value.Bool, val).b
510 return self._Styled(self.bool_style,
511 _Text("true" if b else "false"))
512
513 elif case(value_e.Int):
514 i = cast(value.Int, val).i
515 return self._Styled(self.number_style, _Text(mops.ToStr(i)))
516
517 elif case(value_e.Float):
518 f = cast(value.Float, val).f
519 return self._Styled(self.number_style, _Text(str(f)))
520
521 elif case(value_e.Str):
522 s = cast(value.Str, val).s
523 return self._StringLiteral(s)
524
525 elif case(value_e.Range):
526 r = cast(value.Range, val)
527 return self._Styled(
528 self.number_style,
529 _Concat([
530 _Text(str(r.lower)),
531 _Text(" .. "),
532 _Text(str(r.upper))
533 ]))
534
535 elif case(value_e.List):
536 vlist = cast(value.List, val)
537 heap_id = HeapValueId(vlist)
538 if self.visiting.get(heap_id, False):
539 return _Concat([
540 _Text("["),
541 self._Styled(self.cycle_style, _Text("...")),
542 _Text("]")
543 ])
544 else:
545 self.visiting[heap_id] = True
546 result = self._YshList(vlist)
547 self.visiting[heap_id] = False
548 return result
549
550 elif case(value_e.Dict):
551 vdict = cast(value.Dict, val)
552 heap_id = HeapValueId(vdict)
553 if self.visiting.get(heap_id, False):
554 return _Concat([
555 _Text("{"),
556 self._Styled(self.cycle_style, _Text("...")),
557 _Text("}")
558 ])
559 else:
560 self.visiting[heap_id] = True
561 result = self._YshDict(vdict)
562 self.visiting[heap_id] = False
563 return result
564
565 elif case(value_e.BashArray):
566 varray = cast(value.BashArray, val)
567 return self._BashArray(varray)
568
569 elif case(value_e.BashAssoc):
570 vassoc = cast(value.BashAssoc, val)
571 return self._BashAssoc(vassoc)
572
573 else:
574 ysh_type = value_str(val.tag(), dot=False)
575 id_str = ValueIdString(val)
576 return self._Styled(self.type_style,
577 _Text("<" + ysh_type + id_str + ">"))
578
579
580# vim: sw=4