OILS / data_lang / pretty.py View on Github | oilshell.org

589 lines, 289 significant
1#!/usr/bin/env python2
2"""
3Pretty print Oils values (and later other data/languages as well).
4
5Pretty printing means intelligently choosing whitespace including indentation
6and newline placement, to attempt to display data nicely while staying within a
7maximum line width.
8"""
9
10# ~~~ Architecture ~~~
11#
12# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13#
14# Pretty printing proceeds in two phases:
15#
16# 1. Convert the thing you want to print into a `doc`.
17# 2. Print the `doc` using a standard algorithm.
18#
19# This separation keeps the details of the data you want to print separate from
20# the printing algorithm.
21
22# ~~~ Pretty Printing Overview ~~~
23#
24# If you're just using this file, you don't need to know how pretty printing
25# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26# or extend how values are printed, you'll need to know, so here's an overview.
27#
28# You may want to first read Walder's "A Prettier Printer", which this is based
29# off of:
30# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31#
32# Some additional reading, though only tangentially related:
33#
34# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39#
40# ~ Constructors ~
41#
42# There are just a few constructors for `doc`, from which everything else is
43# built from.
44#
45# Text(string) prints like:
46# |string
47#
48# Break(string) prints like:
49# |string
50# or like a newline:
51# |
52# |
53# (It does the latter if printed in "flat" mode, and the former otherwise. See
54# Group for details.)
55#
56# Concat(a, b) prints like:
57# |AAAAA
58# |AAABBB
59# |BBBBB
60#
61# Indent(3, a) prints like:
62# |AAAAA
63# | AAAAA
64# | AAAAA
65#
66# Group(a) makes a decision. It either:
67# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68# string instead of as a newline, and (ii) every Group nested inside of it is
69# printed flat.
70# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71# newlines, and (ii) the Groups inside of it make their own decision about
72# whether to be flat.
73# It makes this decision greedily. If the current line would not overflow if the
74# group printed flat, then it will print flat. This takes into account not only
75# the group itself, but the content before and after it on the same line.
76#
77# ~ Measures ~
78#
79# The algorithm used here is close to the one originally described by Wadler,
80# but it precomputes a "measure" for each node in the `doc`. This "measure"
81# allows each Groups to decide whether to print flat or not without needing to
82# look ahead per Wadler's algorithm. A measure has two pieces of information:
83#
84# - Measure.flat is the width of the doc if it's printed flat.
85# - Measure.nonflat is the width of the doc until the _earliest possible_
86# newline, or -1 if it doesn't contain a Break.
87#
88# Measures are used in two steps. First, they're computed bottom-up on the
89# `doc`, measuring the size of each node. Later, _PrintDoc() stores a measure in
90# each DocFragment. These Measures measure something different: the width from
91# the doc _to the end of the entire doc tree_. This second set of Measures (the
92# ones in the DocFragments) are computed top-down, and they're used to decide
93# for each Group whether to use flat mode or not, without needing to scan ahead.
94
95from __future__ import print_function
96
97from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
98from _devbuild.gen.value_asdl import value, value_e, value_t, value_str
99from data_lang.j8 import ValueIdString, HeapValueId
100from core import ansi
101from frontend import match
102from mycpp import mops
103from mycpp.mylib import log, tagswitch, BufWriter, iteritems
104from typing import cast, List, Dict
105import fastfunc
106import libc
107
108_ = log
109
110################
111# Measurements #
112################
113
114
115def TryUnicodeWidth(s):
116 # type: (str) -> int
117 try:
118 width = libc.wcswidth(s)
119 except UnicodeError:
120 # e.g. en_US.UTF-8 locale missing, just return the number of bytes
121 width = len(s)
122
123 if width == -1: # non-printable wide char
124 return len(s)
125
126 return width
127
128
129def _EmptyMeasure():
130 # type: () -> Measure
131 """The measure of an empty doc."""
132 return Measure(0, -1)
133
134
135def _FlattenMeasure(measure):
136 # type: (Measure) -> Measure
137 """The measure if its document is rendered flat."""
138 return Measure(measure.flat, -1)
139
140
141def _ConcatMeasure(m1, m2):
142 # type: (Measure, Measure) -> Measure
143 """Compute the measure of concatenated docs.
144
145 If m1 and m2 are the measures of doc1 and doc2,
146 then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
147 This concatenation is associative but not commutative."""
148 if m1.nonflat != -1:
149 return Measure(m1.flat + m2.flat, m1.nonflat)
150 elif m2.nonflat != -1:
151 return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
152 else:
153 return Measure(m1.flat + m2.flat, -1)
154
155
156def _SuffixLen(measure):
157 # type: (Measure) -> int
158 """The width until the earliest possible newline, or end of document."""
159 if measure.nonflat != -1:
160 return measure.nonflat
161 else:
162 return measure.flat
163
164
165####################
166# Doc Construction #
167####################
168
169
170def _Text(string):
171 # type: (str) -> MeasuredDoc
172 """Print `string` (which must not contain a newline)."""
173 return MeasuredDoc(doc.Text(string), Measure(TryUnicodeWidth(string), -1))
174
175
176def _Break(string):
177 # type: (str) -> MeasuredDoc
178 """If in `flat` mode, print `string`, otherwise print `\n`."""
179 return MeasuredDoc(doc.Break(string), Measure(TryUnicodeWidth(string), 0))
180
181
182def _Indent(indent, mdoc):
183 # type: (int, MeasuredDoc) -> MeasuredDoc
184 """Add `indent` spaces after every newline in `mdoc`."""
185 return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
186
187
188def _Concat(mdocs):
189 # type: (List[MeasuredDoc]) -> MeasuredDoc
190 """Print the mdocs in order (with no space in between)."""
191 measure = _EmptyMeasure()
192 for mdoc in mdocs:
193 measure = _ConcatMeasure(measure, mdoc.measure)
194 return MeasuredDoc(doc.Concat(mdocs), measure)
195
196
197def _Group(mdoc):
198 # type: (MeasuredDoc) -> MeasuredDoc
199 """Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
200 return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
201
202
203###################
204# Pretty Printing #
205###################
206
207_DEFAULT_MAX_WIDTH = 80
208_DEFAULT_INDENTATION = 4
209_DEFAULT_USE_STYLES = True
210_DEFAULT_SHOW_TYPE_PREFIX = True
211
212
213class PrettyPrinter(object):
214 """Pretty print an Oils value."""
215
216 def __init__(self):
217 # type: () -> None
218 """Construct a PrettyPrinter with default configuration options.
219
220 Use the Set*() methods for configuration before printing."""
221 self.max_width = _DEFAULT_MAX_WIDTH
222 self.indent = _DEFAULT_INDENTATION
223 self.use_styles = _DEFAULT_USE_STYLES
224 self.show_type_prefix = _DEFAULT_SHOW_TYPE_PREFIX
225
226 def SetMaxWidth(self, max_width):
227 # type: (int) -> None
228 """Set the maximum line width.
229
230 Pretty printing will attempt to (but does not guarantee to) fit the doc
231 within this width.
232 """
233 self.max_width = max_width
234
235 def SetIndent(self, indent):
236 # type: (int) -> None
237 """Set the number of spaces per indentation level."""
238 self.indent = indent
239
240 def SetUseStyles(self, use_styles):
241 # type: (bool) -> None
242 """If true, print with ansi colors and styles. Otherwise print with plain text."""
243 self.use_styles = use_styles
244
245 def SetShowTypePrefix(self, show_type_prefix):
246 # type: (bool) -> None
247 """Set whether or not to print a type before the top-level value.
248
249 E.g. `(Bool) true`"""
250 self.show_type_prefix = show_type_prefix
251
252 def PrintValue(self, val, buf):
253 # type: (value_t, BufWriter) -> None
254 """Pretty print an Oils value to a BufWriter."""
255 constructor = _DocConstructor(self.indent, self.use_styles,
256 self.show_type_prefix)
257 document = constructor.Value(val)
258 self._PrintDoc(document, buf)
259
260 def _Fits(self, prefix_len, group, suffix_measure):
261 # type: (int, doc.Group, Measure) -> bool
262 """Will `group` fit flat on the current line?"""
263 measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
264 suffix_measure)
265 return prefix_len + _SuffixLen(measure) <= self.max_width
266
267 def _PrintDoc(self, document, buf):
268 # type: (MeasuredDoc, BufWriter) -> None
269 """Pretty print a `pretty.doc` to a BufWriter."""
270
271 # The width of the text we've printed so far on the current line
272 prefix_len = 0
273 # A _stack_ of document fragments to print. Each fragment contains:
274 # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
275 # - The indentation level to print this doc node at.
276 # - Is this doc node being printed in flat mode?
277 # - The measure _from just after the doc node, to the end of the entire document_.
278 # (Call this the suffix_measure)
279 fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
280
281 while len(fragments) > 0:
282 frag = fragments.pop()
283 with tagswitch(frag.mdoc.doc) as case:
284
285 if case(doc_e.Text):
286 text = cast(doc.Text, frag.mdoc.doc)
287 buf.write(text.string)
288 prefix_len += frag.mdoc.measure.flat
289
290 elif case(doc_e.Break):
291 if frag.is_flat:
292 break_str = cast(doc.Break, frag.mdoc.doc).string
293 buf.write(break_str)
294 prefix_len += frag.mdoc.measure.flat
295 else:
296 buf.write('\n')
297 buf.write_spaces(frag.indent)
298 prefix_len = frag.indent
299
300 elif case(doc_e.Indent):
301 indented = cast(doc.Indent, frag.mdoc.doc)
302 fragments.append(
303 DocFragment(indented.mdoc,
304 frag.indent + indented.indent,
305 frag.is_flat, frag.measure))
306
307 elif case(doc_e.Concat):
308 # If we encounter Concat([A, B, C]) with a suffix measure M,
309 # we need to push A,B,C onto the stack in reverse order:
310 # - C, with suffix_measure = B.measure + A.measure + M
311 # - B, with suffix_measure = A.measure + M
312 # - A, with suffix_measure = M
313 concat = cast(doc.Concat, frag.mdoc.doc)
314 measure = frag.measure
315 for mdoc in reversed(concat.mdocs):
316 fragments.append(
317 DocFragment(mdoc, frag.indent, frag.is_flat,
318 measure))
319 measure = _ConcatMeasure(mdoc.measure, measure)
320
321 elif case(doc_e.Group):
322 # If the group would fit on the current line when printed
323 # flat, do so. Otherwise, print it non-flat.
324 group = cast(doc.Group, frag.mdoc.doc)
325 flat = self._Fits(prefix_len, group, frag.measure)
326 fragments.append(
327 DocFragment(group.mdoc, frag.indent, flat,
328 frag.measure))
329
330
331################
332# Value -> Doc #
333################
334
335
336class _DocConstructor:
337 """Converts Oil values into `doc`s, which can then be pretty printed."""
338
339 def __init__(self, indent, use_styles, show_type_prefix):
340 # type: (int, bool, bool) -> None
341 self.indent = indent
342 self.use_styles = use_styles
343 self.show_type_prefix = show_type_prefix
344 self.visiting = {} # type: Dict[int, bool]
345
346 # These can be configurable later
347 self.number_style = ansi.YELLOW
348 self.null_style = ansi.BOLD + ansi.RED
349 self.bool_style = ansi.BOLD + ansi.BLUE
350 self.string_style = ansi.GREEN
351 self.cycle_style = ansi.BOLD + ansi.MAGENTA
352 self.type_style = ansi.CYAN
353
354 def Value(self, val):
355 # type: (value_t) -> MeasuredDoc
356 """Convert an Oils value into a `doc`, which can then be pretty printed."""
357 self.visiting.clear()
358 if self.show_type_prefix:
359 ysh_type = value_str(val.tag(), dot=False)
360 return _Group(
361 _Concat([
362 _Text("(" + ysh_type + ")"),
363 _Break(" "),
364 self._Value(val)
365 ]))
366 else:
367 return self._Value(val)
368
369 def _Styled(self, style, mdoc):
370 # type: (str, MeasuredDoc) -> MeasuredDoc
371 """Apply the ANSI style string to the given node, if use_styles is set."""
372 if self.use_styles:
373 return _Concat([
374 MeasuredDoc(doc.Text(style), _EmptyMeasure()), mdoc,
375 MeasuredDoc(doc.Text(ansi.RESET), _EmptyMeasure())
376 ])
377 else:
378 return mdoc
379
380 def _Surrounded(self, open, mdoc, close):
381 # type: (str, MeasuredDoc, str) -> MeasuredDoc
382 """Print one of two options (using '[', ']' for open, close):
383
384 ```
385 [mdoc]
386 ------
387 [
388 mdoc
389 ]
390 ```
391 """
392 return _Group(
393 _Concat([
394 _Text(open),
395 _Indent(self.indent, _Concat([_Break(""), mdoc])),
396 _Break(""),
397 _Text(close)
398 ]))
399
400 def _SurroundedAndPrefixed(self, open, prefix, sep, mdoc, close):
401 # type: (str, MeasuredDoc, str, MeasuredDoc, str) -> MeasuredDoc
402 """Print one of two options
403 (using '[', 'prefix', ':', 'mdoc', ']' for open, prefix, sep, mdoc, close):
404
405 ```
406 [prefix:mdoc]
407 ------
408 [prefix
409 mdoc
410 ]
411 ```
412 """
413 return _Group(
414 _Concat([
415 _Text(open), prefix,
416 _Indent(self.indent, _Concat([_Break(sep), mdoc])),
417 _Break(""),
418 _Text(close)
419 ]))
420
421 def _Join(self, items, sep, space):
422 # type: (List[MeasuredDoc], str, str) -> MeasuredDoc
423 """Join `items`, using either 'sep+space' or 'sep+newline' between them.
424
425 E.g., if sep and space are ',' and '_', print one of these two cases:
426 ```
427 first,_second,_third
428 ------
429 first,
430 second,
431 third
432 ```
433 """
434 seq = [items[0]]
435 for item in items[1:]:
436 seq.append(_Text(sep))
437 seq.append(_Break(space))
438 seq.append(item)
439 return _Concat(seq)
440
441 def _DictKey(self, s):
442 # type: (str) -> MeasuredDoc
443 if match.IsValidVarName(s):
444 return _Text(s)
445 else:
446 return _Text(fastfunc.J8EncodeString(s, True)) # lossy_json=True
447
448 def _StringLiteral(self, s):
449 # type: (str) -> MeasuredDoc
450 return self._Styled(self.string_style,
451 _Text(fastfunc.J8EncodeString(
452 s, True))) # lossy_json=True
453
454 def _BashStringLiteral(self, s):
455 # type: (str) -> MeasuredDoc
456 return self._Styled(self.string_style,
457 _Text(fastfunc.ShellEncodeString(s, 0)))
458
459 def _YshList(self, vlist):
460 # type: (value.List) -> MeasuredDoc
461 """Print a string literal."""
462 if len(vlist.items) == 0:
463 return _Text("[]")
464 mdocs = [self._Value(item) for item in vlist.items]
465 return self._Surrounded("[", self._Join(mdocs, ",", " "), "]")
466
467 def _YshDict(self, vdict):
468 # type: (value.Dict) -> MeasuredDoc
469 if len(vdict.d) == 0:
470 return _Text("{}")
471 mdocs = [] # type: List[MeasuredDoc]
472 for k, v in iteritems(vdict.d):
473 mdocs.append(
474 _Concat([self._DictKey(k),
475 _Text(": "),
476 self._Value(v)]))
477 return self._Surrounded("{", self._Join(mdocs, ",", " "), "}")
478
479 def _BashArray(self, varray):
480 # type: (value.BashArray) -> MeasuredDoc
481 type_name = self._Styled(self.type_style, _Text("BashArray"))
482 if len(varray.strs) == 0:
483 return _Concat([_Text("("), type_name, _Text(")")])
484 mdocs = [] # type: List[MeasuredDoc]
485 for s in varray.strs:
486 if s is None:
487 mdocs.append(_Text("null"))
488 else:
489 mdocs.append(self._BashStringLiteral(s))
490 return self._SurroundedAndPrefixed("(", type_name, " ",
491 self._Join(mdocs, "", " "), ")")
492
493 def _BashAssoc(self, vassoc):
494 # type: (value.BashAssoc) -> MeasuredDoc
495 type_name = self._Styled(self.type_style, _Text("BashAssoc"))
496 if len(vassoc.d) == 0:
497 return _Concat([_Text("("), type_name, _Text(")")])
498 mdocs = [] # type: List[MeasuredDoc]
499 for k2, v2 in iteritems(vassoc.d):
500 mdocs.append(
501 _Concat([
502 _Text("["),
503 self._BashStringLiteral(k2),
504 _Text("]="),
505 self._BashStringLiteral(v2)
506 ]))
507 return self._SurroundedAndPrefixed("(", type_name, " ",
508 self._Join(mdocs, "", " "), ")")
509
510 def _Value(self, val):
511 # type: (value_t) -> MeasuredDoc
512
513 with tagswitch(val) as case:
514 if case(value_e.Null):
515 return self._Styled(self.null_style, _Text("null"))
516
517 elif case(value_e.Bool):
518 b = cast(value.Bool, val).b
519 return self._Styled(self.bool_style,
520 _Text("true" if b else "false"))
521
522 elif case(value_e.Int):
523 i = cast(value.Int, val).i
524 return self._Styled(self.number_style, _Text(mops.ToStr(i)))
525
526 elif case(value_e.Float):
527 f = cast(value.Float, val).f
528 return self._Styled(self.number_style, _Text(str(f)))
529
530 elif case(value_e.Str):
531 s = cast(value.Str, val).s
532 return self._StringLiteral(s)
533
534 elif case(value_e.Range):
535 r = cast(value.Range, val)
536 return self._Styled(
537 self.number_style,
538 _Concat([
539 _Text(str(r.lower)),
540 _Text(" .. "),
541 _Text(str(r.upper))
542 ]))
543
544 elif case(value_e.List):
545 vlist = cast(value.List, val)
546 heap_id = HeapValueId(vlist)
547 if self.visiting.get(heap_id, False):
548 return _Concat([
549 _Text("["),
550 self._Styled(self.cycle_style, _Text("...")),
551 _Text("]")
552 ])
553 else:
554 self.visiting[heap_id] = True
555 result = self._YshList(vlist)
556 self.visiting[heap_id] = False
557 return result
558
559 elif case(value_e.Dict):
560 vdict = cast(value.Dict, val)
561 heap_id = HeapValueId(vdict)
562 if self.visiting.get(heap_id, False):
563 return _Concat([
564 _Text("{"),
565 self._Styled(self.cycle_style, _Text("...")),
566 _Text("}")
567 ])
568 else:
569 self.visiting[heap_id] = True
570 result = self._YshDict(vdict)
571 self.visiting[heap_id] = False
572 return result
573
574 elif case(value_e.BashArray):
575 varray = cast(value.BashArray, val)
576 return self._BashArray(varray)
577
578 elif case(value_e.BashAssoc):
579 vassoc = cast(value.BashAssoc, val)
580 return self._BashAssoc(vassoc)
581
582 else:
583 ysh_type = value_str(val.tag(), dot=False)
584 id_str = ValueIdString(val)
585 return self._Styled(self.type_style,
586 _Text("<" + ysh_type + id_str + ">"))
587
588
589# vim: sw=4