OILS / doctools / oils_doc.py View on Github | oilshell.org

640 lines, 350 significant
1#!/usr/bin/env python2
2"""
3oils_doc.py: HTML processing for Oil documentation.
4
5Plugins:
6 ExpandLinks expands $xref, etc.
7 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
8 HelpTopicsPlugin -- for help-index.html
9
10 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
11 ShSession -- runs shell snippets and caches the output
12"""
13from __future__ import print_function
14
15import cgi
16import cStringIO
17import re
18import sys
19
20from doctools.util import log
21from lazylex import html
22
23
24def RemoveComments(s):
25 """ Remove <!-- comments --> """
26 f = cStringIO.StringIO()
27 out = html.Output(s, f)
28
29 tag_lexer = html.TagLexer(s)
30
31 pos = 0
32
33 for tok_id, end_pos in html.ValidTokens(s):
34 if tok_id == html.Comment:
35 value = s[pos : end_pos]
36 # doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
37 if 'REPLACE' not in value:
38 out.PrintUntil(pos)
39 out.SkipTo(end_pos)
40 pos = end_pos
41
42 out.PrintTheRest()
43 return f.getvalue()
44
45
46class _Abbrev(object):
47 def __init__(self, fmt):
48 self.fmt = fmt
49
50 def __call__(self, value):
51 return self.fmt % {'value': value}
52
53
54_ABBREVIATIONS = {
55 'xref':
56 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
57
58 # alias for osh-help, for backward compatibility
59 # to link to the same version
60
61 # TODO: Remove all of these broken links!
62 'help':
63 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
64 'osh-help':
65 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
66 'oil-help':
67 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
68
69 # New style: one for every chapter?
70 'chap-type-method':
71 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
72 'chap-plugin':
73 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
74
75 # for blog
76 'osh-help-latest':
77 _Abbrev('//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'),
78 'oil-help-latest':
79 _Abbrev('//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'),
80
81 # For the blog
82 'oils-doc':
83 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
84
85 'blog-tag':
86 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
87 'oils-commit':
88 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
89 'oils-src':
90 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
91 'blog-code-src':
92 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
93 'issue':
94 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
95 'wiki':
96 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
97
98}
99
100# Backward compatibility
101_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
102_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
103_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
104
105# $xref:foo
106_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
107
108
109def ExpandLinks(s):
110 """
111 Expand $xref:bash and so forth
112 """
113 f = cStringIO.StringIO()
114 out = html.Output(s, f)
115
116 tag_lexer = html.TagLexer(s)
117
118 pos = 0
119
120 it = html.ValidTokens(s)
121 while True:
122 try:
123 tok_id, end_pos = next(it)
124 except StopIteration:
125 break
126
127 if tok_id == html.StartTag:
128
129 tag_lexer.Reset(pos, end_pos)
130 if tag_lexer.TagName() == 'a':
131 open_tag_right = end_pos
132
133 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
134 if href_start == -1:
135 continue
136
137 # TODO: Need to unescape like GetAttr()
138 href = s[href_start : href_end]
139
140 new = None
141 m = _SHORTCUT_RE.match(href)
142 if m:
143 abbrev_name, arg = m.groups()
144 if not arg:
145 close_tag_left, _ = html.ReadUntilEndTag(it, tag_lexer, 'a')
146 arg = s[open_tag_right : close_tag_left]
147
148 # Hack to so we can write [Wiki Page]($wiki) and have the link look
149 # like /Wiki-Page/
150 if abbrev_name == 'wiki':
151 arg = arg.replace(' ', '-')
152
153 func = _ABBREVIATIONS.get(abbrev_name)
154 if not func:
155 raise RuntimeError('Invalid abbreviation %r' % abbrev_name)
156 new = func(arg)
157
158 if new is not None:
159 out.PrintUntil(href_start)
160 f.write(cgi.escape(new))
161 out.SkipTo(href_end)
162
163 pos = end_pos
164
165 out.PrintTheRest()
166
167 return f.getvalue()
168
169
170class _Plugin(object):
171
172 def __init__(self, s, start_pos, end_pos):
173 self.s = s
174 self.start_pos = start_pos
175 self.end_pos = end_pos
176
177 def PrintHighlighted(self, out):
178 raise NotImplementedError()
179
180
181# Optional newline at end
182_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
183
184_PROMPT_LINE_RE = re.compile(r'''
185(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
186(.*?) # arbitrary text
187(?: # don't highlight tab completion
188 (&lt;TAB&gt;) # it's HTML escaped!!!
189 .*?
190)?
191(?:
192 [ ][ ]([#] .*) # optionally: two spaces then a comment
193)?
194$
195''', re.VERBOSE)
196
197
198_EOL_COMMENT_RE = re.compile(r'''
199.*? # arbitrary text
200[ ][ ]([#] .*) # two spaces then a comment
201$
202''', re.VERBOSE)
203
204_COMMENT_LINE_RE = re.compile(r'#.*')
205
206
207def Lines(s, start_pos, end_pos):
208 pos = start_pos
209 while pos < end_pos:
210 m = _LINE_RE.match(s, pos, end_pos)
211 if not m:
212 raise RuntimeError("Should have matched a line")
213 line_end = m.end(0)
214
215 yield line_end
216
217 pos = line_end
218
219
220class ShPromptPlugin(_Plugin):
221 """
222 Highlight shell prompts.
223 """
224
225 def PrintHighlighted(self, out):
226 pos = self.start_pos
227 for line_end in Lines(self.s, self.start_pos, self.end_pos):
228
229 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
230 if m:
231 out.PrintUntil(m.start(0))
232 out.Print('<span class="sh-comment">')
233 out.PrintUntil(m.end(0))
234 out.Print('</span>')
235 else:
236 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
237 if m:
238 #log('MATCH %r', m.groups())
239
240 out.PrintUntil(m.start(1))
241 out.Print('<span class="sh-prompt">')
242 out.PrintUntil(m.end(1))
243 out.Print('</span>')
244
245 out.PrintUntil(m.start(2))
246 out.Print('<span class="sh-command">')
247 out.PrintUntil(m.end(2))
248 out.Print('</span>')
249
250 if m.group(3):
251 out.PrintUntil(m.start(3))
252 out.Print('<span class="sh-tab-complete">')
253 out.PrintUntil(m.end(3))
254 out.Print('</span>')
255
256 if m.group(4):
257 out.PrintUntil(m.start(4))
258 out.Print('<span class="sh-comment">')
259 out.PrintUntil(m.end(4))
260 out.Print('</span>')
261 else:
262 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
263 if m:
264 out.PrintUntil(m.start(1))
265 out.Print('<span class="sh-comment">')
266 out.PrintUntil(m.end(1))
267 out.Print('</span>')
268
269 out.PrintUntil(line_end)
270
271 pos = line_end
272
273
274class HelpTopicsPlugin(_Plugin):
275 """
276 Highlight blocks of help-index.md.
277 """
278 def __init__(self, s, start_pos, end_pos, chapter):
279 _Plugin.__init__(self, s, start_pos, end_pos)
280 self.chapter = chapter
281
282 def PrintHighlighted(self, out):
283 from doctools import help_gen
284
285 debug_out = []
286
287 pos = self.start_pos
288 for line_end in Lines(self.s, self.start_pos, self.end_pos):
289 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
290 # add tags and leave everything alone.
291 line = self.s[pos : line_end]
292
293 html_line = help_gen.IndexLineToHtml(self.chapter, line, debug_out)
294
295 if html_line is not None:
296 out.PrintUntil(pos)
297 out.Print(html_line)
298 out.SkipTo(line_end)
299
300 pos = line_end
301
302 return debug_out
303
304
305class PygmentsPlugin(_Plugin):
306
307 def __init__(self, s, start_pos, end_pos, lang):
308 _Plugin.__init__(self, s, start_pos, end_pos)
309 self.lang = lang
310
311 def PrintHighlighted(self, out):
312 try:
313 from pygments import lexers
314 from pygments import formatters
315 from pygments import highlight
316 except ImportError:
317 log("Warning: Couldn't import pygments, so skipping syntax highlighting")
318 return
319
320 # unescape before passing to pygments, which will escape
321 code = html.ToText(self.s, self.start_pos, self.end_pos)
322
323 lexer = lexers.get_lexer_by_name(self.lang)
324 formatter = formatters.HtmlFormatter()
325
326 highlighted = highlight(code, lexer, formatter)
327 out.Print(highlighted)
328
329
330def SimpleHighlightCode(s):
331 """
332 Simple highlighting for test/shell-vs-shell.sh
333 """
334
335 f = cStringIO.StringIO()
336 out = html.Output(s, f)
337
338 tag_lexer = html.TagLexer(s)
339
340 pos = 0
341
342 it = html.ValidTokens(s)
343
344 while True:
345 try:
346 tok_id, end_pos = next(it)
347 except StopIteration:
348 break
349
350 if tok_id == html.StartTag:
351
352 tag_lexer.Reset(pos, end_pos)
353 if tag_lexer.TagName() == 'pre':
354 pre_start_pos = pos
355 pre_end_pos = end_pos
356
357 slash_pre_right, slash_pre_right = \
358 html.ReadUntilEndTag(it, tag_lexer, 'pre')
359
360 out.PrintUntil(pre_end_pos)
361
362 # Using ShPromptPlugin because it does the comment highlighting we want!
363 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
364 plugin.PrintHighlighted(out)
365
366 out.SkipTo(slash_pre_right)
367
368 pos = end_pos
369
370 out.PrintTheRest()
371
372 return f.getvalue()
373
374
375
376def HighlightCode(s, default_highlighter, debug_out=None):
377 """
378 Algorithm:
379 1. Collect what's inside <pre><code> ...
380 2. Then read lines with ShPromptPlugin.
381 3. If the line looks like a shell prompt and command, highlight them with
382 <span>
383 """
384 if debug_out is None:
385 debug_out = []
386
387 f = cStringIO.StringIO()
388 out = html.Output(s, f)
389
390 tag_lexer = html.TagLexer(s)
391
392 pos = 0
393
394 it = html.ValidTokens(s)
395
396 while True:
397 try:
398 tok_id, end_pos = next(it)
399 except StopIteration:
400 break
401
402 if tok_id == html.StartTag:
403
404 tag_lexer.Reset(pos, end_pos)
405 if tag_lexer.TagName() == 'pre':
406 pre_start_pos = pos
407 pos = end_pos
408
409 try:
410 tok_id, end_pos = next(it)
411 except StopIteration:
412 break
413
414 tag_lexer.Reset(pos, end_pos)
415 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
416
417 css_class = tag_lexer.GetAttr('class')
418 code_start_pos = end_pos
419
420 if css_class is None:
421 slash_code_left, slash_code_right = \
422 html.ReadUntilEndTag(it, tag_lexer, 'code')
423
424 if default_highlighter is not None:
425 # TODO: Refactor this to remove duplication with
426 # language-{sh-prompt,oil-sh} below
427
428 # oil-sh for compatibility
429 if default_highlighter in ('sh-prompt', 'oils-sh', 'oil-sh'):
430 out.PrintUntil(code_start_pos)
431
432 # Using ShPromptPlugin because it does the comment highlighting we want!
433 plugin = ShPromptPlugin(s, code_start_pos, slash_code_left)
434 plugin.PrintHighlighted(out)
435
436 out.SkipTo(slash_code_left)
437 else:
438 raise RuntimeError('Unknown default highlighter %r' % default_highlighter)
439
440 elif css_class.startswith('language'):
441 slash_code_left, slash_code_right = \
442 html.ReadUntilEndTag(it, tag_lexer, 'code')
443
444 if css_class == 'language-none':
445 # Allow ```none
446 pass
447
448 elif css_class in ('language-sh-prompt', 'language-oil-sh'):
449 # Here's we're KEEPING the original <pre><code>
450 # Print everything up to and including <pre><code language="...">
451 out.PrintUntil(code_start_pos)
452
453 plugin = ShPromptPlugin(s, code_start_pos, slash_code_left)
454 plugin.PrintHighlighted(out)
455
456 out.SkipTo(slash_code_left)
457
458 elif css_class == 'language-ysh':
459 # TODO: Write an Oil syntax highlighter.
460 pass
461
462 elif css_class.startswith('language-chapter-links-'):
463 n = len('language-chapter-links-')
464 chapter = css_class[n:]
465 #log('chap %s', chapter)
466
467 out.PrintUntil(code_start_pos)
468
469 plugin = HelpTopicsPlugin(s, code_start_pos, slash_code_left, chapter)
470 block_debug_info = plugin.PrintHighlighted(out)
471
472 # e.g. these are links to cmd-lang within a block in toc-ysh
473 chap_block = {'to_chap': chapter, 'lines': block_debug_info}
474 debug_out.append(chap_block)
475
476 out.SkipTo(slash_code_left)
477
478 else: # language-*: Use Pygments
479
480 # We REMOVE the original <pre><code> because Pygments gives you a <pre> already
481
482 # We just read closing </code>, and the next one should be </pre>.
483 try:
484 tok_id, end_pos = next(it)
485 except StopIteration:
486 break
487 tag_lexer.Reset(slash_code_right, end_pos)
488 assert tok_id == html.EndTag, tok_id
489 assert tag_lexer.TagName() == 'pre', tag_lexer.TagName()
490 slash_pre_right = end_pos
491
492 out.PrintUntil(pre_start_pos)
493
494 lang = css_class[len('language-'):]
495 plugin = PygmentsPlugin(s, code_start_pos, slash_code_left, lang)
496 plugin.PrintHighlighted(out)
497
498 out.SkipTo(slash_pre_right)
499 f.write('<!-- done pygments -->\n')
500
501 pos = end_pos
502
503 out.PrintTheRest()
504
505 return f.getvalue()
506
507
508def ExtractCode(s, f):
509 """Print code blocks to a plain text file.
510
511 So we can at least validate the syntax.
512
513 Similar to the algorithm code above:
514
515 1. Collect what's inside <pre><code> ...
516 2. Decode &amp; -> &,e tc. and return it
517 """
518 out = html.Output(s, f)
519 tag_lexer = html.TagLexer(s)
520
521 block_num = 0
522 pos = 0
523 it = html.ValidTokens(s)
524
525 while True:
526 try:
527 tok_id, end_pos = next(it)
528 except StopIteration:
529 break
530
531 if tok_id == html.StartTag:
532 tag_lexer.Reset(pos, end_pos)
533 if tag_lexer.TagName() == 'pre':
534 pre_start_pos = pos
535 pos = end_pos
536
537 try:
538 tok_id, end_pos = next(it)
539 except StopIteration:
540 break
541
542 tag_lexer.Reset(pos, end_pos)
543 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
544
545 css_class = tag_lexer.GetAttr('class')
546 # Skip code blocks that look like ```foo
547 # Usually we use 'oil-sh' as the default_highlighter, and all those
548 # code blocks should be extracted. TODO: maybe this should be
549 # oil-language?
550 if css_class is None:
551 code_start_pos = end_pos
552
553 out.SkipTo(code_start_pos)
554 out.Print('# block %d' % block_num)
555 out.Print('\n')
556
557 slash_code_left, slash_code_right = \
558 html.ReadUntilEndTag(it, tag_lexer, 'code')
559
560 text = html.ToText(s, code_start_pos, slash_code_left)
561 out.SkipTo(slash_code_left)
562
563 out.Print(text)
564 out.Print('\n')
565
566 block_num += 1
567
568 pos = end_pos
569
570 #out.PrintTheRest()
571
572
573class ShellSession(object):
574 """
575 TODO: Pass this to HighlightCode as a plugin
576
577 $ x=one
578 $ echo $x
579 $ echo two
580
581 Becomes
582
583 $ x=one
584 $ echo $x
585 one
586 $ echo two
587 two
588
589 And then you will have
590 blog/2019/12/_shell_session/
591 $hash1-stdout.txt
592 $hash2-stdout.txt
593
594 It hashes the command with md5 and then brings it back.
595 If the file already exists then it doesn't run it again.
596 You can delete the file to redo it.
597
598 TODO: write a loop that reads one line at a time, writes, it, then reads
599 output from bash.
600 Use the Lines iterator to get lines.
601 For extra credit, you can solve the PS2 problem? That's easily done with
602 Oil's parser.
603 """
604 def __init__(self, shell_exe, cache_dir):
605 """
606 Args:
607 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
608 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
609 """
610 self.shell_exe = shell_exe
611 self.cache_dir = cache_dir
612
613 def PrintHighlighted(self, s, start_pos, end_pos, out):
614 """
615 Args:
616 s: an HTML string.
617 """
618 pass
619
620
621
622def main(argv):
623 action = argv[1]
624
625 if action == 'highlight':
626 # for test/shell-vs-shell.sh
627
628 html = sys.stdin.read()
629 out = SimpleHighlightCode(html)
630 print(out)
631
632 else:
633 raise RuntimeError('Invalid action %r' % action)
634
635
636if __name__ == '__main__':
637 main(sys.argv)
638
639
640# vim: sw=2