1 | #!/usr/bin/env python2
|
2 | """
|
3 | oils_doc.py: HTML processing for Oil documentation.
|
4 |
|
5 | Plugins:
|
6 | ExpandLinks expands $xref, etc.
|
7 | PygmentsPlugin -- for ```python, ```sh, ```c, etc.
|
8 | HelpTopicsPlugin -- for help-index.html
|
9 |
|
10 | ShPromptPlugin -- understands $ echo hi, but doesn't run anything
|
11 | ShSession -- runs shell snippets and caches the output
|
12 | """
|
13 | from __future__ import print_function
|
14 |
|
15 | import cgi
|
16 | import cStringIO
|
17 | import re
|
18 | import sys
|
19 |
|
20 | from doctools.util import log
|
21 | from lazylex import html
|
22 |
|
23 |
|
24 | def RemoveComments(s):
|
25 | """ Remove <!-- comments --> """
|
26 | f = cStringIO.StringIO()
|
27 | out = html.Output(s, f)
|
28 |
|
29 | tag_lexer = html.TagLexer(s)
|
30 |
|
31 | pos = 0
|
32 |
|
33 | for tok_id, end_pos in html.ValidTokens(s):
|
34 | if tok_id == html.Comment:
|
35 | value = s[pos : end_pos]
|
36 | # doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
|
37 | if 'REPLACE' not in value:
|
38 | out.PrintUntil(pos)
|
39 | out.SkipTo(end_pos)
|
40 | pos = end_pos
|
41 |
|
42 | out.PrintTheRest()
|
43 | return f.getvalue()
|
44 |
|
45 |
|
46 | class _Abbrev(object):
|
47 | def __init__(self, fmt):
|
48 | self.fmt = fmt
|
49 |
|
50 | def __call__(self, value):
|
51 | return self.fmt % {'value': value}
|
52 |
|
53 |
|
54 | _ABBREVIATIONS = {
|
55 | 'xref':
|
56 | _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
|
57 |
|
58 | # alias for osh-help, for backward compatibility
|
59 | # to link to the same version
|
60 |
|
61 | # TODO: Remove all of these broken links!
|
62 | 'help':
|
63 | _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
|
64 | 'osh-help':
|
65 | _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
|
66 | 'oil-help':
|
67 | _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
|
68 |
|
69 | # New style: one for every chapter?
|
70 | 'chap-type-method':
|
71 | _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
|
72 | 'chap-plugin':
|
73 | _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
|
74 |
|
75 | # for blog
|
76 | 'osh-help-latest':
|
77 | _Abbrev('//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'),
|
78 | 'oil-help-latest':
|
79 | _Abbrev('//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'),
|
80 |
|
81 | # For the blog
|
82 | 'oils-doc':
|
83 | _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
|
84 |
|
85 | 'blog-tag':
|
86 | _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
|
87 | 'oils-commit':
|
88 | _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
|
89 | 'oils-src':
|
90 | _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
|
91 | 'blog-code-src':
|
92 | _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
|
93 | 'issue':
|
94 | _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
|
95 | 'wiki':
|
96 | _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
|
97 |
|
98 | }
|
99 |
|
100 | # Backward compatibility
|
101 | _ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
|
102 | _ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
|
103 | _ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
|
104 |
|
105 | # $xref:foo
|
106 | _SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
|
107 |
|
108 |
|
109 | def ExpandLinks(s):
|
110 | """
|
111 | Expand $xref:bash and so forth
|
112 | """
|
113 | f = cStringIO.StringIO()
|
114 | out = html.Output(s, f)
|
115 |
|
116 | tag_lexer = html.TagLexer(s)
|
117 |
|
118 | pos = 0
|
119 |
|
120 | it = html.ValidTokens(s)
|
121 | while True:
|
122 | try:
|
123 | tok_id, end_pos = next(it)
|
124 | except StopIteration:
|
125 | break
|
126 |
|
127 | if tok_id == html.StartTag:
|
128 |
|
129 | tag_lexer.Reset(pos, end_pos)
|
130 | if tag_lexer.TagName() == 'a':
|
131 | open_tag_right = end_pos
|
132 |
|
133 | href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
|
134 | if href_start == -1:
|
135 | continue
|
136 |
|
137 | # TODO: Need to unescape like GetAttr()
|
138 | href = s[href_start : href_end]
|
139 |
|
140 | new = None
|
141 | m = _SHORTCUT_RE.match(href)
|
142 | if m:
|
143 | abbrev_name, arg = m.groups()
|
144 | if not arg:
|
145 | close_tag_left, _ = html.ReadUntilEndTag(it, tag_lexer, 'a')
|
146 | arg = s[open_tag_right : close_tag_left]
|
147 |
|
148 | # Hack to so we can write [Wiki Page]($wiki) and have the link look
|
149 | # like /Wiki-Page/
|
150 | if abbrev_name == 'wiki':
|
151 | arg = arg.replace(' ', '-')
|
152 |
|
153 | func = _ABBREVIATIONS.get(abbrev_name)
|
154 | if not func:
|
155 | raise RuntimeError('Invalid abbreviation %r' % abbrev_name)
|
156 | new = func(arg)
|
157 |
|
158 | if new is not None:
|
159 | out.PrintUntil(href_start)
|
160 | f.write(cgi.escape(new))
|
161 | out.SkipTo(href_end)
|
162 |
|
163 | pos = end_pos
|
164 |
|
165 | out.PrintTheRest()
|
166 |
|
167 | return f.getvalue()
|
168 |
|
169 |
|
170 | class _Plugin(object):
|
171 |
|
172 | def __init__(self, s, start_pos, end_pos):
|
173 | self.s = s
|
174 | self.start_pos = start_pos
|
175 | self.end_pos = end_pos
|
176 |
|
177 | def PrintHighlighted(self, out):
|
178 | raise NotImplementedError()
|
179 |
|
180 |
|
181 | # Optional newline at end
|
182 | _LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
|
183 |
|
184 | _PROMPT_LINE_RE = re.compile(r'''
|
185 | (\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
|
186 | (.*?) # arbitrary text
|
187 | (?: # don't highlight tab completion
|
188 | (<TAB>) # it's HTML escaped!!!
|
189 | .*?
|
190 | )?
|
191 | (?:
|
192 | [ ][ ]([#] .*) # optionally: two spaces then a comment
|
193 | )?
|
194 | $
|
195 | ''', re.VERBOSE)
|
196 |
|
197 |
|
198 | _EOL_COMMENT_RE = re.compile(r'''
|
199 | .*? # arbitrary text
|
200 | [ ][ ]([#] .*) # two spaces then a comment
|
201 | $
|
202 | ''', re.VERBOSE)
|
203 |
|
204 | _COMMENT_LINE_RE = re.compile(r'#.*')
|
205 |
|
206 |
|
207 | def Lines(s, start_pos, end_pos):
|
208 | pos = start_pos
|
209 | while pos < end_pos:
|
210 | m = _LINE_RE.match(s, pos, end_pos)
|
211 | if not m:
|
212 | raise RuntimeError("Should have matched a line")
|
213 | line_end = m.end(0)
|
214 |
|
215 | yield line_end
|
216 |
|
217 | pos = line_end
|
218 |
|
219 |
|
220 | class ShPromptPlugin(_Plugin):
|
221 | """
|
222 | Highlight shell prompts.
|
223 | """
|
224 |
|
225 | def PrintHighlighted(self, out):
|
226 | pos = self.start_pos
|
227 | for line_end in Lines(self.s, self.start_pos, self.end_pos):
|
228 |
|
229 | m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
|
230 | if m:
|
231 | out.PrintUntil(m.start(0))
|
232 | out.Print('<span class="sh-comment">')
|
233 | out.PrintUntil(m.end(0))
|
234 | out.Print('</span>')
|
235 | else:
|
236 | m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
|
237 | if m:
|
238 | #log('MATCH %r', m.groups())
|
239 |
|
240 | out.PrintUntil(m.start(1))
|
241 | out.Print('<span class="sh-prompt">')
|
242 | out.PrintUntil(m.end(1))
|
243 | out.Print('</span>')
|
244 |
|
245 | out.PrintUntil(m.start(2))
|
246 | out.Print('<span class="sh-command">')
|
247 | out.PrintUntil(m.end(2))
|
248 | out.Print('</span>')
|
249 |
|
250 | if m.group(3):
|
251 | out.PrintUntil(m.start(3))
|
252 | out.Print('<span class="sh-tab-complete">')
|
253 | out.PrintUntil(m.end(3))
|
254 | out.Print('</span>')
|
255 |
|
256 | if m.group(4):
|
257 | out.PrintUntil(m.start(4))
|
258 | out.Print('<span class="sh-comment">')
|
259 | out.PrintUntil(m.end(4))
|
260 | out.Print('</span>')
|
261 | else:
|
262 | m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
|
263 | if m:
|
264 | out.PrintUntil(m.start(1))
|
265 | out.Print('<span class="sh-comment">')
|
266 | out.PrintUntil(m.end(1))
|
267 | out.Print('</span>')
|
268 |
|
269 | out.PrintUntil(line_end)
|
270 |
|
271 | pos = line_end
|
272 |
|
273 |
|
274 | class HelpTopicsPlugin(_Plugin):
|
275 | """
|
276 | Highlight blocks of help-index.md.
|
277 | """
|
278 | def __init__(self, s, start_pos, end_pos, chapter):
|
279 | _Plugin.__init__(self, s, start_pos, end_pos)
|
280 | self.chapter = chapter
|
281 |
|
282 | def PrintHighlighted(self, out):
|
283 | from doctools import help_gen
|
284 |
|
285 | debug_out = []
|
286 |
|
287 | pos = self.start_pos
|
288 | for line_end in Lines(self.s, self.start_pos, self.end_pos):
|
289 | # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
|
290 | # add tags and leave everything alone.
|
291 | line = self.s[pos : line_end]
|
292 |
|
293 | html_line = help_gen.IndexLineToHtml(self.chapter, line, debug_out)
|
294 |
|
295 | if html_line is not None:
|
296 | out.PrintUntil(pos)
|
297 | out.Print(html_line)
|
298 | out.SkipTo(line_end)
|
299 |
|
300 | pos = line_end
|
301 |
|
302 | return debug_out
|
303 |
|
304 |
|
305 | class PygmentsPlugin(_Plugin):
|
306 |
|
307 | def __init__(self, s, start_pos, end_pos, lang):
|
308 | _Plugin.__init__(self, s, start_pos, end_pos)
|
309 | self.lang = lang
|
310 |
|
311 | def PrintHighlighted(self, out):
|
312 | try:
|
313 | from pygments import lexers
|
314 | from pygments import formatters
|
315 | from pygments import highlight
|
316 | except ImportError:
|
317 | log("Warning: Couldn't import pygments, so skipping syntax highlighting")
|
318 | return
|
319 |
|
320 | # unescape before passing to pygments, which will escape
|
321 | code = html.ToText(self.s, self.start_pos, self.end_pos)
|
322 |
|
323 | lexer = lexers.get_lexer_by_name(self.lang)
|
324 | formatter = formatters.HtmlFormatter()
|
325 |
|
326 | highlighted = highlight(code, lexer, formatter)
|
327 | out.Print(highlighted)
|
328 |
|
329 |
|
330 | def SimpleHighlightCode(s):
|
331 | """
|
332 | Simple highlighting for test/shell-vs-shell.sh
|
333 | """
|
334 |
|
335 | f = cStringIO.StringIO()
|
336 | out = html.Output(s, f)
|
337 |
|
338 | tag_lexer = html.TagLexer(s)
|
339 |
|
340 | pos = 0
|
341 |
|
342 | it = html.ValidTokens(s)
|
343 |
|
344 | while True:
|
345 | try:
|
346 | tok_id, end_pos = next(it)
|
347 | except StopIteration:
|
348 | break
|
349 |
|
350 | if tok_id == html.StartTag:
|
351 |
|
352 | tag_lexer.Reset(pos, end_pos)
|
353 | if tag_lexer.TagName() == 'pre':
|
354 | pre_start_pos = pos
|
355 | pre_end_pos = end_pos
|
356 |
|
357 | slash_pre_right, slash_pre_right = \
|
358 | html.ReadUntilEndTag(it, tag_lexer, 'pre')
|
359 |
|
360 | out.PrintUntil(pre_end_pos)
|
361 |
|
362 | # Using ShPromptPlugin because it does the comment highlighting we want!
|
363 | plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
|
364 | plugin.PrintHighlighted(out)
|
365 |
|
366 | out.SkipTo(slash_pre_right)
|
367 |
|
368 | pos = end_pos
|
369 |
|
370 | out.PrintTheRest()
|
371 |
|
372 | return f.getvalue()
|
373 |
|
374 |
|
375 |
|
376 | def HighlightCode(s, default_highlighter, debug_out=None):
|
377 | """
|
378 | Algorithm:
|
379 | 1. Collect what's inside <pre><code> ...
|
380 | 2. Then read lines with ShPromptPlugin.
|
381 | 3. If the line looks like a shell prompt and command, highlight them with
|
382 | <span>
|
383 | """
|
384 | if debug_out is None:
|
385 | debug_out = []
|
386 |
|
387 | f = cStringIO.StringIO()
|
388 | out = html.Output(s, f)
|
389 |
|
390 | tag_lexer = html.TagLexer(s)
|
391 |
|
392 | pos = 0
|
393 |
|
394 | it = html.ValidTokens(s)
|
395 |
|
396 | while True:
|
397 | try:
|
398 | tok_id, end_pos = next(it)
|
399 | except StopIteration:
|
400 | break
|
401 |
|
402 | if tok_id == html.StartTag:
|
403 |
|
404 | tag_lexer.Reset(pos, end_pos)
|
405 | if tag_lexer.TagName() == 'pre':
|
406 | pre_start_pos = pos
|
407 | pos = end_pos
|
408 |
|
409 | try:
|
410 | tok_id, end_pos = next(it)
|
411 | except StopIteration:
|
412 | break
|
413 |
|
414 | tag_lexer.Reset(pos, end_pos)
|
415 | if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
|
416 |
|
417 | css_class = tag_lexer.GetAttr('class')
|
418 | code_start_pos = end_pos
|
419 |
|
420 | if css_class is None:
|
421 | slash_code_left, slash_code_right = \
|
422 | html.ReadUntilEndTag(it, tag_lexer, 'code')
|
423 |
|
424 | if default_highlighter is not None:
|
425 | # TODO: Refactor this to remove duplication with
|
426 | # language-{sh-prompt,oil-sh} below
|
427 |
|
428 | # oil-sh for compatibility
|
429 | if default_highlighter in ('sh-prompt', 'oils-sh', 'oil-sh'):
|
430 | out.PrintUntil(code_start_pos)
|
431 |
|
432 | # Using ShPromptPlugin because it does the comment highlighting we want!
|
433 | plugin = ShPromptPlugin(s, code_start_pos, slash_code_left)
|
434 | plugin.PrintHighlighted(out)
|
435 |
|
436 | out.SkipTo(slash_code_left)
|
437 | else:
|
438 | raise RuntimeError('Unknown default highlighter %r' % default_highlighter)
|
439 |
|
440 | elif css_class.startswith('language'):
|
441 | slash_code_left, slash_code_right = \
|
442 | html.ReadUntilEndTag(it, tag_lexer, 'code')
|
443 |
|
444 | if css_class == 'language-none':
|
445 | # Allow ```none
|
446 | pass
|
447 |
|
448 | elif css_class in ('language-sh-prompt', 'language-oil-sh'):
|
449 | # Here's we're KEEPING the original <pre><code>
|
450 | # Print everything up to and including <pre><code language="...">
|
451 | out.PrintUntil(code_start_pos)
|
452 |
|
453 | plugin = ShPromptPlugin(s, code_start_pos, slash_code_left)
|
454 | plugin.PrintHighlighted(out)
|
455 |
|
456 | out.SkipTo(slash_code_left)
|
457 |
|
458 | elif css_class == 'language-ysh':
|
459 | # TODO: Write an Oil syntax highlighter.
|
460 | pass
|
461 |
|
462 | elif css_class.startswith('language-chapter-links-'):
|
463 | n = len('language-chapter-links-')
|
464 | chapter = css_class[n:]
|
465 | #log('chap %s', chapter)
|
466 |
|
467 | out.PrintUntil(code_start_pos)
|
468 |
|
469 | plugin = HelpTopicsPlugin(s, code_start_pos, slash_code_left, chapter)
|
470 | block_debug_info = plugin.PrintHighlighted(out)
|
471 |
|
472 | # e.g. these are links to cmd-lang within a block in toc-ysh
|
473 | chap_block = {'to_chap': chapter, 'lines': block_debug_info}
|
474 | debug_out.append(chap_block)
|
475 |
|
476 | out.SkipTo(slash_code_left)
|
477 |
|
478 | else: # language-*: Use Pygments
|
479 |
|
480 | # We REMOVE the original <pre><code> because Pygments gives you a <pre> already
|
481 |
|
482 | # We just read closing </code>, and the next one should be </pre>.
|
483 | try:
|
484 | tok_id, end_pos = next(it)
|
485 | except StopIteration:
|
486 | break
|
487 | tag_lexer.Reset(slash_code_right, end_pos)
|
488 | assert tok_id == html.EndTag, tok_id
|
489 | assert tag_lexer.TagName() == 'pre', tag_lexer.TagName()
|
490 | slash_pre_right = end_pos
|
491 |
|
492 | out.PrintUntil(pre_start_pos)
|
493 |
|
494 | lang = css_class[len('language-'):]
|
495 | plugin = PygmentsPlugin(s, code_start_pos, slash_code_left, lang)
|
496 | plugin.PrintHighlighted(out)
|
497 |
|
498 | out.SkipTo(slash_pre_right)
|
499 | f.write('<!-- done pygments -->\n')
|
500 |
|
501 | pos = end_pos
|
502 |
|
503 | out.PrintTheRest()
|
504 |
|
505 | return f.getvalue()
|
506 |
|
507 |
|
508 | def ExtractCode(s, f):
|
509 | """Print code blocks to a plain text file.
|
510 |
|
511 | So we can at least validate the syntax.
|
512 |
|
513 | Similar to the algorithm code above:
|
514 |
|
515 | 1. Collect what's inside <pre><code> ...
|
516 | 2. Decode & -> &,e tc. and return it
|
517 | """
|
518 | out = html.Output(s, f)
|
519 | tag_lexer = html.TagLexer(s)
|
520 |
|
521 | block_num = 0
|
522 | pos = 0
|
523 | it = html.ValidTokens(s)
|
524 |
|
525 | while True:
|
526 | try:
|
527 | tok_id, end_pos = next(it)
|
528 | except StopIteration:
|
529 | break
|
530 |
|
531 | if tok_id == html.StartTag:
|
532 | tag_lexer.Reset(pos, end_pos)
|
533 | if tag_lexer.TagName() == 'pre':
|
534 | pre_start_pos = pos
|
535 | pos = end_pos
|
536 |
|
537 | try:
|
538 | tok_id, end_pos = next(it)
|
539 | except StopIteration:
|
540 | break
|
541 |
|
542 | tag_lexer.Reset(pos, end_pos)
|
543 | if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
|
544 |
|
545 | css_class = tag_lexer.GetAttr('class')
|
546 | # Skip code blocks that look like ```foo
|
547 | # Usually we use 'oil-sh' as the default_highlighter, and all those
|
548 | # code blocks should be extracted. TODO: maybe this should be
|
549 | # oil-language?
|
550 | if css_class is None:
|
551 | code_start_pos = end_pos
|
552 |
|
553 | out.SkipTo(code_start_pos)
|
554 | out.Print('# block %d' % block_num)
|
555 | out.Print('\n')
|
556 |
|
557 | slash_code_left, slash_code_right = \
|
558 | html.ReadUntilEndTag(it, tag_lexer, 'code')
|
559 |
|
560 | text = html.ToText(s, code_start_pos, slash_code_left)
|
561 | out.SkipTo(slash_code_left)
|
562 |
|
563 | out.Print(text)
|
564 | out.Print('\n')
|
565 |
|
566 | block_num += 1
|
567 |
|
568 | pos = end_pos
|
569 |
|
570 | #out.PrintTheRest()
|
571 |
|
572 |
|
573 | class ShellSession(object):
|
574 | """
|
575 | TODO: Pass this to HighlightCode as a plugin
|
576 |
|
577 | $ x=one
|
578 | $ echo $x
|
579 | $ echo two
|
580 |
|
581 | Becomes
|
582 |
|
583 | $ x=one
|
584 | $ echo $x
|
585 | one
|
586 | $ echo two
|
587 | two
|
588 |
|
589 | And then you will have
|
590 | blog/2019/12/_shell_session/
|
591 | $hash1-stdout.txt
|
592 | $hash2-stdout.txt
|
593 |
|
594 | It hashes the command with md5 and then brings it back.
|
595 | If the file already exists then it doesn't run it again.
|
596 | You can delete the file to redo it.
|
597 |
|
598 | TODO: write a loop that reads one line at a time, writes, it, then reads
|
599 | output from bash.
|
600 | Use the Lines iterator to get lines.
|
601 | For extra credit, you can solve the PS2 problem? That's easily done with
|
602 | Oil's parser.
|
603 | """
|
604 | def __init__(self, shell_exe, cache_dir):
|
605 | """
|
606 | Args:
|
607 | shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
|
608 | cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
|
609 | """
|
610 | self.shell_exe = shell_exe
|
611 | self.cache_dir = cache_dir
|
612 |
|
613 | def PrintHighlighted(self, s, start_pos, end_pos, out):
|
614 | """
|
615 | Args:
|
616 | s: an HTML string.
|
617 | """
|
618 | pass
|
619 |
|
620 |
|
621 |
|
622 | def main(argv):
|
623 | action = argv[1]
|
624 |
|
625 | if action == 'highlight':
|
626 | # for test/shell-vs-shell.sh
|
627 |
|
628 | html = sys.stdin.read()
|
629 | out = SimpleHighlightCode(html)
|
630 | print(out)
|
631 |
|
632 | else:
|
633 | raise RuntimeError('Invalid action %r' % action)
|
634 |
|
635 |
|
636 | if __name__ == '__main__':
|
637 | main(sys.argv)
|
638 |
|
639 |
|
640 | # vim: sw=2
|