| 1 | #!/usr/bin/env python2
|
| 2 | """html_lib.py.
|
| 3 |
|
| 4 | Shared between HTML processors.
|
| 5 |
|
| 6 | TODO: Write a "pull parser" API!
|
| 7 | """
|
| 8 | from __future__ import print_function
|
| 9 |
|
| 10 | import cgi
|
| 11 | import re
|
| 12 |
|
| 13 |
|
| 14 | def AttrsToString(attrs):
|
| 15 | if not attrs:
|
| 16 | return ''
|
| 17 |
|
| 18 | # Important: there's a leading space here.
|
| 19 | # TODO: Change href="$help:command" to href="help.html#command"
|
| 20 | return ''.join(' %s="%s"' % (k, cgi.escape(v)) for (k, v) in attrs)
|
| 21 |
|
| 22 |
|
| 23 | def PrettyHref(s, preserve_anchor_case=False):
|
| 24 | """Turn arbitrary heading text into href with no special characters.
|
| 25 |
|
| 26 | This is modeled after what github does. It makes everything lower case.
|
| 27 | """
|
| 28 | # Split by whitespace or hyphen
|
| 29 | words = re.split(r'[\s\-]+', s)
|
| 30 |
|
| 31 | if preserve_anchor_case:
|
| 32 | # doc/ref: Keep only alphanumeric and /, for List/append, cmd/append
|
| 33 | # Note that "preserve_anchor_case" could be renamed
|
| 34 | keep_re = r'[\w/]+'
|
| 35 | else:
|
| 36 | # Keep only alphanumeric
|
| 37 | keep_re = r'\w+'
|
| 38 |
|
| 39 | keep = [''.join(re.findall(keep_re, w)) for w in words]
|
| 40 |
|
| 41 | # Join with - and lowercase. And then remove empty words, unlike Github.
|
| 42 | # This is SIMILAR to what Github does, but there's no need to be 100%
|
| 43 | # compatible.
|
| 44 |
|
| 45 | pretty = '-'.join(p for p in keep if p)
|
| 46 | if not preserve_anchor_case:
|
| 47 | pretty = pretty.lower()
|
| 48 | return pretty
|