1 | #!/usr/bin/env python2
|
2 | """
|
3 | html_test.py: Tests for html.py
|
4 | """
|
5 | from __future__ import print_function
|
6 |
|
7 | import sys
|
8 | import unittest
|
9 |
|
10 | from lazylex import html # module under test log = html.log
|
11 |
|
12 | log = html.log
|
13 |
|
14 | with open('lazylex/testdata.html') as f:
|
15 | TEST_HTML = f.read()
|
16 |
|
17 |
|
18 | def _MakeTagLexer(s):
|
19 | lex = html.TagLexer(s)
|
20 | lex.Reset(0, len(s))
|
21 | return lex
|
22 |
|
23 |
|
24 | def _PrintTokens(lex):
|
25 | log('')
|
26 | log('tag = %r', lex.TagName())
|
27 | for tok, start, end in lex.Tokens():
|
28 | log('%s %r', tok, lex.s[start:end])
|
29 |
|
30 |
|
31 | class HtmlTest(unittest.TestCase):
|
32 |
|
33 | def testTagLexer(self):
|
34 | # Invalid!
|
35 | #lex = _MakeTagLexer('< >')
|
36 | #print(lex.Tag())
|
37 |
|
38 | lex = _MakeTagLexer('<a>')
|
39 | _PrintTokens(lex)
|
40 |
|
41 | lex = _MakeTagLexer('<a novalue>')
|
42 | _PrintTokens(lex)
|
43 |
|
44 | # Note: we could have a different HasAttr() method
|
45 | # <a novalue> means lex.Get('novalue') == None
|
46 | # https://developer.mozilla.org/en-US/docs/Web/API/Element/hasAttribute
|
47 | self.assertEqual(None, lex.GetAttr('novalue'))
|
48 |
|
49 | lex = _MakeTagLexer('<a href="double quoted">')
|
50 | _PrintTokens(lex)
|
51 |
|
52 | self.assertEqual('double quoted', lex.GetAttr('href'))
|
53 | self.assertEqual(None, lex.GetAttr('oops'))
|
54 |
|
55 | lex = _MakeTagLexer('<a href=foo class="bar">')
|
56 | _PrintTokens(lex)
|
57 |
|
58 | lex = _MakeTagLexer('<a href=foo class="bar" />')
|
59 | _PrintTokens(lex)
|
60 |
|
61 | # IndexLinker in devtools/make_help.py
|
62 | # <pre> sections in doc/html_help.py
|
63 | # TocExtractor in devtools/cmark.py
|
64 |
|
65 | def testPstrip(self):
|
66 | """
|
67 | Remove anything like this
|
68 |
|
69 | <p><pstrip> </pstrip></p>
|
70 | """
|
71 | pass
|
72 |
|
73 | def testSplit(self):
|
74 | """
|
75 | doc/help.md and help-index.md have to be split up
|
76 | """
|
77 | pass
|
78 |
|
79 | def testCommentParse(self):
|
80 | """
|
81 | """
|
82 | n = len(TEST_HTML)
|
83 | for tok_id, end_pos in html._Tokens(TEST_HTML, 0, n):
|
84 | if tok_id == html.Invalid:
|
85 | raise RuntimeError(event)
|
86 | print(tok_id)
|
87 |
|
88 |
|
89 | if __name__ == '__main__':
|
90 | unittest.main()
|