OILS / frontend / lexer_test.py View on Github | oilshell.org

172 lines, 110 significant
1#!/usr/bin/env python2
2"""
3lexer_test.py: Tests for lexer.py
4"""
5
6import unittest
7
8from _devbuild.gen.id_kind_asdl import Id, Id_str
9from _devbuild.gen.types_asdl import lex_mode_e
10from core import test_lib
11from mycpp.mylib import log
12from frontend.lexer import DummyToken as Tok
13from frontend.lexer_def import LEXER_DEF
14from frontend import reader
15
16
17def _PrintfOuterTokens(fmt):
18 log('PrintfOuter lexing %r', fmt)
19
20 parse_ctx = test_lib.InitParseContext()
21 arena = test_lib.MakeArena('<lexer_test.py>')
22 line_reader = reader.StringLineReader(fmt, arena)
23 lexer = parse_ctx.MakeLexer(line_reader)
24
25 while True:
26 t = lexer.Read(lex_mode_e.PrintfOuter)
27 print(t)
28 if t.id in (Id.Eof_Real, Id.Eol_Tok):
29 break
30
31 log('')
32
33
34def _PrintToken(t):
35 #print(t)
36 print('%20s %r' % (Id_str(t.id), t.tval))
37
38
39def _PrintAllTokens(lx, lex_mode):
40 while True:
41 t = lx.Read(lex_mode)
42 _PrintToken(t)
43 if t.id in (Id.Eof_Real, Id.Eol_Tok):
44 break
45
46
47class TokenTest(unittest.TestCase):
48
49 def testToken(self):
50 t = Tok(Id.Lit_Chars, 'abc')
51 print(t)
52
53 # This redundancy is OK I guess.
54 t = Tok(Id.Lit_LBrace, '{')
55 print(t)
56
57 t = Tok(Id.Op_Semi, ';')
58 print(t)
59
60 def testPrintStats(self):
61 states = sorted(LEXER_DEF.items(),
62 key=lambda pair: len(pair[1]),
63 reverse=True)
64 total = 0
65 for state, re_list in states:
66 n = len(re_list)
67 print(n, state)
68 total += n
69
70 print("Number of lex states: %d" % len(LEXER_DEF))
71 print("Number of token dispatches: %d" % total)
72
73 def testMoveToNextLine(self):
74 """Test that it doesn't mess up invariants."""
75 arena = test_lib.MakeArena('<lexer_test.py>')
76 code_str = '''cd {
77}'''
78
79 print('=== Printing all tokens')
80 if 1:
81 _, lx = test_lib.InitLexer(code_str, arena)
82 _PrintAllTokens(lx, lex_mode_e.ShCommand)
83
84 print()
85 print('=== MoveToNextLine() and LookAheadOne()')
86 _, lx = test_lib.InitLexer(code_str, arena)
87
88 t = lx.Read(lex_mode_e.ShCommand)
89 _PrintToken(t)
90 self.assertEqual(Id.Lit_Chars, t.id)
91
92 t = lx.Read(lex_mode_e.ShCommand)
93 _PrintToken(t)
94 self.assertEqual(Id.WS_Space, t.id)
95
96 t = lx.Read(lex_mode_e.ShCommand)
97 _PrintToken(t)
98 self.assertEqual(Id.Lit_LBrace, t.id)
99
100 try:
101 lx.MoveToNextLine()
102 except AssertionError:
103 pass
104 else:
105 self.fail('Should have asserted')
106
107 t = lx.Read(lex_mode_e.ShCommand)
108 _PrintToken(t)
109 self.assertEqual(Id.Op_Newline, t.id)
110
111 look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
112 self.assertEqual(Id.Unknown_Tok, look_ahead_id)
113
114 # Method being tested
115 lx.MoveToNextLine()
116
117 # Lookahead
118 print('Lookahead')
119 look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
120 self.assertEqual(Id.Lit_RBrace, look_ahead_id)
121
122 # Lookahead again
123 print('Lookahead 2')
124 look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
125 self.assertEqual(Id.Lit_RBrace, look_ahead_id)
126
127 t = lx.Read(lex_mode_e.ShCommand)
128 _PrintToken(t)
129 self.assertEqual(Id.Lit_RBrace, t.id)
130
131 t = lx.Read(lex_mode_e.ShCommand)
132 _PrintToken(t)
133 self.assertEqual(Id.Eof_Real, t.id)
134
135 def testMaybeUnreadOne(self):
136 arena = test_lib.MakeArena('<lexer_test.py>')
137 _, lx = test_lib.InitLexer('()', arena)
138
139 t = lx.Read(lex_mode_e.ShCommand)
140 print(t)
141 self.assertEqual(Id.Op_LParen, t.id)
142
143 t = lx.Read(lex_mode_e.ShCommand)
144 print(t)
145 self.assertEqual(Id.Op_RParen, t.id)
146
147 # Go back
148 lx.MaybeUnreadOne()
149
150 # Push Hint
151 lx.PushHint(Id.Op_RParen, Id.Right_CasePat)
152
153 # Now we see it again another a Id
154 t = lx.Read(lex_mode_e.ShCommand)
155 print(t)
156 self.assertEqual(Id.Right_CasePat, t.id)
157
158 def testPrintf(self):
159 # Demonstrate input handling quirk
160
161 # Get Id.Eof_Real because len('') == 0
162 _PrintfOuterTokens('')
163
164 # Get Id.Eol_Tok because len('\0') == 1
165 _PrintfOuterTokens('\0')
166
167 # Get x, then Id.Eof_Real because there are no more lines
168 _PrintfOuterTokens('x\0')
169
170
171if __name__ == '__main__':
172 unittest.main()