| 1 | #!/usr/bin/env python2
|
| 2 | """
|
| 3 | lexer_test.py: Tests for lexer.py
|
| 4 | """
|
| 5 |
|
| 6 | import unittest
|
| 7 |
|
| 8 | from _devbuild.gen.id_kind_asdl import Id, Id_str
|
| 9 | from _devbuild.gen.types_asdl import lex_mode_e
|
| 10 | from core import test_lib
|
| 11 | from mycpp.mylib import log
|
| 12 | from frontend.lexer import DummyToken as Tok
|
| 13 | from frontend.lexer_def import LEXER_DEF
|
| 14 | from frontend import lexer
|
| 15 | from frontend import reader
|
| 16 |
|
| 17 |
|
| 18 | def _PrintfOuterTokens(fmt):
|
| 19 | log('PrintfOuter lexing %r', fmt)
|
| 20 |
|
| 21 | parse_ctx = test_lib.InitParseContext()
|
| 22 | arena = test_lib.MakeArena('<lexer_test.py>')
|
| 23 | line_reader = reader.StringLineReader(fmt, arena)
|
| 24 | lx = parse_ctx.MakeLexer(line_reader)
|
| 25 |
|
| 26 | while True:
|
| 27 | t = lx.Read(lex_mode_e.PrintfOuter)
|
| 28 | print(t)
|
| 29 | if t.id in (Id.Eof_Real, Id.Eol_Tok):
|
| 30 | break
|
| 31 |
|
| 32 | log('')
|
| 33 |
|
| 34 |
|
| 35 | def _PrintToken(t):
|
| 36 | #print(t)
|
| 37 | print('%20s %r' % (Id_str(t.id), t.tval))
|
| 38 |
|
| 39 |
|
| 40 | def _PrintAllTokens(lx, lex_mode):
|
| 41 | while True:
|
| 42 | t = lx.Read(lex_mode)
|
| 43 | _PrintToken(t)
|
| 44 | if t.id in (Id.Eof_Real, Id.Eol_Tok):
|
| 45 | break
|
| 46 |
|
| 47 |
|
| 48 | class TokenTest(unittest.TestCase):
|
| 49 |
|
| 50 | def testToken(self):
|
| 51 | t = Tok(Id.Lit_Chars, 'abc')
|
| 52 | print(t)
|
| 53 |
|
| 54 | # This redundancy is OK I guess.
|
| 55 | t = Tok(Id.Lit_LBrace, '{')
|
| 56 | print(t)
|
| 57 |
|
| 58 | t = Tok(Id.Op_Semi, ';')
|
| 59 | print(t)
|
| 60 |
|
| 61 | def testPrintStats(self):
|
| 62 | states = sorted(LEXER_DEF.items(),
|
| 63 | key=lambda pair: len(pair[1]),
|
| 64 | reverse=True)
|
| 65 | total = 0
|
| 66 | for state, re_list in states:
|
| 67 | n = len(re_list)
|
| 68 | print(n, state)
|
| 69 | total += n
|
| 70 |
|
| 71 | print("Number of lex states: %d" % len(LEXER_DEF))
|
| 72 | print("Number of token dispatches: %d" % total)
|
| 73 |
|
| 74 | def testMoveToNextLine(self):
|
| 75 | """Test that it doesn't mess up invariants."""
|
| 76 | arena = test_lib.MakeArena('<lexer_test.py>')
|
| 77 | code_str = '''cd {
|
| 78 | }'''
|
| 79 |
|
| 80 | print('=== Printing all tokens')
|
| 81 | if 1:
|
| 82 | _, lx = test_lib.InitLexer(code_str, arena)
|
| 83 | _PrintAllTokens(lx, lex_mode_e.ShCommand)
|
| 84 |
|
| 85 | print()
|
| 86 | print('=== MoveToNextLine() and LookAheadOne()')
|
| 87 | _, lx = test_lib.InitLexer(code_str, arena)
|
| 88 |
|
| 89 | t = lx.Read(lex_mode_e.ShCommand)
|
| 90 | _PrintToken(t)
|
| 91 | self.assertEqual(Id.Lit_Chars, t.id)
|
| 92 |
|
| 93 | t = lx.Read(lex_mode_e.ShCommand)
|
| 94 | _PrintToken(t)
|
| 95 | self.assertEqual(Id.WS_Space, t.id)
|
| 96 |
|
| 97 | t = lx.Read(lex_mode_e.ShCommand)
|
| 98 | _PrintToken(t)
|
| 99 | self.assertEqual(Id.Lit_LBrace, t.id)
|
| 100 |
|
| 101 | try:
|
| 102 | lx.MoveToNextLine()
|
| 103 | except AssertionError:
|
| 104 | pass
|
| 105 | else:
|
| 106 | self.fail('Should have asserted')
|
| 107 |
|
| 108 | t = lx.Read(lex_mode_e.ShCommand)
|
| 109 | _PrintToken(t)
|
| 110 | self.assertEqual(Id.Op_Newline, t.id)
|
| 111 |
|
| 112 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
| 113 | self.assertEqual(Id.Unknown_Tok, look_ahead_id)
|
| 114 |
|
| 115 | # Method being tested
|
| 116 | lx.MoveToNextLine()
|
| 117 |
|
| 118 | # Lookahead
|
| 119 | print('Lookahead')
|
| 120 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
| 121 | self.assertEqual(Id.Lit_RBrace, look_ahead_id)
|
| 122 |
|
| 123 | # Lookahead again
|
| 124 | print('Lookahead 2')
|
| 125 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
| 126 | self.assertEqual(Id.Lit_RBrace, look_ahead_id)
|
| 127 |
|
| 128 | t = lx.Read(lex_mode_e.ShCommand)
|
| 129 | _PrintToken(t)
|
| 130 | self.assertEqual(Id.Lit_RBrace, t.id)
|
| 131 |
|
| 132 | t = lx.Read(lex_mode_e.ShCommand)
|
| 133 | _PrintToken(t)
|
| 134 | self.assertEqual(Id.Eof_Real, t.id)
|
| 135 |
|
| 136 | def testMaybeUnreadOne(self):
|
| 137 | arena = test_lib.MakeArena('<lexer_test.py>')
|
| 138 | _, lx = test_lib.InitLexer('()', arena)
|
| 139 |
|
| 140 | t = lx.Read(lex_mode_e.ShCommand)
|
| 141 | print(t)
|
| 142 | self.assertEqual(Id.Op_LParen, t.id)
|
| 143 |
|
| 144 | t = lx.Read(lex_mode_e.ShCommand)
|
| 145 | print(t)
|
| 146 | self.assertEqual(Id.Op_RParen, t.id)
|
| 147 |
|
| 148 | # Go back
|
| 149 | lx.MaybeUnreadOne()
|
| 150 |
|
| 151 | # Push Hint
|
| 152 | lx.PushHint(Id.Op_RParen, Id.Right_CasePat)
|
| 153 |
|
| 154 | # Now we see it again another a Id
|
| 155 | t = lx.Read(lex_mode_e.ShCommand)
|
| 156 | print(t)
|
| 157 | self.assertEqual(Id.Right_CasePat, t.id)
|
| 158 |
|
| 159 | def testPrintf(self):
|
| 160 | # Demonstrate input handling quirk
|
| 161 |
|
| 162 | # Get Id.Eof_Real because len('') == 0
|
| 163 | _PrintfOuterTokens('')
|
| 164 |
|
| 165 | # Get Id.Eol_Tok because len('\0') == 1
|
| 166 | _PrintfOuterTokens('\0')
|
| 167 |
|
| 168 | # Get x, then Id.Eof_Real because there are no more lines
|
| 169 | _PrintfOuterTokens('x\0')
|
| 170 |
|
| 171 |
|
| 172 | class TokenFunctionsTest(unittest.TestCase):
|
| 173 |
|
| 174 | def testContainsEquals(self):
|
| 175 | arena = test_lib.MakeArena('<lexer_test.py>')
|
| 176 | _, lx = test_lib.InitLexer('echo "hi $name"', arena)
|
| 177 |
|
| 178 | tok = lx.Read(lex_mode_e.ShCommand)
|
| 179 | print(tok)
|
| 180 |
|
| 181 | self.assertEqual(True, lexer.TokenContains(tok, 'echo'))
|
| 182 | self.assertEqual(True, lexer.TokenContains(tok, 'ech'))
|
| 183 | self.assertEqual(True, lexer.TokenContains(tok, 'cho'))
|
| 184 | self.assertEqual(True, lexer.TokenContains(tok, 'c'))
|
| 185 | self.assertEqual(True, lexer.TokenContains(tok, ''))
|
| 186 |
|
| 187 | self.assertEqual(True, lexer.TokenEquals(tok, 'echo'))
|
| 188 | self.assertEqual(False, lexer.TokenEquals(tok, 'ech'))
|
| 189 |
|
| 190 | self.assertEqual(True, lexer.TokenStartsWith(tok, ''))
|
| 191 | self.assertEqual(True, lexer.TokenStartsWith(tok, 'e'))
|
| 192 | self.assertEqual(True, lexer.TokenStartsWith(tok, 'ech'))
|
| 193 | self.assertEqual(False, lexer.TokenStartsWith(tok, 'cho'))
|
| 194 |
|
| 195 | self.assertEqual(True, lexer.TokenEndsWith(tok, ''))
|
| 196 | self.assertEqual(False, lexer.TokenEndsWith(tok, 'ech'))
|
| 197 | self.assertEqual(True, lexer.TokenEndsWith(tok, 'cho'))
|
| 198 | self.assertEqual(True, lexer.TokenEndsWith(tok, 'o'))
|
| 199 |
|
| 200 | def testIsPlusEquals(self):
|
| 201 | arena = test_lib.MakeArena('<lexer_test.py>')
|
| 202 | _, lx = test_lib.InitLexer('foo+=b"', arena)
|
| 203 |
|
| 204 | tok = lx.Read(lex_mode_e.ShCommand)
|
| 205 | print(tok)
|
| 206 | self.assertEqual(True, lexer.IsPlusEquals(tok))
|
| 207 |
|
| 208 | _, lx = test_lib.InitLexer('foo=b"', arena)
|
| 209 |
|
| 210 | tok = lx.Read(lex_mode_e.ShCommand)
|
| 211 | print(tok)
|
| 212 | self.assertEqual(False, lexer.IsPlusEquals(tok))
|
| 213 |
|
| 214 |
|
| 215 | if __name__ == '__main__':
|
| 216 | unittest.main()
|