| 1 | #!/usr/bin/env python2
|
| 2 | """
|
| 3 | lexer_test.py: Tests for lexer.py
|
| 4 | """
|
| 5 |
|
| 6 | import unittest
|
| 7 |
|
| 8 | from _devbuild.gen.id_kind_asdl import Id, Id_str
|
| 9 | from _devbuild.gen.types_asdl import lex_mode_e
|
| 10 | from core import test_lib
|
| 11 | from mycpp.mylib import log
|
| 12 | from frontend.lexer import DummyToken as Tok
|
| 13 | from frontend.lexer_def import LEXER_DEF
|
| 14 | from frontend import reader
|
| 15 |
|
| 16 |
|
| 17 | def _PrintfOuterTokens(fmt):
|
| 18 | log('PrintfOuter lexing %r', fmt)
|
| 19 |
|
| 20 | parse_ctx = test_lib.InitParseContext()
|
| 21 | arena = test_lib.MakeArena('<lexer_test.py>')
|
| 22 | line_reader = reader.StringLineReader(fmt, arena)
|
| 23 | lexer = parse_ctx.MakeLexer(line_reader)
|
| 24 |
|
| 25 | while True:
|
| 26 | t = lexer.Read(lex_mode_e.PrintfOuter)
|
| 27 | print(t)
|
| 28 | if t.id in (Id.Eof_Real, Id.Eol_Tok):
|
| 29 | break
|
| 30 |
|
| 31 | log('')
|
| 32 |
|
| 33 |
|
| 34 | def _PrintToken(t):
|
| 35 | #print(t)
|
| 36 | print('%20s %r' % (Id_str(t.id), t.tval))
|
| 37 |
|
| 38 |
|
| 39 | def _PrintAllTokens(lx, lex_mode):
|
| 40 | while True:
|
| 41 | t = lx.Read(lex_mode)
|
| 42 | _PrintToken(t)
|
| 43 | if t.id in (Id.Eof_Real, Id.Eol_Tok):
|
| 44 | break
|
| 45 |
|
| 46 |
|
| 47 | class TokenTest(unittest.TestCase):
|
| 48 |
|
| 49 | def testToken(self):
|
| 50 | t = Tok(Id.Lit_Chars, 'abc')
|
| 51 | print(t)
|
| 52 |
|
| 53 | # This redundancy is OK I guess.
|
| 54 | t = Tok(Id.Lit_LBrace, '{')
|
| 55 | print(t)
|
| 56 |
|
| 57 | t = Tok(Id.Op_Semi, ';')
|
| 58 | print(t)
|
| 59 |
|
| 60 | def testPrintStats(self):
|
| 61 | states = sorted(LEXER_DEF.items(),
|
| 62 | key=lambda pair: len(pair[1]),
|
| 63 | reverse=True)
|
| 64 | total = 0
|
| 65 | for state, re_list in states:
|
| 66 | n = len(re_list)
|
| 67 | print(n, state)
|
| 68 | total += n
|
| 69 |
|
| 70 | print("Number of lex states: %d" % len(LEXER_DEF))
|
| 71 | print("Number of token dispatches: %d" % total)
|
| 72 |
|
| 73 | def testMoveToNextLine(self):
|
| 74 | """Test that it doesn't mess up invariants."""
|
| 75 | arena = test_lib.MakeArena('<lexer_test.py>')
|
| 76 | code_str = '''cd {
|
| 77 | }'''
|
| 78 |
|
| 79 | print('=== Printing all tokens')
|
| 80 | if 1:
|
| 81 | _, lx = test_lib.InitLexer(code_str, arena)
|
| 82 | _PrintAllTokens(lx, lex_mode_e.ShCommand)
|
| 83 |
|
| 84 | print()
|
| 85 | print('=== MoveToNextLine() and LookAheadOne()')
|
| 86 | _, lx = test_lib.InitLexer(code_str, arena)
|
| 87 |
|
| 88 | t = lx.Read(lex_mode_e.ShCommand)
|
| 89 | _PrintToken(t)
|
| 90 | self.assertEqual(Id.Lit_Chars, t.id)
|
| 91 |
|
| 92 | t = lx.Read(lex_mode_e.ShCommand)
|
| 93 | _PrintToken(t)
|
| 94 | self.assertEqual(Id.WS_Space, t.id)
|
| 95 |
|
| 96 | t = lx.Read(lex_mode_e.ShCommand)
|
| 97 | _PrintToken(t)
|
| 98 | self.assertEqual(Id.Lit_LBrace, t.id)
|
| 99 |
|
| 100 | try:
|
| 101 | lx.MoveToNextLine()
|
| 102 | except AssertionError:
|
| 103 | pass
|
| 104 | else:
|
| 105 | self.fail('Should have asserted')
|
| 106 |
|
| 107 | t = lx.Read(lex_mode_e.ShCommand)
|
| 108 | _PrintToken(t)
|
| 109 | self.assertEqual(Id.Op_Newline, t.id)
|
| 110 |
|
| 111 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
| 112 | self.assertEqual(Id.Unknown_Tok, look_ahead_id)
|
| 113 |
|
| 114 | # Method being tested
|
| 115 | lx.MoveToNextLine()
|
| 116 |
|
| 117 | # Lookahead
|
| 118 | print('Lookahead')
|
| 119 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
| 120 | self.assertEqual(Id.Lit_RBrace, look_ahead_id)
|
| 121 |
|
| 122 | # Lookahead again
|
| 123 | print('Lookahead 2')
|
| 124 | look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
|
| 125 | self.assertEqual(Id.Lit_RBrace, look_ahead_id)
|
| 126 |
|
| 127 | t = lx.Read(lex_mode_e.ShCommand)
|
| 128 | _PrintToken(t)
|
| 129 | self.assertEqual(Id.Lit_RBrace, t.id)
|
| 130 |
|
| 131 | t = lx.Read(lex_mode_e.ShCommand)
|
| 132 | _PrintToken(t)
|
| 133 | self.assertEqual(Id.Eof_Real, t.id)
|
| 134 |
|
| 135 | def testMaybeUnreadOne(self):
|
| 136 | arena = test_lib.MakeArena('<lexer_test.py>')
|
| 137 | _, lx = test_lib.InitLexer('()', arena)
|
| 138 |
|
| 139 | t = lx.Read(lex_mode_e.ShCommand)
|
| 140 | print(t)
|
| 141 | self.assertEqual(Id.Op_LParen, t.id)
|
| 142 |
|
| 143 | t = lx.Read(lex_mode_e.ShCommand)
|
| 144 | print(t)
|
| 145 | self.assertEqual(Id.Op_RParen, t.id)
|
| 146 |
|
| 147 | # Go back
|
| 148 | lx.MaybeUnreadOne()
|
| 149 |
|
| 150 | # Push Hint
|
| 151 | lx.PushHint(Id.Op_RParen, Id.Right_CasePat)
|
| 152 |
|
| 153 | # Now we see it again another a Id
|
| 154 | t = lx.Read(lex_mode_e.ShCommand)
|
| 155 | print(t)
|
| 156 | self.assertEqual(Id.Right_CasePat, t.id)
|
| 157 |
|
| 158 | def testPrintf(self):
|
| 159 | # Demonstrate input handling quirk
|
| 160 |
|
| 161 | # Get Id.Eof_Real because len('') == 0
|
| 162 | _PrintfOuterTokens('')
|
| 163 |
|
| 164 | # Get Id.Eol_Tok because len('\0') == 1
|
| 165 | _PrintfOuterTokens('\0')
|
| 166 |
|
| 167 | # Get x, then Id.Eof_Real because there are no more lines
|
| 168 | _PrintfOuterTokens('x\0')
|
| 169 |
|
| 170 |
|
| 171 | if __name__ == '__main__':
|
| 172 | unittest.main()
|