| 1 | #!/usr/bin/env python
|
| 2 | from __future__ import print_function
|
| 3 | """
|
| 4 | lex_test.py: Tests for lex.py
|
| 5 | """
|
| 6 |
|
| 7 | import unittest
|
| 8 |
|
| 9 | from core.lexer import CompileAll, LineLexer
|
| 10 | from core import test_lib
|
| 11 |
|
| 12 | from osh import parse_lib
|
| 13 | from osh.meta import ast, Id, Kind, LookupKind, types
|
| 14 | from osh.lex import LEXER_DEF
|
| 15 |
|
| 16 | lex_mode_e = types.lex_mode_e
|
| 17 |
|
| 18 |
|
| 19 | def _InitLexer(s):
|
| 20 | arena = test_lib.MakeArena('<lex_test.py>')
|
| 21 | _, lexer = parse_lib.InitLexer(s, arena)
|
| 22 | return lexer
|
| 23 |
|
| 24 |
|
| 25 | class AsdlTest(unittest.TestCase):
|
| 26 |
|
| 27 | def testLexMode(self):
|
| 28 | print(lex_mode_e.DQ)
|
| 29 |
|
| 30 |
|
| 31 | CMD = """\
|
| 32 | ls /
|
| 33 | ls /home/
|
| 34 | """
|
| 35 |
|
| 36 | class LexerTest(unittest.TestCase):
|
| 37 |
|
| 38 | def assertTokensEqual(self, left, right):
|
| 39 | self.assertTrue(
|
| 40 | test_lib.TokensEqual(left, right),
|
| 41 | 'Expected %r, got %r' % (left, right))
|
| 42 |
|
| 43 | def testRead(self):
|
| 44 | lexer = _InitLexer(CMD)
|
| 45 |
|
| 46 | t = lexer.Read(lex_mode_e.OUTER)
|
| 47 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
|
| 48 | t = lexer.Read(lex_mode_e.OUTER)
|
| 49 |
|
| 50 | self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
|
| 51 |
|
| 52 | t = lexer.Read(lex_mode_e.OUTER)
|
| 53 | self.assertTokensEqual(ast.token(Id.Lit_Chars, '/'), t)
|
| 54 |
|
| 55 | t = lexer.Read(lex_mode_e.OUTER)
|
| 56 | self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)
|
| 57 |
|
| 58 | # Line two
|
| 59 | t = lexer.Read(lex_mode_e.OUTER)
|
| 60 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
|
| 61 |
|
| 62 | t = lexer.Read(lex_mode_e.OUTER)
|
| 63 | self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
|
| 64 |
|
| 65 | t = lexer.Read(lex_mode_e.OUTER)
|
| 66 | self.assertTokensEqual(ast.token(Id.Lit_Chars, '/home/'), t)
|
| 67 |
|
| 68 | t = lexer.Read(lex_mode_e.OUTER)
|
| 69 | self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)
|
| 70 |
|
| 71 | t = lexer.Read(lex_mode_e.OUTER)
|
| 72 | self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
|
| 73 |
|
| 74 | # Another EOF gives EOF
|
| 75 | t = lexer.Read(lex_mode_e.OUTER)
|
| 76 | self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
|
| 77 |
|
| 78 | def testRead_VS_ARG_UNQ(self):
|
| 79 | # Another EOF gives EOF
|
| 80 | lexer = _InitLexer("'hi'")
|
| 81 | t = lexer.Read(lex_mode_e.VS_ARG_UNQ)
|
| 82 | #self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
|
| 83 | #t = l.Read(lex_mode_e.VS_ARG_UNQ)
|
| 84 | print(t)
|
| 85 |
|
| 86 | def testExtGlob(self):
|
| 87 | lexer = _InitLexer('@(foo|bar)')
|
| 88 |
|
| 89 | t = lexer.Read(lex_mode_e.OUTER)
|
| 90 | self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)
|
| 91 |
|
| 92 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
| 93 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)
|
| 94 |
|
| 95 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
| 96 | self.assertTokensEqual(ast.token(Id.Op_Pipe, '|'), t)
|
| 97 |
|
| 98 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
| 99 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'bar'), t)
|
| 100 |
|
| 101 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
| 102 | self.assertTokensEqual(ast.token(Id.Op_RParen, ')'), t)
|
| 103 |
|
| 104 | # Individual cases
|
| 105 |
|
| 106 | lexer = _InitLexer('@(')
|
| 107 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
| 108 | self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)
|
| 109 |
|
| 110 | lexer = _InitLexer('*(')
|
| 111 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
| 112 | self.assertTokensEqual(ast.token(Id.ExtGlob_Star, '*('), t)
|
| 113 |
|
| 114 | lexer = _InitLexer('?(')
|
| 115 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
| 116 | self.assertTokensEqual(ast.token(Id.ExtGlob_QMark, '?('), t)
|
| 117 |
|
| 118 | lexer = _InitLexer('$')
|
| 119 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
| 120 | self.assertTokensEqual(ast.token(Id.Lit_Other, '$'), t)
|
| 121 |
|
| 122 | def testBashRegexState(self):
|
| 123 | lexer = _InitLexer('(foo|bar)')
|
| 124 |
|
| 125 | t = lexer.Read(lex_mode_e.BASH_REGEX)
|
| 126 | self.assertTokensEqual(ast.token(Id.Lit_Chars, '('), t)
|
| 127 |
|
| 128 | t = lexer.Read(lex_mode_e.BASH_REGEX)
|
| 129 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)
|
| 130 |
|
| 131 | t = lexer.Read(lex_mode_e.BASH_REGEX)
|
| 132 | self.assertTokensEqual(ast.token(Id.Lit_Chars, '|'), t)
|
| 133 |
|
| 134 | def testDBracketState(self):
|
| 135 | lexer = _InitLexer('-z foo')
|
| 136 | t = lexer.Read(lex_mode_e.DBRACKET)
|
| 137 | self.assertTokensEqual(ast.token(Id.BoolUnary_z, '-z'), t)
|
| 138 | self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
|
| 139 |
|
| 140 | def testDollarSqState(self):
|
| 141 | lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
|
| 142 |
|
| 143 | t = lexer.Read(lex_mode_e.DOLLAR_SQ)
|
| 144 | print(t)
|
| 145 | self.assertTokensEqual(ast.token(Id.Char_Literals, 'foo bar'), t)
|
| 146 |
|
| 147 | t = lexer.Read(lex_mode_e.DOLLAR_SQ)
|
| 148 | print(t)
|
| 149 | self.assertTokensEqual(ast.token(Id.Char_OneChar, r'\n'), t)
|
| 150 |
|
| 151 | def testLookAhead(self):
|
| 152 | # I think this is the usage pattern we care about. Peek and Next() past
|
| 153 | # the function; then Peek() the next token. Then Lookahead in that state.
|
| 154 | lexer = _InitLexer('func()')
|
| 155 |
|
| 156 | t = lexer.Read(lex_mode_e.OUTER)
|
| 157 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)
|
| 158 |
|
| 159 | #self.assertEqual(Id.Op_LParen, lexer.LookAhead())
|
| 160 |
|
| 161 | t = lexer.Read(lex_mode_e.OUTER)
|
| 162 | self.assertTokensEqual(ast.token(Id.Op_LParen, '('), t)
|
| 163 |
|
| 164 | self.assertTokensEqual(
|
| 165 | ast.token(Id.Op_RParen, ')'), lexer.LookAhead(lex_mode_e.OUTER))
|
| 166 |
|
| 167 | lexer = _InitLexer('func ()')
|
| 168 |
|
| 169 | t = lexer.Read(lex_mode_e.OUTER)
|
| 170 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)
|
| 171 |
|
| 172 | t = lexer.Read(lex_mode_e.OUTER)
|
| 173 | self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
|
| 174 |
|
| 175 | self.assertTokensEqual(
|
| 176 | ast.token(Id.Op_LParen, '('), lexer.LookAhead(lex_mode_e.OUTER))
|
| 177 |
|
| 178 |
|
| 179 | class LineLexerTest(unittest.TestCase):
|
| 180 |
|
| 181 | def setUp(self):
|
| 182 | self.arena = test_lib.MakeArena('<lex_test.py>')
|
| 183 |
|
| 184 | def assertTokensEqual(self, left, right):
|
| 185 | self.assertTrue(test_lib.TokensEqual(left, right))
|
| 186 |
|
| 187 | def testReadOuter(self):
|
| 188 | l = LineLexer(parse_lib._MakeMatcher(), '\n', self.arena)
|
| 189 | self.assertTokensEqual(
|
| 190 | ast.token(Id.Op_Newline, '\n'), l.Read(lex_mode_e.OUTER))
|
| 191 |
|
| 192 | def testRead_VS_ARG_UNQ(self):
|
| 193 | l = LineLexer(parse_lib._MakeMatcher(), "'hi'", self.arena)
|
| 194 | t = l.Read(lex_mode_e.VS_ARG_UNQ)
|
| 195 | self.assertEqual(Id.Left_SingleQuote, t.id)
|
| 196 |
|
| 197 | def testLookAhead(self):
|
| 198 | # Lines always end with '\n'
|
| 199 | l = LineLexer(parse_lib._MakeMatcher(), '', self.arena)
|
| 200 | self.assertTokensEqual(
|
| 201 | ast.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.OUTER))
|
| 202 |
|
| 203 | l = LineLexer(parse_lib._MakeMatcher(), 'foo', self.arena)
|
| 204 | self.assertTokensEqual(
|
| 205 | ast.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.OUTER))
|
| 206 | self.assertTokensEqual(
|
| 207 | ast.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.OUTER))
|
| 208 |
|
| 209 | l = LineLexer(parse_lib._MakeMatcher(), 'foo bar', self.arena)
|
| 210 | self.assertTokensEqual(
|
| 211 | ast.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.OUTER))
|
| 212 | self.assertTokensEqual(
|
| 213 | ast.token(Id.Lit_Chars, 'bar'), l.LookAhead(lex_mode_e.OUTER))
|
| 214 |
|
| 215 | # No lookahead; using the cursor!
|
| 216 | l = LineLexer(parse_lib._MakeMatcher(), 'func(', self.arena)
|
| 217 | self.assertTokensEqual(
|
| 218 | ast.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.OUTER))
|
| 219 | self.assertTokensEqual(
|
| 220 | ast.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.OUTER))
|
| 221 |
|
| 222 | l = LineLexer(parse_lib._MakeMatcher(), 'func (', self.arena)
|
| 223 | self.assertTokensEqual(
|
| 224 | ast.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.OUTER))
|
| 225 | self.assertTokensEqual(
|
| 226 | ast.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.OUTER))
|
| 227 |
|
| 228 |
|
| 229 | OUTER_RE = CompileAll(LEXER_DEF[lex_mode_e.OUTER])
|
| 230 | DOUBLE_QUOTED_RE = CompileAll(LEXER_DEF[lex_mode_e.DQ])
|
| 231 |
|
| 232 |
|
| 233 | class RegexTest(unittest.TestCase):
|
| 234 |
|
| 235 | def testOuter(self):
|
| 236 | o = OUTER_RE
|
| 237 | nul_pat, _ = o[3]
|
| 238 | print(nul_pat.match('\0'))
|
| 239 |
|
| 240 | def testDoubleQuoted(self):
|
| 241 | d = DOUBLE_QUOTED_RE
|
| 242 | nul_pat, _ = d[3]
|
| 243 | print(nul_pat.match('\0'))
|
| 244 |
|
| 245 |
|
| 246 | if __name__ == '__main__':
|
| 247 | unittest.main()
|