| 1 | #!/usr/bin/env python2
|
| 2 | """
|
| 3 | lexer_def_test.py: Tests for lexer_def.py
|
| 4 | """
|
| 5 | from __future__ import print_function
|
| 6 |
|
| 7 | import re
|
| 8 | import unittest
|
| 9 |
|
| 10 | from _devbuild.gen.id_kind_asdl import Id, Id_str, Kind
|
| 11 | from _devbuild.gen.types_asdl import lex_mode_e
|
| 12 | from core import test_lib
|
| 13 | from core.test_lib import FakeTok
|
| 14 | from mycpp.mylib import log
|
| 15 | from frontend import lexer
|
| 16 | from frontend import lexer_def
|
| 17 | from frontend import consts
|
| 18 | from frontend import match
|
| 19 |
|
| 20 | _ = log
|
| 21 |
|
| 22 |
|
| 23 | def _InitLexer(s):
|
| 24 | arena = test_lib.MakeArena('<lex_test.py>')
|
| 25 | _, lexer = test_lib.InitLexer(s, arena)
|
| 26 | return lexer
|
| 27 |
|
| 28 |
|
| 29 | class AsdlTest(unittest.TestCase):
|
| 30 |
|
| 31 | def testLexMode(self):
|
| 32 | print(lex_mode_e.DQ)
|
| 33 |
|
| 34 |
|
| 35 | CMD = """\
|
| 36 | ls /
|
| 37 | ls /home/
|
| 38 | """
|
| 39 |
|
| 40 |
|
| 41 | class LexerTest(unittest.TestCase):
|
| 42 |
|
| 43 | def assertTokensEqual(self, left, right):
|
| 44 | self.assertTrue(test_lib.TokensEqual(left, right),
|
| 45 | 'Expected %r, got %r' % (left, right))
|
| 46 |
|
| 47 | def testRead(self):
|
| 48 | lexer = _InitLexer(CMD)
|
| 49 |
|
| 50 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 51 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
|
| 52 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 53 |
|
| 54 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
|
| 55 |
|
| 56 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 57 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
|
| 58 |
|
| 59 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 60 | self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
|
| 61 |
|
| 62 | # Line two
|
| 63 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 64 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
|
| 65 |
|
| 66 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 67 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
|
| 68 |
|
| 69 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 70 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
|
| 71 |
|
| 72 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 73 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'home'), t)
|
| 74 |
|
| 75 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 76 | self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
|
| 77 |
|
| 78 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 79 | self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
|
| 80 |
|
| 81 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 82 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
|
| 83 |
|
| 84 | # Another EOF gives EOF
|
| 85 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 86 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
|
| 87 |
|
| 88 | def testMode_VSub_ArgUnquoted(self):
|
| 89 | # Another EOF gives EOF
|
| 90 | lx = _InitLexer("'hi'")
|
| 91 | t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
|
| 92 | print(t)
|
| 93 |
|
| 94 | self.assertTokensEqual(FakeTok(Id.Left_SingleQuote, "'"), t)
|
| 95 |
|
| 96 | lx = _InitLexer("~root")
|
| 97 | t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
|
| 98 | print(t)
|
| 99 |
|
| 100 | self.assertTokensEqual(FakeTok(Id.Lit_Tilde, '~'), t)
|
| 101 |
|
| 102 | def testMode_ExtGlob(self):
|
| 103 | lexer = _InitLexer('@(foo|bar)')
|
| 104 |
|
| 105 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 106 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
|
| 107 |
|
| 108 | t = lexer.Read(lex_mode_e.ExtGlob)
|
| 109 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
|
| 110 |
|
| 111 | t = lexer.Read(lex_mode_e.ExtGlob)
|
| 112 | self.assertTokensEqual(FakeTok(Id.Op_Pipe, '|'), t)
|
| 113 |
|
| 114 | t = lexer.Read(lex_mode_e.ExtGlob)
|
| 115 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'bar'), t)
|
| 116 |
|
| 117 | t = lexer.Read(lex_mode_e.ExtGlob)
|
| 118 | self.assertTokensEqual(FakeTok(Id.Op_RParen, ')'), t)
|
| 119 |
|
| 120 | # Individual cases
|
| 121 |
|
| 122 | lexer = _InitLexer('@(')
|
| 123 | t = lexer.Read(lex_mode_e.ExtGlob)
|
| 124 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
|
| 125 |
|
| 126 | lexer = _InitLexer('*(')
|
| 127 | t = lexer.Read(lex_mode_e.ExtGlob)
|
| 128 | self.assertTokensEqual(FakeTok(Id.ExtGlob_Star, '*('), t)
|
| 129 |
|
| 130 | lexer = _InitLexer('?(')
|
| 131 | t = lexer.Read(lex_mode_e.ExtGlob)
|
| 132 | self.assertTokensEqual(FakeTok(Id.ExtGlob_QMark, '?('), t)
|
| 133 |
|
| 134 | lexer = _InitLexer('$')
|
| 135 | t = lexer.Read(lex_mode_e.ExtGlob)
|
| 136 | self.assertTokensEqual(FakeTok(Id.Lit_Other, '$'), t)
|
| 137 |
|
| 138 | def testMode_BashRegex(self):
|
| 139 | lexer = _InitLexer('(foo|bar)')
|
| 140 |
|
| 141 | t = lexer.Read(lex_mode_e.BashRegex)
|
| 142 | self.assertTokensEqual(FakeTok(Id.BashRegex_LParen, '('), t)
|
| 143 |
|
| 144 | t = lexer.Read(lex_mode_e.BashRegex)
|
| 145 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
|
| 146 |
|
| 147 | t = lexer.Read(lex_mode_e.BashRegex)
|
| 148 | self.assertTokensEqual(FakeTok(Id.Lit_Other, '|'), t)
|
| 149 |
|
| 150 | def testMode_DBracket(self):
|
| 151 | lex = _InitLexer('-z foo')
|
| 152 | t = lex.Read(lex_mode_e.DBracket)
|
| 153 | self.assertTokensEqual(FakeTok(Id.BoolUnary_z, '-z'), t)
|
| 154 | self.assertEqual(Kind.BoolUnary, consts.GetKind(t.id))
|
| 155 |
|
| 156 | def testMode_DollarSq(self):
|
| 157 | lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
|
| 158 |
|
| 159 | t = lexer.Read(lex_mode_e.SQ_C)
|
| 160 | print(t)
|
| 161 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo bar'), t)
|
| 162 |
|
| 163 | t = lexer.Read(lex_mode_e.SQ_C)
|
| 164 | print(t)
|
| 165 | self.assertTokensEqual(FakeTok(Id.Char_OneChar, r'\n'), t)
|
| 166 |
|
| 167 | def testMode_Backtick(self):
|
| 168 | CASES = [
|
| 169 | r'echo \" \\ hi`',
|
| 170 | r'`',
|
| 171 | r'',
|
| 172 | ]
|
| 173 |
|
| 174 | for case in CASES:
|
| 175 | print()
|
| 176 | print('--- %s ---' % case)
|
| 177 | print()
|
| 178 |
|
| 179 | lexer = _InitLexer(case)
|
| 180 |
|
| 181 | while True:
|
| 182 | t = lexer.Read(lex_mode_e.Backtick)
|
| 183 | print(t)
|
| 184 | if t.id == Id.Eof_Real:
|
| 185 | break
|
| 186 |
|
| 187 | def testMode_Printf(self):
|
| 188 | CASES = [
|
| 189 | r'hello %s\n',
|
| 190 | r'%% percent %%\377',
|
| 191 | ]
|
| 192 |
|
| 193 | for case in CASES:
|
| 194 | print()
|
| 195 | print('--- %s ---' % case)
|
| 196 | print()
|
| 197 |
|
| 198 | lexer = _InitLexer(case)
|
| 199 |
|
| 200 | while True:
|
| 201 | t = lexer.Read(lex_mode_e.PrintfOuter)
|
| 202 | print(t)
|
| 203 | if t.id == Id.Eof_Real:
|
| 204 | break
|
| 205 |
|
| 206 | # Now test the Printf_Percent mode
|
| 207 | CASES = [r'-3.3f', r'03d']
|
| 208 |
|
| 209 | for case in CASES:
|
| 210 | print()
|
| 211 | print('--- %s ---' % case)
|
| 212 | print()
|
| 213 |
|
| 214 | lexer = _InitLexer(case)
|
| 215 |
|
| 216 | while True:
|
| 217 | t = lexer.Read(lex_mode_e.PrintfPercent)
|
| 218 | print(t)
|
| 219 | if t.id == Id.Eof_Real:
|
| 220 | break
|
| 221 |
|
| 222 | def testMode_Expr(self):
|
| 223 | CASES = [
|
| 224 | r'@[ ]',
|
| 225 | ]
|
| 226 |
|
| 227 | for case in CASES:
|
| 228 | print()
|
| 229 | print('--- %s ---' % case)
|
| 230 | print()
|
| 231 |
|
| 232 | lexer = _InitLexer(case)
|
| 233 |
|
| 234 | while True:
|
| 235 | t = lexer.Read(lex_mode_e.Expr)
|
| 236 | print(t)
|
| 237 | if t.id == Id.Eof_Real:
|
| 238 | break
|
| 239 |
|
| 240 | def testLookPastSpace(self):
|
| 241 | # I think this is the usage pattern we care about. Peek and Next() past
|
| 242 | # the function; then Peek() the next token. Then Lookahead in that state.
|
| 243 | lexer = _InitLexer('fun()')
|
| 244 |
|
| 245 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 246 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
|
| 247 |
|
| 248 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 249 | self.assertTokensEqual(FakeTok(Id.Op_LParen, '('), t)
|
| 250 |
|
| 251 | self.assertEqual(Id.Op_RParen,
|
| 252 | lexer.LookPastSpace(lex_mode_e.ShCommand))
|
| 253 |
|
| 254 | lexer = _InitLexer('fun ()')
|
| 255 |
|
| 256 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 257 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
|
| 258 |
|
| 259 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 260 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
|
| 261 |
|
| 262 | self.assertEqual(Id.Op_LParen,
|
| 263 | lexer.LookPastSpace(lex_mode_e.ShCommand))
|
| 264 |
|
| 265 | def testPushHint(self):
|
| 266 | # Extglob use case
|
| 267 | lexer = _InitLexer('@()')
|
| 268 | lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
|
| 269 |
|
| 270 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 271 | self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
|
| 272 |
|
| 273 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 274 | self.assertTokensEqual(FakeTok(Id.Right_ExtGlob, ')'), t)
|
| 275 |
|
| 276 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 277 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
|
| 278 |
|
| 279 | def testEmitCompDummy(self):
|
| 280 | lexer = _InitLexer('echo ')
|
| 281 | lexer.EmitCompDummy()
|
| 282 |
|
| 283 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 284 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'echo'), t)
|
| 285 |
|
| 286 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 287 | self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
|
| 288 |
|
| 289 | # Right before EOF
|
| 290 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 291 | self.assertTokensEqual(FakeTok(Id.Lit_CompDummy, ''), t)
|
| 292 |
|
| 293 | t = lexer.Read(lex_mode_e.ShCommand)
|
| 294 | self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
|
| 295 |
|
| 296 |
|
| 297 | class LineLexerTest(unittest.TestCase):
|
| 298 |
|
| 299 | def setUp(self):
|
| 300 | self.arena = test_lib.MakeArena('<lex_test.py>')
|
| 301 |
|
| 302 | def assertTokensEqual(self, left, right):
|
| 303 | #log('LEFT %s', left)
|
| 304 | #log('RIGHT %s', right)
|
| 305 | # self.assertTrue(test_lib.TokensEqual(left, right))
|
| 306 | self.assertEqual(left.id, right.id,
|
| 307 | '%s != %s' % (Id_str(left.id), Id_str(right.id)))
|
| 308 | self.assertEqual(left.tval, right.tval)
|
| 309 |
|
| 310 | def testReadOuter(self):
|
| 311 | l = test_lib.InitLineLexer('\n', self.arena)
|
| 312 | self.assertTokensEqual(lexer.DummyToken(Id.Op_Newline, None),
|
| 313 | l.Read(lex_mode_e.ShCommand))
|
| 314 |
|
| 315 | def testRead_VSub_ArgUnquoted(self):
|
| 316 | l = test_lib.InitLineLexer("'hi'", self.arena)
|
| 317 | t = l.Read(lex_mode_e.VSub_ArgUnquoted)
|
| 318 | self.assertEqual(Id.Left_SingleQuote, t.id)
|
| 319 |
|
| 320 | def testLookPastSpace(self):
|
| 321 | # Lines always end with '\n'
|
| 322 | l = test_lib.InitLineLexer('', self.arena)
|
| 323 | self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
|
| 324 |
|
| 325 | l = test_lib.InitLineLexer('foo', self.arena)
|
| 326 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
|
| 327 | l.Read(lex_mode_e.ShCommand))
|
| 328 | self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
|
| 329 |
|
| 330 | l = test_lib.InitLineLexer('foo bar', self.arena)
|
| 331 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
|
| 332 | l.Read(lex_mode_e.ShCommand))
|
| 333 | self.assertEqual(Id.Lit_Chars, l.LookPastSpace(lex_mode_e.ShCommand))
|
| 334 |
|
| 335 | # No lookahead; using the cursor!
|
| 336 | l = test_lib.InitLineLexer('fun(', self.arena)
|
| 337 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
|
| 338 | l.Read(lex_mode_e.ShCommand))
|
| 339 | self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
|
| 340 |
|
| 341 | l = test_lib.InitLineLexer('fun (', self.arena)
|
| 342 | self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
|
| 343 | l.Read(lex_mode_e.ShCommand))
|
| 344 | self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
|
| 345 |
|
| 346 |
|
| 347 | class RegexTest(unittest.TestCase):
|
| 348 |
|
| 349 | def testNul(self):
|
| 350 | nul_pat = re.compile(r'[\0]')
|
| 351 | self.assertEqual(False, bool(nul_pat.match('x')))
|
| 352 | self.assertEqual(True, bool(nul_pat.match('\0')))
|
| 353 |
|
| 354 | _, p, _ = lexer_def.ECHO_E_DEF[-1]
|
| 355 | print('P %r' % p)
|
| 356 | last_echo_e_pat = re.compile(p)
|
| 357 | self.assertEqual(True, bool(last_echo_e_pat.match('x')))
|
| 358 | self.assertEqual(False, bool(last_echo_e_pat.match('\0')))
|
| 359 |
|
| 360 |
|
| 361 | class OtherLexerTest(unittest.TestCase):
|
| 362 |
|
| 363 | def testEchoLexer(self):
|
| 364 | CASES = [
|
| 365 | r'newline \n NUL \0 octal \0377 hex \x00',
|
| 366 | r'unicode \u0065 \U00000065',
|
| 367 | r'\d \e \f \g',
|
| 368 | ]
|
| 369 | for s in CASES:
|
| 370 | lex = match.EchoLexer(s)
|
| 371 | print(lex.Tokens())
|
| 372 |
|
| 373 | def testPS1Lexer(self):
|
| 374 | print(list(match.Ps1Tokens(r'foo')))
|
| 375 | print(list(match.Ps1Tokens(r'\h \w \$')))
|
| 376 |
|
| 377 | def testHistoryLexer(self):
|
| 378 | print(list(match.HistoryTokens(r'echo hi')))
|
| 379 |
|
| 380 | print(list(match.HistoryTokens(r'echo !! !* !^ !$')))
|
| 381 |
|
| 382 | # No history operator with \ escape
|
| 383 | tokens = list(match.HistoryTokens(r'echo \!!'))
|
| 384 | print(tokens)
|
| 385 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
| 386 |
|
| 387 | print(list(match.HistoryTokens(r'echo !3...')))
|
| 388 | print(list(match.HistoryTokens(r'echo !-5...')))
|
| 389 | print(list(match.HistoryTokens(r'echo !x/foo.py bar')))
|
| 390 |
|
| 391 | print('---')
|
| 392 |
|
| 393 | # No history operator in single quotes
|
| 394 | tokens = list(match.HistoryTokens(r"echo '!!' $'!!' "))
|
| 395 | print(tokens)
|
| 396 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
| 397 |
|
| 398 | # No history operator in incomplete single quotes
|
| 399 | tokens = list(match.HistoryTokens(r"echo '!! "))
|
| 400 | print(tokens)
|
| 401 | self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
| 402 |
|
| 403 | # Quoted single quote, and then a History operator
|
| 404 | tokens = list(match.HistoryTokens(r"echo \' !! "))
|
| 405 | print(tokens)
|
| 406 | # YES operator
|
| 407 | self.assert_(Id.History_Op in [tok_type for tok_type, _ in tokens])
|
| 408 |
|
| 409 | def testHistoryDoesNotConflict(self):
|
| 410 | # https://github.com/oilshell/oil/issues/264
|
| 411 | #
|
| 412 | # Bash has a bunch of hacks to suppress the conflict between ! for history
|
| 413 | # and:
|
| 414 | #
|
| 415 | # 1. [!abc] globbing
|
| 416 | # 2. ${!foo} indirect expansion
|
| 417 | # 3. $!x -- the PID
|
| 418 | # 4. !(foo|bar) -- extended glob
|
| 419 | #
|
| 420 | # I guess [[ a != b ]] doesn't match the pattern in bash.
|
| 421 |
|
| 422 | three_other = [Id.History_Other, Id.History_Other, Id.History_Other]
|
| 423 | two_other = [Id.History_Other, Id.History_Other]
|
| 424 | CASES = [
|
| 425 | (r'[!abc]', three_other),
|
| 426 | (r'${!indirect}', three_other),
|
| 427 | (r'$!x', three_other), # didn't need a special case
|
| 428 | (r'!(foo|bar)', two_other), # didn't need a special case
|
| 429 | ]
|
| 430 |
|
| 431 | for s, expected_types in CASES:
|
| 432 | tokens = list(match.HistoryTokens(s))
|
| 433 | print(tokens)
|
| 434 | actual_types = [id_ for id_, val in tokens]
|
| 435 |
|
| 436 | self.assert_(Id.History_Search not in actual_types, tokens)
|
| 437 |
|
| 438 | self.assertEqual(expected_types, actual_types)
|
| 439 |
|
| 440 | def testBraceRangeLexer(self):
|
| 441 | CASES = [
|
| 442 | 'a..z',
|
| 443 | '100..300',
|
| 444 | '-300..-100..1',
|
| 445 | '1.3', # invalid
|
| 446 | 'aa',
|
| 447 | ]
|
| 448 | for s in CASES:
|
| 449 | lex = match.BraceRangeLexer(s)
|
| 450 | print(lex.Tokens())
|
| 451 |
|
| 452 |
|
| 453 | if __name__ == '__main__':
|
| 454 | unittest.main()
|