frontend/lexer_def

OILS / frontend / lexer_def_test.py View on Github | oilshell.org

454 lines, 300 significant

1	#!/usr/bin/env python2
2	"""
3	lexer_def_test.py: Tests for lexer_def.py
4	"""
5	from __future__ import print_function
6
7	import re
8	import unittest
9
10	from _devbuild.gen.id_kind_asdl import Id, Id_str, Kind
11	from _devbuild.gen.types_asdl import lex_mode_e
12	from core import test_lib
13	from core.test_lib import FakeTok
14	from mycpp.mylib import log
15	from frontend import lexer
16	from frontend import lexer_def
17	from frontend import consts
18	from frontend import match
19
20	_ = log
21
22
23	def _InitLexer(s):
24	arena = test_lib.MakeArena('<lex_test.py>')
25	_, lexer = test_lib.InitLexer(s, arena)
26	return lexer
27
28
29	class AsdlTest(unittest.TestCase):
30
31	def testLexMode(self):
32	print(lex_mode_e.DQ)
33
34
35	CMD = """\
36	ls /
37	ls /home/
38	"""
39
40
41	class LexerTest(unittest.TestCase):
42
43	def assertTokensEqual(self, left, right):
44	self.assertTrue(test_lib.TokensEqual(left, right),
45	'Expected %r, got %r' % (left, right))
46
47	def testRead(self):
48	lexer = _InitLexer(CMD)
49
50	t = lexer.Read(lex_mode_e.ShCommand)
51	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
52	t = lexer.Read(lex_mode_e.ShCommand)
53
54	self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
55
56	t = lexer.Read(lex_mode_e.ShCommand)
57	self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
58
59	t = lexer.Read(lex_mode_e.ShCommand)
60	self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
61
62	# Line two
63	t = lexer.Read(lex_mode_e.ShCommand)
64	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'ls'), t)
65
66	t = lexer.Read(lex_mode_e.ShCommand)
67	self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
68
69	t = lexer.Read(lex_mode_e.ShCommand)
70	self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
71
72	t = lexer.Read(lex_mode_e.ShCommand)
73	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'home'), t)
74
75	t = lexer.Read(lex_mode_e.ShCommand)
76	self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
77
78	t = lexer.Read(lex_mode_e.ShCommand)
79	self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
80
81	t = lexer.Read(lex_mode_e.ShCommand)
82	self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
83
84	# Another EOF gives EOF
85	t = lexer.Read(lex_mode_e.ShCommand)
86	self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
87
88	def testMode_VSub_ArgUnquoted(self):
89	# Another EOF gives EOF
90	lx = _InitLexer("'hi'")
91	t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
92	print(t)
93
94	self.assertTokensEqual(FakeTok(Id.Left_SingleQuote, "'"), t)
95
96	lx = _InitLexer("~root")
97	t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
98	print(t)
99
100	self.assertTokensEqual(FakeTok(Id.Lit_Tilde, '~'), t)
101
102	def testMode_ExtGlob(self):
103	lexer = _InitLexer('@(foo\|bar)')
104
105	t = lexer.Read(lex_mode_e.ShCommand)
106	self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
107
108	t = lexer.Read(lex_mode_e.ExtGlob)
109	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
110
111	t = lexer.Read(lex_mode_e.ExtGlob)
112	self.assertTokensEqual(FakeTok(Id.Op_Pipe, '\|'), t)
113
114	t = lexer.Read(lex_mode_e.ExtGlob)
115	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'bar'), t)
116
117	t = lexer.Read(lex_mode_e.ExtGlob)
118	self.assertTokensEqual(FakeTok(Id.Op_RParen, ')'), t)
119
120	# Individual cases
121
122	lexer = _InitLexer('@(')
123	t = lexer.Read(lex_mode_e.ExtGlob)
124	self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
125
126	lexer = _InitLexer('*(')
127	t = lexer.Read(lex_mode_e.ExtGlob)
128	self.assertTokensEqual(FakeTok(Id.ExtGlob_Star, '*('), t)
129
130	lexer = _InitLexer('?(')
131	t = lexer.Read(lex_mode_e.ExtGlob)
132	self.assertTokensEqual(FakeTok(Id.ExtGlob_QMark, '?('), t)
133
134	lexer = _InitLexer('$')
135	t = lexer.Read(lex_mode_e.ExtGlob)
136	self.assertTokensEqual(FakeTok(Id.Lit_Other, '$'), t)
137
138	def testMode_BashRegex(self):
139	lexer = _InitLexer('(foo\|bar)')
140
141	t = lexer.Read(lex_mode_e.BashRegex)
142	self.assertTokensEqual(FakeTok(Id.BashRegex_LParen, '('), t)
143
144	t = lexer.Read(lex_mode_e.BashRegex)
145	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'), t)
146
147	t = lexer.Read(lex_mode_e.BashRegex)
148	self.assertTokensEqual(FakeTok(Id.Lit_Other, '\|'), t)
149
150	def testMode_DBracket(self):
151	lex = _InitLexer('-z foo')
152	t = lex.Read(lex_mode_e.DBracket)
153	self.assertTokensEqual(FakeTok(Id.BoolUnary_z, '-z'), t)
154	self.assertEqual(Kind.BoolUnary, consts.GetKind(t.id))
155
156	def testMode_DollarSq(self):
157	lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
158
159	t = lexer.Read(lex_mode_e.SQ_C)
160	print(t)
161	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo bar'), t)
162
163	t = lexer.Read(lex_mode_e.SQ_C)
164	print(t)
165	self.assertTokensEqual(FakeTok(Id.Char_OneChar, r'\n'), t)
166
167	def testMode_Backtick(self):
168	CASES = [
169	r'echo \" \\ hi`',
170	r'`',
171	r'',
172	]
173
174	for case in CASES:
175	print()
176	print('--- %s ---' % case)
177	print()
178
179	lexer = _InitLexer(case)
180
181	while True:
182	t = lexer.Read(lex_mode_e.Backtick)
183	print(t)
184	if t.id == Id.Eof_Real:
185	break
186
187	def testMode_Printf(self):
188	CASES = [
189	r'hello %s\n',
190	r'%% percent %%\377',
191	]
192
193	for case in CASES:
194	print()
195	print('--- %s ---' % case)
196	print()
197
198	lexer = _InitLexer(case)
199
200	while True:
201	t = lexer.Read(lex_mode_e.PrintfOuter)
202	print(t)
203	if t.id == Id.Eof_Real:
204	break
205
206	# Now test the Printf_Percent mode
207	CASES = [r'-3.3f', r'03d']
208
209	for case in CASES:
210	print()
211	print('--- %s ---' % case)
212	print()
213
214	lexer = _InitLexer(case)
215
216	while True:
217	t = lexer.Read(lex_mode_e.PrintfPercent)
218	print(t)
219	if t.id == Id.Eof_Real:
220	break
221
222	def testMode_Expr(self):
223	CASES = [
224	r'@[ ]',
225	]
226
227	for case in CASES:
228	print()
229	print('--- %s ---' % case)
230	print()
231
232	lexer = _InitLexer(case)
233
234	while True:
235	t = lexer.Read(lex_mode_e.Expr)
236	print(t)
237	if t.id == Id.Eof_Real:
238	break
239
240	def testLookPastSpace(self):
241	# I think this is the usage pattern we care about. Peek and Next() past
242	# the function; then Peek() the next token. Then Lookahead in that state.
243	lexer = _InitLexer('fun()')
244
245	t = lexer.Read(lex_mode_e.ShCommand)
246	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
247
248	t = lexer.Read(lex_mode_e.ShCommand)
249	self.assertTokensEqual(FakeTok(Id.Op_LParen, '('), t)
250
251	self.assertEqual(Id.Op_RParen,
252	lexer.LookPastSpace(lex_mode_e.ShCommand))
253
254	lexer = _InitLexer('fun ()')
255
256	t = lexer.Read(lex_mode_e.ShCommand)
257	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'), t)
258
259	t = lexer.Read(lex_mode_e.ShCommand)
260	self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
261
262	self.assertEqual(Id.Op_LParen,
263	lexer.LookPastSpace(lex_mode_e.ShCommand))
264
265	def testPushHint(self):
266	# Extglob use case
267	lexer = _InitLexer('@()')
268	lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
269
270	t = lexer.Read(lex_mode_e.ShCommand)
271	self.assertTokensEqual(FakeTok(Id.ExtGlob_At, '@('), t)
272
273	t = lexer.Read(lex_mode_e.ShCommand)
274	self.assertTokensEqual(FakeTok(Id.Right_ExtGlob, ')'), t)
275
276	t = lexer.Read(lex_mode_e.ShCommand)
277	self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
278
279	def testEmitCompDummy(self):
280	lexer = _InitLexer('echo ')
281	lexer.EmitCompDummy()
282
283	t = lexer.Read(lex_mode_e.ShCommand)
284	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'echo'), t)
285
286	t = lexer.Read(lex_mode_e.ShCommand)
287	self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
288
289	# Right before EOF
290	t = lexer.Read(lex_mode_e.ShCommand)
291	self.assertTokensEqual(FakeTok(Id.Lit_CompDummy, ''), t)
292
293	t = lexer.Read(lex_mode_e.ShCommand)
294	self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
295
296
297	class LineLexerTest(unittest.TestCase):
298
299	def setUp(self):
300	self.arena = test_lib.MakeArena('<lex_test.py>')
301
302	def assertTokensEqual(self, left, right):
303	#log('LEFT %s', left)
304	#log('RIGHT %s', right)
305	# self.assertTrue(test_lib.TokensEqual(left, right))
306	self.assertEqual(left.id, right.id,
307	'%s != %s' % (Id_str(left.id), Id_str(right.id)))
308	self.assertEqual(left.tval, right.tval)
309
310	def testReadOuter(self):
311	l = test_lib.InitLineLexer('\n', self.arena)
312	self.assertTokensEqual(lexer.DummyToken(Id.Op_Newline, None),
313	l.Read(lex_mode_e.ShCommand))
314
315	def testRead_VSub_ArgUnquoted(self):
316	l = test_lib.InitLineLexer("'hi'", self.arena)
317	t = l.Read(lex_mode_e.VSub_ArgUnquoted)
318	self.assertEqual(Id.Left_SingleQuote, t.id)
319
320	def testLookPastSpace(self):
321	# Lines always end with '\n'
322	l = test_lib.InitLineLexer('', self.arena)
323	self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
324
325	l = test_lib.InitLineLexer('foo', self.arena)
326	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
327	l.Read(lex_mode_e.ShCommand))
328	self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
329
330	l = test_lib.InitLineLexer('foo bar', self.arena)
331	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'foo'),
332	l.Read(lex_mode_e.ShCommand))
333	self.assertEqual(Id.Lit_Chars, l.LookPastSpace(lex_mode_e.ShCommand))
334
335	# No lookahead; using the cursor!
336	l = test_lib.InitLineLexer('fun(', self.arena)
337	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
338	l.Read(lex_mode_e.ShCommand))
339	self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
340
341	l = test_lib.InitLineLexer('fun (', self.arena)
342	self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'fun'),
343	l.Read(lex_mode_e.ShCommand))
344	self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
345
346
347	class RegexTest(unittest.TestCase):
348
349	def testNul(self):
350	nul_pat = re.compile(r'[\0]')
351	self.assertEqual(False, bool(nul_pat.match('x')))
352	self.assertEqual(True, bool(nul_pat.match('\0')))
353
354	_, p, _ = lexer_def.ECHO_E_DEF[-1]
355	print('P %r' % p)
356	last_echo_e_pat = re.compile(p)
357	self.assertEqual(True, bool(last_echo_e_pat.match('x')))
358	self.assertEqual(False, bool(last_echo_e_pat.match('\0')))
359
360
361	class OtherLexerTest(unittest.TestCase):
362
363	def testEchoLexer(self):
364	CASES = [
365	r'newline \n NUL \0 octal \0377 hex \x00',
366	r'unicode \u0065 \U00000065',
367	r'\d \e \f \g',
368	]
369	for s in CASES:
370	lex = match.EchoLexer(s)
371	print(lex.Tokens())
372
373	def testPS1Lexer(self):
374	print(list(match.Ps1Tokens(r'foo')))
375	print(list(match.Ps1Tokens(r'\h \w \$')))
376
377	def testHistoryLexer(self):
378	print(list(match.HistoryTokens(r'echo hi')))
379
380	print(list(match.HistoryTokens(r'echo !! !* !^ !$')))
381
382	# No history operator with \ escape
383	tokens = list(match.HistoryTokens(r'echo \!!'))
384	print(tokens)
385	self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
386
387	print(list(match.HistoryTokens(r'echo !3...')))
388	print(list(match.HistoryTokens(r'echo !-5...')))
389	print(list(match.HistoryTokens(r'echo !x/foo.py bar')))
390
391	print('---')
392
393	# No history operator in single quotes
394	tokens = list(match.HistoryTokens(r"echo '!!' $'!!' "))
395	print(tokens)
396	self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
397
398	# No history operator in incomplete single quotes
399	tokens = list(match.HistoryTokens(r"echo '!! "))
400	print(tokens)
401	self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
402
403	# Quoted single quote, and then a History operator
404	tokens = list(match.HistoryTokens(r"echo \' !! "))
405	print(tokens)
406	# YES operator
407	self.assert_(Id.History_Op in [tok_type for tok_type, _ in tokens])
408
409	def testHistoryDoesNotConflict(self):
410	# https://github.com/oilshell/oil/issues/264
411	#
412	# Bash has a bunch of hacks to suppress the conflict between ! for history
413	# and:
414	#
415	# 1. [!abc] globbing
416	# 2. ${!foo} indirect expansion
417	# 3. $!x -- the PID
418	# 4. !(foo\|bar) -- extended glob
419	#
420	# I guess [[ a != b ]] doesn't match the pattern in bash.
421
422	three_other = [Id.History_Other, Id.History_Other, Id.History_Other]
423	two_other = [Id.History_Other, Id.History_Other]
424	CASES = [
425	(r'[!abc]', three_other),
426	(r'${!indirect}', three_other),
427	(r'$!x', three_other), # didn't need a special case
428	(r'!(foo\|bar)', two_other), # didn't need a special case
429	]
430
431	for s, expected_types in CASES:
432	tokens = list(match.HistoryTokens(s))
433	print(tokens)
434	actual_types = [id_ for id_, val in tokens]
435
436	self.assert_(Id.History_Search not in actual_types, tokens)
437
438	self.assertEqual(expected_types, actual_types)
439
440	def testBraceRangeLexer(self):
441	CASES = [
442	'a..z',
443	'100..300',
444	'-300..-100..1',
445	'1.3', # invalid
446	'aa',
447	]
448	for s in CASES:
449	lex = match.BraceRangeLexer(s)
450	print(lex.Tokens())
451
452
453	if __name__ == '__main__':
454	unittest.main()