OILS / spec / regex.test.sh View on Github | oilshell.org

356 lines, 157 significant
1#
2# Only bash and zsh seem to implement [[ foo =~ '' ]]
3#
4# ^(a b)$ is a regex that should match 'a b' in a group.
5#
6# Not sure what bash is doing here... I think I have to just be empirical.
7# Might need "compat" switch for parsing the regex. It should be an opaque
8# string like zsh, not sure why it isn't.
9#
10# I think this is just papering over bugs...
11# https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
12#
13# Storing the regular expression in a shell variable is often a useful way to
14# avoid problems with quoting characters that are special to the shell. It is
15# sometimes difficult to specify a regular expression literally without using
16# quotes, or to keep track of the quoting used by regular expressions while
17# paying attention to the shell’s quote removal. Using a shell variable to
18# store the pattern decreases these problems. For example, the following is
19# equivalent to the above:
20#
21# pattern='[[:space:]]*(a)?b'
22# [[ $line =~ $pattern ]]
23#
24# If you want to match a character that’s special to the regular expression
25# grammar, it has to be quoted to remove its special meaning. This means that in
26# the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
27# regular expression meaning), but in the pattern "xxx.txt"’ it can only match a
28# literal ‘.’. Shell programmers should take special care with backslashes, since
29# backslashes are used both by the shell and regular expressions to remove the
30# special meaning from the following character. The following two sets of
31# commands are not equivalent:
32#
33# From bash code: ( | ) are treated special. Normally they must be quoted, but
34# they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
35
36#### BASH_REMATCH
37[[ foo123 =~ ([a-z]+)([0-9]+) ]]
38echo status=$?
39argv.py "${BASH_REMATCH[@]}"
40
41[[ failed =~ ([a-z]+)([0-9]+) ]]
42echo status=$?
43argv.py "${BASH_REMATCH[@]}" # not cleared!
44
45## STDOUT:
46status=0
47['foo123', 'foo', '123']
48status=1
49[]
50## END
51## N-I zsh STDOUT:
52status=0
53['']
54status=1
55['']
56## END
57
58#### Match is unanchored at both ends
59[[ 'bar' =~ a ]] && echo true
60## stdout: true
61
62#### Failed match
63[[ 'bar' =~ X ]] && echo true
64## status: 1
65## stdout-json: ""
66
67#### Regex quoted with \ -- preferred in bash
68[[ 'a b' =~ ^(a\ b)$ ]] && echo true
69## stdout: true
70
71#### Regex quoted with single quotes
72# bash doesn't like the quotes
73[[ 'a b' =~ '^(a b)$' ]] && echo true
74## stdout-json: ""
75## status: 1
76## OK zsh stdout: true
77## OK zsh status: 0
78
79#### Regex quoted with double quotes
80# bash doesn't like the quotes
81[[ 'a b' =~ "^(a b)$" ]] && echo true
82## stdout-json: ""
83## status: 1
84## OK zsh stdout: true
85## OK zsh status: 0
86
87#### Fix single quotes by storing in variable
88pat='^(a b)$'
89[[ 'a b' =~ $pat ]] && echo true
90## stdout: true
91
92#### Fix single quotes by storing in variable
93pat="^(a b)$"
94[[ 'a b' =~ $pat ]] && echo true
95## stdout: true
96
97#### Double quoting pat variable -- again bash doesn't like it.
98pat="^(a b)$"
99[[ 'a b' =~ "$pat" ]] && echo true
100## stdout-json: ""
101## status: 1
102## OK zsh stdout: true
103## OK zsh status: 0
104
105#### Mixing quoted and unquoted parts
106[[ 'a b' =~ 'a 'b ]] && echo true
107[[ "a b" =~ "a "'b' ]] && echo true
108## STDOUT:
109true
110true
111## END
112
113#### Regex with == and not =~ is parse error, different lexer mode required
114# They both give a syntax error. This is lame.
115[[ '^(a b)$' == ^(a\ b)$ ]] && echo true
116## status: 2
117## OK zsh status: 1
118
119#### Omitting ( )
120[[ '^a b$' == ^a\ b$ ]] && echo true
121## stdout: true
122
123#### Malformed regex
124# Are they trying to PARSE the regex? Do they feed the buffer directly to
125# regcomp()?
126[[ 'a b' =~ ^)a\ b($ ]] && echo true
127## stdout-json: ""
128## status: 2
129## OK zsh status: 1
130
131#### Regex with char class containing space
132# For some reason it doesn't work without parens?
133[[ 'ba ba ' =~ ([a b]+) ]] && echo true
134## stdout: true
135
136#### Operators and space lose meaning inside ()
137[[ '< >' =~ (< >) ]] && echo true
138## stdout: true
139## N-I zsh stdout-json: ""
140## N-I zsh status: 1
141
142#### Regex with |
143[[ 'bar' =~ foo|bar ]] && echo true
144## stdout: true
145## N-I zsh stdout-json: ""
146## N-I zsh status: 1
147
148#### Regex to match literal brackets []
149
150# bash-completion relies on this, so we're making it match bash.
151# zsh understandably differs.
152[[ '[]' =~ \[\] ]] && echo true
153
154# Another way to write this.
155pat='\[\]'
156[[ '[]' =~ $pat ]] && echo true
157## STDOUT:
158true
159true
160## END
161## OK zsh STDOUT:
162true
163## END
164
165#### Regex to match literals . ^ $ etc.
166[[ 'x' =~ \. ]] || echo false
167[[ '.' =~ \. ]] && echo true
168
169[[ 'xx' =~ \^\$ ]] || echo false
170[[ '^$' =~ \^\$ ]] && echo true
171
172[[ 'xxx' =~ \+\*\? ]] || echo false
173[[ '*+?' =~ \*\+\? ]] && echo true
174
175[[ 'xx' =~ \{\} ]] || echo false
176[[ '{}' =~ \{\} ]] && echo true
177## STDOUT:
178false
179true
180false
181true
182false
183true
184false
185true
186## END
187## BUG zsh STDOUT:
188true
189false
190false
191false
192## END
193## BUG zsh status: 1
194
195#### Unquoted { is a regex parse error
196[[ { =~ { ]] && echo true
197echo status=$?
198## stdout-json: ""
199## status: 2
200## BUG bash stdout-json: "status=2\n"
201## BUG bash status: 0
202## BUG zsh stdout-json: "status=1\n"
203## BUG zsh status: 0
204
205#### Fatal error inside [[ =~ ]]
206
207# zsh and osh are stricter than bash. bash treats [[ like a command.
208
209[[ a =~ $(( 1 / 0 )) ]]
210echo status=$?
211## stdout-json: ""
212## status: 1
213## BUG bash stdout: status=1
214## BUG bash status: 0
215
216#### Quoted { and +
217[[ { =~ "{" ]] && echo 'yes {'
218[[ + =~ "+" ]] && echo 'yes +'
219[[ * =~ "*" ]] && echo 'yes *'
220[[ ? =~ "?" ]] && echo 'yes ?'
221[[ ^ =~ "^" ]] && echo 'yes ^'
222[[ $ =~ "$" ]] && echo 'yes $'
223[[ '(' =~ '(' ]] && echo 'yes ('
224[[ ')' =~ ')' ]] && echo 'yes )'
225[[ '|' =~ '|' ]] && echo 'yes |'
226[[ '\' =~ '\' ]] && echo 'yes \'
227echo ---
228
229[[ . =~ "." ]] && echo 'yes .'
230[[ z =~ "." ]] || echo 'no .'
231echo ---
232
233# This rule is weird but all shells agree. I would expect that the - gets
234# escaped? It's an operator? but it behaves like a-z.
235[[ a =~ ["a-z"] ]]; echo "a $?"
236[[ - =~ ["a-z"] ]]; echo "- $?"
237[[ b =~ ['a-z'] ]]; echo "b $?"
238[[ z =~ ['a-z'] ]]; echo "z $?"
239
240echo status=$?
241## STDOUT:
242yes {
243yes +
244yes *
245yes ?
246yes ^
247yes $
248yes (
249yes )
250yes |
251yes \
252---
253yes .
254no .
255---
256a 0
257- 1
258b 0
259z 0
260status=0
261## END
262## N-I zsh STDOUT:
263yes ^
264yes $
265yes )
266yes |
267---
268yes .
269---
270a 0
271- 1
272b 0
273z 0
274status=0
275## END
276
277#### Escaped {
278# from bash-completion
279[[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
280## STDOUT:
281['$PA', '$', 'PA']
282## END
283## BUG zsh stdout-json: ""
284## BUG zsh status: 1
285
286#### Escaped { stored in variable first
287# from bash-completion
288pat='^(\$\{?)([A-Za-z0-9_]*)$'
289[[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
290## STDOUT:
291['$PA', '$', 'PA']
292## END
293## BUG zsh STDOUT:
294['']
295## END
296
297#### regex with ?
298[[ 'c' =~ c? ]] && echo true
299[[ '' =~ c? ]] && echo true
300## STDOUT:
301true
302true
303## END
304
305#### regex with unprintable characters
306# can't have nul byte
307
308# This pattern has literal characters
309pat=$'^[\x01\x02]+$'
310
311[[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
312[[ $'a\x01' =~ $pat ]]; echo status=$?
313
314# NOTE: There doesn't appear to be any way to escape these!
315pat2='^[\x01\x02]+$'
316
317## STDOUT:
318status=0
319status=1
320## END
321
322#### pattern $f(x) -- regression
323f=fff
324[[ fffx =~ $f(x) ]]
325echo status=$?
326[[ ffx =~ $f(x) ]]
327echo status=$?
328## STDOUT:
329status=0
330status=1
331## END
332
333#### pattern a=(1)
334[[ a=x =~ a=(x) ]]
335echo status=$?
336[[ =x =~ a=(x) ]]
337echo status=$?
338## STDOUT:
339status=0
340status=1
341## END
342## BUG zsh status: 1
343## BUG zsh STDOUT:
344status=0
345## END
346
347#### pattern @f(x)
348shopt -s parse_at
349[[ @fx =~ @f(x) ]]
350echo status=$?
351[[ fx =~ @f(x) ]]
352echo status=$?
353## STDOUT:
354status=0
355status=1
356## END