OILS / spec / regex.test.sh View on Github | oilshell.org

631 lines, 285 significant
1## oils_failures_allowed: 0
2## compare_shells: bash zsh
3
4#
5# Only bash and zsh seem to implement [[ foo =~ '' ]]
6#
7# ^(a b)$ is a regex that should match 'a b' in a group.
8#
9# Not sure what bash is doing here... I think I have to just be empirical.
10# Might need "compat" switch for parsing the regex. It should be an opaque
11# string like zsh, not sure why it isn't.
12#
13# I think this is just papering over bugs...
14# https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
15#
16# Storing the regular expression in a shell variable is often a useful way to
17# avoid problems with quoting characters that are special to the shell. It is
18# sometimes difficult to specify a regular expression literally without using
19# quotes, or to keep track of the quoting used by regular expressions while
20# paying attention to the shell’s quote removal. Using a shell variable to
21# store the pattern decreases these problems. For example, the following is
22# equivalent to the above:
23#
24# pattern='[[:space:]]*(a)?b'
25# [[ $line =~ $pattern ]]
26#
27# If you want to match a character that’s special to the regular expression
28# grammar, it has to be quoted to remove its special meaning. This means that in
29# the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
30# regular expression meaning), but in the pattern "xxx.txt"’ it can only match a
31# literal ‘.’. Shell programmers should take special care with backslashes, since
32# backslashes are used both by the shell and regular expressions to remove the
33# special meaning from the following character. The following two sets of
34# commands are not equivalent:
35#
36# From bash code: ( | ) are treated special. Normally they must be quoted, but
37# they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
38
39#### BASH_REMATCH
40[[ foo123 =~ ([a-z]+)([0-9]+) ]]
41echo status=$?
42argv.py "${BASH_REMATCH[@]}"
43
44[[ failed =~ ([a-z]+)([0-9]+) ]]
45echo status=$?
46argv.py "${BASH_REMATCH[@]}" # not cleared!
47
48## STDOUT:
49status=0
50['foo123', 'foo', '123']
51status=1
52[]
53## END
54## N-I zsh STDOUT:
55status=0
56['']
57status=1
58['']
59## END
60
61#### Match is unanchored at both ends
62[[ 'bar' =~ a ]] && echo true
63## stdout: true
64
65#### Failed match
66[[ 'bar' =~ X ]] && echo true
67## status: 1
68## stdout-json: ""
69
70#### Regex quoted with \ -- preferred in bash
71[[ 'a b' =~ ^(a\ b)$ ]] && echo true
72## stdout: true
73
74#### Regex quoted with single quotes
75# bash doesn't like the quotes
76[[ 'a b' =~ '^(a b)$' ]] && echo true
77## stdout-json: ""
78## status: 1
79## OK zsh stdout: true
80## OK zsh status: 0
81
82#### Regex quoted with double quotes
83# bash doesn't like the quotes
84[[ 'a b' =~ "^(a b)$" ]] && echo true
85## stdout-json: ""
86## status: 1
87## OK zsh stdout: true
88## OK zsh status: 0
89
90#### Fix single quotes by storing in variable
91pat='^(a b)$'
92[[ 'a b' =~ $pat ]] && echo true
93## stdout: true
94
95#### Fix single quotes by storing in variable
96pat="^(a b)$"
97[[ 'a b' =~ $pat ]] && echo true
98## stdout: true
99
100#### Double quoting pat variable -- again bash doesn't like it.
101pat="^(a b)$"
102[[ 'a b' =~ "$pat" ]] && echo true
103## stdout-json: ""
104## status: 1
105## OK zsh stdout: true
106## OK zsh status: 0
107
108#### Mixing quoted and unquoted parts
109[[ 'a b' =~ 'a 'b ]] && echo true
110[[ "a b" =~ "a "'b' ]] && echo true
111## STDOUT:
112true
113true
114## END
115
116#### Regex with == and not =~ is parse error, different lexer mode required
117# They both give a syntax error. This is lame.
118[[ '^(a b)$' == ^(a\ b)$ ]] && echo true
119## status: 2
120## OK zsh status: 1
121
122#### Omitting ( )
123[[ '^a b$' == ^a\ b$ ]] && echo true
124## stdout: true
125
126#### Malformed regex
127# Are they trying to PARSE the regex? Do they feed the buffer directly to
128# regcomp()?
129[[ 'a b' =~ ^)a\ b($ ]] && echo true
130## stdout-json: ""
131## status: 2
132## OK zsh status: 1
133
134#### Regex with |
135[[ 'bar' =~ foo|bar ]] && echo true
136## stdout: true
137## N-I zsh stdout-json: ""
138## N-I zsh status: 1
139
140#### Regex to match literal brackets []
141
142# bash-completion relies on this, so we're making it match bash.
143# zsh understandably differs.
144[[ '[]' =~ \[\] ]] && echo true
145
146# Another way to write this.
147pat='\[\]'
148[[ '[]' =~ $pat ]] && echo true
149## STDOUT:
150true
151true
152## END
153## OK zsh STDOUT:
154true
155## END
156
157#### Regex to match literals . ^ $ etc.
158[[ 'x' =~ \. ]] || echo false
159[[ '.' =~ \. ]] && echo true
160
161[[ 'xx' =~ \^\$ ]] || echo false
162[[ '^$' =~ \^\$ ]] && echo true
163
164[[ 'xxx' =~ \+\*\? ]] || echo false
165[[ '*+?' =~ \*\+\? ]] && echo true
166
167[[ 'xx' =~ \{\} ]] || echo false
168[[ '{}' =~ \{\} ]] && echo true
169## STDOUT:
170false
171true
172false
173true
174false
175true
176false
177true
178## END
179## BUG zsh STDOUT:
180true
181false
182false
183false
184## END
185## BUG zsh status: 1
186
187#### Unquoted { is a regex parse error
188[[ { =~ { ]] && echo true
189echo status=$?
190## stdout-json: ""
191## status: 2
192## BUG bash stdout-json: "status=2\n"
193## BUG bash status: 0
194## BUG zsh stdout-json: "status=1\n"
195## BUG zsh status: 0
196
197#### Fatal error inside [[ =~ ]]
198
199# zsh and osh are stricter than bash. bash treats [[ like a command.
200
201[[ a =~ $(( 1 / 0 )) ]]
202echo status=$?
203## stdout-json: ""
204## status: 1
205## BUG bash stdout: status=1
206## BUG bash status: 0
207
208#### Quoted { and +
209[[ { =~ "{" ]] && echo 'yes {'
210[[ + =~ "+" ]] && echo 'yes +'
211[[ * =~ "*" ]] && echo 'yes *'
212[[ ? =~ "?" ]] && echo 'yes ?'
213[[ ^ =~ "^" ]] && echo 'yes ^'
214[[ $ =~ "$" ]] && echo 'yes $'
215[[ '(' =~ '(' ]] && echo 'yes ('
216[[ ')' =~ ')' ]] && echo 'yes )'
217[[ '|' =~ '|' ]] && echo 'yes |'
218[[ '\' =~ '\' ]] && echo 'yes \'
219echo ---
220
221[[ . =~ "." ]] && echo 'yes .'
222[[ z =~ "." ]] || echo 'no .'
223echo ---
224
225# This rule is weird but all shells agree. I would expect that the - gets
226# escaped? It's an operator? but it behaves like a-z.
227[[ a =~ ["a-z"] ]]; echo "a $?"
228[[ - =~ ["a-z"] ]]; echo "- $?"
229[[ b =~ ['a-z'] ]]; echo "b $?"
230[[ z =~ ['a-z'] ]]; echo "z $?"
231
232echo status=$?
233## STDOUT:
234yes {
235yes +
236yes *
237yes ?
238yes ^
239yes $
240yes (
241yes )
242yes |
243yes \
244---
245yes .
246no .
247---
248a 0
249- 1
250b 0
251z 0
252status=0
253## END
254## N-I zsh STDOUT:
255yes ^
256yes $
257yes )
258yes |
259---
260yes .
261---
262a 0
263- 1
264b 0
265z 0
266status=0
267## END
268
269#### Escaped {
270# from bash-completion
271[[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
272## STDOUT:
273['$PA', '$', 'PA']
274## END
275## BUG zsh stdout-json: ""
276## BUG zsh status: 1
277
278#### Escaped { stored in variable first
279# from bash-completion
280pat='^(\$\{?)([A-Za-z0-9_]*)$'
281[[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
282## STDOUT:
283['$PA', '$', 'PA']
284## END
285## BUG zsh STDOUT:
286['']
287## END
288
289#### regex with ?
290[[ 'c' =~ c? ]] && echo true
291[[ '' =~ c? ]] && echo true
292## STDOUT:
293true
294true
295## END
296
297#### regex with unprintable characters
298# can't have nul byte
299
300# This pattern has literal characters
301pat=$'^[\x01\x02]+$'
302
303[[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
304[[ $'a\x01' =~ $pat ]]; echo status=$?
305
306# NOTE: There doesn't appear to be any way to escape these!
307pat2='^[\x01\x02]+$'
308
309## STDOUT:
310status=0
311status=1
312## END
313
314#### pattern $f(x) -- regression
315f=fff
316[[ fffx =~ $f(x) ]]
317echo status=$?
318[[ ffx =~ $f(x) ]]
319echo status=$?
320## STDOUT:
321status=0
322status=1
323## END
324
325#### pattern a=(1)
326[[ a=x =~ a=(x) ]]
327echo status=$?
328[[ =x =~ a=(x) ]]
329echo status=$?
330## STDOUT:
331status=0
332status=1
333## END
334## BUG zsh status: 1
335## BUG zsh STDOUT:
336status=0
337## END
338
339#### pattern @f(x)
340shopt -s parse_at
341[[ @fx =~ @f(x) ]]
342echo status=$?
343[[ fx =~ @f(x) ]]
344echo status=$?
345## STDOUT:
346status=0
347status=1
348## END
349
350
351#### Bug: Nix idiom with closing ) next to pattern
352
353if [[ ! (" ${params[*]} " =~ " -shared " || " ${params[*]} " =~ " -static ") ]]; then
354 echo one
355fi
356
357# Reduced idiom
358if [[ (foo =~ foo) ]]; then
359 echo two
360fi
361
362## STDOUT:
363one
364two
365## END
366
367#### unquoted (a b) as pattern, (a b|c)
368
369if [[ 'a b' =~ (a b) ]]; then
370 echo one
371fi
372
373if [[ 'a b' =~ (a b) ]]; then
374 echo BAD
375fi
376
377if [[ 'a b' =~ (a b|c) ]]; then
378 echo two
379fi
380
381# I think spaces are only allowed within ()
382
383if [[ ' c' =~ (a| c) ]]; then
384 echo three
385fi
386
387## STDOUT:
388one
389two
390three
391## END
392
393#### Multiple adjacent () groups
394
395if [[ 'a-b-c-d' =~ a-(b| >>)-c-( ;|[de])|ff|gg ]]; then
396 argv.py "${BASH_REMATCH[@]}"
397fi
398
399if [[ ff =~ a-(b| >>)-c-( ;|[de])|ff|gg ]]; then
400 argv.py "${BASH_REMATCH[@]}"
401fi
402
403# empty group ()
404
405if [[ zz =~ ([a-z]+)() ]]; then
406 argv.py "${BASH_REMATCH[@]}"
407fi
408
409# nested empty group
410if [[ zz =~ ([a-z]+)(()z) ]]; then
411 argv.py "${BASH_REMATCH[@]}"
412fi
413
414## STDOUT:
415['a-b-c-d', 'b', 'd']
416['ff', '', '']
417['zz', 'zz', '']
418['zz', 'z', 'z', '']
419## END
420
421## BUG zsh status: 1
422## BUG zsh STDOUT:
423['']
424['']
425['']
426['']
427## END
428
429#### unquoted [a b] as pattern, [a b|c]
430
431$SH <<'EOF'
432[[ a =~ [ab] ]] && echo yes
433EOF
434echo "[ab]=$?"
435
436$SH <<'EOF'
437[[ a =~ [a b] ]] && echo yes
438EOF
439echo "[a b]=$?"
440
441$SH <<'EOF'
442[[ a =~ ([a b]) ]] && echo yes
443EOF
444echo "[a b]=$?"
445
446## STDOUT:
447yes
448[ab]=0
449[a b]=2
450yes
451[a b]=0
452## END
453
454## OK zsh STDOUT:
455yes
456[ab]=0
457[a b]=1
458yes
459[a b]=0
460## END
461
462#### c|a unquoted
463
464if [[ a =~ c|a ]]; then
465 echo one
466fi
467
468## STDOUT:
469one
470## END
471## N-I zsh status: 1
472
473#### Operator chars ; & but not |
474
475# Hm semicolon is still an operator in bash
476$SH <<'EOF'
477[[ ';' =~ ; ]] && echo semi
478EOF
479echo semi=$?
480
481$SH <<'EOF'
482[[ ';' =~ (;) ]] && echo semi paren
483EOF
484echo semi paren=$?
485
486echo
487
488$SH <<'EOF'
489[[ '&' =~ & ]] && echo amp
490EOF
491echo amp=$?
492
493# Oh I guess this is not a bug? regcomp doesn't reject this trivial regex?
494$SH <<'EOF'
495[[ '|' =~ | ]] && echo pipe1
496[[ 'a' =~ | ]] && echo pipe2
497EOF
498echo pipe=$?
499
500$SH <<'EOF'
501[[ '|' =~ a| ]] && echo four
502EOF
503echo pipe=$?
504
505# This is probably special because > operator is inside foo [[ a > b ]]
506$SH <<'EOF'
507[[ '<>' =~ <> ]] && echo angle
508EOF
509echo angle=$?
510
511# Bug: OSH allowed this!
512$SH <<'EOF'
513[[ $'a\nb' =~ a
514b ]] && echo newline
515EOF
516echo newline=$?
517
518## STDOUT:
519semi=2
520semi paren
521semi paren=0
522
523amp=2
524pipe1
525pipe2
526pipe=0
527four
528pipe=0
529angle=2
530newline=2
531## END
532
533## BUG zsh STDOUT:
534semi=1
535semi paren=1
536
537amp=1
538pipe=1
539pipe=1
540angle=1
541newline=1
542## END
543
544
545
546#### Quotes '' "" $'' $"" in pattern
547
548$SH <<'EOF'
549[[ '|' =~ '|' ]] && echo sq
550EOF
551echo sq=$?
552
553$SH <<'EOF'
554[[ '|' =~ "|" ]] && echo dq
555EOF
556echo dq=$?
557
558$SH <<'EOF'
559[[ '|' =~ $'|' ]] && echo dollar-sq
560EOF
561echo dollar-sq=$?
562
563$SH <<'EOF'
564[[ '|' =~ $"|" ]] && echo dollar-dq
565EOF
566echo dollar-dq=$?
567
568## STDOUT:
569sq
570sq=0
571dq
572dq=0
573dollar-sq
574dollar-sq=0
575dollar-dq
576dollar-dq=0
577## END
578
579
580#### Unicode in pattern
581
582$SH <<'EOF'
583[[ μ =~ μ ]] && echo mu
584EOF
585echo mu=$?
586
587## STDOUT:
588mu
589mu=0
590## END
591
592#### Parse error with 2 words
593
594if [[ a =~ c a ]]; then
595 echo one
596fi
597
598## status: 2
599## STDOUT:
600## END
601
602## BUG zsh status: 1
603## BUG zsh STDOUT:
604one
605## END
606
607#### make a lisp example
608
609str='(hi)'
610[[ "${str}" =~ ^^([][{}\(\)^@])|^(~@)|(\"(\\.|[^\\\"])*\")|^(;[^$'\n']*)|^([~\'\`])|^([^][ ~\`\'\";{}\(\)^@\,]+)|^[,]|^[[:space:]]+ ]]
611echo status=$?
612
613m=${BASH_REMATCH[0]}
614echo m=$m
615
616## STDOUT:
617status=0
618m=(
619## END
620
621## BUG zsh STDOUT:
622status=1
623m=
624## END
625
626#### Operators and space lose meaning inside ()
627[[ '< >' =~ (< >) ]] && echo true
628## stdout: true
629## N-I zsh stdout-json: ""
630## N-I zsh status: 1
631