1 ## oils_failures_allowed: 0
2 ## compare_shells: bash zsh
3
4 #
5 # Only bash and zsh seem to implement [[ foo =~ '' ]]
6 #
7 # ^(a b)$ is a regex that should match 'a b' in a group.
8 #
9 # Not sure what bash is doing here... I think I have to just be empirical.
10 # Might need "compat" switch for parsing the regex. It should be an opaque
11 # string like zsh, not sure why it isn't.
12 #
13 # I think this is just papering over bugs...
14 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
15 #
16 # Storing the regular expression in a shell variable is often a useful way to
17 # avoid problems with quoting characters that are special to the shell. It is
18 # sometimes difficult to specify a regular expression literally without using
19 # quotes, or to keep track of the quoting used by regular expressions while
20 # paying attention to the shell’s quote removal. Using a shell variable to
21 # store the pattern decreases these problems. For example, the following is
22 # equivalent to the above:
23 #
24 # pattern='[[:space:]]*(a)?b'
25 # [[ $line =~ $pattern ]]
26 #
27 # If you want to match a character that’s special to the regular expression
28 # grammar, it has to be quoted to remove its special meaning. This means that in
29 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
30 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
31 # literal ‘.’. Shell programmers should take special care with backslashes, since
32 # backslashes are used both by the shell and regular expressions to remove the
33 # special meaning from the following character. The following two sets of
34 # commands are not equivalent:
35 #
36 # From bash code: ( | ) are treated special. Normally they must be quoted, but
37 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
38
39 #### BASH_REMATCH
40 [[ foo123 =~ ([a-z]+)([0-9]+) ]]
41 echo status=$?
42 argv.py "${BASH_REMATCH[@]}"
43
44 [[ failed =~ ([a-z]+)([0-9]+) ]]
45 echo status=$?
46 argv.py "${BASH_REMATCH[@]}" # not cleared!
47
48 ## STDOUT:
49 status=0
50 ['foo123', 'foo', '123']
51 status=1
52 []
53 ## END
54 ## N-I zsh STDOUT:
55 status=0
56 ['']
57 status=1
58 ['']
59 ## END
60
61 #### Match is unanchored at both ends
62 [[ 'bar' =~ a ]] && echo true
63 ## stdout: true
64
65 #### Failed match
66 [[ 'bar' =~ X ]] && echo true
67 ## status: 1
68 ## stdout-json: ""
69
70 #### Regex quoted with \ -- preferred in bash
71 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
72 ## stdout: true
73
74 #### Regex quoted with single quotes
75 # bash doesn't like the quotes
76 [[ 'a b' =~ '^(a b)$' ]] && echo true
77 ## stdout-json: ""
78 ## status: 1
79 ## OK zsh stdout: true
80 ## OK zsh status: 0
81
82 #### Regex quoted with double quotes
83 # bash doesn't like the quotes
84 [[ 'a b' =~ "^(a b)$" ]] && echo true
85 ## stdout-json: ""
86 ## status: 1
87 ## OK zsh stdout: true
88 ## OK zsh status: 0
89
90 #### Fix single quotes by storing in variable
91 pat='^(a b)$'
92 [[ 'a b' =~ $pat ]] && echo true
93 ## stdout: true
94
95 #### Fix single quotes by storing in variable
96 pat="^(a b)$"
97 [[ 'a b' =~ $pat ]] && echo true
98 ## stdout: true
99
100 #### Double quoting pat variable -- again bash doesn't like it.
101 pat="^(a b)$"
102 [[ 'a b' =~ "$pat" ]] && echo true
103 ## stdout-json: ""
104 ## status: 1
105 ## OK zsh stdout: true
106 ## OK zsh status: 0
107
108 #### Mixing quoted and unquoted parts
109 [[ 'a b' =~ 'a 'b ]] && echo true
110 [[ "a b" =~ "a "'b' ]] && echo true
111 ## STDOUT:
112 true
113 true
114 ## END
115
116 #### Regex with == and not =~ is parse error, different lexer mode required
117 # They both give a syntax error. This is lame.
118 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
119 ## status: 2
120 ## OK zsh status: 1
121
122 #### Omitting ( )
123 [[ '^a b$' == ^a\ b$ ]] && echo true
124 ## stdout: true
125
126 #### Malformed regex
127 # Are they trying to PARSE the regex? Do they feed the buffer directly to
128 # regcomp()?
129 [[ 'a b' =~ ^)a\ b($ ]] && echo true
130 ## stdout-json: ""
131 ## status: 2
132 ## OK zsh status: 1
133
134 #### Regex with |
135 [[ 'bar' =~ foo|bar ]] && echo true
136 ## stdout: true
137 ## N-I zsh stdout-json: ""
138 ## N-I zsh status: 1
139
140 #### Regex to match literal brackets []
141
142 # bash-completion relies on this, so we're making it match bash.
143 # zsh understandably differs.
144 [[ '[]' =~ \[\] ]] && echo true
145
146 # Another way to write this.
147 pat='\[\]'
148 [[ '[]' =~ $pat ]] && echo true
149 ## STDOUT:
150 true
151 true
152 ## END
153 ## OK zsh STDOUT:
154 true
155 ## END
156
157 #### Regex to match literals . ^ $ etc.
158 [[ 'x' =~ \. ]] || echo false
159 [[ '.' =~ \. ]] && echo true
160
161 [[ 'xx' =~ \^\$ ]] || echo false
162 [[ '^$' =~ \^\$ ]] && echo true
163
164 [[ 'xxx' =~ \+\*\? ]] || echo false
165 [[ '*+?' =~ \*\+\? ]] && echo true
166
167 [[ 'xx' =~ \{\} ]] || echo false
168 [[ '{}' =~ \{\} ]] && echo true
169 ## STDOUT:
170 false
171 true
172 false
173 true
174 false
175 true
176 false
177 true
178 ## END
179 ## BUG zsh STDOUT:
180 true
181 false
182 false
183 false
184 ## END
185 ## BUG zsh status: 1
186
187 #### Unquoted { is a regex parse error
188 [[ { =~ { ]] && echo true
189 echo status=$?
190 ## stdout-json: ""
191 ## status: 2
192 ## BUG bash stdout-json: "status=2\n"
193 ## BUG bash status: 0
194 ## BUG zsh stdout-json: "status=1\n"
195 ## BUG zsh status: 0
196
197 #### Fatal error inside [[ =~ ]]
198
199 # zsh and osh are stricter than bash. bash treats [[ like a command.
200
201 [[ a =~ $(( 1 / 0 )) ]]
202 echo status=$?
203 ## stdout-json: ""
204 ## status: 1
205 ## BUG bash stdout: status=1
206 ## BUG bash status: 0
207
208 #### Quoted { and +
209 [[ { =~ "{" ]] && echo 'yes {'
210 [[ + =~ "+" ]] && echo 'yes +'
211 [[ * =~ "*" ]] && echo 'yes *'
212 [[ ? =~ "?" ]] && echo 'yes ?'
213 [[ ^ =~ "^" ]] && echo 'yes ^'
214 [[ $ =~ "$" ]] && echo 'yes $'
215 [[ '(' =~ '(' ]] && echo 'yes ('
216 [[ ')' =~ ')' ]] && echo 'yes )'
217 [[ '|' =~ '|' ]] && echo 'yes |'
218 [[ '\' =~ '\' ]] && echo 'yes \'
219 echo ---
220
221 [[ . =~ "." ]] && echo 'yes .'
222 [[ z =~ "." ]] || echo 'no .'
223 echo ---
224
225 # This rule is weird but all shells agree. I would expect that the - gets
226 # escaped? It's an operator? but it behaves like a-z.
227 [[ a =~ ["a-z"] ]]; echo "a $?"
228 [[ - =~ ["a-z"] ]]; echo "- $?"
229 [[ b =~ ['a-z'] ]]; echo "b $?"
230 [[ z =~ ['a-z'] ]]; echo "z $?"
231
232 echo status=$?
233 ## STDOUT:
234 yes {
235 yes +
236 yes *
237 yes ?
238 yes ^
239 yes $
240 yes (
241 yes )
242 yes |
243 yes \
244 ---
245 yes .
246 no .
247 ---
248 a 0
249 - 1
250 b 0
251 z 0
252 status=0
253 ## END
254 ## N-I zsh STDOUT:
255 yes ^
256 yes $
257 yes )
258 yes |
259 ---
260 yes .
261 ---
262 a 0
263 - 1
264 b 0
265 z 0
266 status=0
267 ## END
268
269 #### Escaped {
270 # from bash-completion
271 [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
272 ## STDOUT:
273 ['$PA', '$', 'PA']
274 ## END
275 ## BUG zsh stdout-json: ""
276 ## BUG zsh status: 1
277
278 #### Escaped { stored in variable first
279 # from bash-completion
280 pat='^(\$\{?)([A-Za-z0-9_]*)$'
281 [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
282 ## STDOUT:
283 ['$PA', '$', 'PA']
284 ## END
285 ## BUG zsh STDOUT:
286 ['']
287 ## END
288
289 #### regex with ?
290 [[ 'c' =~ c? ]] && echo true
291 [[ '' =~ c? ]] && echo true
292 ## STDOUT:
293 true
294 true
295 ## END
296
297 #### regex with unprintable characters
298 # can't have nul byte
299
300 # This pattern has literal characters
301 pat=$'^[\x01\x02]+$'
302
303 [[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
304 [[ $'a\x01' =~ $pat ]]; echo status=$?
305
306 # NOTE: There doesn't appear to be any way to escape these!
307 pat2='^[\x01\x02]+$'
308
309 ## STDOUT:
310 status=0
311 status=1
312 ## END
313
314 #### pattern $f(x) -- regression
315 f=fff
316 [[ fffx =~ $f(x) ]]
317 echo status=$?
318 [[ ffx =~ $f(x) ]]
319 echo status=$?
320 ## STDOUT:
321 status=0
322 status=1
323 ## END
324
325 #### pattern a=(1)
326 [[ a=x =~ a=(x) ]]
327 echo status=$?
328 [[ =x =~ a=(x) ]]
329 echo status=$?
330 ## STDOUT:
331 status=0
332 status=1
333 ## END
334 ## BUG zsh status: 1
335 ## BUG zsh STDOUT:
336 status=0
337 ## END
338
339 #### pattern @f(x)
340 shopt -s parse_at
341 [[ @fx =~ @f(x) ]]
342 echo status=$?
343 [[ fx =~ @f(x) ]]
344 echo status=$?
345 ## STDOUT:
346 status=0
347 status=1
348 ## END
349
350
351 #### Bug: Nix idiom with closing ) next to pattern
352
353 if [[ ! (" ${params[*]} " =~ " -shared " || " ${params[*]} " =~ " -static ") ]]; then
354 echo one
355 fi
356
357 # Reduced idiom
358 if [[ (foo =~ foo) ]]; then
359 echo two
360 fi
361
362 ## STDOUT:
363 one
364 two
365 ## END
366
367 #### unquoted (a b) as pattern, (a b|c)
368
369 if [[ 'a b' =~ (a b) ]]; then
370 echo one
371 fi
372
373 if [[ 'a b' =~ (a b) ]]; then
374 echo BAD
375 fi
376
377 if [[ 'a b' =~ (a b|c) ]]; then
378 echo two
379 fi
380
381 # I think spaces are only allowed within ()
382
383 if [[ ' c' =~ (a| c) ]]; then
384 echo three
385 fi
386
387 ## STDOUT:
388 one
389 two
390 three
391 ## END
392
393 #### Multiple adjacent () groups
394
395 if [[ 'a-b-c-d' =~ a-(b| >>)-c-( ;|[de])|ff|gg ]]; then
396 argv.py "${BASH_REMATCH[@]}"
397 fi
398
399 if [[ ff =~ a-(b| >>)-c-( ;|[de])|ff|gg ]]; then
400 argv.py "${BASH_REMATCH[@]}"
401 fi
402
403 # empty group ()
404
405 if [[ zz =~ ([a-z]+)() ]]; then
406 argv.py "${BASH_REMATCH[@]}"
407 fi
408
409 # nested empty group
410 if [[ zz =~ ([a-z]+)(()z) ]]; then
411 argv.py "${BASH_REMATCH[@]}"
412 fi
413
414 ## STDOUT:
415 ['a-b-c-d', 'b', 'd']
416 ['ff', '', '']
417 ['zz', 'zz', '']
418 ['zz', 'z', 'z', '']
419 ## END
420
421 ## BUG zsh status: 1
422 ## BUG zsh STDOUT:
423 ['']
424 ['']
425 ['']
426 ['']
427 ## END
428
429 #### unquoted [a b] as pattern, [a b|c]
430
431 $SH <<'EOF'
432 [[ a =~ [ab] ]] && echo yes
433 EOF
434 echo "[ab]=$?"
435
436 $SH <<'EOF'
437 [[ a =~ [a b] ]] && echo yes
438 EOF
439 echo "[a b]=$?"
440
441 $SH <<'EOF'
442 [[ a =~ ([a b]) ]] && echo yes
443 EOF
444 echo "[a b]=$?"
445
446 ## STDOUT:
447 yes
448 [ab]=0
449 [a b]=2
450 yes
451 [a b]=0
452 ## END
453
454 ## OK zsh STDOUT:
455 yes
456 [ab]=0
457 [a b]=1
458 yes
459 [a b]=0
460 ## END
461
462 #### c|a unquoted
463
464 if [[ a =~ c|a ]]; then
465 echo one
466 fi
467
468 ## STDOUT:
469 one
470 ## END
471 ## N-I zsh status: 1
472
473 #### Operator chars ; & but not |
474
475 # Hm semicolon is still an operator in bash
476 $SH <<'EOF'
477 [[ ';' =~ ; ]] && echo semi
478 EOF
479 echo semi=$?
480
481 $SH <<'EOF'
482 [[ ';' =~ (;) ]] && echo semi paren
483 EOF
484 echo semi paren=$?
485
486 echo
487
488 $SH <<'EOF'
489 [[ '&' =~ & ]] && echo amp
490 EOF
491 echo amp=$?
492
493 # Oh I guess this is not a bug? regcomp doesn't reject this trivial regex?
494 $SH <<'EOF'
495 [[ '|' =~ | ]] && echo pipe1
496 [[ 'a' =~ | ]] && echo pipe2
497 EOF
498 echo pipe=$?
499
500 $SH <<'EOF'
501 [[ '|' =~ a| ]] && echo four
502 EOF
503 echo pipe=$?
504
505 # This is probably special because > operator is inside foo [[ a > b ]]
506 $SH <<'EOF'
507 [[ '<>' =~ <> ]] && echo angle
508 EOF
509 echo angle=$?
510
511 # Bug: OSH allowed this!
512 $SH <<'EOF'
513 [[ $'a\nb' =~ a
514 b ]] && echo newline
515 EOF
516 echo newline=$?
517
518 ## STDOUT:
519 semi=2
520 semi paren
521 semi paren=0
522
523 amp=2
524 pipe1
525 pipe2
526 pipe=0
527 four
528 pipe=0
529 angle=2
530 newline=2
531 ## END
532
533 ## BUG zsh STDOUT:
534 semi=1
535 semi paren=1
536
537 amp=1
538 pipe=1
539 pipe=1
540 angle=1
541 newline=1
542 ## END
543
544
545
546 #### Quotes '' "" $'' $"" in pattern
547
548 $SH <<'EOF'
549 [[ '|' =~ '|' ]] && echo sq
550 EOF
551 echo sq=$?
552
553 $SH <<'EOF'
554 [[ '|' =~ "|" ]] && echo dq
555 EOF
556 echo dq=$?
557
558 $SH <<'EOF'
559 [[ '|' =~ $'|' ]] && echo dollar-sq
560 EOF
561 echo dollar-sq=$?
562
563 $SH <<'EOF'
564 [[ '|' =~ $"|" ]] && echo dollar-dq
565 EOF
566 echo dollar-dq=$?
567
568 ## STDOUT:
569 sq
570 sq=0
571 dq
572 dq=0
573 dollar-sq
574 dollar-sq=0
575 dollar-dq
576 dollar-dq=0
577 ## END
578
579
580 #### Unicode in pattern
581
582 $SH <<'EOF'
583 [[ μ =~ μ ]] && echo mu
584 EOF
585 echo mu=$?
586
587 ## STDOUT:
588 mu
589 mu=0
590 ## END
591
592 #### Parse error with 2 words
593
594 if [[ a =~ c a ]]; then
595 echo one
596 fi
597
598 ## status: 2
599 ## STDOUT:
600 ## END
601
602 ## BUG zsh status: 1
603 ## BUG zsh STDOUT:
604 one
605 ## END
606
607 #### make a lisp example
608
609 str='(hi)'
610 [[ "${str}" =~ ^^([][{}\(\)^@])|^(~@)|(\"(\\.|[^\\\"])*\")|^(;[^$'\n']*)|^([~\'\`])|^([^][ ~\`\'\";{}\(\)^@\,]+)|^[,]|^[[:space:]]+ ]]
611 echo status=$?
612
613 m=${BASH_REMATCH[0]}
614 echo m=$m
615
616 ## STDOUT:
617 status=0
618 m=(
619 ## END
620
621 ## BUG zsh STDOUT:
622 status=1
623 m=
624 ## END
625
626 #### Operators and space lose meaning inside ()
627 [[ '< >' =~ (< >) ]] && echo true
628 ## stdout: true
629 ## N-I zsh stdout-json: ""
630 ## N-I zsh status: 1
631