1 | #
|
2 | # Only bash and zsh seem to implement [[ foo =~ '' ]]
|
3 | #
|
4 | # ^(a b)$ is a regex that should match 'a b' in a group.
|
5 | #
|
6 | # Not sure what bash is doing here... I think I have to just be empirical.
|
7 | # Might need "compat" switch for parsing the regex. It should be an opaque
|
8 | # string like zsh, not sure why it isn't.
|
9 | #
|
10 | # I think this is just papering over bugs...
|
11 | # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
|
12 | #
|
13 | # Storing the regular expression in a shell variable is often a useful way to
|
14 | # avoid problems with quoting characters that are special to the shell. It is
|
15 | # sometimes difficult to specify a regular expression literally without using
|
16 | # quotes, or to keep track of the quoting used by regular expressions while
|
17 | # paying attention to the shell’s quote removal. Using a shell variable to
|
18 | # store the pattern decreases these problems. For example, the following is
|
19 | # equivalent to the above:
|
20 | #
|
21 | # pattern='[[:space:]]*(a)?b'
|
22 | # [[ $line =~ $pattern ]]
|
23 | #
|
24 | # If you want to match a character that’s special to the regular expression
|
25 | # grammar, it has to be quoted to remove its special meaning. This means that in
|
26 | # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
|
27 | # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
|
28 | # literal ‘.’. Shell programmers should take special care with backslashes, since
|
29 | # backslashes are used both by the shell and regular expressions to remove the
|
30 | # special meaning from the following character. The following two sets of
|
31 | # commands are not equivalent:
|
32 | #
|
33 | # From bash code: ( | ) are treated special. Normally they must be quoted, but
|
34 | # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
|
35 |
|
36 | #### BASH_REMATCH
|
37 | [[ foo123 =~ ([a-z]+)([0-9]+) ]]
|
38 | echo status=$?
|
39 | argv.py "${BASH_REMATCH[@]}"
|
40 |
|
41 | [[ failed =~ ([a-z]+)([0-9]+) ]]
|
42 | echo status=$?
|
43 | argv.py "${BASH_REMATCH[@]}" # not cleared!
|
44 |
|
45 | ## STDOUT:
|
46 | status=0
|
47 | ['foo123', 'foo', '123']
|
48 | status=1
|
49 | []
|
50 | ## END
|
51 | ## N-I zsh STDOUT:
|
52 | status=0
|
53 | ['']
|
54 | status=1
|
55 | ['']
|
56 | ## END
|
57 |
|
58 | #### Match is unanchored at both ends
|
59 | [[ 'bar' =~ a ]] && echo true
|
60 | ## stdout: true
|
61 |
|
62 | #### Failed match
|
63 | [[ 'bar' =~ X ]] && echo true
|
64 | ## status: 1
|
65 | ## stdout-json: ""
|
66 |
|
67 | #### Regex quoted with \ -- preferred in bash
|
68 | [[ 'a b' =~ ^(a\ b)$ ]] && echo true
|
69 | ## stdout: true
|
70 |
|
71 | #### Regex quoted with single quotes
|
72 | # bash doesn't like the quotes
|
73 | [[ 'a b' =~ '^(a b)$' ]] && echo true
|
74 | ## stdout-json: ""
|
75 | ## status: 1
|
76 | ## OK zsh stdout: true
|
77 | ## OK zsh status: 0
|
78 |
|
79 | #### Regex quoted with double quotes
|
80 | # bash doesn't like the quotes
|
81 | [[ 'a b' =~ "^(a b)$" ]] && echo true
|
82 | ## stdout-json: ""
|
83 | ## status: 1
|
84 | ## OK zsh stdout: true
|
85 | ## OK zsh status: 0
|
86 |
|
87 | #### Fix single quotes by storing in variable
|
88 | pat='^(a b)$'
|
89 | [[ 'a b' =~ $pat ]] && echo true
|
90 | ## stdout: true
|
91 |
|
92 | #### Fix single quotes by storing in variable
|
93 | pat="^(a b)$"
|
94 | [[ 'a b' =~ $pat ]] && echo true
|
95 | ## stdout: true
|
96 |
|
97 | #### Double quoting pat variable -- again bash doesn't like it.
|
98 | pat="^(a b)$"
|
99 | [[ 'a b' =~ "$pat" ]] && echo true
|
100 | ## stdout-json: ""
|
101 | ## status: 1
|
102 | ## OK zsh stdout: true
|
103 | ## OK zsh status: 0
|
104 |
|
105 | #### Mixing quoted and unquoted parts
|
106 | [[ 'a b' =~ 'a 'b ]] && echo true
|
107 | [[ "a b" =~ "a "'b' ]] && echo true
|
108 | ## STDOUT:
|
109 | true
|
110 | true
|
111 | ## END
|
112 |
|
113 | #### Regex with == and not =~ is parse error, different lexer mode required
|
114 | # They both give a syntax error. This is lame.
|
115 | [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
|
116 | ## status: 2
|
117 | ## OK zsh status: 1
|
118 |
|
119 | #### Omitting ( )
|
120 | [[ '^a b$' == ^a\ b$ ]] && echo true
|
121 | ## stdout: true
|
122 |
|
123 | #### Malformed regex
|
124 | # Are they trying to PARSE the regex? Do they feed the buffer directly to
|
125 | # regcomp()?
|
126 | [[ 'a b' =~ ^)a\ b($ ]] && echo true
|
127 | ## stdout-json: ""
|
128 | ## status: 2
|
129 | ## OK zsh status: 1
|
130 |
|
131 | #### Regex with char class containing space
|
132 | # For some reason it doesn't work without parens?
|
133 | [[ 'ba ba ' =~ ([a b]+) ]] && echo true
|
134 | ## stdout: true
|
135 |
|
136 | #### Operators and space lose meaning inside ()
|
137 | [[ '< >' =~ (< >) ]] && echo true
|
138 | ## stdout: true
|
139 | ## N-I zsh stdout-json: ""
|
140 | ## N-I zsh status: 1
|
141 |
|
142 | #### Regex with |
|
143 | [[ 'bar' =~ foo|bar ]] && echo true
|
144 | ## stdout: true
|
145 | ## N-I zsh stdout-json: ""
|
146 | ## N-I zsh status: 1
|
147 |
|
148 | #### Regex to match literal brackets []
|
149 |
|
150 | # bash-completion relies on this, so we're making it match bash.
|
151 | # zsh understandably differs.
|
152 | [[ '[]' =~ \[\] ]] && echo true
|
153 |
|
154 | # Another way to write this.
|
155 | pat='\[\]'
|
156 | [[ '[]' =~ $pat ]] && echo true
|
157 | ## STDOUT:
|
158 | true
|
159 | true
|
160 | ## END
|
161 | ## OK zsh STDOUT:
|
162 | true
|
163 | ## END
|
164 |
|
165 | #### Regex to match literals . ^ $ etc.
|
166 | [[ 'x' =~ \. ]] || echo false
|
167 | [[ '.' =~ \. ]] && echo true
|
168 |
|
169 | [[ 'xx' =~ \^\$ ]] || echo false
|
170 | [[ '^$' =~ \^\$ ]] && echo true
|
171 |
|
172 | [[ 'xxx' =~ \+\*\? ]] || echo false
|
173 | [[ '*+?' =~ \*\+\? ]] && echo true
|
174 |
|
175 | [[ 'xx' =~ \{\} ]] || echo false
|
176 | [[ '{}' =~ \{\} ]] && echo true
|
177 | ## STDOUT:
|
178 | false
|
179 | true
|
180 | false
|
181 | true
|
182 | false
|
183 | true
|
184 | false
|
185 | true
|
186 | ## END
|
187 | ## BUG zsh STDOUT:
|
188 | true
|
189 | false
|
190 | false
|
191 | false
|
192 | ## END
|
193 | ## BUG zsh status: 1
|
194 |
|
195 | #### Unquoted { is a regex parse error
|
196 | [[ { =~ { ]] && echo true
|
197 | echo status=$?
|
198 | ## stdout-json: ""
|
199 | ## status: 2
|
200 | ## BUG bash stdout-json: "status=2\n"
|
201 | ## BUG bash status: 0
|
202 | ## BUG zsh stdout-json: "status=1\n"
|
203 | ## BUG zsh status: 0
|
204 |
|
205 | #### Fatal error inside [[ =~ ]]
|
206 |
|
207 | # zsh and osh are stricter than bash. bash treats [[ like a command.
|
208 |
|
209 | [[ a =~ $(( 1 / 0 )) ]]
|
210 | echo status=$?
|
211 | ## stdout-json: ""
|
212 | ## status: 1
|
213 | ## BUG bash stdout: status=1
|
214 | ## BUG bash status: 0
|
215 |
|
216 | #### Quoted { and +
|
217 | [[ { =~ "{" ]] && echo 'yes {'
|
218 | [[ + =~ "+" ]] && echo 'yes +'
|
219 | [[ * =~ "*" ]] && echo 'yes *'
|
220 | [[ ? =~ "?" ]] && echo 'yes ?'
|
221 | [[ ^ =~ "^" ]] && echo 'yes ^'
|
222 | [[ $ =~ "$" ]] && echo 'yes $'
|
223 | [[ '(' =~ '(' ]] && echo 'yes ('
|
224 | [[ ')' =~ ')' ]] && echo 'yes )'
|
225 | [[ '|' =~ '|' ]] && echo 'yes |'
|
226 | [[ '\' =~ '\' ]] && echo 'yes \'
|
227 | echo ---
|
228 |
|
229 | [[ . =~ "." ]] && echo 'yes .'
|
230 | [[ z =~ "." ]] || echo 'no .'
|
231 | echo ---
|
232 |
|
233 | # This rule is weird but all shells agree. I would expect that the - gets
|
234 | # escaped? It's an operator? but it behaves like a-z.
|
235 | [[ a =~ ["a-z"] ]]; echo "a $?"
|
236 | [[ - =~ ["a-z"] ]]; echo "- $?"
|
237 | [[ b =~ ['a-z'] ]]; echo "b $?"
|
238 | [[ z =~ ['a-z'] ]]; echo "z $?"
|
239 |
|
240 | echo status=$?
|
241 | ## STDOUT:
|
242 | yes {
|
243 | yes +
|
244 | yes *
|
245 | yes ?
|
246 | yes ^
|
247 | yes $
|
248 | yes (
|
249 | yes )
|
250 | yes |
|
251 | yes \
|
252 | ---
|
253 | yes .
|
254 | no .
|
255 | ---
|
256 | a 0
|
257 | - 1
|
258 | b 0
|
259 | z 0
|
260 | status=0
|
261 | ## END
|
262 | ## N-I zsh STDOUT:
|
263 | yes ^
|
264 | yes $
|
265 | yes )
|
266 | yes |
|
267 | ---
|
268 | yes .
|
269 | ---
|
270 | a 0
|
271 | - 1
|
272 | b 0
|
273 | z 0
|
274 | status=0
|
275 | ## END
|
276 |
|
277 | #### Escaped {
|
278 | # from bash-completion
|
279 | [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
|
280 | ## STDOUT:
|
281 | ['$PA', '$', 'PA']
|
282 | ## END
|
283 | ## BUG zsh stdout-json: ""
|
284 | ## BUG zsh status: 1
|
285 |
|
286 | #### Escaped { stored in variable first
|
287 | # from bash-completion
|
288 | pat='^(\$\{?)([A-Za-z0-9_]*)$'
|
289 | [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
|
290 | ## STDOUT:
|
291 | ['$PA', '$', 'PA']
|
292 | ## END
|
293 | ## BUG zsh STDOUT:
|
294 | ['']
|
295 | ## END
|
296 |
|
297 | #### regex with ?
|
298 | [[ 'c' =~ c? ]] && echo true
|
299 | [[ '' =~ c? ]] && echo true
|
300 | ## STDOUT:
|
301 | true
|
302 | true
|
303 | ## END
|
304 |
|
305 | #### regex with unprintable characters
|
306 | # can't have nul byte
|
307 |
|
308 | # This pattern has literal characters
|
309 | pat=$'^[\x01\x02]+$'
|
310 |
|
311 | [[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
|
312 | [[ $'a\x01' =~ $pat ]]; echo status=$?
|
313 |
|
314 | # NOTE: There doesn't appear to be any way to escape these!
|
315 | pat2='^[\x01\x02]+$'
|
316 |
|
317 | ## STDOUT:
|
318 | status=0
|
319 | status=1
|
320 | ## END
|
321 |
|
322 | #### pattern $f(x) -- regression
|
323 | f=fff
|
324 | [[ fffx =~ $f(x) ]]
|
325 | echo status=$?
|
326 | [[ ffx =~ $f(x) ]]
|
327 | echo status=$?
|
328 | ## STDOUT:
|
329 | status=0
|
330 | status=1
|
331 | ## END
|
332 |
|
333 | #### pattern a=(1)
|
334 | [[ a=x =~ a=(x) ]]
|
335 | echo status=$?
|
336 | [[ =x =~ a=(x) ]]
|
337 | echo status=$?
|
338 | ## STDOUT:
|
339 | status=0
|
340 | status=1
|
341 | ## END
|
342 | ## BUG zsh status: 1
|
343 | ## BUG zsh STDOUT:
|
344 | status=0
|
345 | ## END
|
346 |
|
347 | #### pattern @f(x)
|
348 | shopt -s parse_at
|
349 | [[ @fx =~ @f(x) ]]
|
350 | echo status=$?
|
351 | [[ fx =~ @f(x) ]]
|
352 | echo status=$?
|
353 | ## STDOUT:
|
354 | status=0
|
355 | status=1
|
356 | ## END
|