| 1 | # |
| 2 | # Only bash and zsh seem to implement [[ foo =~ '' ]] |
| 3 | # |
| 4 | # ^(a b)$ is a regex that should match 'a b' in a group. |
| 5 | # |
| 6 | # Not sure what bash is doing here... I think I have to just be empirical. |
| 7 | # Might need "compat" switch for parsing the regex. It should be an opaque |
| 8 | # string like zsh, not sure why it isn't. |
| 9 | # |
| 10 | # I think this is just papering over bugs... |
| 11 | # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs |
| 12 | # |
| 13 | # Storing the regular expression in a shell variable is often a useful way to |
| 14 | # avoid problems with quoting characters that are special to the shell. It is |
| 15 | # sometimes difficult to specify a regular expression literally without using |
| 16 | # quotes, or to keep track of the quoting used by regular expressions while |
| 17 | # paying attention to the shell’s quote removal. Using a shell variable to |
| 18 | # store the pattern decreases these problems. For example, the following is |
| 19 | # equivalent to the above: |
| 20 | # |
| 21 | # pattern='[[:space:]]*(a)?b' |
| 22 | # [[ $line =~ $pattern ]] |
| 23 | # |
| 24 | # If you want to match a character that’s special to the regular expression |
| 25 | # grammar, it has to be quoted to remove its special meaning. This means that in |
| 26 | # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual |
| 27 | # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a |
| 28 | # literal ‘.’. Shell programmers should take special care with backslashes, since |
| 29 | # backslashes are used both by the shell and regular expressions to remove the |
| 30 | # special meaning from the following character. The following two sets of |
| 31 | # commands are not equivalent: |
| 32 | # |
| 33 | # From bash code: ( | ) are treated special. Normally they must be quoted, but |
| 34 | # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted! |
| 35 | |
| 36 | #### BASH_REMATCH |
| 37 | [[ foo123 =~ ([a-z]+)([0-9]+) ]] |
| 38 | argv.py "${BASH_REMATCH[@]}" |
| 39 | ## STDOUT: |
| 40 | ['foo123', 'foo', '123'] |
| 41 | ## END |
| 42 | ## N-I zsh STDOUT: |
| 43 | [''] |
| 44 | ## END |
| 45 | |
| 46 | #### Match is unanchored at both ends |
| 47 | [[ 'bar' =~ a ]] && echo true |
| 48 | ## stdout: true |
| 49 | |
| 50 | #### Failed match |
| 51 | [[ 'bar' =~ X ]] && echo true |
| 52 | ## status: 1 |
| 53 | ## stdout-json: "" |
| 54 | |
| 55 | #### Regex quoted with \ -- preferred in bash |
| 56 | [[ 'a b' =~ ^(a\ b)$ ]] && echo true |
| 57 | ## stdout: true |
| 58 | |
| 59 | #### Regex quoted with single quotes |
| 60 | # bash doesn't like the quotes |
| 61 | [[ 'a b' =~ '^(a b)$' ]] && echo true |
| 62 | ## stdout-json: "" |
| 63 | ## status: 1 |
| 64 | ## OK zsh stdout: true |
| 65 | ## OK zsh status: 0 |
| 66 | |
| 67 | #### Regex quoted with double quotes |
| 68 | # bash doesn't like the quotes |
| 69 | [[ 'a b' =~ "^(a b)$" ]] && echo true |
| 70 | ## stdout-json: "" |
| 71 | ## status: 1 |
| 72 | ## OK zsh stdout: true |
| 73 | ## OK zsh status: 0 |
| 74 | |
| 75 | #### Fix single quotes by storing in variable |
| 76 | pat='^(a b)$' |
| 77 | [[ 'a b' =~ $pat ]] && echo true |
| 78 | ## stdout: true |
| 79 | |
| 80 | #### Fix single quotes by storing in variable |
| 81 | pat="^(a b)$" |
| 82 | [[ 'a b' =~ $pat ]] && echo true |
| 83 | ## stdout: true |
| 84 | |
| 85 | #### Double quoting pat variable -- again bash doesn't like it. |
| 86 | pat="^(a b)$" |
| 87 | [[ 'a b' =~ "$pat" ]] && echo true |
| 88 | ## stdout-json: "" |
| 89 | ## status: 1 |
| 90 | ## OK zsh stdout: true |
| 91 | ## OK zsh status: 0 |
| 92 | |
| 93 | #### Mixing quoted and unquoted parts |
| 94 | [[ 'a b' =~ 'a 'b ]] && echo true |
| 95 | [[ "a b" =~ "a "'b' ]] && echo true |
| 96 | ## STDOUT: |
| 97 | true |
| 98 | true |
| 99 | ## END |
| 100 | |
| 101 | #### Regex with == and not =~ is parse error, different lexer mode required |
| 102 | # They both give a syntax error. This is lame. |
| 103 | [[ '^(a b)$' == ^(a\ b)$ ]] && echo true |
| 104 | ## status: 2 |
| 105 | ## OK zsh status: 1 |
| 106 | |
| 107 | #### Omitting ( ) |
| 108 | [[ '^a b$' == ^a\ b$ ]] && echo true |
| 109 | ## stdout: true |
| 110 | |
| 111 | #### Malformed regex |
| 112 | # Are they trying to PARSE the regex? Do they feed the buffer directly to |
| 113 | # regcomp()? |
| 114 | [[ 'a b' =~ ^)a\ b($ ]] && echo true |
| 115 | ## stdout-json: "" |
| 116 | ## status: 2 |
| 117 | ## OK zsh status: 1 |
| 118 | |
| 119 | #### Regex with char class containing space |
| 120 | # For some reason it doesn't work without parens? |
| 121 | [[ 'ba ba ' =~ ([a b]+) ]] && echo true |
| 122 | ## stdout: true |
| 123 | |
| 124 | #### Operators and space lose meaning inside () |
| 125 | [[ '< >' =~ (< >) ]] && echo true |
| 126 | ## stdout: true |
| 127 | ## N-I zsh stdout-json: "" |
| 128 | ## N-I zsh status: 1 |
| 129 | |
| 130 | #### Regex with | |
| 131 | [[ 'bar' =~ foo|bar ]] && echo true |
| 132 | ## stdout: true |
| 133 | ## N-I zsh stdout-json: "" |
| 134 | ## N-I zsh status: 1 |
| 135 | |
| 136 | #### Regex to match literal brackets [] |
| 137 | |
| 138 | # bash-completion relies on this, so we're making it match bash. |
| 139 | # zsh understandably differs. |
| 140 | [[ '[]' =~ \[\] ]] && echo true |
| 141 | |
| 142 | # Another way to write this. |
| 143 | pat='\[\]' |
| 144 | [[ '[]' =~ $pat ]] && echo true |
| 145 | ## STDOUT: |
| 146 | true |
| 147 | true |
| 148 | ## END |
| 149 | ## OK zsh STDOUT: |
| 150 | true |
| 151 | ## END |
| 152 | |
| 153 | #### Regex to match literals . ^ $ etc. |
| 154 | [[ 'x' =~ \. ]] || echo false |
| 155 | [[ '.' =~ \. ]] && echo true |
| 156 | |
| 157 | [[ 'xx' =~ \^\$ ]] || echo false |
| 158 | [[ '^$' =~ \^\$ ]] && echo true |
| 159 | |
| 160 | [[ 'xxx' =~ \+\*\? ]] || echo false |
| 161 | [[ '*+?' =~ \*\+\? ]] && echo true |
| 162 | |
| 163 | [[ 'xx' =~ \{\} ]] || echo false |
| 164 | [[ '{}' =~ \{\} ]] && echo true |
| 165 | ## STDOUT: |
| 166 | false |
| 167 | true |
| 168 | false |
| 169 | true |
| 170 | false |
| 171 | true |
| 172 | false |
| 173 | true |
| 174 | ## END |
| 175 | ## BUG zsh STDOUT: |
| 176 | true |
| 177 | false |
| 178 | false |
| 179 | false |
| 180 | ## END |
| 181 | ## BUG zsh status: 1 |
| 182 | |
| 183 | #### Unquoted { is a regex parse error |
| 184 | [[ { =~ { ]] && echo true |
| 185 | echo status=$? |
| 186 | ## stdout-json: "" |
| 187 | ## status: 2 |
| 188 | ## BUG bash stdout-json: "status=2\n" |
| 189 | ## BUG bash status: 0 |
| 190 | ## BUG zsh stdout-json: "status=1\n" |
| 191 | ## BUG zsh status: 0 |
| 192 | |
| 193 | #### Fatal error inside [[ =~ ]] |
| 194 | |
| 195 | # zsh and osh are stricter than bash. bash treats [[ like a command. |
| 196 | |
| 197 | [[ a =~ $(( 1 / 0 )) ]] |
| 198 | echo status=$? |
| 199 | ## stdout-json: "" |
| 200 | ## status: 1 |
| 201 | ## BUG bash stdout: status=1 |
| 202 | ## BUG bash status: 0 |
| 203 | |
| 204 | #### Quoted { and + |
| 205 | [[ { =~ "{" ]] && echo 'yes {' |
| 206 | [[ + =~ "+" ]] && echo 'yes +' |
| 207 | [[ * =~ "*" ]] && echo 'yes *' |
| 208 | [[ ? =~ "?" ]] && echo 'yes ?' |
| 209 | [[ ^ =~ "^" ]] && echo 'yes ^' |
| 210 | [[ $ =~ "$" ]] && echo 'yes $' |
| 211 | [[ '(' =~ '(' ]] && echo 'yes (' |
| 212 | [[ ')' =~ ')' ]] && echo 'yes )' |
| 213 | [[ '|' =~ '|' ]] && echo 'yes |' |
| 214 | [[ '\' =~ '\' ]] && echo 'yes \' |
| 215 | echo --- |
| 216 | |
| 217 | [[ . =~ "." ]] && echo 'yes .' |
| 218 | [[ z =~ "." ]] || echo 'no .' |
| 219 | echo --- |
| 220 | |
| 221 | # This rule is weird but all shells agree. I would expect that the - gets |
| 222 | # escaped? It's an operator? but it behaves like a-z. |
| 223 | [[ a =~ ["a-z"] ]]; echo "a $?" |
| 224 | [[ - =~ ["a-z"] ]]; echo "- $?" |
| 225 | [[ b =~ ['a-z'] ]]; echo "b $?" |
| 226 | [[ z =~ ['a-z'] ]]; echo "z $?" |
| 227 | |
| 228 | echo status=$? |
| 229 | ## STDOUT: |
| 230 | yes { |
| 231 | yes + |
| 232 | yes * |
| 233 | yes ? |
| 234 | yes ^ |
| 235 | yes $ |
| 236 | yes ( |
| 237 | yes ) |
| 238 | yes | |
| 239 | yes \ |
| 240 | --- |
| 241 | yes . |
| 242 | no . |
| 243 | --- |
| 244 | a 0 |
| 245 | - 1 |
| 246 | b 0 |
| 247 | z 0 |
| 248 | status=0 |
| 249 | ## END |
| 250 | ## N-I zsh STDOUT: |
| 251 | yes ^ |
| 252 | yes $ |
| 253 | yes ) |
| 254 | yes | |
| 255 | --- |
| 256 | yes . |
| 257 | --- |
| 258 | a 0 |
| 259 | - 1 |
| 260 | b 0 |
| 261 | z 0 |
| 262 | status=0 |
| 263 | ## END |
| 264 | |
| 265 | #### Escaped { |
| 266 | # from bash-completion |
| 267 | [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}" |
| 268 | ## STDOUT: |
| 269 | ['$PA', '$', 'PA'] |
| 270 | ## END |
| 271 | ## BUG zsh stdout-json: "" |
| 272 | ## BUG zsh status: 1 |
| 273 | |
| 274 | #### Escaped { stored in variable first |
| 275 | # from bash-completion |
| 276 | pat='^(\$\{?)([A-Za-z0-9_]*)$' |
| 277 | [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}" |
| 278 | ## STDOUT: |
| 279 | ['$PA', '$', 'PA'] |
| 280 | ## END |
| 281 | ## BUG zsh STDOUT: |
| 282 | [''] |
| 283 | ## END |
| 284 | |
| 285 | #### regex with ? |
| 286 | [[ 'c' =~ c? ]] && echo true |
| 287 | [[ '' =~ c? ]] && echo true |
| 288 | ## STDOUT: |
| 289 | true |
| 290 | true |
| 291 | ## END |
| 292 | |
| 293 | #### regex with unprintable characters |
| 294 | # can't have nul byte |
| 295 | |
| 296 | # This pattern has literal characters |
| 297 | pat=$'^[\x01\x02]+$' |
| 298 | |
| 299 | [[ $'\x01\x02\x01' =~ $pat ]]; echo status=$? |
| 300 | [[ $'a\x01' =~ $pat ]]; echo status=$? |
| 301 | |
| 302 | # NOTE: There doesn't appear to be any way to escape these! |
| 303 | pat2='^[\x01\x02]+$' |
| 304 | |
| 305 | ## STDOUT: |
| 306 | status=0 |
| 307 | status=1 |
| 308 | ## END |
| 309 | |
| 310 | #### pattern $f(x) -- regression |
| 311 | f=fff |
| 312 | [[ fffx =~ $f(x) ]] |
| 313 | echo status=$? |
| 314 | [[ ffx =~ $f(x) ]] |
| 315 | echo status=$? |
| 316 | ## STDOUT: |
| 317 | status=0 |
| 318 | status=1 |
| 319 | ## END |
| 320 | |
| 321 | #### pattern a=(1) |
| 322 | [[ a=x =~ a=(x) ]] |
| 323 | echo status=$? |
| 324 | [[ =x =~ a=(x) ]] |
| 325 | echo status=$? |
| 326 | ## STDOUT: |
| 327 | status=0 |
| 328 | status=1 |
| 329 | ## END |
| 330 | ## BUG zsh status: 1 |
| 331 | ## BUG zsh STDOUT: |
| 332 | status=0 |
| 333 | ## END |
| 334 | |
| 335 | #### pattern @f(x) |
| 336 | shopt -s parse_at |
| 337 | [[ @fx =~ @f(x) ]] |
| 338 | echo status=$? |
| 339 | [[ fx =~ @f(x) ]] |
| 340 | echo status=$? |
| 341 | ## STDOUT: |
| 342 | status=0 |
| 343 | status=1 |
| 344 | ## END |