1 |
# Test out Oil's regular expression syntax. |
2 |
|
3 |
#### /^.$/ |
4 |
shopt -s oil:all |
5 |
var pat = '' |
6 |
|
7 |
setvar pat = /^.$/ |
8 |
echo pat=$pat |
9 |
|
10 |
setvar pat = /%start dot %end/ |
11 |
echo pat=$pat |
12 |
|
13 |
if ('' ~ pat) { # ERE syntax |
14 |
echo yes |
15 |
} else { |
16 |
echo no |
17 |
} |
18 |
# $pat is same as pat |
19 |
if ('f' ~ pat) { # ERE syntax |
20 |
echo yes |
21 |
} else { |
22 |
echo no |
23 |
} |
24 |
|
25 |
## STDOUT: |
26 |
pat=^.$ |
27 |
pat=^.$ |
28 |
no |
29 |
yes |
30 |
## END |
31 |
|
32 |
|
33 |
#### /.+/ |
34 |
shopt -s oil:all |
35 |
|
36 |
var pat = /.+/ |
37 |
echo $pat |
38 |
|
39 |
var s = 'foo' |
40 |
if (s ~ pat) { # ERE syntax |
41 |
echo yes |
42 |
} |
43 |
var empty = '' |
44 |
if (empty ~ pat) { echo yes } else { echo no } |
45 |
## STDOUT: |
46 |
.+ |
47 |
yes |
48 |
no |
49 |
## END |
50 |
|
51 |
#### Positional captures with _match |
52 |
shopt -s oil:all |
53 |
|
54 |
var x = 'zz 2020-08-20' |
55 |
|
56 |
if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] { |
57 |
argv.py "${BASH_REMATCH[@]}" |
58 |
} |
59 |
|
60 |
# THIS IS A NO-OP. The variable is SHADOWED by the special name. |
61 |
# I think that's OK. |
62 |
setvar BASH_REMATCH = %(reset) |
63 |
|
64 |
if (x ~ /<d+> '-' <d+>/) { |
65 |
argv.py "${BASH_REMATCH[@]}" |
66 |
argv.py $[_match(0)] $[_match(1)] $[_match(2)] |
67 |
|
68 |
argv.py $[_match()] # synonym for _match(0) |
69 |
|
70 |
# TODO: Also test _start() and _end() |
71 |
} |
72 |
## STDOUT: |
73 |
['2020-08', '2020', '08'] |
74 |
['2020-08', '2020', '08'] |
75 |
['2020-08', '2020', '08'] |
76 |
['2020-08'] |
77 |
## END |
78 |
|
79 |
#### s ~ regex and s !~ regex |
80 |
shopt -s oil:upgrade |
81 |
|
82 |
var s = 'foo' |
83 |
if (s ~ '.([[:alpha:]]+)') { # ERE syntax |
84 |
echo matches |
85 |
argv.py $[_match(0)] $[_match(1)] |
86 |
} |
87 |
if (s !~ '[[:digit:]]+') { |
88 |
echo "does not match" |
89 |
argv.py $[_match(0)] $[_match(1)] |
90 |
} |
91 |
|
92 |
if (s ~ '[[:digit:]]+') { |
93 |
echo "matches" |
94 |
} |
95 |
# Should be cleared now |
96 |
# should this be Undef rather than ''? |
97 |
var x = _match(0) |
98 |
var y = _match(1) |
99 |
if (x === null and y === null) { |
100 |
echo 'cleared' |
101 |
} |
102 |
|
103 |
## STDOUT: |
104 |
matches |
105 |
['foo', 'oo'] |
106 |
does not match |
107 |
['foo', 'oo'] |
108 |
cleared |
109 |
## END |
110 |
|
111 |
#### _start() and _end() |
112 |
shopt -s oil:upgrade |
113 |
|
114 |
var s = 'foo123bar' |
115 |
if (s ~ /digit+/) { |
116 |
echo start=$[_start()] end=$[_end()] |
117 |
} |
118 |
|
119 |
if (s ~ / word+ <digit+> /) { |
120 |
echo start=$[_start(1)] end=$[_end(1)] |
121 |
} |
122 |
## STDOUT: |
123 |
start=3 end=6 |
124 |
start=3 end=6 |
125 |
## END |
126 |
|
127 |
#### Repeat {1,3} etc. |
128 |
var pat = null |
129 |
|
130 |
setvar pat = /d{2}/ |
131 |
echo $pat |
132 |
setvar pat = /d{1,3}/ |
133 |
echo $pat |
134 |
setvar pat = /d{1,}/ |
135 |
echo $pat |
136 |
setvar pat = /d{,3}/ |
137 |
echo $pat |
138 |
|
139 |
|
140 |
## STDOUT: |
141 |
[[:digit:]]{2} |
142 |
[[:digit:]]{1,3} |
143 |
[[:digit:]]{1,} |
144 |
[[:digit:]]{,3} |
145 |
## END |
146 |
|
147 |
|
148 |
#### d+ digit+ !d+ !digit+ |
149 |
shopt -s oil:all |
150 |
|
151 |
var pat = '' |
152 |
|
153 |
setvar pat = /d+/ |
154 |
echo $pat |
155 |
if ('42' ~ pat) { echo yes } |
156 |
|
157 |
var empty = '' |
158 |
if (empty ~ pat) { echo yes } else { echo no } |
159 |
|
160 |
setvar pat = /digit+/ |
161 |
echo $pat |
162 |
setvar pat = /!d+/ |
163 |
echo $pat |
164 |
setvar pat = /!digit+/ |
165 |
echo $pat |
166 |
|
167 |
|
168 |
## STDOUT: |
169 |
[[:digit:]]+ |
170 |
yes |
171 |
no |
172 |
[[:digit:]]+ |
173 |
[^[:digit:]]+ |
174 |
[^[:digit:]]+ |
175 |
## END |
176 |
|
177 |
#### Alternation and sequence |
178 |
var pat = '' |
179 |
setvar pat = /s d+ | w*/ |
180 |
echo $pat |
181 |
setvar pat = /s d+ or w*/ |
182 |
echo $pat |
183 |
## STDOUT: |
184 |
[[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]* |
185 |
[[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]* |
186 |
## END |
187 |
|
188 |
#### Char Class Ranges |
189 |
shopt -s oil:all |
190 |
|
191 |
var pat = '' |
192 |
setvar pat = /[0-9 a-f]+/ |
193 |
echo $pat |
194 |
# This is equivalent |
195 |
setvar pat = /['0' - '9' 'a' - 'f']+/ |
196 |
echo $pat |
197 |
|
198 |
if ('0123' ~ pat) { echo yes } else { echo no } |
199 |
if ('zzz' ~ pat) { echo yes } else { echo no } |
200 |
if ('' ~ pat) { echo yes } else { echo no } |
201 |
## STDOUT: |
202 |
[0-9a-f]+ |
203 |
[0-9a-f]+ |
204 |
yes |
205 |
no |
206 |
no |
207 |
## END |
208 |
|
209 |
#### Char Class Set |
210 |
shopt -s oil:all |
211 |
var pat = '' |
212 |
|
213 |
# This is NOT allowed |
214 |
# setvar pat = /[a b c]+/ |
215 |
|
216 |
setvar pat = /['abc']+/ |
217 |
echo $pat |
218 |
|
219 |
if ('cbcb' ~ pat) { echo yes } else { echo no } |
220 |
if ('0123' ~ pat) { echo yes } else { echo no } |
221 |
if ('' ~ pat) { echo yes } else { echo no } |
222 |
## STDOUT: |
223 |
[abc]+ |
224 |
yes |
225 |
no |
226 |
no |
227 |
## END |
228 |
|
229 |
#### Range with escaped characters |
230 |
shopt -s oil:all |
231 |
|
232 |
var pat = null |
233 |
|
234 |
setvar pat = / [ \x00 - \x0f ] / |
235 |
echo $pat | od -A n -t x1 |
236 |
|
237 |
## STDOUT: |
238 |
5b 00 2d 0f 5d 0a |
239 |
## END |
240 |
|
241 |
|
242 |
#### Group () |
243 |
shopt -s oil:all |
244 |
var pat = '' |
245 |
|
246 |
setvar pat = /(%start s or d d)/ |
247 |
echo $pat |
248 |
|
249 |
if (' foo' ~ pat) { echo yes } else { echo no } |
250 |
if ('-00-' ~ pat) { echo yes } else { echo no } |
251 |
if ('foo' ~ pat) { echo yes } else { echo no } |
252 |
|
253 |
## STDOUT: |
254 |
(^[[:space:]]|[[:digit:]][[:digit:]]) |
255 |
yes |
256 |
yes |
257 |
no |
258 |
## END |
259 |
|
260 |
#### Capture is acceptable as a group |
261 |
shopt -s oil:all |
262 |
var pat = /<%start s | d d>/ |
263 |
echo $pat |
264 |
## STDOUT: |
265 |
(^[[:space:]]|[[:digit:]][[:digit:]]) |
266 |
## END |
267 |
|
268 |
#### Named captures with _match |
269 |
shopt -s oil:all |
270 |
|
271 |
var x = 'zz 2020-08-20' |
272 |
|
273 |
if (x ~ /<d+ : year> '-' <d+ : month>/) { |
274 |
argv.py $[_match('year')] $[_match('month')] |
275 |
} |
276 |
## STDOUT: |
277 |
['2020', '08'] |
278 |
## END |
279 |
|
280 |
#### Named Capture Decays Without Name |
281 |
shopt -s oil:all |
282 |
var pat = /<d+ : month>/ |
283 |
echo $pat |
284 |
|
285 |
if ('123' ~ pat) { |
286 |
echo yes |
287 |
} |
288 |
|
289 |
## STDOUT: |
290 |
([[:digit:]]+) |
291 |
yes |
292 |
## END |
293 |
|
294 |
#### Operator ~ assigns named variable |
295 |
shopt -s oil:all |
296 |
var pat = /<d+ : month>/ |
297 |
echo $pat |
298 |
|
299 |
if ('123' ~ pat) { |
300 |
echo yes |
301 |
= month |
302 |
} |
303 |
## STDOUT: |
304 |
([[:digit:]]+) |
305 |
yes |
306 |
TODO MONTH |
307 |
## END |
308 |
|
309 |
#### literal '' |
310 |
shopt -s oil:all |
311 |
var pat = '' |
312 |
|
313 |
setvar pat = /'abc' 'def'/ |
314 |
echo $pat |
315 |
|
316 |
#setvar pat = /'abc' '^ + * ?'/ |
317 |
#echo $pat |
318 |
|
319 |
if ('abcde' ~ pat) { echo yes } else { echo no } |
320 |
if ('abcdef' ~ pat) { echo yes } else { echo no } |
321 |
|
322 |
## STDOUT: |
323 |
abcdef |
324 |
no |
325 |
yes |
326 |
## END |
327 |
|
328 |
#### Single quotes and splicing (do what "foo $x ${x}" used to) |
329 |
shopt -s oil:all |
330 |
var pat = '' |
331 |
|
332 |
var x = 'x' |
333 |
var y = 'y' |
334 |
setvar pat = / @x @x 'abc' @x @y / |
335 |
echo $pat |
336 |
|
337 |
if ('xxabcx' ~ pat) { echo yes } else { echo no } |
338 |
if ('xxabcxyf' ~ pat) { echo yes } else { echo no } |
339 |
|
340 |
## STDOUT: |
341 |
xxabcxy |
342 |
no |
343 |
yes |
344 |
## END |
345 |
|
346 |
#### @splice |
347 |
shopt -s oil:all |
348 |
var d = /d+/; |
349 |
var ip = / @d '.' @d '.' @d '.' @d / |
350 |
echo $ip |
351 |
if ('0.0.0.0' ~ ip) { echo yes } else { echo no } |
352 |
if ('0.0.0' ~ ip) { echo yes } else { echo no } |
353 |
## STDOUT: |
354 |
[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+ |
355 |
yes |
356 |
no |
357 |
## END |
358 |
|
359 |
#### splice with capital letters |
360 |
shopt -s oil:all |
361 |
var D = /d+/; |
362 |
var ip = / D '.' D '.' D '.' D / |
363 |
echo $ip |
364 |
if ('0.0.0.0' ~ ip) { echo yes } else { echo no } |
365 |
if ('0.0.0' ~ ip) { echo yes } else { echo no } |
366 |
## STDOUT: |
367 |
[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+ |
368 |
yes |
369 |
no |
370 |
## END |
371 |
|
372 |
#### Matching escaped tab character |
373 |
shopt -s oil:all |
374 |
|
375 |
var lines=%($'aa\tbb' $'cc\tdd') |
376 |
|
377 |
var pat = / ('a' [\t] 'b') / |
378 |
write pat=$pat |
379 |
write @lines | egrep $pat |
380 |
|
381 |
## stdout-json: "pat=(a[\t]b)\naa\tbb\n" |
382 |
|
383 |
#### Match unicode char |
384 |
shopt -s oil:all |
385 |
var pat = / 'a' dot 'b' / |
386 |
|
387 |
if ('axb' ~ pat ) { echo yes } else { echo no } |
388 |
|
389 |
# mu character |
390 |
if ($'a\xce\xbcb' ~ pat ) { echo yes } else { echo no } |
391 |
|
392 |
if ('aZZb' ~ pat ) { echo yes } else { echo no } |
393 |
## STDOUT: |
394 |
yes |
395 |
yes |
396 |
no |
397 |
## END |
398 |
|
399 |
#### Match non-ASCII byte denoted using $'\xff' (TODO: LANG=C) |
400 |
|
401 |
# NOTE: This pattern doesn't work with en_US.UTF-8. I think the user should |
402 |
# set LANG=C or shopt --unset libc_utf8. |
403 |
|
404 |
shopt -s oil:all |
405 |
var pat = /[ $'\xff' ]/; |
406 |
|
407 |
echo $pat | od -A n -t x1 |
408 |
if ($'\xff' ~ pat) { echo yes } else { echo no } |
409 |
if ($'\xfe' ~ pat) { echo yes } else { echo no } |
410 |
|
411 |
## STDOUT: |
412 |
5b ff 5d 0a |
413 |
yes |
414 |
no |
415 |
## END |
416 |
|
417 |
#### Match non-ASCII byte denoted using \xff |
418 |
shopt -s oil:all |
419 |
var pat = /[ \xff ]/; |
420 |
|
421 |
# Show what it translates to |
422 |
echo $pat | od -A n -t x1 |
423 |
|
424 |
# TODO: This might require LANG=C to work |
425 |
#if ($'\xff' ~ pat) { echo yes } else { echo no } |
426 |
#if ($'\xfe' ~ pat) { echo yes } else { echo no } |
427 |
|
428 |
## STDOUT: |
429 |
5b ff 5d 0a |
430 |
## END |
431 |
|
432 |
#### ERE can express Unicode escapes that are in the ASCII range |
433 |
shopt -s oil:all |
434 |
var pat = /[ \u{7f} ]/; |
435 |
|
436 |
echo $pat | od -A n -t x1 |
437 |
if ($'\x7f' ~ pat) { echo yes } else { echo no } |
438 |
if ($'\x7e' ~ pat) { echo yes } else { echo no } |
439 |
|
440 |
var pat2 = /[ \u{7f} ]/; |
441 |
var pat3 = /[ \u{0007f} ]/; |
442 |
test "$pat2" = "$pat3" && echo 'equal' |
443 |
|
444 |
var range = / [ \u{70} - \u{7f} ] / |
445 |
if ($'\x70' ~ range) { echo yes } else { echo no } |
446 |
if ($'\x69' ~ range) { echo yes } else { echo no } |
447 |
|
448 |
## STDOUT: |
449 |
5b 7f 5d 0a |
450 |
yes |
451 |
no |
452 |
equal |
453 |
yes |
454 |
no |
455 |
## END |
456 |
|
457 |
#### ERE can't express higher Unicode escapes |
458 |
shopt -s oil:all |
459 |
var pat2 = /[ \u{00} - \u{ff} ]/; |
460 |
|
461 |
# This causes an error |
462 |
echo $pat2 |
463 |
|
464 |
# This just prints it |
465 |
= pat2 |
466 |
|
467 |
var pat1 = /[ \u{ff} ]/; |
468 |
|
469 |
echo $pat1 | od -A n -t x1 |
470 |
if ($'\x7f' ~ pat) { echo yes } else { echo no } |
471 |
if ($'\x7e' ~ pat) { echo yes } else { echo no } |
472 |
|
473 |
## status: 1 |
474 |
## stdout-json: "" |
475 |
|
476 |
#### non-ASCII bytes must be singleton terms, e.g. '\x7f\xff' is disallowed |
477 |
var bytes = $'\x7f\xff' |
478 |
var pat = / [ @bytes ] / |
479 |
echo $pat |
480 |
## status: 1 |
481 |
## stdout-json: "" |
482 |
|
483 |
#### Matching escaped tab character |
484 |
shopt -s oil:all |
485 |
|
486 |
# BUG: need C strings in array literal |
487 |
var lines=%($'aa\tbb' $'cc\tdd') |
488 |
|
489 |
var pat = / ('a' [\t] 'b') / |
490 |
write pat=$pat |
491 |
write @lines | egrep $pat |
492 |
|
493 |
## stdout-json: "pat=(a[\t]b)\naa\tbb\n" |
494 |
|
495 |
#### Repeated String Literal With Single Char |
496 |
shopt -s oil:all |
497 |
|
498 |
var literal = 'f' |
499 |
var pat = null |
500 |
|
501 |
setvar pat = / %start @literal+ %end / |
502 |
echo $pat |
503 |
setvar pat = / %start (@literal)+ %end / |
504 |
echo $pat |
505 |
|
506 |
if ('fff' ~ pat) { echo yes } |
507 |
if ('foo' !~ pat) { echo no } |
508 |
|
509 |
## STDOUT: |
510 |
^f+$ |
511 |
^(f)+$ |
512 |
yes |
513 |
no |
514 |
## END |
515 |
|
516 |
#### Error when unparenthesized string of more than one character is repeated |
517 |
shopt -s oil:all |
518 |
|
519 |
var literal = 'foo' |
520 |
var pat = null |
521 |
|
522 |
setvar pat = / %start @literal+ %end / |
523 |
echo $pat |
524 |
setvar pat = / %start (@literal)+ %end / |
525 |
echo $pat |
526 |
|
527 |
if ('foofoo' ~ pat) { echo yes } |
528 |
if ('foof' !~ pat) { echo no } |
529 |
|
530 |
## status: 1 |
531 |
## stdout-json: "" |
532 |
|
533 |
#### Instead of $'foo\\bar' use 'foo' \\ 'bar' |
534 |
shopt -s oil:all |
535 |
var pat = /'foo' \\ 'bar'/ |
536 |
echo $pat |
537 |
|
538 |
if (r'foo\bar' ~ pat) { echo yes } |
539 |
if (r'foo.bar' !~ pat) { echo no } |
540 |
## STDOUT: |
541 |
foo\\bar |
542 |
yes |
543 |
no |
544 |
## END |
545 |
|
546 |
#### Negation of Character Class ![a-z] |
547 |
shopt -s oil:all |
548 |
|
549 |
var pat = / ![ a-z ] / |
550 |
echo $pat |
551 |
|
552 |
if ('0' ~ pat) { echo yes } |
553 |
if ('a' !~ pat) { echo no } |
554 |
|
555 |
## STDOUT: |
556 |
[^a-z] |
557 |
yes |
558 |
no |
559 |
## END |
560 |
|
561 |
#### Posix and Perl class in class literals |
562 |
shopt -s oil:all |
563 |
|
564 |
var pat = null |
565 |
|
566 |
setvar pat = / [ space 'z' ] / |
567 |
echo $pat |
568 |
#setvar pat = / [ ~space 'z' ] / |
569 |
#echo $pat |
570 |
|
571 |
# PROBLEM: can't negate individual POSIX classes. They would have to be a Perl |
572 |
# class to be \D or \S. |
573 |
# [[:space:]z] negates the whole thing! |
574 |
# [^[:space:]] |
575 |
|
576 |
setvar pat = / [ digit 'z' ] / |
577 |
echo $pat |
578 |
#setvar pat = / [ ~digit 'z' ] / |
579 |
#echo $pat |
580 |
|
581 |
## STDOUT: |
582 |
[[:space:]z] |
583 |
[[:digit:]z] |
584 |
## END |
585 |
|
586 |
#### [!d] can't be negated because it's a literal character |
587 |
setvar pat = / [ !d 'z' ] / |
588 |
echo $pat |
589 |
## status: 2 |
590 |
## stdout-json: "" |
591 |
|
592 |
#### [!digit] can't be negated in POSIX ERE (but yes in Perl) |
593 |
var pat = null |
594 |
setvar pat = / [ !digit 'z' ] / |
595 |
echo $pat |
596 |
## status: 1 |
597 |
## stdout-json: "" |
598 |
|
599 |
#### Long Python Example |
600 |
|
601 |
# https://docs.python.org/3/reference/lexical_analysis.html#integer-literals |
602 |
|
603 |
# integer ::= decinteger | bininteger | octinteger | hexinteger |
604 |
# decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")* |
605 |
# bininteger ::= "0" ("b" | "B") (["_"] bindigit)+ |
606 |
# octinteger ::= "0" ("o" | "O") (["_"] octdigit)+ |
607 |
# hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+ |
608 |
# nonzerodigit ::= "1"..."9" |
609 |
# digit ::= "0"..."9" |
610 |
# bindigit ::= "0" | "1" |
611 |
# octdigit ::= "0"..."7" |
612 |
# hexdigit ::= digit | "a"..."f" | "A"..."F" |
613 |
|
614 |
shopt -s oil:all |
615 |
|
616 |
const DecDigit = / [0-9] / |
617 |
const BinDigit = / [0-1] / |
618 |
const OctDigit = / [0-7] / |
619 |
const HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class |
620 |
|
621 |
const DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* / |
622 |
const BinInt = / '0' [b B] ('_'? BinDigit)+ / |
623 |
const OctInt = / '0' [o O] ('_'? OctDigit)+ / |
624 |
const HexInt = / '0' [x X] ('_'? HexDigit)+ / |
625 |
|
626 |
const Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end / |
627 |
|
628 |
#echo $Integer |
629 |
|
630 |
if ( '123' ~ Integer) { echo 'Y' } |
631 |
if ( 'zzz' !~ Integer) { echo 'N' } |
632 |
|
633 |
if ('123_000' ~ Integer) { echo 'Y decimal' } |
634 |
if ('000_123' !~ Integer) { echo 'N decimal' } |
635 |
|
636 |
if ( '0b100' ~ Integer) { echo 'Y binary' } |
637 |
if ( '0b102' !~ Integer) { echo 'N binary' } |
638 |
|
639 |
if ( '0o755' ~ Integer) { echo 'Y octal' } |
640 |
if ( '0o778' !~ Integer) { echo 'N octal' } |
641 |
|
642 |
if ( '0xFF' ~ Integer) { echo 'Y hex' } |
643 |
if ( '0xFG' !~ Integer) { echo 'N hex' } |
644 |
|
645 |
## STDOUT: |
646 |
Y |
647 |
N |
648 |
Y decimal |
649 |
N decimal |
650 |
Y binary |
651 |
N binary |
652 |
Y octal |
653 |
N octal |
654 |
Y hex |
655 |
N hex |
656 |
## END |
657 |
|
658 |
#### Invalid sh operation on eggex |
659 |
var pat = / d+ / |
660 |
#pat[invalid]=1 |
661 |
pat[invalid]+=1 |
662 |
## status: 1 |
663 |
## stdout-json: "" |
664 |
|
665 |
|
666 |
#### Regex in a loop (bug regression) |
667 |
|
668 |
shopt --set oil:all |
669 |
|
670 |
var content = [ 1, 2 ] |
671 |
var i = 0 |
672 |
while (i < len(content)) { |
673 |
var line = content[i] |
674 |
write $[content[i]] |
675 |
if (Str(line) ~ / s* 'imports' s* '=' s* .* /) { |
676 |
exit |
677 |
} |
678 |
setvar i += 1 |
679 |
} |
680 |
|
681 |
## STDOUT: |
682 |
1 |
683 |
2 |
684 |
## END |
685 |
|
686 |
|
687 |
#### Regex in a loop depending on var |
688 |
|
689 |
shopt --set oil:all |
690 |
|
691 |
var lines = ['foo', 'bar'] |
692 |
for line in (lines) { |
693 |
write "line $line" |
694 |
|
695 |
# = / $line / |
696 |
|
697 |
if ("x$line" ~ / dot @line /) { |
698 |
#if (line ~ / $line /) { |
699 |
write "matched $line" |
700 |
} |
701 |
} |
702 |
|
703 |
## STDOUT: |
704 |
line foo |
705 |
matched foo |
706 |
line bar |
707 |
matched bar |
708 |
## END |
709 |
|
710 |
|
711 |
#### Regex with [ (bug regression) |
712 |
shopt --set oil:all |
713 |
|
714 |
if ('[' ~ / '[' /) { |
715 |
echo 'sq' |
716 |
} |
717 |
|
718 |
if ('[' ~ / [ '[' ] /) { |
719 |
echo 'char class' |
720 |
} |
721 |
|
722 |
# User-reported string |
723 |
if ("a" ~ / s* 'imports' s* '=' s* '[' /) { |
724 |
echo "yes" |
725 |
} |
726 |
|
727 |
## STDOUT: |
728 |
sq |
729 |
char class |
730 |
## END |
731 |
|
732 |
#### Operator chars in char classes (bash-like) |
733 |
|
734 |
pat='[-]' |
735 |
[[ '-' =~ $pat ]] && echo hyphen |
736 |
[[ '\' =~ $pat ]] && echo FAIL |
737 |
|
738 |
pat='[\]' |
739 |
[[ '\' =~ $pat ]] && echo backslash |
740 |
[[ '-' =~ $pat ]] && echo FAIL |
741 |
|
742 |
pat='[]]' |
743 |
[[ ']' =~ $pat ]] && echo 'right bracket' |
744 |
[[ '[' =~ $pat ]] && echo FAIL |
745 |
|
746 |
pat='[[]' |
747 |
[[ '[' =~ $pat ]] && echo 'left bracket' |
748 |
[[ ']' =~ $pat ]] && echo FAIL |
749 |
|
750 |
pat='[.]' |
751 |
[[ '.' =~ $pat ]] && echo period |
752 |
[[ '\' =~ $pat ]] && echo FAIL |
753 |
|
754 |
pat='[\^]' |
755 |
[[ '^' =~ $pat ]] && echo caret |
756 |
[[ '\' =~ $pat ]] && echo 'no way to have [^]' |
757 |
|
758 |
## STDOUT: |
759 |
hyphen |
760 |
backslash |
761 |
right bracket |
762 |
left bracket |
763 |
period |
764 |
caret |
765 |
no way to have [^] |
766 |
## END |
767 |
|
768 |
#### Operator chars in char classes (eggex) |
769 |
shopt --set oil:upgrade |
770 |
|
771 |
var pat = / ['-'] / |
772 |
#echo PAT=$pat |
773 |
if ('-' ~ pat) { echo hyphen } |
774 |
if ($'\\' ~ pat) { echo FAIL } |
775 |
|
776 |
var pat = / [ \\ ] / |
777 |
[[ '\' =~ $pat ]] && echo backslash |
778 |
[[ '-' =~ $pat ]] && echo FAIL |
779 |
|
780 |
var pat = / [ ']' ] / |
781 |
[[ ']' =~ $pat ]] && echo 'right bracket' |
782 |
[[ '[' =~ $pat ]] && echo FAIL |
783 |
|
784 |
var pat = / [ '[' ] / |
785 |
[[ '[' =~ $pat ]] && echo 'left bracket' |
786 |
[[ ']' =~ $pat ]] && echo FAIL |
787 |
|
788 |
var pat = / [ '.' ] / |
789 |
[[ '.' =~ $pat ]] && echo period |
790 |
[[ '\' =~ $pat ]] && echo FAIL |
791 |
|
792 |
var pat = / [ \\ '^' ] / |
793 |
[[ '^' =~ $pat ]] && echo caret |
794 |
[[ '\' =~ $pat ]] && echo 'no way to have [^]' |
795 |
|
796 |
|
797 |
## STDOUT: |
798 |
hyphen |
799 |
backslash |
800 |
right bracket |
801 |
left bracket |
802 |
period |
803 |
caret |
804 |
no way to have [^] |
805 |
## END |
806 |
|
807 |
#### Matching ] and \ and ' and " in character classes |
808 |
shopt -s oil:all |
809 |
|
810 |
# BUG: need C strings in array literal |
811 |
var lines=%( |
812 |
'backslash \' |
813 |
'rbracket ]' |
814 |
'lbracket [' |
815 |
"sq '" |
816 |
'dq "' |
817 |
) |
818 |
|
819 |
# Weird GNU quirk: ] has to come first! |
820 |
# []abc] works. But [abc\]] does NOT work. Stupid rule! |
821 |
|
822 |
var pat = / [ ']' \\ \' \" ] / |
823 |
write pat=$pat |
824 |
write @lines | egrep $pat |
825 |
|
826 |
## STDOUT: |
827 |
pat=[]'"\\] |
828 |
backslash \ |
829 |
rbracket ] |
830 |
sq ' |
831 |
dq " |
832 |
## END |
833 |
|
834 |
#### Matching literal hyphen in character classes |
835 |
shopt -s oil:all |
836 |
|
837 |
var literal = '-' |
838 |
var pat = / [ 'a' 'b' @literal ] / |
839 |
write pat=$pat |
840 |
write 'c-d' 'ab' 'cd' | grep $pat |
841 |
## STDOUT: |
842 |
pat=[ab-] |
843 |
c-d |
844 |
ab |
845 |
## END |
846 |
|
847 |
#### Char class special: ^ - ] \ |
848 |
|
849 |
# See demo/ere-char-class-literals.sh |
850 |
# |
851 |
# \ is special because of gawk |
852 |
|
853 |
shopt -s oil:upgrade |
854 |
|
855 |
|
856 |
# Note: single caret disalowed |
857 |
var caret = / ['^' 'x'] / |
858 |
echo caret=$caret |
859 |
|
860 |
var caret2 = / [ \x5e 'x'] / |
861 |
echo caret2=$caret2 |
862 |
|
863 |
var caret3 = / [ \u{5e} 'x'] / |
864 |
echo caret3=$caret3 |
865 |
|
866 |
if ('x' ~ caret3) { |
867 |
echo 'match x' |
868 |
} |
869 |
if ('^' ~ caret3) { |
870 |
echo 'match ^' |
871 |
} |
872 |
|
873 |
echo --- |
874 |
|
875 |
var hyphen = / ['a' '-' 'b'] / |
876 |
echo hyphen=$hyphen |
877 |
|
878 |
var hyphen2 = / ['a' \x2d 'b' ] / |
879 |
echo hyphen2=$hyphen2 |
880 |
|
881 |
if ('-' ~ hyphen2) { |
882 |
echo 'match -' |
883 |
} |
884 |
|
885 |
if ('a' ~ hyphen2) { |
886 |
echo 'match a' |
887 |
} |
888 |
|
889 |
if ('c' ~ hyphen2) { |
890 |
echo 'match c' |
891 |
} |
892 |
|
893 |
echo --- |
894 |
|
895 |
var rbracket = / [ '[' ']' ] / |
896 |
echo rbracket=$rbracket |
897 |
|
898 |
var rbracket2 = / [ \x5b \x5d ] / |
899 |
echo rbracket2=$rbracket2 |
900 |
|
901 |
if ('[' ~ rbracket2) { |
902 |
echo 'match [' |
903 |
} |
904 |
|
905 |
if (']' ~ rbracket2) { |
906 |
echo 'match ]' |
907 |
} |
908 |
|
909 |
echo --- |
910 |
|
911 |
var backslash = / [ 'x' \\ 'n' ] / |
912 |
echo backslash=$backslash |
913 |
|
914 |
var backslash2 = / [ 'x' \x5c 'n' ] / |
915 |
echo backslash2=$backslash2 |
916 |
|
917 |
var backslash3 = / [ 'x' $'\\' 'n' ] / |
918 |
echo backslash3=$backslash3 |
919 |
|
920 |
if ('x' ~ backslash3) { |
921 |
echo 'match x' |
922 |
} |
923 |
|
924 |
if ('n' ~ backslash3) { |
925 |
echo 'match n' |
926 |
} |
927 |
|
928 |
if ($'\\' ~ backslash3) { |
929 |
echo 'match backslash' |
930 |
} |
931 |
|
932 |
if ($'\n' ~ backslash3) { |
933 |
echo 'match nnewline' |
934 |
} |
935 |
|
936 |
|
937 |
## STDOUT: |
938 |
caret=[x^] |
939 |
caret2=[x^] |
940 |
caret3=[x^] |
941 |
match x |
942 |
match ^ |
943 |
--- |
944 |
hyphen=[ab-] |
945 |
hyphen2=[ab-] |
946 |
match - |
947 |
match a |
948 |
--- |
949 |
rbracket=[][] |
950 |
rbracket2=[][] |
951 |
match [ |
952 |
match ] |
953 |
--- |
954 |
backslash=[xn\\] |
955 |
backslash2=[xn\\] |
956 |
backslash3=[xn\\] |
957 |
match x |
958 |
match n |
959 |
match backslash |
960 |
## END |
961 |
|