1 # Test out Oil's regular expression syntax.
2
3 #### /^.$/
4 shopt -s oil:all
5 var pat = ''
6
7 setvar pat = /^.$/
8 echo pat=$pat
9
10 setvar pat = /%start dot %end/
11 echo pat=$pat
12
13 if ('' ~ pat) { # ERE syntax
14 echo yes
15 } else {
16 echo no
17 }
18 # $pat is same as pat
19 if ('f' ~ pat) { # ERE syntax
20 echo yes
21 } else {
22 echo no
23 }
24
25 ## STDOUT:
26 pat=^.$
27 pat=^.$
28 no
29 yes
30 ## END
31
32
33 #### /.+/
34 shopt -s oil:all
35
36 var pat = /.+/
37 echo $pat
38
39 var s = 'foo'
40 if (s ~ pat) { # ERE syntax
41 echo yes
42 }
43 var empty = ''
44 if (empty ~ pat) { echo yes } else { echo no }
45 ## STDOUT:
46 .+
47 yes
48 no
49 ## END
50
51 #### Positional captures with _match
52 shopt -s oil:all
53
54 var x = 'zz 2020-08-20'
55
56 if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] {
57 argv.py "${BASH_REMATCH[@]}"
58 }
59
60 # THIS IS A NO-OP. The variable is SHADOWED by the special name.
61 # I think that's OK.
62 setvar BASH_REMATCH = %(reset)
63
64 if (x ~ /<d+> '-' <d+>/) {
65 argv.py "${BASH_REMATCH[@]}"
66 argv.py $_match(0) $_match(1) $_match(2)
67
68 argv.py $_match() # synonym for _match(0)
69
70 # TODO: Also test _start() and _end()
71 }
72 ## STDOUT:
73 ['2020-08', '2020', '08']
74 ['2020-08', '2020', '08']
75 ['2020-08', '2020', '08']
76 ['2020-08']
77 ## END
78
79 #### s ~ regex and s !~ regex
80 shopt -s oil:basic
81
82 var s = 'foo'
83 if (s ~ '.([[:alpha:]]+)') { # ERE syntax
84 echo matches
85 argv.py $_match(0) $_match(1)
86 }
87 if (s !~ '[[:digit:]]+') {
88 echo "does not match"
89 argv.py $_match(0) $_match(1)
90 }
91
92 if (s ~ '[[:digit:]]+') {
93 echo "matches"
94 }
95 # Should be cleared now
96 # should this be Undef rather than ''?
97 var x = _match(0)
98 var y = _match(1)
99 if (x === null and y === null) {
100 echo 'cleared'
101 }
102
103 ## STDOUT:
104 matches
105 ['foo', 'oo']
106 does not match
107 ['foo', 'oo']
108 cleared
109 ## END
110
111 #### _start() and _end()
112 shopt -s oil:basic
113
114 var s = 'foo123bar'
115 if (s ~ /digit+/) {
116 echo start=$_start() end=$_end()
117 }
118
119 if (s ~ / word+ <digit+> /) {
120 echo start=$_start(1) end=$_end(1)
121 }
122 ## STDOUT:
123 start=3 end=6
124 start=3 end=6
125 ## END
126
127 #### Repeat {1,3} etc.
128 var pat = null
129
130 setvar pat = /d{2}/
131 echo $pat
132 setvar pat = /d{1,3}/
133 echo $pat
134 setvar pat = /d{1,}/
135 echo $pat
136 setvar pat = /d{,3}/
137 echo $pat
138
139
140 ## STDOUT:
141 [[:digit:]]{2}
142 [[:digit:]]{1,3}
143 [[:digit:]]{1,}
144 [[:digit:]]{,3}
145 ## END
146
147
148 #### d+ digit+ !d+ !digit+
149 shopt -s oil:all
150
151 var pat = ''
152
153 setvar pat = /d+/
154 echo $pat
155 if ('42' ~ pat) { echo yes }
156
157 var empty = ''
158 if (empty ~ pat) { echo yes } else { echo no }
159
160 setvar pat = /digit+/
161 echo $pat
162 setvar pat = /!d+/
163 echo $pat
164 setvar pat = /!digit+/
165 echo $pat
166
167
168 ## STDOUT:
169 [[:digit:]]+
170 yes
171 no
172 [[:digit:]]+
173 [^[:digit:]]+
174 [^[:digit:]]+
175 ## END
176
177 #### Alternation and sequence
178 var pat = ''
179 setvar pat = /s d+ | w*/
180 echo $pat
181 setvar pat = /s d+ or w*/
182 echo $pat
183 ## STDOUT:
184 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
185 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
186 ## END
187
188 #### Char Class Ranges
189 shopt -s oil:all
190
191 var pat = ''
192 setvar pat = /[0-9 a-f]+/
193 echo $pat
194 # This is equivalent
195 setvar pat = /['0' - '9' 'a' - 'f']+/
196 echo $pat
197
198 if ('0123' ~ pat) { echo yes } else { echo no }
199 if ('zzz' ~ pat) { echo yes } else { echo no }
200 if ('' ~ pat) { echo yes } else { echo no }
201 ## STDOUT:
202 [0-9a-f]+
203 [0-9a-f]+
204 yes
205 no
206 no
207 ## END
208
209 #### Char Class Set
210 shopt -s oil:all
211 var pat = ''
212
213 # This is NOT allowed
214 # setvar pat = /[a b c]+/
215
216 setvar pat = /['abc']+/
217 echo $pat
218
219 if ('cbcb' ~ pat) { echo yes } else { echo no }
220 if ('0123' ~ pat) { echo yes } else { echo no }
221 if ('' ~ pat) { echo yes } else { echo no }
222 ## STDOUT:
223 [abc]+
224 yes
225 no
226 no
227 ## END
228
229 #### Range with escaped characters
230 shopt -s oil:all
231
232 var pat = null
233
234 setvar pat = / [ \x00 - \x0f ] /
235 echo $pat | od -A n -t x1
236
237 ## STDOUT:
238 5b 00 2d 0f 5d 0a
239 ## END
240
241
242 #### Group ()
243 shopt -s oil:all
244 var pat = ''
245
246 setvar pat = /(%start s or d d)/
247 echo $pat
248
249 if (' foo' ~ pat) { echo yes } else { echo no }
250 if ('-00-' ~ pat) { echo yes } else { echo no }
251 if ('foo' ~ pat) { echo yes } else { echo no }
252
253 ## STDOUT:
254 (^[[:space:]]|[[:digit:]][[:digit:]])
255 yes
256 yes
257 no
258 ## END
259
260 #### Capture is acceptable as a group
261 shopt -s oil:all
262 var pat = /<%start s | d d>/
263 echo $pat
264 ## STDOUT:
265 (^[[:space:]]|[[:digit:]][[:digit:]])
266 ## END
267
268 #### Named captures with _match
269 shopt -s oil:all
270
271 var x = 'zz 2020-08-20'
272
273 if (x ~ /<d+ : year> '-' <d+ : month>/) {
274 argv.py $_match('year') $_match('month')
275 }
276 ## STDOUT:
277 ['2020', '08']
278 ## END
279
280 #### Named Capture Decays Without Name
281 shopt -s oil:all
282 var pat = /<d+ : month>/
283 echo $pat
284
285 if ('123' ~ pat) {
286 echo yes
287 }
288
289 ## STDOUT:
290 ([[:digit:]]+)
291 yes
292 ## END
293
294 #### Named Capture With ~ Assigns Variable
295 shopt -s oil:all
296 var pat = /<d+ : month>/
297 echo $pat
298
299 if ('123' ~ pat) {
300 echo yes
301 = month
302 }
303 ## STDOUT:
304 ([[:digit:]]+)
305 yes
306 TODO MONTH
307 ## END
308
309 #### literal ''
310 shopt -s oil:all
311 var pat = ''
312
313 setvar pat = /'abc' 'def'/
314 echo $pat
315
316 #setvar pat = /'abc' '^ + * ?'/
317 #echo $pat
318
319 if ('abcde' ~ pat) { echo yes } else { echo no }
320 if ('abcdef' ~ pat) { echo yes } else { echo no }
321
322 ## STDOUT:
323 abcdef
324 no
325 yes
326 ## END
327
328 #### double quoted, $x, and ${x}
329 shopt -s oil:all
330 var pat = ''
331
332 var x = 'x'
333 var y = 'y'
334 setvar pat = / $x ${x} "abc" "$x${y}"/
335 echo $pat
336
337 if ('xxabcx' ~ pat) { echo yes } else { echo no }
338 if ('xxabcxyf' ~ pat) { echo yes } else { echo no }
339
340 ## STDOUT:
341 xxabcxy
342 no
343 yes
344 ## END
345
346 #### @splice
347 shopt -s oil:all
348 var d = /d+/;
349 var ip = / @d '.' @d '.' @d '.' @d /
350 echo $ip
351 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
352 if ('0.0.0' ~ ip) { echo yes } else { echo no }
353 ## STDOUT:
354 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
355 yes
356 no
357 ## END
358
359 #### splice with capital letters
360 shopt -s oil:all
361 var D = /d+/;
362 var ip = / D '.' D '.' D '.' D /
363 echo $ip
364 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
365 if ('0.0.0' ~ ip) { echo yes } else { echo no }
366 ## STDOUT:
367 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
368 yes
369 no
370 ## END
371
372 #### Matching escaped tab character
373 shopt -s oil:all
374
375 var lines=%($'aa\tbb' $'cc\tdd')
376
377 var pat = / ('a' [\t] 'b') /
378 write pat=$pat
379 write @lines | egrep $pat
380
381 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
382
383 #### Match unicode char
384 shopt -s oil:all
385 var pat = / 'a' dot 'b' /
386
387 if ('axb' ~ pat ) { echo yes } else { echo no }
388
389 # mu character
390 if ($'a\xce\xbcb' ~ pat ) { echo yes } else { echo no }
391
392 if ('aZZb' ~ pat ) { echo yes } else { echo no }
393 ## STDOUT:
394 yes
395 yes
396 no
397 ## END
398
399 #### Match non-ASCII byte denoted using $'\xff' (TODO: LANG=C)
400
401 # NOTE: This pattern doesn't work with en_US.UTF-8. I think the user should
402 # set LANG=C or shopt --unset libc_utf8.
403
404 shopt -s oil:all
405 var pat = /[ $'\xff' ]/;
406
407 echo $pat | od -A n -t x1
408 if ($'\xff' ~ pat) { echo yes } else { echo no }
409 if ($'\xfe' ~ pat) { echo yes } else { echo no }
410
411 ## STDOUT:
412 5b ff 5d 0a
413 yes
414 no
415 ## END
416
417 #### Match non-ASCII byte denoted using \xff (TODO: LANG=C)
418 shopt -s oil:all
419 var pat = /[ \xff ]/;
420
421 echo $pat | od -A n -t x1
422 if ($'\xff' ~ pat) { echo yes } else { echo no }
423 if ($'\xfe' ~ pat) { echo yes } else { echo no }
424
425 ## STDOUT:
426 5b ff 5d 0a
427 yes
428 no
429 ## END
430
431 #### ERE can express Unicode escapes that are in the ASCII range
432 shopt -s oil:all
433 var pat = /[ \u{7f} ]/;
434
435 echo $pat | od -A n -t x1
436 if ($'\x7f' ~ pat) { echo yes } else { echo no }
437 if ($'\x7e' ~ pat) { echo yes } else { echo no }
438
439 var pat2 = /[ \u{7f} ]/;
440 var pat3 = /[ \u{0007f} ]/;
441 test "$pat2" = "$pat3" && echo 'equal'
442
443 ## STDOUT:
444 5b 7f 5d 0a
445 yes
446 no
447 equal
448 ## END
449
450 #### ERE can't express higher Unicode escapes
451 shopt -s oil:all
452 var pat = /[ \u{ff} ]/;
453
454 echo $pat | od -A n -t x1
455 if ($'\x7f' ~ pat) { echo yes } else { echo no }
456 if ($'\x7e' ~ pat) { echo yes } else { echo no }
457
458 ## status: 1
459 ## stdout-json: ""
460
461 #### non-ASCII bytes must be singleton terms, e.g. '\x7f\xff' is disallowed
462 var bytes = $'\x7f\xff'
463 var pat = / [ $bytes ] /
464 echo $pat
465 ## status: 1
466 ## stdout-json: ""
467
468 #### Matching escaped tab character
469 shopt -s oil:all
470
471 # BUG: need C strings in array literal
472 var lines=%($'aa\tbb' $'cc\tdd')
473
474 var pat = / ('a' [\t] 'b') /
475 write pat=$pat
476 write @lines | egrep $pat
477
478 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
479
480 #### Matching ] and \ and ' and " in character classes
481 shopt -s oil:all
482
483 # BUG: need C strings in array literal
484 var lines=%(
485 'backslash \'
486 'rbracket ]'
487 'lbracket ['
488 "sq '"
489 'dq "'
490 )
491
492 # Weird GNU quirk: ] has to come first!
493 # []abc] works. But [abc\]] does NOT work. Stupid rule!
494
495 var pat = / [ ']' \\ \' \" ] /
496 write pat=$pat
497 write @lines | egrep $pat
498
499 ## STDOUT:
500 pat=[]\\'"]
501 backslash \
502 rbracket ]
503 sq '
504 dq "
505 ## END
506
507 #### Matching literal hyphen in character classes
508 shopt -s oil:all
509
510 var literal = '-'
511 var pat = / [ 'a' $literal 'b' ${literal} "-" ] /
512 write pat=$pat
513 write 'c-d' 'ab' 'cd' | grep $pat
514 ## STDOUT:
515 pat=[a\-b\-\-]
516 c-d
517 ab
518 ## END
519
520 #### Repeated String Literal With Single Char
521 shopt -s oil:all
522
523 var literal = 'f'
524 var pat = null
525
526 setvar pat = / %start $literal+ %end /
527 echo $pat
528 setvar pat = / %start ($literal)+ %end /
529 echo $pat
530
531 if ('fff' ~ pat) { echo yes }
532 if ('foo' !~ pat) { echo no }
533
534 ## STDOUT:
535 ^f+$
536 ^(f)+$
537 yes
538 no
539 ## END
540
541 #### Error when unparenthesized string of more than one character is repeated
542 shopt -s oil:all
543
544 var literal = 'foo'
545 var pat = null
546
547 setvar pat = / %start $literal+ %end /
548 echo $pat
549 setvar pat = / %start ($literal)+ %end /
550 echo $pat
551
552 if ('foofoo' ~ pat) { echo yes }
553 if ('foof' !~ pat) { echo no }
554
555 ## status: 1
556 ## stdout-json: ""
557
558 #### Instead of $'foo\\bar' use 'foo' \\ 'bar'
559 shopt -s oil:all
560 var pat = /'foo' \\ 'bar'/
561 echo $pat
562
563 if (r'foo\bar' ~ pat) { echo yes }
564 if (r'foo.bar' !~ pat) { echo no }
565 ## STDOUT:
566 foo\\bar
567 yes
568 no
569 ## END
570
571 #### Negation of Character Class ![a-z]
572 shopt -s oil:all
573
574 var pat = / ![ a-z ] /
575 echo $pat
576
577 if ('0' ~ pat) { echo yes }
578 if ('a' !~ pat) { echo no }
579
580 ## STDOUT:
581 [^a-z]
582 yes
583 no
584 ## END
585
586 #### Posix and Perl class in class literals
587 shopt -s oil:all
588
589 var pat = null
590
591 setvar pat = / [ space 'z' ] /
592 echo $pat
593 #setvar pat = / [ ~space 'z' ] /
594 #echo $pat
595
596 # PROBLEM: can't negate individual POSIX classes. They would have to be a Perl
597 # class to be \D or \S.
598 # [[:space:]z] negates the whole thing!
599 # [^[:space:]]
600
601 setvar pat = / [ digit 'z' ] /
602 echo $pat
603 #setvar pat = / [ ~digit 'z' ] /
604 #echo $pat
605
606 ## STDOUT:
607 [[:space:]z]
608 [[:digit:]z]
609 ## END
610
611 #### [!d] can't be negated because it's a literal character
612 setvar pat = / [ !d 'z' ] /
613 echo $pat
614 ## status: 2
615 ## stdout-json: ""
616
617 #### [!digit] can't be negated in POSIX ERE (but yes in Perl)
618 var pat = null
619 setvar pat = / [ !digit 'z' ] /
620 echo $pat
621 ## status: 1
622 ## stdout-json: ""
623
624 #### Long Python Example
625
626 # https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
627
628 # integer ::= decinteger | bininteger | octinteger | hexinteger
629 # decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
630 # bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
631 # octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
632 # hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
633 # nonzerodigit ::= "1"..."9"
634 # digit ::= "0"..."9"
635 # bindigit ::= "0" | "1"
636 # octdigit ::= "0"..."7"
637 # hexdigit ::= digit | "a"..."f" | "A"..."F"
638
639 shopt -s oil:all
640
641 DecDigit = / [0-9] /
642 BinDigit = / [0-1] /
643 OctDigit = / [0-7] /
644 HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class
645
646 DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* /
647 BinInt = / '0' [b B] ('_'? BinDigit)+ /
648 OctInt = / '0' [o O] ('_'? OctDigit)+ /
649 HexInt = / '0' [x X] ('_'? HexDigit)+ /
650
651 Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end /
652
653 #echo $Integer
654
655 if ( '123' ~ Integer) { echo 'Y' }
656 if ( 'zzz' !~ Integer) { echo 'N' }
657
658 if ('123_000' ~ Integer) { echo 'Y decimal' }
659 if ('000_123' !~ Integer) { echo 'N decimal' }
660
661 if ( '0b100' ~ Integer) { echo 'Y binary' }
662 if ( '0b102' !~ Integer) { echo 'N binary' }
663
664 if ( '0o755' ~ Integer) { echo 'Y octal' }
665 if ( '0o778' !~ Integer) { echo 'N octal' }
666
667 if ( '0xFF' ~ Integer) { echo 'Y hex' }
668 if ( '0xFG' !~ Integer) { echo 'N hex' }
669
670 ## STDOUT:
671 Y
672 N
673 Y decimal
674 N decimal
675 Y binary
676 N binary
677 Y octal
678 N octal
679 Y hex
680 N hex
681 ## END
682
683 #### Invalid sh operation on eggex
684 var pat = / d+ /
685 #pat[invalid]=1
686 pat[invalid]+=1
687 ## status: 1
688 ## stdout-json: ""
689