1 ## oils_failures_allowed: 0
2
3 #### s ~ regex and s !~ regex
4 shopt -s ysh:upgrade
5
6 var s = 'foo'
7 if (s ~ '.([[:alpha:]]+)') { # ERE syntax
8 echo matches
9 argv.py $[_group(0)] $[_group(1)]
10 }
11 if (s !~ '[[:digit:]]+') {
12 echo "does not match"
13 argv.py $[_group(0)] $[_group(1)]
14 }
15
16 if (s ~ '[[:digit:]]+') {
17 echo "matches"
18 }
19 # Should be cleared now
20 # should this be Undef rather than ''?
21 try {
22 var x = _group(0)
23 }
24 if (_status === 3) {
25 echo 'got expected status 3'
26 }
27
28 try {
29 var y = _group(1)
30 }
31 if (_status === 3) {
32 echo 'got expected status 3'
33 }
34
35 ## STDOUT:
36 matches
37 ['foo', 'oo']
38 does not match
39 ['foo', 'oo']
40 got expected status 3
41 got expected status 3
42 ## END
43
44 #### Invalid regex has libc error message
45
46 shopt -s ysh:upgrade
47
48 # Hm it's hard to test this, we can't get stderr of YSH from within YSH?
49 #fopen 2>err.txt {
50 # if ('abc' ~ '+') {
51 # echo 'bad'
52 # }
53 #}
54
55 if ('abc' ~ '+') {
56 echo 'bad'
57 }
58
59 ## status: 2
60 ## STDOUT:
61 ## END
62
63 #### Eggex flags to ignore case are respected
64 shopt -s ysh:upgrade
65
66 # based on Python's spelling
67 var pat = / 'abc' ; i /
68 var pat2 = / @pat 'def' ; reg_icase / # this is allowed
69
70 if ('-abcdef-' ~ pat2) {
71 echo 'yes'
72 }
73
74 if ('-ABCDEF-' ~ pat2) {
75 echo 'yes'
76 }
77
78 if ('ABCDE' ~ pat2) {
79 echo 'BUG'
80 }
81
82 ## STDOUT:
83 yes
84 yes
85 ## END
86
87 #### Eggex flags to treat newlines as special are respected
88 shopt -s ysh:upgrade
89
90 if (u'abc123\n' ~ / digit %end /) {
91 echo 'BUG'
92 }
93 if (u'abc\n123' ~ / %start digit /) {
94 echo 'BUG'
95 }
96
97 if (u'abc123\n' ~ / digit %end ; reg_newline /) {
98 echo 'yes'
99 }
100 if (u'abc\n123' ~ / %start digit ; reg_newline /) {
101 echo 'yes'
102 }
103
104 if (u'\n' ~ / . /) {
105 echo 'yes'
106 }
107 if (u'\n' ~ / !digit /) {
108 echo 'yes'
109 }
110
111 if (u'\n' ~ / . ; reg_newline /) {
112 echo 'BUG'
113 }
114 if (u'\n' ~ / !digit ; reg_newline /) {
115 echo 'BUG'
116 }
117
118 ## STDOUT:
119 yes
120 yes
121 yes
122 yes
123 ## END
124
125 #### Positional captures with _group
126 shopt -s ysh:upgrade
127
128 var x = 'zz 2020-08-20'
129
130 if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] {
131 argv.py "${BASH_REMATCH[@]}"
132 }
133
134 # THIS IS A NO-OP. The variable is SHADOWED by the special name.
135 # I think that's OK.
136 setvar BASH_REMATCH = :| reset |
137
138 if (x ~ /<capture d+> '-' <capture d+>/) {
139 argv.py "${BASH_REMATCH[@]}"
140 argv.py $[_group(0)] $[_group(1)] $[_group(2)]
141
142 # TODO: Also test _start() and _end()
143 }
144 ## STDOUT:
145 ['2020-08', '2020', '08']
146 ['2020-08', '2020', '08']
147 ['2020-08', '2020', '08']
148 ## END
149
150 #### _group() returns null when group doesn't match
151 shopt -s ysh:upgrade
152
153 var pat = / <capture 'a'> | <capture 'b'> /
154 if ('b' ~ pat) {
155 echo "$[_group(1)] $[_group(2)]"
156 }
157 ## STDOUT:
158 null b
159 ## END
160
161 #### _start() and _end()
162 shopt -s ysh:upgrade
163
164 var s = 'foo123bar'
165 if (s ~ /digit+/) {
166 echo start=$[_start(0)] end=$[_end(0)]
167 }
168 echo ---
169
170 if (s ~ / <capture [a-z]+> <capture digit+> /) {
171 echo start=$[_start(1)] end=$[_end(1)]
172 echo start=$[_start(2)] end=$[_end(2)]
173 }
174 echo ---
175
176 if (s ~ / <capture [a-z]+> | <capture digit+> /) {
177 echo start=$[_start(1)] end=$[_end(1)]
178 echo start=$[_start(2)] end=$[_end(2)]
179 }
180
181 ## STDOUT:
182 start=3 end=6
183 ---
184 start=0 end=3
185 start=3 end=6
186 ---
187 start=0 end=3
188 start=-1 end=-1
189 ## END
190
191 #### Str->search() method returns value.Match object
192
193 var s = '= Hi5- Bye6-'
194
195 var m = s => search(/ <capture [a-z]+ > <capture d+> '-' ; i /)
196 echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
197 echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
198 echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
199
200 echo ---
201
202 var pos = m => end(0) # search from end position
203 var m = s => search(/ <capture [a-z]+ > <capture d+> '-' ; i /, pos=pos)
204 echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
205 echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
206 echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
207
208 ## STDOUT:
209 g0 2 6 Hi5-
210 g1 2 4 Hi
211 g2 4 5 5
212 ---
213 g0 7 12 Bye6-
214 g1 7 10 Bye
215 g2 10 11 6
216 ## END
217
218 #### Str->search() only matches %start ^ when pos == 0
219
220 shopt -s ysh:upgrade
221
222 var anchored = / %start <capture d+> '-' /
223 var free = / <capture d+> '-' /
224
225 var s = '12-34-'
226
227 for pat in ([anchored, free]) {
228 echo "pat=$pat"
229
230 var pos = 0
231 while (true) {
232 var m = s => search(pat, pos=pos)
233 if (not m) {
234 break
235 }
236 echo $[m => group(0)]
237 setvar pos = m => end(0)
238 }
239
240 }
241
242 ## STDOUT:
243 pat=^([[:digit:]]+)-
244 12-
245 pat=([[:digit:]]+)-
246 12-
247 34-
248 ## END
249
250
251 #### search() and leftMatch() accept ERE string
252
253 var s = '= hi5- bye6-'
254
255 var m = s => search('([[:alpha:]]+)([[:digit:]]+)-')
256 echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
257 echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
258 echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
259 echo ---
260
261 var m = s[2:] => leftMatch('([[:alpha:]]+)([[:digit:]]+)-')
262 echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
263 echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
264 echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
265
266 ## STDOUT:
267 g0 2 6 hi5-
268 g1 2 4 hi
269 g2 4 5 5
270 ---
271 g0 0 4 hi5-
272 g1 0 2 hi
273 g2 2 3 5
274 ## END
275
276 #### Str=>leftMatch() can implement lexer pattern
277
278 shopt -s ysh:upgrade
279
280 var lexer = / <capture d+> | <capture [a-z]+> | <capture s+> /
281 #echo $lexer
282
283 proc show-tokens (s) {
284 var pos = 0
285
286 while (true) {
287 echo "pos=$pos"
288
289 var m = s=>leftMatch(lexer, pos=pos)
290 if (not m) {
291 break
292 }
293 # TODO: add groups()
294 #var groups = [m => group(1), m => group(2), m => group(3)]
295 echo "$[m => group(1)]/$[m => group(2)]/$[m => group(3)]/"
296
297 echo
298
299 setvar pos = m => end(0)
300 }
301 }
302
303 show-tokens 'ab 12'
304
305 echo '==='
306
307 # There's a token here that doesn't leftMatch()
308 show-tokens 'ab+12'
309
310 ## STDOUT:
311 pos=0
312 null/ab/null/
313
314 pos=2
315 null/null/ /
316
317 pos=3
318 12/null/null/
319
320 pos=5
321 ===
322 pos=0
323 null/ab/null/
324
325 pos=2
326 ## END
327
328 #### Named captures with m => group()
329 shopt -s ysh:all
330
331 var s = 'zz 2020-08-20'
332 var pat = /<capture d+ as year> '-' <capture d+ as month>/
333
334 var m = s => search(pat)
335 argv.py $[m => group('year')] $[m => group('month')]
336 echo $[m => start('year')] $[m => end('year')]
337 echo $[m => start('month')] $[m => end('month')]
338
339 argv.py $[m => group('oops')]
340 echo 'error'
341
342 ## status: 3
343 ## STDOUT:
344 ['2020', '08']
345 3 7
346 8 10
347 ## END
348
349 #### Named captures with _group() _start() _end()
350 shopt -s ysh:all
351
352 var x = 'zz 2020-08-20'
353
354 if (x ~ /<capture d+ as year> '-' <capture d+ as month>/) {
355 argv.py $[_group('year')] $[_group('month')]
356 echo $[_start('year')] $[_end('year')]
357 echo $[_start('month')] $[_end('month')]
358 }
359
360 argv.py $[_group('oops')]
361
362 ## status: 3
363 ## STDOUT:
364 ['2020', '08']
365 3 7
366 8 10
367 ## END
368
369 #### Named Capture Decays Without Name
370 shopt -s ysh:all
371 var pat = /<capture d+ as month>/
372 echo $pat
373
374 if ('123' ~ pat) {
375 echo yes
376 }
377
378 ## STDOUT:
379 ([[:digit:]]+)
380 yes
381 ## END
382
383 #### Nested Named Capture Uses ( ordering
384
385 shopt -s ysh:upgrade
386
387 var Date = /<capture d+ as year> '-' <capture d+ as month>/
388 var Time = /<capture d+ as hour> ':' <capture d+ as minute> (':' <capture d+ as secs>)? /
389
390 var pat = / 'when: ' (<capture Date> | <capture Time as two>) /
391 #echo $pat
392
393 proc show-groups (; m) {
394 echo 0 $[m => group(0)]
395 echo 1 $[m => group(1)] # this is everything except when
396 echo 2 $[m => group(2)]
397 echo
398 echo $[m => group('two')]
399 echo $[m => group('year')] $[m => group('month')]
400 echo $[m => group('hour')] $[m => group('minute')] $[m => group('secs')]
401 }
402
403 var m = 'when: 2023-10' => leftMatch(pat)
404
405 show-groups (m)
406
407 var m = 'when: 23:30' => leftMatch(pat)
408
409 echo ---
410 show-groups (m)
411
412 var m = 'when: 23:30:59' => leftMatch(pat)
413
414 echo ---
415 show-groups (m)
416
417 ## STDOUT:
418 0 when: 2023-10
419 1 2023-10
420 2 2023-10
421
422 null
423 2023 10
424 null null null
425 ---
426 0 when: 23:30
427 1 23:30
428 2 null
429
430 23:30
431 null null
432 23 30 null
433 ---
434 0 when: 23:30:59
435 1 23:30:59
436 2 null
437
438 23:30:59
439 null null
440 23 30 59
441 ## END
442
443 #### Capture with Type Conversion Func
444 shopt -s ysh:upgrade
445
446 var s = 'hi 42-3.14'
447 var pat = / <capture d+: int> '-' <capture d+ '.' d+ : float> /
448
449 if (s ~ pat) {
450 var g1 = _group(1) # Int
451 var g2 = _group(2) # Float
452 echo $[type(g1)] $[type(g2)]
453 }
454
455 var m = s => search(pat)
456 if (m) {
457 echo $[m => group(1) => type()] $[m => group(2) => type()]
458 }
459
460 ## STDOUT:
461 Int Float
462 Int Float
463 ## END
464
465
466 #### Named Capture with Type Conversion Func
467 shopt -s ysh:upgrade
468
469 func floatNegate(x) {
470 return (-float(x))
471 }
472
473 var s = 'hi 42-3.14'
474 var pat = / <capture d+ as left: int> '-' <capture d+ '.' d+ as right: floatNegate> /
475
476 if (s ~ pat) {
477 var g1 = _group('left') # Int
478 var g2 = _group('right') # Float
479 echo $g2
480 echo $[type(g1)] $[type(g2)]
481 }
482
483 var m = s => search(pat)
484 if (m) {
485 echo $[m => group('right')]
486 echo $[m => group('left') => type()] $[m => group('right') => type()]
487 }
488
489 ## STDOUT:
490 -3.14
491 Int Float
492 -3.14
493 Int Float
494 ## END
495
496 #### Can't splice eggex with different flags
497 shopt -s ysh:upgrade
498
499 var pat = / 'abc' ; i /
500 var pat2 = / @pat 'def' ; reg_icase / # this is allowed
501
502 var pat3 = / @pat 'def' /
503 = pat3
504
505 ## status: 1
506 ## STDOUT:
507 ## END
508
509 #### Eggex with translation preference has arbitrary flags
510 shopt -s ysh:upgrade
511
512 # TODO: can provide introspection so users can translate it?
513 # This is kind of a speculative corner of the language.
514
515 var pat = / d+ ; ignorecase ; PCRE /
516
517 # This uses ERE, as a test
518 if ('ab 12' ~ pat) {
519 echo yes
520 }
521
522 ## STDOUT:
523 yes
524 ## END
525
526
527 #### Invalid sh operation on eggex
528 var pat = / d+ /
529 #pat[invalid]=1
530 pat[invalid]+=1
531 ## status: 1
532 ## stdout-json: ""
533
534 #### Long Python Example
535
536 # https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
537
538 # integer ::= decinteger | bininteger | octinteger | hexinteger
539 # decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
540 # bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
541 # octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
542 # hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
543 # nonzerodigit ::= "1"..."9"
544 # digit ::= "0"..."9"
545 # bindigit ::= "0" | "1"
546 # octdigit ::= "0"..."7"
547 # hexdigit ::= digit | "a"..."f" | "A"..."F"
548
549 shopt -s ysh:all
550
551 const DecDigit = / [0-9] /
552 const BinDigit = / [0-1] /
553 const OctDigit = / [0-7] /
554 const HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class
555
556 const DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* /
557 const BinInt = / '0' [b B] ('_'? BinDigit)+ /
558 const OctInt = / '0' [o O] ('_'? OctDigit)+ /
559 const HexInt = / '0' [x X] ('_'? HexDigit)+ /
560
561 const Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end /
562
563 #echo $Integer
564
565 if ( '123' ~ Integer) { echo 'Y' }
566 if ( 'zzz' !~ Integer) { echo 'N' }
567
568 if ('123_000' ~ Integer) { echo 'Y decimal' }
569 if ('000_123' !~ Integer) { echo 'N decimal' }
570
571 if ( '0b100' ~ Integer) { echo 'Y binary' }
572 if ( '0b102' !~ Integer) { echo 'N binary' }
573
574 if ( '0o755' ~ Integer) { echo 'Y octal' }
575 if ( '0o778' !~ Integer) { echo 'N octal' }
576
577 if ( '0xFF' ~ Integer) { echo 'Y hex' }
578 if ( '0xFG' !~ Integer) { echo 'N hex' }
579
580 ## STDOUT:
581 Y
582 N
583 Y decimal
584 N decimal
585 Y binary
586 N binary
587 Y octal
588 N octal
589 Y hex
590 N hex
591 ## END
592
593 #### Regex in a loop (bug regression)
594
595 shopt --set ysh:all
596
597 var content = [ 1, 2 ]
598 var i = 0
599 while (i < len(content)) {
600 var line = content[i]
601 write $[content[i]]
602 if (str(line) ~ / s* 'imports' s* '=' s* .* /) {
603 exit
604 }
605 setvar i += 1
606 }
607
608 ## STDOUT:
609 1
610 2
611 ## END
612
613
614 #### Regex in a loop depending on var
615
616 shopt --set ysh:all
617
618 var lines = ['foo', 'bar']
619 for line in (lines) {
620 write "line $line"
621
622 # = / $line /
623
624 if ("x$line" ~ / dot @line /) {
625 #if (line ~ / $line /) {
626 write "matched $line"
627 }
628 }
629
630 ## STDOUT:
631 line foo
632 matched foo
633 line bar
634 matched bar
635 ## END
636
637
638 #### Regex with [ (bug regression)
639 shopt --set ysh:all
640
641 if ('[' ~ / '[' /) {
642 echo 'sq'
643 }
644
645 if ('[' ~ / [ '[' ] /) {
646 echo 'char class'
647 }
648
649 # User-reported string
650 if ("a" ~ / s* 'imports' s* '=' s* '[' /) {
651 echo "yes"
652 }
653
654 ## STDOUT:
655 sq
656 char class
657 ## END
658
659 #### Str => replace(Str, Str)
660 shopt --set ysh:all
661
662 var mystr = 'abca'
663 write $[mystr => replace('a', 'A')] # Two matches
664 write $[mystr => replace('b', 'B')] # One match
665 write $[mystr => replace('x', 'y')] # No matches
666
667 write $[mystr => replace('abc', '')] # Empty substitution
668 write $[mystr => replace('', 'new')] # Empty substring
669 ## STDOUT:
670 AbcA
671 aBca
672 abca
673 a
674 newanewbnewcnewanew
675 ## END
676
677 #### Str => replace(Eggex, Str)
678 shopt --set ysh:all
679
680 var mystr = 'mangled----kebab--case'
681 write $[mystr => replace(/ '-'+ /, '-')]
682
683 setvar mystr = 'smaller-to-bigger'
684 write $[mystr => replace(/ '-'+ /, '---')]
685 ## STDOUT:
686 mangled-kebab-case
687 smaller---to---bigger
688 ## END
689
690 #### Str => replace(Eggex, Expr)
691 shopt --set ysh:all
692
693 var mystr = 'name: Bob'
694 write $[mystr => replace(/ 'name: ' <capture dot+> /, ^"Hello $1")]
695 write $[mystr => replace(/ 'name: ' <capture dot+> /, ^"Hello $1 (extracted from '$0')")]
696 ## STDOUT:
697 Hello Bob
698 Hello Bob (extracted from 'name: Bob')
699 ## END
700
701 #### Str => replace(*, Expr), $0
702 shopt --set ysh:all
703
704 # Functionality
705 var mystr = 'class Foo: # this class is called Foo'
706 write $[mystr => replace("Foo", ^"$0Bar")]
707 write $[mystr => replace(/ 'Foo' /, ^"$0Bar")]
708
709 # Edge-cases
710 var dollar0 = "$0"
711 func f() { return ("$0") }
712 write $["foo" => replace("o", "$0") === "f$dollar0$dollar0"]
713 write $["foo" => replace("o", ^[f()]) === "f$dollar0$dollar0"]
714 write $[f() === "$dollar0"]
715 ## STDOUT:
716 class FooBar: # this class is called FooBar
717 class FooBar: # this class is called FooBar
718 true
719 true
720 true
721 ## END
722
723 #### Str => replace(Eggex, Expr), scopes
724 shopt --set ysh:all
725
726 var mystr = '123'
727
728 var anotherVar = 'surprise!'
729 write $[mystr => replace(/ <capture d+> /, ^"Hello $1 ($anotherVar)")]
730
731 var globalName = '456'
732 write $[mystr => replace(/ <capture d+ as globalName> /, ^"Hello $globalName")]
733
734 write $[mystr => replace(/ <capture d+ as localName> /, ^"Hello $localName, $globalName")]
735 ## STDOUT:
736 Hello 123 (surprise!)
737 Hello 123
738 Hello 123, 456
739 ## END
740
741 #### Str => replace(Eggex, *, count)
742 shopt --set ysh:all
743
744 var mystr = '1abc2abc3abc'
745
746 for count in (-2..4) {
747 write $[mystr => replace('abc', "-", count=count)]
748 write $[mystr => replace('abc', ^"-", count=count)]
749 write $[mystr => replace(/ [a-z]+ /, "-", count=count)]
750 write $[mystr => replace(/ [a-z]+ /, "-", count=count)]
751 }
752 ## STDOUT:
753 1-2-3-
754 1-2-3-
755 1-2-3-
756 1-2-3-
757 1-2-3-
758 1-2-3-
759 1-2-3-
760 1-2-3-
761 1abc2abc3abc
762 1abc2abc3abc
763 1abc2abc3abc
764 1abc2abc3abc
765 1-2abc3abc
766 1-2abc3abc
767 1-2abc3abc
768 1-2abc3abc
769 1-2-3abc
770 1-2-3abc
771 1-2-3abc
772 1-2-3abc
773 1-2-3-
774 1-2-3-
775 1-2-3-
776 1-2-3-
777 ## END
778
779 #### Str => replace(Str, Str), empty new/old strings
780 var mystr = 'abca'
781 write $[mystr => replace('abc', '')] # Empty substitution
782 write $[mystr => replace('', 'new')] # Empty substring
783 write $[mystr => replace('', 'new', count=1)] # Empty substring, count != -1
784 write $[mystr => replace('', 'new', count=10)] # Empty substring, count too large
785 ## STDOUT:
786 a
787 newanewbnewcnewanew
788 newabca
789 newanewbnewcnewanew
790 ## END
791
792 #### Str => replace(Eggex, Lazy), convert_func
793 shopt --set ysh:all
794
795 var mystr = '123'
796
797 write $[mystr => replace(/ <capture d+ as n : int> /, ^"$[n + 1]")]
798
799 # values automatically get stringified
800 write $[mystr => replace(/ <capture d+ as n : int> /, ^"$1")]
801
802 func not_str(inp) {
803 return ({ "value": inp })
804 }
805
806 # should fail to stringify $1
807 try { call mystr => replace(/ <capture d+ : not_str> /, ^"$1") }
808 write status=$_status
809 ## STDOUT:
810 124
811 123
812 status=3
813 ## END
814
815 #### Str => replace(Eggex, *), eflags
816 shopt --set ysh:all
817
818 var mystr = $'1-2-3\n4-5'
819 write $[mystr => replace(/ d+ /, ^"[$0]")]
820 write $[mystr => replace(/ ^ d+ /, ^"[$0]")]
821 write $[mystr => replace(/ ^ d+ ; reg_newline /, ^"[$0]")]
822 ## STDOUT:
823 [1]-[2]-[3]
824 [4]-[5]
825 [1]-2-3
826 4-5
827 [1]-2-3
828 [4]-5
829 ## END