OILS / spec / ysh-regex-api.test.sh View on Github | oilshell.org

829 lines, 504 significant
1## oils_failures_allowed: 0
2
3#### s ~ regex and s !~ regex
4shopt -s ysh:upgrade
5
6var s = 'foo'
7if (s ~ '.([[:alpha:]]+)') { # ERE syntax
8 echo matches
9 argv.py $[_group(0)] $[_group(1)]
10}
11if (s !~ '[[:digit:]]+') {
12 echo "does not match"
13 argv.py $[_group(0)] $[_group(1)]
14}
15
16if (s ~ '[[:digit:]]+') {
17 echo "matches"
18}
19# Should be cleared now
20# should this be Undef rather than ''?
21try {
22 var x = _group(0)
23}
24if (_status === 3) {
25 echo 'got expected status 3'
26}
27
28try {
29 var y = _group(1)
30}
31if (_status === 3) {
32 echo 'got expected status 3'
33}
34
35## STDOUT:
36matches
37['foo', 'oo']
38does not match
39['foo', 'oo']
40got expected status 3
41got expected status 3
42## END
43
44#### Invalid regex has libc error message
45
46shopt -s ysh:upgrade
47
48# Hm it's hard to test this, we can't get stderr of YSH from within YSH?
49#fopen 2>err.txt {
50# if ('abc' ~ '+') {
51# echo 'bad'
52# }
53#}
54
55if ('abc' ~ '+') {
56 echo 'bad'
57}
58
59## status: 2
60## STDOUT:
61## END
62
63#### Eggex flags to ignore case are respected
64shopt -s ysh:upgrade
65
66# based on Python's spelling
67var pat = / 'abc' ; i /
68var pat2 = / @pat 'def' ; reg_icase / # this is allowed
69
70if ('-abcdef-' ~ pat2) {
71 echo 'yes'
72}
73
74if ('-ABCDEF-' ~ pat2) {
75 echo 'yes'
76}
77
78if ('ABCDE' ~ pat2) {
79 echo 'BUG'
80}
81
82## STDOUT:
83yes
84yes
85## END
86
87#### Eggex flags to treat newlines as special are respected
88shopt -s ysh:upgrade
89
90if (u'abc123\n' ~ / digit %end /) {
91 echo 'BUG'
92}
93if (u'abc\n123' ~ / %start digit /) {
94 echo 'BUG'
95}
96
97if (u'abc123\n' ~ / digit %end ; reg_newline /) {
98 echo 'yes'
99}
100if (u'abc\n123' ~ / %start digit ; reg_newline /) {
101 echo 'yes'
102}
103
104if (u'\n' ~ / . /) {
105 echo 'yes'
106}
107if (u'\n' ~ / !digit /) {
108 echo 'yes'
109}
110
111if (u'\n' ~ / . ; reg_newline /) {
112 echo 'BUG'
113}
114if (u'\n' ~ / !digit ; reg_newline /) {
115 echo 'BUG'
116}
117
118## STDOUT:
119yes
120yes
121yes
122yes
123## END
124
125#### Positional captures with _group
126shopt -s ysh:upgrade
127
128var x = 'zz 2020-08-20'
129
130if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] {
131 argv.py "${BASH_REMATCH[@]}"
132}
133
134# THIS IS A NO-OP. The variable is SHADOWED by the special name.
135# I think that's OK.
136setvar BASH_REMATCH = :| reset |
137
138if (x ~ /<capture d+> '-' <capture d+>/) {
139 argv.py "${BASH_REMATCH[@]}"
140 argv.py $[_group(0)] $[_group(1)] $[_group(2)]
141
142 # TODO: Also test _start() and _end()
143}
144## STDOUT:
145['2020-08', '2020', '08']
146['2020-08', '2020', '08']
147['2020-08', '2020', '08']
148## END
149
150#### _group() returns null when group doesn't match
151shopt -s ysh:upgrade
152
153var pat = / <capture 'a'> | <capture 'b'> /
154if ('b' ~ pat) {
155 echo "$[_group(1)] $[_group(2)]"
156}
157## STDOUT:
158null b
159## END
160
161#### _start() and _end()
162shopt -s ysh:upgrade
163
164var s = 'foo123bar'
165if (s ~ /digit+/) {
166 echo start=$[_start(0)] end=$[_end(0)]
167}
168echo ---
169
170if (s ~ / <capture [a-z]+> <capture digit+> /) {
171 echo start=$[_start(1)] end=$[_end(1)]
172 echo start=$[_start(2)] end=$[_end(2)]
173}
174echo ---
175
176if (s ~ / <capture [a-z]+> | <capture digit+> /) {
177 echo start=$[_start(1)] end=$[_end(1)]
178 echo start=$[_start(2)] end=$[_end(2)]
179}
180
181## STDOUT:
182start=3 end=6
183---
184start=0 end=3
185start=3 end=6
186---
187start=0 end=3
188start=-1 end=-1
189## END
190
191#### Str->search() method returns value.Match object
192
193var s = '= Hi5- Bye6-'
194
195var m = s => search(/ <capture [a-z]+ > <capture d+> '-' ; i /)
196echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
197echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
198echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
199
200echo ---
201
202var pos = m => end(0) # search from end position
203var m = s => search(/ <capture [a-z]+ > <capture d+> '-' ; i /, pos=pos)
204echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
205echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
206echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
207
208## STDOUT:
209g0 2 6 Hi5-
210g1 2 4 Hi
211g2 4 5 5
212---
213g0 7 12 Bye6-
214g1 7 10 Bye
215g2 10 11 6
216## END
217
218#### Str->search() only matches %start ^ when pos == 0
219
220shopt -s ysh:upgrade
221
222var anchored = / %start <capture d+> '-' /
223var free = / <capture d+> '-' /
224
225var s = '12-34-'
226
227for pat in ([anchored, free]) {
228 echo "pat=$pat"
229
230 var pos = 0
231 while (true) {
232 var m = s => search(pat, pos=pos)
233 if (not m) {
234 break
235 }
236 echo $[m => group(0)]
237 setvar pos = m => end(0)
238 }
239
240}
241
242## STDOUT:
243pat=^([[:digit:]]+)-
24412-
245pat=([[:digit:]]+)-
24612-
24734-
248## END
249
250
251#### search() and leftMatch() accept ERE string
252
253var s = '= hi5- bye6-'
254
255var m = s => search('([[:alpha:]]+)([[:digit:]]+)-')
256echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
257echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
258echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
259echo ---
260
261var m = s[2:] => leftMatch('([[:alpha:]]+)([[:digit:]]+)-')
262echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
263echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
264echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
265
266## STDOUT:
267g0 2 6 hi5-
268g1 2 4 hi
269g2 4 5 5
270---
271g0 0 4 hi5-
272g1 0 2 hi
273g2 2 3 5
274## END
275
276#### Str=>leftMatch() can implement lexer pattern
277
278shopt -s ysh:upgrade
279
280var lexer = / <capture d+> | <capture [a-z]+> | <capture s+> /
281#echo $lexer
282
283proc show-tokens (s) {
284 var pos = 0
285
286 while (true) {
287 echo "pos=$pos"
288
289 var m = s=>leftMatch(lexer, pos=pos)
290 if (not m) {
291 break
292 }
293 # TODO: add groups()
294 #var groups = [m => group(1), m => group(2), m => group(3)]
295 echo "$[m => group(1)]/$[m => group(2)]/$[m => group(3)]/"
296
297 echo
298
299 setvar pos = m => end(0)
300 }
301}
302
303show-tokens 'ab 12'
304
305echo '==='
306
307# There's a token here that doesn't leftMatch()
308show-tokens 'ab+12'
309
310## STDOUT:
311pos=0
312null/ab/null/
313
314pos=2
315null/null/ /
316
317pos=3
31812/null/null/
319
320pos=5
321===
322pos=0
323null/ab/null/
324
325pos=2
326## END
327
328#### Named captures with m => group()
329shopt -s ysh:all
330
331var s = 'zz 2020-08-20'
332var pat = /<capture d+ as year> '-' <capture d+ as month>/
333
334var m = s => search(pat)
335argv.py $[m => group('year')] $[m => group('month')]
336echo $[m => start('year')] $[m => end('year')]
337echo $[m => start('month')] $[m => end('month')]
338
339argv.py $[m => group('oops')]
340echo 'error'
341
342## status: 3
343## STDOUT:
344['2020', '08']
3453 7
3468 10
347## END
348
349#### Named captures with _group() _start() _end()
350shopt -s ysh:all
351
352var x = 'zz 2020-08-20'
353
354if (x ~ /<capture d+ as year> '-' <capture d+ as month>/) {
355 argv.py $[_group('year')] $[_group('month')]
356 echo $[_start('year')] $[_end('year')]
357 echo $[_start('month')] $[_end('month')]
358}
359
360argv.py $[_group('oops')]
361
362## status: 3
363## STDOUT:
364['2020', '08']
3653 7
3668 10
367## END
368
369#### Named Capture Decays Without Name
370shopt -s ysh:all
371var pat = /<capture d+ as month>/
372echo $pat
373
374if ('123' ~ pat) {
375 echo yes
376}
377
378## STDOUT:
379([[:digit:]]+)
380yes
381## END
382
383#### Nested Named Capture Uses ( ordering
384
385shopt -s ysh:upgrade
386
387var Date = /<capture d+ as year> '-' <capture d+ as month>/
388var Time = /<capture d+ as hour> ':' <capture d+ as minute> (':' <capture d+ as secs>)? /
389
390var pat = / 'when: ' (<capture Date> | <capture Time as two>) /
391#echo $pat
392
393proc show-groups (; m) {
394 echo 0 $[m => group(0)]
395 echo 1 $[m => group(1)] # this is everything except when
396 echo 2 $[m => group(2)]
397 echo
398 echo $[m => group('two')]
399 echo $[m => group('year')] $[m => group('month')]
400 echo $[m => group('hour')] $[m => group('minute')] $[m => group('secs')]
401}
402
403var m = 'when: 2023-10' => leftMatch(pat)
404
405show-groups (m)
406
407var m = 'when: 23:30' => leftMatch(pat)
408
409echo ---
410show-groups (m)
411
412var m = 'when: 23:30:59' => leftMatch(pat)
413
414echo ---
415show-groups (m)
416
417## STDOUT:
4180 when: 2023-10
4191 2023-10
4202 2023-10
421
422null
4232023 10
424null null null
425---
4260 when: 23:30
4271 23:30
4282 null
429
43023:30
431null null
43223 30 null
433---
4340 when: 23:30:59
4351 23:30:59
4362 null
437
43823:30:59
439null null
44023 30 59
441## END
442
443#### Capture with Type Conversion Func
444shopt -s ysh:upgrade
445
446var s = 'hi 42-3.14'
447var pat = / <capture d+: int> '-' <capture d+ '.' d+ : float> /
448
449if (s ~ pat) {
450 var g1 = _group(1) # Int
451 var g2 = _group(2) # Float
452 echo $[type(g1)] $[type(g2)]
453}
454
455var m = s => search(pat)
456if (m) {
457 echo $[m => group(1) => type()] $[m => group(2) => type()]
458}
459
460## STDOUT:
461Int Float
462Int Float
463## END
464
465
466#### Named Capture with Type Conversion Func
467shopt -s ysh:upgrade
468
469func floatNegate(x) {
470 return (-float(x))
471}
472
473var s = 'hi 42-3.14'
474var pat = / <capture d+ as left: int> '-' <capture d+ '.' d+ as right: floatNegate> /
475
476if (s ~ pat) {
477 var g1 = _group('left') # Int
478 var g2 = _group('right') # Float
479 echo $g2
480 echo $[type(g1)] $[type(g2)]
481}
482
483var m = s => search(pat)
484if (m) {
485 echo $[m => group('right')]
486 echo $[m => group('left') => type()] $[m => group('right') => type()]
487}
488
489## STDOUT:
490-3.14
491Int Float
492-3.14
493Int Float
494## END
495
496#### Can't splice eggex with different flags
497shopt -s ysh:upgrade
498
499var pat = / 'abc' ; i /
500var pat2 = / @pat 'def' ; reg_icase / # this is allowed
501
502var pat3 = / @pat 'def' /
503= pat3
504
505## status: 1
506## STDOUT:
507## END
508
509#### Eggex with translation preference has arbitrary flags
510shopt -s ysh:upgrade
511
512# TODO: can provide introspection so users can translate it?
513# This is kind of a speculative corner of the language.
514
515var pat = / d+ ; ignorecase ; PCRE /
516
517# This uses ERE, as a test
518if ('ab 12' ~ pat) {
519 echo yes
520}
521
522## STDOUT:
523yes
524## END
525
526
527#### Invalid sh operation on eggex
528var pat = / d+ /
529#pat[invalid]=1
530pat[invalid]+=1
531## status: 1
532## stdout-json: ""
533
534#### Long Python Example
535
536# https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
537
538# integer ::= decinteger | bininteger | octinteger | hexinteger
539# decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
540# bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
541# octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
542# hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
543# nonzerodigit ::= "1"..."9"
544# digit ::= "0"..."9"
545# bindigit ::= "0" | "1"
546# octdigit ::= "0"..."7"
547# hexdigit ::= digit | "a"..."f" | "A"..."F"
548
549shopt -s ysh:all
550
551const DecDigit = / [0-9] /
552const BinDigit = / [0-1] /
553const OctDigit = / [0-7] /
554const HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class
555
556const DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* /
557const BinInt = / '0' [b B] ('_'? BinDigit)+ /
558const OctInt = / '0' [o O] ('_'? OctDigit)+ /
559const HexInt = / '0' [x X] ('_'? HexDigit)+ /
560
561const Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end /
562
563#echo $Integer
564
565if ( '123' ~ Integer) { echo 'Y' }
566if ( 'zzz' !~ Integer) { echo 'N' }
567
568if ('123_000' ~ Integer) { echo 'Y decimal' }
569if ('000_123' !~ Integer) { echo 'N decimal' }
570
571if ( '0b100' ~ Integer) { echo 'Y binary' }
572if ( '0b102' !~ Integer) { echo 'N binary' }
573
574if ( '0o755' ~ Integer) { echo 'Y octal' }
575if ( '0o778' !~ Integer) { echo 'N octal' }
576
577if ( '0xFF' ~ Integer) { echo 'Y hex' }
578if ( '0xFG' !~ Integer) { echo 'N hex' }
579
580## STDOUT:
581Y
582N
583Y decimal
584N decimal
585Y binary
586N binary
587Y octal
588N octal
589Y hex
590N hex
591## END
592
593#### Regex in a loop (bug regression)
594
595shopt --set ysh:all
596
597var content = [ 1, 2 ]
598var i = 0
599while (i < len(content)) {
600 var line = content[i]
601 write $[content[i]]
602 if (str(line) ~ / s* 'imports' s* '=' s* .* /) {
603 exit
604 }
605 setvar i += 1
606}
607
608## STDOUT:
6091
6102
611## END
612
613
614#### Regex in a loop depending on var
615
616shopt --set ysh:all
617
618var lines = ['foo', 'bar']
619for line in (lines) {
620 write "line $line"
621
622 # = / $line /
623
624if ("x$line" ~ / dot @line /) {
625 #if (line ~ / $line /) {
626 write "matched $line"
627 }
628}
629
630## STDOUT:
631line foo
632matched foo
633line bar
634matched bar
635## END
636
637
638#### Regex with [ (bug regression)
639shopt --set ysh:all
640
641if ('[' ~ / '[' /) {
642 echo 'sq'
643}
644
645if ('[' ~ / [ '[' ] /) {
646 echo 'char class'
647}
648
649# User-reported string
650if ("a" ~ / s* 'imports' s* '=' s* '[' /) {
651 echo "yes"
652}
653
654## STDOUT:
655sq
656char class
657## END
658
659#### Str => replace(Str, Str)
660shopt --set ysh:all
661
662var mystr = 'abca'
663write $[mystr => replace('a', 'A')] # Two matches
664write $[mystr => replace('b', 'B')] # One match
665write $[mystr => replace('x', 'y')] # No matches
666
667write $[mystr => replace('abc', '')] # Empty substitution
668write $[mystr => replace('', 'new')] # Empty substring
669## STDOUT:
670AbcA
671aBca
672abca
673a
674newanewbnewcnewanew
675## END
676
677#### Str => replace(Eggex, Str)
678shopt --set ysh:all
679
680var mystr = 'mangled----kebab--case'
681write $[mystr => replace(/ '-'+ /, '-')]
682
683setvar mystr = 'smaller-to-bigger'
684write $[mystr => replace(/ '-'+ /, '---')]
685## STDOUT:
686mangled-kebab-case
687smaller---to---bigger
688## END
689
690#### Str => replace(Eggex, Expr)
691shopt --set ysh:all
692
693var mystr = 'name: Bob'
694write $[mystr => replace(/ 'name: ' <capture dot+> /, ^"Hello $1")]
695write $[mystr => replace(/ 'name: ' <capture dot+> /, ^"Hello $1 (extracted from '$0')")]
696## STDOUT:
697Hello Bob
698Hello Bob (extracted from 'name: Bob')
699## END
700
701#### Str => replace(*, Expr), $0
702shopt --set ysh:all
703
704# Functionality
705var mystr = 'class Foo: # this class is called Foo'
706write $[mystr => replace("Foo", ^"$0Bar")]
707write $[mystr => replace(/ 'Foo' /, ^"$0Bar")]
708
709# Edge-cases
710var dollar0 = "$0"
711func f() { return ("$0") }
712write $["foo" => replace("o", "$0") === "f$dollar0$dollar0"]
713write $["foo" => replace("o", ^[f()]) === "f$dollar0$dollar0"]
714write $[f() === "$dollar0"]
715## STDOUT:
716class FooBar: # this class is called FooBar
717class FooBar: # this class is called FooBar
718true
719true
720true
721## END
722
723#### Str => replace(Eggex, Expr), scopes
724shopt --set ysh:all
725
726var mystr = '123'
727
728var anotherVar = 'surprise!'
729write $[mystr => replace(/ <capture d+> /, ^"Hello $1 ($anotherVar)")]
730
731var globalName = '456'
732write $[mystr => replace(/ <capture d+ as globalName> /, ^"Hello $globalName")]
733
734write $[mystr => replace(/ <capture d+ as localName> /, ^"Hello $localName, $globalName")]
735## STDOUT:
736Hello 123 (surprise!)
737Hello 123
738Hello 123, 456
739## END
740
741#### Str => replace(Eggex, *, count)
742shopt --set ysh:all
743
744var mystr = '1abc2abc3abc'
745
746for count in (-2..4) {
747 write $[mystr => replace('abc', "-", count=count)]
748 write $[mystr => replace('abc', ^"-", count=count)]
749 write $[mystr => replace(/ [a-z]+ /, "-", count=count)]
750 write $[mystr => replace(/ [a-z]+ /, "-", count=count)]
751}
752## STDOUT:
7531-2-3-
7541-2-3-
7551-2-3-
7561-2-3-
7571-2-3-
7581-2-3-
7591-2-3-
7601-2-3-
7611abc2abc3abc
7621abc2abc3abc
7631abc2abc3abc
7641abc2abc3abc
7651-2abc3abc
7661-2abc3abc
7671-2abc3abc
7681-2abc3abc
7691-2-3abc
7701-2-3abc
7711-2-3abc
7721-2-3abc
7731-2-3-
7741-2-3-
7751-2-3-
7761-2-3-
777## END
778
779#### Str => replace(Str, Str), empty new/old strings
780var mystr = 'abca'
781write $[mystr => replace('abc', '')] # Empty substitution
782write $[mystr => replace('', 'new')] # Empty substring
783write $[mystr => replace('', 'new', count=1)] # Empty substring, count != -1
784write $[mystr => replace('', 'new', count=10)] # Empty substring, count too large
785## STDOUT:
786a
787newanewbnewcnewanew
788newabca
789newanewbnewcnewanew
790## END
791
792#### Str => replace(Eggex, Lazy), convert_func
793shopt --set ysh:all
794
795var mystr = '123'
796
797write $[mystr => replace(/ <capture d+ as n : int> /, ^"$[n + 1]")]
798
799# values automatically get stringified
800write $[mystr => replace(/ <capture d+ as n : int> /, ^"$1")]
801
802func not_str(inp) {
803 return ({ "value": inp })
804}
805
806# should fail to stringify $1
807try { call mystr => replace(/ <capture d+ : not_str> /, ^"$1") }
808write status=$_status
809## STDOUT:
810124
811123
812status=3
813## END
814
815#### Str => replace(Eggex, *), eflags
816shopt --set ysh:all
817
818var mystr = $'1-2-3\n4-5'
819write $[mystr => replace(/ d+ /, ^"[$0]")]
820write $[mystr => replace(/ ^ d+ /, ^"[$0]")]
821write $[mystr => replace(/ ^ d+ ; reg_newline /, ^"[$0]")]
822## STDOUT:
823[1]-[2]-[3]
824[4]-[5]
825[1]-2-3
8264-5
827[1]-2-3
828[4]-5
829## END