OILS / spec / ysh-regex-api.test.sh View on Github | oilshell.org

830 lines, 504 significant
1## oils_failures_allowed: 0
2
3#### s ~ regex and s !~ regex
4shopt -s ysh:upgrade
5
6var s = 'foo'
7if (s ~ '.([[:alpha:]]+)') { # ERE syntax
8 echo matches
9 argv.py $[_group(0)] $[_group(1)]
10}
11if (s !~ '[[:digit:]]+') {
12 echo "does not match"
13 argv.py $[_group(0)] $[_group(1)]
14}
15
16if (s ~ '[[:digit:]]+') {
17 echo "matches"
18}
19# Should be cleared now
20# should this be Undef rather than ''?
21try {
22 var x = _group(0)
23}
24if (_status === 3) {
25 echo 'got expected status 3'
26}
27
28try {
29 var y = _group(1)
30}
31if (_status === 3) {
32 echo 'got expected status 3'
33}
34
35## STDOUT:
36matches
37['foo', 'oo']
38does not match
39['foo', 'oo']
40got expected status 3
41got expected status 3
42## END
43
44#### Invalid regex has libc error message
45
46shopt -s ysh:upgrade
47
48# Hm it's hard to test this, we can't get stderr of YSH from within YSH?
49#fopen 2>err.txt {
50# if ('abc' ~ '+') {
51# echo 'bad'
52# }
53#}
54
55if ('abc' ~ '+') {
56 echo 'bad'
57}
58
59## status: 2
60## STDOUT:
61## END
62
63#### Eggex flags to ignore case are respected
64shopt -s ysh:upgrade
65
66# based on Python's spelling
67var pat = / 'abc' ; i /
68var pat2 = / @pat 'def' ; reg_icase / # this is allowed
69
70if ('-abcdef-' ~ pat2) {
71 echo 'yes'
72}
73
74if ('-ABCDEF-' ~ pat2) {
75 echo 'yes'
76}
77
78if ('ABCDE' ~ pat2) {
79 echo 'BUG'
80}
81
82## STDOUT:
83yes
84yes
85## END
86
87#### Eggex flags to treat newlines as special are respected
88shopt -s ysh:upgrade
89
90if (u'abc123\n' ~ / digit %end /) {
91 echo 'BUG'
92}
93if (u'abc\n123' ~ / %start digit /) {
94 echo 'BUG'
95}
96
97if (u'abc123\n' ~ / digit %end ; reg_newline /) {
98 echo 'yes'
99}
100if (u'abc\n123' ~ / %start digit ; reg_newline /) {
101 echo 'yes'
102}
103
104if (u'\n' ~ / . /) {
105 echo 'yes'
106}
107if (u'\n' ~ / !digit /) {
108 echo 'yes'
109}
110
111if (u'\n' ~ / . ; reg_newline /) {
112 echo 'BUG'
113}
114if (u'\n' ~ / !digit ; reg_newline /) {
115 echo 'BUG'
116}
117
118## STDOUT:
119yes
120yes
121yes
122yes
123## END
124
125#### Positional captures with _group
126shopt -s ysh:all
127
128var x = 'zz 2020-08-20'
129
130if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] {
131 argv.py "${BASH_REMATCH[@]}"
132}
133
134# THIS IS A NO-OP. The variable is SHADOWED by the special name.
135# I think that's OK.
136setvar BASH_REMATCH = :| reset |
137
138if (x ~ /<capture d+> '-' <capture d+>/) {
139 argv.py "${BASH_REMATCH[@]}"
140 argv.py $[_group(0)] $[_group(1)] $[_group(2)]
141
142 # TODO: Also test _start() and _end()
143}
144## STDOUT:
145['2020-08', '2020', '08']
146['2020-08', '2020', '08']
147['2020-08', '2020', '08']
148## END
149
150#### _group() returns null when group doesn't match
151shopt -s ysh:upgrade
152
153var pat = / <capture 'a'> | <capture 'b'> /
154if ('b' ~ pat) {
155 echo "$[_group(1)] $[_group(2)]"
156}
157## STDOUT:
158null b
159## END
160
161#### _start() and _end()
162shopt -s ysh:upgrade
163
164var s = 'foo123bar'
165if (s ~ /digit+/) {
166 echo start=$[_start(0)] end=$[_end(0)]
167}
168echo ---
169
170if (s ~ / <capture [a-z]+> <capture digit+> /) {
171 echo start=$[_start(1)] end=$[_end(1)]
172 echo start=$[_start(2)] end=$[_end(2)]
173}
174echo ---
175
176if (s ~ / <capture [a-z]+> | <capture digit+> /) {
177 echo start=$[_start(1)] end=$[_end(1)]
178 echo start=$[_start(2)] end=$[_end(2)]
179}
180
181## STDOUT:
182start=3 end=6
183---
184start=0 end=3
185start=3 end=6
186---
187start=0 end=3
188start=-1 end=-1
189## END
190
191#### Str->search() method returns value.Match object
192
193var s = '= Hi5- Bye6-'
194
195var m = s => search(/ <capture [a-z]+ > <capture d+> '-' ; i /)
196echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
197echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
198echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
199
200echo ---
201
202var pos = m => end(0) # search from end position
203var m = s => search(/ <capture [a-z]+ > <capture d+> '-' ; i /, pos=pos)
204echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
205echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
206echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
207
208## STDOUT:
209g0 2 6 Hi5-
210g1 2 4 Hi
211g2 4 5 5
212---
213g0 7 12 Bye6-
214g1 7 10 Bye
215g2 10 11 6
216## END
217
218#### Str->search() only matches %start ^ when pos == 0
219
220shopt -s ysh:upgrade
221
222var anchored = / %start <capture d+> '-' /
223var free = / <capture d+> '-' /
224
225var s = '12-34-'
226
227for pat in ([anchored, free]) {
228 echo "pat=$pat"
229
230 var pos = 0
231 while (true) {
232 var m = s => search(pat, pos=pos)
233 if (not m) {
234 break
235 }
236 echo $[m => group(0)]
237 setvar pos = m => end(0)
238 }
239
240}
241
242## STDOUT:
243pat=^([[:digit:]]+)-
24412-
245pat=([[:digit:]]+)-
24612-
24734-
248## END
249
250
251#### search() and leftMatch() accept ERE string
252
253var s = '= hi5- bye6-'
254
255var m = s => search('([[:alpha:]]+)([[:digit:]]+)-')
256echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
257echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
258echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
259echo ---
260
261var m = s[2:] => leftMatch('([[:alpha:]]+)([[:digit:]]+)-')
262echo "g0 $[m => start(0)] $[m => end(0)] $[m => group(0)]"
263echo "g1 $[m => start(1)] $[m => end(1)] $[m => group(1)]"
264echo "g2 $[m => start(2)] $[m => end(2)] $[m => group(2)]"
265
266## STDOUT:
267g0 2 6 hi5-
268g1 2 4 hi
269g2 4 5 5
270---
271g0 0 4 hi5-
272g1 0 2 hi
273g2 2 3 5
274## END
275
276#### Str->leftMatch() can implement lexer pattern
277
278shopt -s ysh:upgrade
279
280var lexer = / <capture d+> | <capture [a-z]+> | <capture s+> /
281#echo $lexer
282
283proc show-tokens (s) {
284 var pos = 0
285
286 while (true) {
287 echo "pos=$pos"
288
289 var m = s->leftMatch(lexer, pos=pos)
290 if (not m) {
291 break
292 }
293 # TODO: add groups()
294 #var groups = [m => group(1), m => group(2), m => group(3)]
295 #json write --pretty=F (groups)
296 echo "$[m => group(1)]/$[m => group(2)]/$[m => group(3)]/"
297
298 echo
299
300 setvar pos = m => end(0)
301 }
302}
303
304show-tokens 'ab 12'
305
306echo '==='
307
308# There's a token here that doesn't leftMatch()
309show-tokens 'ab+12'
310
311## STDOUT:
312pos=0
313null/ab/null/
314
315pos=2
316null/null/ /
317
318pos=3
31912/null/null/
320
321pos=5
322===
323pos=0
324null/ab/null/
325
326pos=2
327## END
328
329#### Named captures with m => group()
330shopt -s ysh:all
331
332var s = 'zz 2020-08-20'
333var pat = /<capture d+ as year> '-' <capture d+ as month>/
334
335var m = s => search(pat)
336argv.py $[m => group('year')] $[m => group('month')]
337echo $[m => start('year')] $[m => end('year')]
338echo $[m => start('month')] $[m => end('month')]
339
340argv.py $[m => group('oops')]
341echo 'error'
342
343## status: 3
344## STDOUT:
345['2020', '08']
3463 7
3478 10
348## END
349
350#### Named captures with _group() _start() _end()
351shopt -s ysh:all
352
353var x = 'zz 2020-08-20'
354
355if (x ~ /<capture d+ as year> '-' <capture d+ as month>/) {
356 argv.py $[_group('year')] $[_group('month')]
357 echo $[_start('year')] $[_end('year')]
358 echo $[_start('month')] $[_end('month')]
359}
360
361argv.py $[_group('oops')]
362
363## status: 3
364## STDOUT:
365['2020', '08']
3663 7
3678 10
368## END
369
370#### Named Capture Decays Without Name
371shopt -s ysh:all
372var pat = /<capture d+ as month>/
373echo $pat
374
375if ('123' ~ pat) {
376 echo yes
377}
378
379## STDOUT:
380([[:digit:]]+)
381yes
382## END
383
384#### Nested Named Capture Uses ( ordering
385
386shopt -s ysh:upgrade
387
388var Date = /<capture d+ as year> '-' <capture d+ as month>/
389var Time = /<capture d+ as hour> ':' <capture d+ as minute> (':' <capture d+ as secs>)? /
390
391var pat = / 'when: ' (<capture Date> | <capture Time as two>) /
392#echo $pat
393
394proc show-groups (; m) {
395 echo 0 $[m => group(0)]
396 echo 1 $[m => group(1)] # this is everything except when
397 echo 2 $[m => group(2)]
398 echo
399 echo $[m => group('two')]
400 echo $[m => group('year')] $[m => group('month')]
401 echo $[m => group('hour')] $[m => group('minute')] $[m => group('secs')]
402}
403
404var m = 'when: 2023-10' => leftMatch(pat)
405
406show-groups (m)
407
408var m = 'when: 23:30' => leftMatch(pat)
409
410echo ---
411show-groups (m)
412
413var m = 'when: 23:30:59' => leftMatch(pat)
414
415echo ---
416show-groups (m)
417
418## STDOUT:
4190 when: 2023-10
4201 2023-10
4212 2023-10
422
423null
4242023 10
425null null null
426---
4270 when: 23:30
4281 23:30
4292 null
430
43123:30
432null null
43323 30 null
434---
4350 when: 23:30:59
4361 23:30:59
4372 null
438
43923:30:59
440null null
44123 30 59
442## END
443
444#### Capture with Type Conversion Func
445shopt -s ysh:upgrade
446
447var s = 'hi 42-3.14'
448var pat = / <capture d+: int> '-' <capture d+ '.' d+ : float> /
449
450if (s ~ pat) {
451 var g1 = _group(1) # Int
452 var g2 = _group(2) # Float
453 echo $[type(g1)] $[type(g2)]
454}
455
456var m = s => search(pat)
457if (m) {
458 echo $[m => group(1) => type()] $[m => group(2) => type()]
459}
460
461## STDOUT:
462Int Float
463Int Float
464## END
465
466
467#### Named Capture with Type Conversion Func
468shopt -s ysh:upgrade
469
470func floatNegate(x) {
471 return (-float(x))
472}
473
474var s = 'hi 42-3.14'
475var pat = / <capture d+ as left: int> '-' <capture d+ '.' d+ as right: floatNegate> /
476
477if (s ~ pat) {
478 var g1 = _group('left') # Int
479 var g2 = _group('right') # Float
480 echo $g2
481 echo $[type(g1)] $[type(g2)]
482}
483
484var m = s => search(pat)
485if (m) {
486 echo $[m => group('right')]
487 echo $[m => group('left') => type()] $[m => group('right') => type()]
488}
489
490## STDOUT:
491-3.14
492Int Float
493-3.14
494Int Float
495## END
496
497#### Can't splice eggex with different flags
498shopt -s ysh:upgrade
499
500var pat = / 'abc' ; i /
501var pat2 = / @pat 'def' ; reg_icase / # this is allowed
502
503var pat3 = / @pat 'def' /
504= pat3
505
506## status: 1
507## STDOUT:
508## END
509
510#### Eggex with translation preference has arbitrary flags
511shopt -s ysh:upgrade
512
513# TODO: can provide introspection so users can translate it?
514# This is kind of a speculative corner of the language.
515
516var pat = / d+ ; ignorecase ; PCRE /
517
518# This uses ERE, as a test
519if ('ab 12' ~ pat) {
520 echo yes
521}
522
523## STDOUT:
524yes
525## END
526
527
528#### Invalid sh operation on eggex
529var pat = / d+ /
530#pat[invalid]=1
531pat[invalid]+=1
532## status: 1
533## stdout-json: ""
534
535#### Long Python Example
536
537# https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
538
539# integer ::= decinteger | bininteger | octinteger | hexinteger
540# decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
541# bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
542# octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
543# hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
544# nonzerodigit ::= "1"..."9"
545# digit ::= "0"..."9"
546# bindigit ::= "0" | "1"
547# octdigit ::= "0"..."7"
548# hexdigit ::= digit | "a"..."f" | "A"..."F"
549
550shopt -s ysh:all
551
552const DecDigit = / [0-9] /
553const BinDigit = / [0-1] /
554const OctDigit = / [0-7] /
555const HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class
556
557const DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* /
558const BinInt = / '0' [b B] ('_'? BinDigit)+ /
559const OctInt = / '0' [o O] ('_'? OctDigit)+ /
560const HexInt = / '0' [x X] ('_'? HexDigit)+ /
561
562const Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end /
563
564#echo $Integer
565
566if ( '123' ~ Integer) { echo 'Y' }
567if ( 'zzz' !~ Integer) { echo 'N' }
568
569if ('123_000' ~ Integer) { echo 'Y decimal' }
570if ('000_123' !~ Integer) { echo 'N decimal' }
571
572if ( '0b100' ~ Integer) { echo 'Y binary' }
573if ( '0b102' !~ Integer) { echo 'N binary' }
574
575if ( '0o755' ~ Integer) { echo 'Y octal' }
576if ( '0o778' !~ Integer) { echo 'N octal' }
577
578if ( '0xFF' ~ Integer) { echo 'Y hex' }
579if ( '0xFG' !~ Integer) { echo 'N hex' }
580
581## STDOUT:
582Y
583N
584Y decimal
585N decimal
586Y binary
587N binary
588Y octal
589N octal
590Y hex
591N hex
592## END
593
594#### Regex in a loop (bug regression)
595
596shopt --set ysh:all
597
598var content = [ 1, 2 ]
599var i = 0
600while (i < len(content)) {
601 var line = content[i]
602 write $[content[i]]
603 if (str(line) ~ / s* 'imports' s* '=' s* .* /) {
604 exit
605 }
606 setvar i += 1
607}
608
609## STDOUT:
6101
6112
612## END
613
614
615#### Regex in a loop depending on var
616
617shopt --set ysh:all
618
619var lines = ['foo', 'bar']
620for line in (lines) {
621 write "line $line"
622
623 # = / $line /
624
625if ("x$line" ~ / dot @line /) {
626 #if (line ~ / $line /) {
627 write "matched $line"
628 }
629}
630
631## STDOUT:
632line foo
633matched foo
634line bar
635matched bar
636## END
637
638
639#### Regex with [ (bug regression)
640shopt --set ysh:all
641
642if ('[' ~ / '[' /) {
643 echo 'sq'
644}
645
646if ('[' ~ / [ '[' ] /) {
647 echo 'char class'
648}
649
650# User-reported string
651if ("a" ~ / s* 'imports' s* '=' s* '[' /) {
652 echo "yes"
653}
654
655## STDOUT:
656sq
657char class
658## END
659
660#### Str => replace(Str, Str)
661shopt --set ysh:all
662
663var mystr = 'abca'
664write $[mystr => replace('a', 'A')] # Two matches
665write $[mystr => replace('b', 'B')] # One match
666write $[mystr => replace('x', 'y')] # No matches
667
668write $[mystr => replace('abc', '')] # Empty substitution
669write $[mystr => replace('', 'new')] # Empty substring
670## STDOUT:
671AbcA
672aBca
673abca
674a
675newanewbnewcnewanew
676## END
677
678#### Str => replace(Eggex, Str)
679shopt --set ysh:all
680
681var mystr = 'mangled----kebab--case'
682write $[mystr => replace(/ '-'+ /, '-')]
683
684setvar mystr = 'smaller-to-bigger'
685write $[mystr => replace(/ '-'+ /, '---')]
686## STDOUT:
687mangled-kebab-case
688smaller---to---bigger
689## END
690
691#### Str => replace(Eggex, Expr)
692shopt --set ysh:all
693
694var mystr = 'name: Bob'
695write $[mystr => replace(/ 'name: ' <capture dot+> /, ^"Hello $1")]
696write $[mystr => replace(/ 'name: ' <capture dot+> /, ^"Hello $1 (extracted from '$0')")]
697## STDOUT:
698Hello Bob
699Hello Bob (extracted from 'name: Bob')
700## END
701
702#### Str => replace(*, Expr), $0
703shopt --set ysh:all
704
705# Functionality
706var mystr = 'class Foo: # this class is called Foo'
707write $[mystr => replace("Foo", ^"$0Bar")]
708write $[mystr => replace(/ 'Foo' /, ^"$0Bar")]
709
710# Edge-cases
711var dollar0 = "$0"
712func f() { return ("$0") }
713write $["foo" => replace("o", "$0") === "f$dollar0$dollar0"]
714write $["foo" => replace("o", ^[f()]) === "f$dollar0$dollar0"]
715write $[f() === "$dollar0"]
716## STDOUT:
717class FooBar: # this class is called FooBar
718class FooBar: # this class is called FooBar
719true
720true
721true
722## END
723
724#### Str => replace(Eggex, Expr), scopes
725shopt --set ysh:all
726
727var mystr = '123'
728
729var anotherVar = 'surprise!'
730write $[mystr => replace(/ <capture d+> /, ^"Hello $1 ($anotherVar)")]
731
732var globalName = '456'
733write $[mystr => replace(/ <capture d+ as globalName> /, ^"Hello $globalName")]
734
735write $[mystr => replace(/ <capture d+ as localName> /, ^"Hello $localName, $globalName")]
736## STDOUT:
737Hello 123 (surprise!)
738Hello 123
739Hello 123, 456
740## END
741
742#### Str => replace(Eggex, *, count)
743shopt --set ysh:all
744
745var mystr = '1abc2abc3abc'
746
747for count in (-2..4) {
748 write $[mystr => replace('abc', "-", count=count)]
749 write $[mystr => replace('abc', ^"-", count=count)]
750 write $[mystr => replace(/ [a-z]+ /, "-", count=count)]
751 write $[mystr => replace(/ [a-z]+ /, "-", count=count)]
752}
753## STDOUT:
7541-2-3-
7551-2-3-
7561-2-3-
7571-2-3-
7581-2-3-
7591-2-3-
7601-2-3-
7611-2-3-
7621abc2abc3abc
7631abc2abc3abc
7641abc2abc3abc
7651abc2abc3abc
7661-2abc3abc
7671-2abc3abc
7681-2abc3abc
7691-2abc3abc
7701-2-3abc
7711-2-3abc
7721-2-3abc
7731-2-3abc
7741-2-3-
7751-2-3-
7761-2-3-
7771-2-3-
778## END
779
780#### Str => replace(Str, Str), empty new/old strings
781var mystr = 'abca'
782write $[mystr => replace('abc', '')] # Empty substitution
783write $[mystr => replace('', 'new')] # Empty substring
784write $[mystr => replace('', 'new', count=1)] # Empty substring, count != -1
785write $[mystr => replace('', 'new', count=10)] # Empty substring, count too large
786## STDOUT:
787a
788newanewbnewcnewanew
789newabca
790newanewbnewcnewanew
791## END
792
793#### Str => replace(Eggex, Lazy), convert_func
794shopt --set ysh:all
795
796var mystr = '123'
797
798write $[mystr => replace(/ <capture d+ as n : int> /, ^"$[n + 1]")]
799
800# values automatically get stringified
801write $[mystr => replace(/ <capture d+ as n : int> /, ^"$1")]
802
803func not_str(inp) {
804 return ({ "value": inp })
805}
806
807# should fail to stringify $1
808try { call mystr => replace(/ <capture d+ : not_str> /, ^"$1") }
809write status=$_status
810## STDOUT:
811124
812123
813status=3
814## END
815
816#### Str => replace(Eggex, *), eflags
817shopt --set ysh:all
818
819var mystr = $'1-2-3\n4-5'
820write $[mystr => replace(/ d+ /, ^"[$0]")]
821write $[mystr => replace(/ ^ d+ /, ^"[$0]")]
822write $[mystr => replace(/ ^ d+ ; reg_newline /, ^"[$0]")]
823## STDOUT:
824[1]-[2]-[3]
825[4]-[5]
826[1]-2-3
8274-5
828[1]-2-3
829[4]-5
830## END