OILS / demo / url-search-params.ysh View on Github | oilshell.org

275 lines, 121 significant
1#!bin/ysh
2#
3# Usage:
4# demo/url-search-params.ysh <function name>
5#
6# Tested against JavaScript's URLSearchParams. Differences:
7#
8# - JS strings can't represent bytes, so %ff turns into the Unicode replacement char.
9# - YSH turns this into the 0xff byte, denoted as b'\yff'
10# - JS accepts '==' as key="" value="="
11# - In YSH, this is a syntax error.
12# - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs:
13# [["", ""]
14# ["", ""],
15# ["", ""]]
16#
17# Evaluation of "the YSH experience":
18#
19# GOOD:
20#
21# - Eggex is elegant
22# - This code is structured better than the Python stdlib urlparse.py!
23# - This problem is also hard/ugly in JavaScript. They use an extra
24# s=>replace() on top of decodeURIComponent()!
25# - Task files in YSH basically work!
26# - I think this file has a nice structure
27# - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js
28# - Triple quoted multiline strings are nice!
29#
30# NEEDS WORK:
31#
32# - need Vim syntax highlighting!
33# - e.g. multiline '' strings aren't higlighted
34# - task files need completion
35#
36# - Eggex can use multiline /// syntax, though you can use \ for line continuation
37# - Eggex could use "which" match
38# - m=>group('lit') sorta bothers me, it should be
39# - m.group('lit')
40# - $lit - probably!
41# - with vars(m.groupDict()) { ... }
42# - Alternative to printf -v probably needed, or at least wrap it in the YSH
43# stdlib
44#
45# - ERROR messages for URL parsing should bubble up to the user!
46# - USER code should be able to point out to location info for bad escapes
47# like %f or %0z
48# - I guess we just need an idiom for this?
49
50source $LIB_OSH/task-five.sh
51#source $LIB_YSH/yblocks.ysh
52
53func strFromTwoHex(two_hex) {
54 var result
55 # TODO: provide alternative to old OSH style!
56
57 # Python style would include something like this
58 # var i = int(two_hex, 16)
59
60 printf -v result "\\x$two_hex"
61 return (result)
62}
63
64const Hex = / [0-9 a-f A-F] /
65
66const Quoted = / \
67 <capture !['%+']+ as lit> \
68 | <capture '+' as plus> \
69 | '%' <capture Hex Hex as two_hex> \
70 /
71
72func unquote (s) {
73 ### Turn strings with %20 into space, etc.
74
75 #echo
76 #echo "unquote $s"
77
78 var pos = 0
79 var parts = []
80 while (true) {
81 var m = s => leftMatch(Quoted, pos=pos)
82 if (not m) {
83 break
84 }
85
86 var lit = m => group('lit')
87 var plus = m => group('plus')
88 var two_hex = m => group('two_hex')
89
90 var part
91 if (lit) {
92 #echo " lit $lit"
93 setvar part = lit
94 } elif (plus) {
95 #echo " plus $plus"
96 setvar part = ' '
97 } elif (two_hex) {
98 #echo " two_hex $two_hex"
99 #setvar part = two_hex
100
101 setvar part = strFromTwoHex(two_hex)
102 }
103 call parts->append(part)
104
105 setvar pos = m => end(0)
106 #echo
107 }
108 if (pos !== len(s)) {
109 error "Unexpected trailing input in unquote"
110 }
111
112 return (join(parts))
113}
114
115proc js-decode-part(s) {
116 nodejs -e '''
117
118 var encoded = process.argv[1];
119
120 // It does not handle +, because is only for query params, not components?
121 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent
122 var encoded = encoded.replace(/\+/g, " ")
123
124 var j = JSON.stringify(decodeURIComponent(encoded))
125 process.stdout.write(j);
126
127 ''' $s
128}
129
130const PART_CASES = [
131 'foo+bar',
132 'foo%23%40',
133 # empty key, empty value, invalid % , etc.
134]
135
136proc test-part() {
137 echo hi
138
139 #_check ('foo bar' === unquote('foo+bar'))
140
141 for s in (PART_CASES) {
142 js-decode-part $s | json read (&js)
143 echo 'JS'
144 pp line (js)
145
146 echo 'YSH'
147 var y = unquote(s)
148 pp line (y)
149
150 assert [y === js]
151
152 echo
153 #break
154 }
155}
156
157#
158# Query
159#
160
161# JavaScript allows either side of k=v to be empty, so we match that
162const Tok = / !['&= ']* /
163
164const Pair = / <capture Tok as key> '=' <capture Tok as value> /
165
166const Pairs = / Pair <capture '&' as sep>? /
167
168func URLSearchParams(s) {
169 ### Turn k=v&foo=spam+eggs&k=v into a list of pairs
170
171 # Loop over matches
172 var pos = 0
173 #echo Pairs=$Pairs
174
175 var pairs = []
176 while (true) {
177 var m = s => leftMatch(Pairs, pos=pos)
178 if (not m) {
179 break
180 }
181 #pp line (m)
182 #pp line (m => group(0))
183 var k = m => group('key')
184 var v = m => group('value')
185
186 #pp line (k)
187 #pp line (v)
188
189 call pairs->append([unquote(k), unquote(v)])
190
191 setvar pos = m => end(0)
192 #pp line (pos)
193
194 var sep = m => group('sep')
195 if (not sep) {
196 break
197 }
198 }
199 if (pos !== len(s)) {
200 error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]"
201 }
202
203 return (pairs)
204}
205
206proc js-decode-query(s) {
207 nodejs -e '''
208
209 const u = new URLSearchParams(process.argv[1]);
210 //console.log(JSON.stringify(u));
211
212 var pairs = []
213 for (pair of u) {
214 pairs.push(pair)
215 }
216
217 var j = JSON.stringify(pairs);
218
219 //console.log(j):
220 process.stdout.write(j);
221 ''' $s
222}
223
224const QUERY_CASES = [
225 'k=foo+bar',
226 'key=foo%23%40',
227 'k=v&foo%23=bar+baz+%24%25&k=v',
228 'foo+bar=z',
229
230 'missing_val=&k=',
231
232 '=missing_key&=m2',
233
234 # This is valid
235 '=&=',
236 '=&=&',
237
238]
239
240const OTHER_CASES = [
241
242 # JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes
243 'foo%ffbar=z',
244
245 # JavaScript treats = as literal - that seems wrong
246 # YSH treating this as an error seems right
247 '==',
248]
249
250
251proc test-query() {
252 for s in (QUERY_CASES) {
253 #for s in (OTHER_CASES) {
254 echo 'INPUT'
255 echo " $s"
256
257 js-decode-query $s | json read (&js)
258 echo 'JS'
259 pp line (js)
260
261 echo 'YSH'
262 var pairs = URLSearchParams(s)
263 pp line (pairs)
264
265 assert [pairs === js]
266
267 echo
268 }
269}
270
271proc run-tests() {
272 devtools/byo.sh test $0
273}
274
275task-five "$@"