1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Usage:
|
4 | # data_lang/json-survey.sh <function name>
|
5 |
|
6 | set -o nounset
|
7 | set -o pipefail
|
8 | set -o errexit
|
9 |
|
10 | source build/dev-shell.sh # python3 in $PATH
|
11 |
|
12 | decode-int-float() {
|
13 | # This is a float
|
14 | python2 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
|
15 | python2 -c 'import json; val = json.loads("1e-6"); print(type(val)); print(val)'
|
16 | python2 -c 'import json; val = json.loads("0.5"); print(type(val)); print(val)'
|
17 |
|
18 | # Int
|
19 | python2 -c 'import json; val = json.loads("42"); print(type(val)); print(val)'
|
20 |
|
21 | python3 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
|
22 |
|
23 | echo
|
24 | echo
|
25 |
|
26 | # JavaScript only has 'number', no Int and Float
|
27 | nodejs -e 'var val = JSON.parse("1e6"); console.log(typeof(val)); console.log(val)'
|
28 | }
|
29 |
|
30 | big-int() {
|
31 | for i in $(seq 1000); do
|
32 | echo -n 1234567890
|
33 | done
|
34 | }
|
35 |
|
36 | # Hm, decoding integers and floats doesn't have overflow cases
|
37 |
|
38 | decode-huge-int() {
|
39 | local i
|
40 | i=$(big-int)
|
41 | echo $i
|
42 |
|
43 | # really big integer causes 100% CPU usage in Python 3
|
44 | echo "$i" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
|
45 |
|
46 | # decodes to "Infinity"
|
47 | echo "$i" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
|
48 | }
|
49 |
|
50 | decode-huge-float() {
|
51 | local f
|
52 | f=$(big-int).99
|
53 | echo $f
|
54 |
|
55 | # decodes to "inf"
|
56 | echo "$f" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
|
57 |
|
58 | # decodes to "Infinity"
|
59 | echo "$f" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
|
60 | }
|
61 |
|
62 | decode-syntax-errors() {
|
63 |
|
64 | python2 -c 'import json; val = json.loads("{3:4}"); print(type(val)); print(val)' || true
|
65 | echo
|
66 | python2 -c 'import json; val = json.loads("[3:4]"); print(type(val)); print(val)' || true
|
67 |
|
68 | echo
|
69 | echo
|
70 |
|
71 | # This has good position information
|
72 | # It prints the line number, the line, and points to the token in the line
|
73 | # where the problem happened
|
74 |
|
75 | nodejs -e 'var val = JSON.parse("{3: 4}"); console.log(typeof(val)); console.log(val)' || true
|
76 |
|
77 | nodejs -e 'var val = JSON.parse("[\n 3: 4\n]"); console.log(typeof(val)); console.log(val)' || true
|
78 |
|
79 | nodejs -e 'var val = JSON.parse("[\n\n \"hello "); console.log(typeof(val)); console.log(val)' || true
|
80 | }
|
81 |
|
82 | decode-empty-input() {
|
83 | python3 -c 'import json; val = json.loads(""); print(type(val)); print(val)' || true
|
84 |
|
85 | echo
|
86 | echo
|
87 |
|
88 | nodejs -e 'var val = JSON.parse(""); console.log(typeof(val)); console.log(val)' || true
|
89 | }
|
90 |
|
91 | decode-trailing-data() {
|
92 | # Extra data
|
93 | python3 -c 'import json; val = json.loads("[]]"); print(type(val)); print(val)' || true
|
94 |
|
95 | echo
|
96 | echo
|
97 |
|
98 | nodejs -e 'var val = JSON.parse("[]]"); console.log(typeof(val)); console.log(val)' || true
|
99 | }
|
100 |
|
101 |
|
102 | decode-invalid-escape() {
|
103 | # single quoted escape not valid
|
104 | cat >_tmp/json.txt <<'EOF'
|
105 | "\'"
|
106 | EOF
|
107 | local json
|
108 | json=$(cat _tmp/json.txt)
|
109 |
|
110 | python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
|
111 | "$json" || true
|
112 |
|
113 | echo
|
114 | echo
|
115 |
|
116 | nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
|
117 | "$json" || true
|
118 | }
|
119 |
|
120 | encode-list-dict-indent() {
|
121 | echo 'PYTHON'
|
122 | python3 -c 'import json; val = {}; print(json.dumps(val, indent=4))'
|
123 | python3 -c 'import json; val = {"a": 42}; print(json.dumps(val, indent=4))'
|
124 | python3 -c 'import json; val = {"a": 42, "b": 43}; print(json.dumps(val, indent=4))'
|
125 | python3 -c 'import json; val = []; print(json.dumps(val, indent=4))'
|
126 | python3 -c 'import json; val = [42]; print(json.dumps(val, indent=4))'
|
127 | echo
|
128 |
|
129 | echo 'JS'
|
130 | nodejs -e 'var val = {}; console.log(JSON.stringify(val, null, 4))'
|
131 | nodejs -e 'var val = {"a": 42}; console.log(JSON.stringify(val, null, 4))'
|
132 | nodejs -e 'var val = {"a": 42, "b": 43}; console.log(JSON.stringify(val, null, 4))'
|
133 | nodejs -e 'var val = []; console.log(JSON.stringify(val, null, 4))'
|
134 | nodejs -e 'var val = [42]; console.log(JSON.stringify(val, null, 4))'
|
135 | echo
|
136 | }
|
137 |
|
138 | encode-no-indent() {
|
139 | echo 'PYTHON'
|
140 |
|
141 | # has a space
|
142 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=None))'
|
143 | # you control it like this
|
144 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, separators=[",", ":"]))'
|
145 |
|
146 | # -1 and 0 are the same in Python
|
147 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=-1))'
|
148 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=0))'
|
149 | echo
|
150 |
|
151 | echo 'JS'
|
152 | # -1 and 0 are the same in Python
|
153 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, -1))'
|
154 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, 0))'
|
155 | }
|
156 |
|
157 | encode-obj-cycles() {
|
158 | python3 -c 'import json; val = {}; val["k"] = val; print(json.dumps(val))' || true
|
159 | echo
|
160 |
|
161 | python3 -c 'import json; val = []; val.append(val); print(json.dumps(val))' || true
|
162 | echo
|
163 |
|
164 | # Better error message than Python!
|
165 | # TypeError: Converting circular structure to JSON
|
166 | # --> starting at object with constructor 'Object'
|
167 | # --- property 'k' closes the circle
|
168 | nodejs -e 'var val = {}; val["k"] = val; console.log(JSON.stringify(val))' || true
|
169 | echo
|
170 |
|
171 | nodejs -e 'var val = []; val.push(val); console.log(JSON.stringify(val))' || true
|
172 | echo
|
173 | }
|
174 |
|
175 | multiple-refs() {
|
176 | # Python prints a tree
|
177 | python3 -c 'import json; mylist = [1,2,3]; val = [mylist, mylist]; print(repr(val)); print(json.dumps(val))'
|
178 | echo
|
179 |
|
180 | # Same with node.js
|
181 | nodejs -e 'var mylist = [1,2,3]; var val = [mylist, mylist]; console.log(val); console.log(JSON.stringify(val))'
|
182 | echo
|
183 |
|
184 | # Same with Oils
|
185 | bin/osh -c 'var mylist = [1,2,3]; var val = [mylist, mylist]; = val; json write (val); pp asdl (val)'
|
186 | echo
|
187 | }
|
188 |
|
189 | oils-cycles() {
|
190 | bin/ysh -c 'var d = {}; setvar d.key = d; = d; pp line (d); pp asdl (d); json write (d)'
|
191 | }
|
192 |
|
193 | surrogate-pair() {
|
194 | local json=${1:-'"\ud83e\udd26"'}
|
195 |
|
196 | # Hm it actually escapes. I thought it would use raw UTF-8
|
197 | python2 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
|
198 | echo
|
199 |
|
200 | python3 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
|
201 | echo
|
202 |
|
203 | # This doesn't escape
|
204 | nodejs -e 'var s = JSON.parse('\'$json\''); console.log(JSON.stringify(s))'
|
205 | echo
|
206 | }
|
207 |
|
208 | surrogate-half() {
|
209 | local json='"\ud83e"'
|
210 |
|
211 | # Round trips correctly!
|
212 | surrogate-pair "$json"
|
213 | }
|
214 |
|
215 | encode-nan() {
|
216 | # Wow Python doesn't conform to spec!!
|
217 | # https://docs.python.org/3.8/library/json.html#infinite-and-nan-number-values
|
218 |
|
219 | # allow_nan=False and parse_constant alter the behavior
|
220 |
|
221 | python2 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
222 | echo
|
223 |
|
224 | python3 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
225 | echo
|
226 |
|
227 | python3 -c 'import json; val = float("nan"); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
|
228 | echo
|
229 |
|
230 | # nodejs uses null
|
231 | nodejs -e 'var val = NaN; var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
|
232 | echo
|
233 | }
|
234 |
|
235 | encode-inf() {
|
236 | # Again, Python doesn't conform to spec
|
237 |
|
238 | python2 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
239 | echo
|
240 |
|
241 | python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
242 | echo
|
243 |
|
244 | python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
|
245 | echo
|
246 |
|
247 | # nodejs uses null again
|
248 | nodejs -e 'var val = Number.NEGATIVE_INFINITY; console.log(val); var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
|
249 | echo
|
250 | }
|
251 |
|
252 | encode-bad-type() {
|
253 | python3 -c 'import json; print(json.dumps(json))' || true
|
254 | echo
|
255 |
|
256 | # {} or undefined - BAD!
|
257 | nodejs -e 'console.log(JSON.stringify(JSON));' || true
|
258 | nodejs -e 'function f() { return 42; }; console.log(JSON.stringify(f));' || true
|
259 | echo
|
260 | }
|
261 |
|
262 | encode-binary-data() {
|
263 | # utf-8 codec can't decode byte -- so it does UTF-8 decoding during encoding,
|
264 | # which makes sense
|
265 | python2 -c 'import json; print(json.dumps(b"\xff"))' || true
|
266 | echo
|
267 |
|
268 | # can't serialize bytes type
|
269 | python3 -c 'import json; print(json.dumps(b"\xff"))' || true
|
270 | echo
|
271 |
|
272 | # there is no bytes type? \xff is a code point in JS
|
273 | nodejs -e 'console.log(JSON.stringify("\xff"));' || true
|
274 | nodejs -e 'console.log(JSON.stringify("\u{ff}"));' || true
|
275 | echo
|
276 | }
|
277 |
|
278 | decode-utf8-in-surrogate-range() {
|
279 | python2 -c 'b = "\xed\xa0\xbe"; print(repr(b.decode("utf-8")))'
|
280 | echo
|
281 |
|
282 | # Hm Python 3 gives an error here!
|
283 | python3 -c 'b = b"\xed\xa0\xbe"; print(repr(b.decode("utf-8")))' || true
|
284 | echo
|
285 |
|
286 | # valid
|
287 | nodejs -e 'var u = new Uint8Array([0xce, 0xbc]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
|
288 | echo
|
289 |
|
290 | # can't decode!
|
291 | nodejs -e 'var u = new Uint8Array([0xed, 0xa0, 0xbe]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
|
292 | echo
|
293 | }
|
294 |
|
295 | pairs() {
|
296 | local nums
|
297 | nums=$(seq $1)
|
298 |
|
299 | echo -n '['
|
300 | for i in $nums; do
|
301 | echo -n '[42,'
|
302 | done
|
303 | echo -n '43]'
|
304 | for i in $nums; do
|
305 | echo -n ']'
|
306 | done
|
307 | }
|
308 |
|
309 | decode-deeply-nested() {
|
310 | local msg
|
311 | msg=$(pairs 40200)
|
312 |
|
313 | # RuntimeError
|
314 | echo "$msg" | python2 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
|
315 |
|
316 | # RecursionError
|
317 | echo "$msg" | python3 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
|
318 |
|
319 | # Hm node.js handles it fine? Probably doesn't have a stackful parser.
|
320 | # [ [ [ [Array] ] ] ]
|
321 | echo "$msg" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));' || true
|
322 |
|
323 | echo "$msg" | bin/osh -c 'json read; = _reply' || true
|
324 |
|
325 | # Hm this works past 40K in C++! Then segmentation fault. We could put an
|
326 | # artifical limit on it.
|
327 | local osh=_bin/cxx-opt/osh
|
328 | ninja $osh
|
329 | echo "$msg" | $osh -c 'json read; = _reply; echo $[len(_reply)]' || true
|
330 | }
|
331 |
|
332 | "$@"
|