pea/TEST.sh

OILS / pea / TEST.sh View on Github | oilshell.org

336 lines, 128 significant

1	#!/usr/bin/env bash
2	#
3	# Quick test for a potential rewrite of mycpp.
4	#
5	# Usage:
6	# pea/TEST.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/no-quotes.sh
11
12	source test/common.sh # run-test-funcs
13	source devtools/common.sh
14
15	source build/dev-shell.sh # find python3 in /wedge PATH component
16
17	# This is just like the yapf problem in devtools/format.sh !
18	# Pea needs a newer version of MyPy -- one that supports 'math'
19	unset PYTHONPATH
20	export PYTHONPATH=.
21
22	readonly MYPY_VENV='_tmp/mypy-venv'
23
24	install-mypy() {
25	local venv=$MYPY_VENV
26
27	rm -r -f -v $venv
28
29	python3 -m venv $venv
30
31	. $venv/bin/activate
32
33	python3 -m pip install mypy
34
35	# Says 1.5.1 (compiled: yes)
36	mypy-version
37	}
38
39	mypy-version() {
40	. $MYPY_VENV/bin/activate
41	python3 -m mypy --version
42	}
43
44	#
45	# Run Pea
46	#
47
48	pea-main() {
49	pea/pea_main.py "$@"
50	}
51
52	parse-one() {
53	pea-main parse "$@"
54	}
55
56	translate-cpp() {
57	### Used by mycpp/NINJA-steps.sh
58
59	pea-main cpp "$@"
60	}
61
62	all-files() {
63	# Can't run this on Soil because we only have build/py.sh py-source, not
64	# 'minimal'
65
66	# Update this file with build/dynamic-deps.sh pea-hack
67
68	cat pea/oils-typecheck.txt
69
70	for path in /.pyi; do
71	echo $path
72	done
73	}
74
75	parse-all() {
76	time all-files \| xargs --verbose -- $0 pea-main parse
77	}
78
79	# Good illustration of "distributing your overhead"
80	#
81	# Total work goes up, while latency goes down. To a point. Then it goes back
82	# up.
83
84	# batch size 30
85	#
86	# real 0m0.342s
87	# user 0m0.735s
88	# sys 0m0.059s
89	#
90	# batch size 20
91	#
92	# real 0m0.305s
93	# user 0m0.993s
94	# sys 0m0.081s
95	#
96	# batch size 15
97	#
98	# real 0m0.299s
99	# user 0m1.110s
100	# sys 0m0.123s
101	#
102	# batch size 10
103	#
104	# real 0m0.272s
105	# user 0m1.362s
106	# sys 0m0.145s
107
108	batch-size() {
109	local num_files=$1
110
111	local num_procs
112	num_procs=$(nproc)
113
114	# Use (p-1) as a fudge so we don't end up more batches than processors
115	local files_per_process=$(( num_files / (num_procs - 1) ))
116
117	echo "$num_procs $files_per_process"
118	}
119
120	demo-par() {
121	### Demo parallelism of Python processes
122
123	local files
124	num_files=$(all-files \| wc -l)
125
126	# 103 files
127
128	shopt -s lastpipe
129	batch-size $num_files \| read num_procs optimal
130
131	echo "Parsing $num_files files with $num_procs parallel processes"
132	echo "Optimal batch size is $optimal"
133
134	echo
135
136	echo 'All at once:'
137	time parse-all > /dev/null 2>&1
138	echo
139
140	# 5 is meant to be suboptimal
141	for n in 50 30 20 10 5 $optimal; do
142	echo "batch size $n"
143	time all-files \| xargs --verbose -P $num_procs -n $n -- \
144	$0 parse-one > /dev/null 2>&1
145	echo
146	done
147	}
148
149	# - 0.40 secs to parse
150	# - 0.56 secs pickle, so that's 160 ms
151	# Then
152	#
153	# - 0.39 secs load pickle
154	#
155	# That's definitely slower than I want. It's 6.6 MB of data.
156	#
157	# So
158	# - parallel parsing can be done in <300 ms
159	# - parallel pickling
160	# - serial unpickling (reduce) in 390 ms
161	#
162	# So now we're at ~700 ms or so. Can we type check in 300 ms in pure Python?
163	#
164	# What if we compress the generated ASDL? Those are very repetitive.
165
166	# Problem statement:
167
168	_serial-pickle() {
169	mkdir -p _tmp
170	local tmp=_tmp/serial
171
172	time all-files \| xargs --verbose -- $0 pea-main dump-pickles > $tmp
173
174	ls -l -h $tmp
175
176	echo 'loading'
177	time pea-main load-pickles < $tmp
178	}
179
180	# 1.07 seconds
181	serial-pickle() { time $0 _serial-pickle; }
182
183	pickle-one() {
184	pea-main dump-pickles "$@" > _tmp/p/$$
185	}
186
187	_par-pickle() {
188	local files
189	num_files=$(all-files \| wc -l)
190
191	shopt -s lastpipe
192	batch-size $num_files \| read num_procs optimal
193
194	local dir=_tmp/p
195	rm -r -f -v $dir
196	mkdir -p $dir
197
198	time all-files \| xargs --verbose -P $num_procs -n $optimal -- $0 pickle-one
199
200	ls -l -h $dir
201
202	# This takes 410-430 ms? Wow that's slow.
203	time cat $dir/* \| pea-main load-pickles
204	}
205
206	# Can get this down to ~700 ms
207	#
208	# Note parsing serially in a single process is 410 ms !!! So this is NOT a win
209	# unless we have more work besides parsing to parallelize.
210	#
211	# We can extract constants and forward declarations in parallel I suppose.
212	#
213	# BUT immutable string constants have to be de-duplciated! Though I guess that
214	# is a natural 'reduce' step.
215	#
216	# And we can even do implementation and prototypes in parallel too?
217	#
218	# I think the entire algorithm can be OPTIMISTIC without serialized type
219	# checking?
220	#
221	# I think
222	#
223	# a = 5
224	# b = a # do not know the type without a global algorithm
225	#
226	# Or I guess you can do type checking within a function. Functions require
227	# signatures. So yes let's do that in parallel.
228	#
229	# --
230	#
231	# The ideal way to do this would be to split Oils up into MODULES, like
232	#
233	# _debuild/
234	# builtin/
235	# core/
236	# data_lang/
237	# frontend/
238	# osh/
239	# ysh/
240	# Smaller: pgen2/ pylib/ tools/
241	#
242	# And modules are acyclic, and can compile on their own with dependencies. If
243	# you pick random .py files and spit out header files, I think they won't compile.
244	# The forward declarations and constants will work, but the prototype won't.
245
246	par-pickle() { time $0 _par-pickle; }
247
248	sum1() {
249	awk '{ sum += $1 } END { print sum }'
250	}
251
252	sum-sizes() {
253	xargs -I {} -- find {} -printf '%s %p\n' \| sum1
254	}
255
256	size-ratio() {
257	# all-files
258	# echo _tmp/p/*
259
260	# 1.96 MB of source code
261	all-files \| sum-sizes
262
263	# 7.13 MB of pickle files
264	# Weirdly echo _tmp/p/* doesn't work here
265	for f in _tmp/p/*; do echo $f; done \| sum-sizes
266	}
267
268	# Only 47 ms!
269	# I want the overhead to be less than 1 second:
270	# 1. parallel parsing + pickle
271	# 2. serial unpickle + type check
272	# 3. starting the process
273	#
274	# So unpickling is slow.
275
276	osh-overhead() {
277	time bin/osh -c 'echo hi'
278	}
279
280
281	# MyPy dev version takes 10.2 seconds the first time (without their mypyc
282	# speedups)
283	#
284	# 0.150 seconds the second time, WITHOUT code changes
285	# 0.136 seconds
286
287	# 4.1 seconds: whitespace change
288	# 3.9 seconds: again, and this is on my fast hoover machine
289
290	# 5.0 seconds - Invalid type!
291	# 4.9 seconds - again invalid
292
293
294	mypy-compare() {
295	devtools/types.sh check-oils
296	}
297
298	check-types() {
299
300	# install-mypy creates this. May not be present in CI machine.
301	local activate=$MYPY_VENV/bin/activate
302	if test -f $activate; then
303	. $activate
304	fi
305
306	time python3 -m mypy --strict pea/pea_main.py
307	}
308
309	test-translate() {
310	translate-cpp bin/oils_for_unix.py
311	}
312
313	test-syntax-error() {
314	set +o errexit
315
316	# error in Python syntax
317	parse-one pea/testdata/py_err.py
318	nq-assert $? -eq 1
319
320	# error in signature
321	parse-one pea/testdata/sig_err.py
322	nq-assert $? -eq 1
323
324	# error in assignment
325	parse-one pea/testdata/assign_err.py
326	nq-assert $? -eq 1
327	}
328
329	run-tests() {
330	# Making this separate for soil/worker.sh
331
332	echo 'Running test functions'
333	run-test-funcs
334	}
335
336	"$@"