OILS / pea / TEST.sh View on Github | oilshell.org

336 lines, 128 significant
1#!/usr/bin/env bash
2#
3# Quick test for a potential rewrite of mycpp.
4#
5# Usage:
6# pea/TEST.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12source test/common.sh # run-test-funcs
13source devtools/common.sh
14
15source build/dev-shell.sh # find python3 in /wedge PATH component
16
17# This is just like the yapf problem in devtools/format.sh !
18# Pea needs a newer version of MyPy -- one that supports 'math'
19unset PYTHONPATH
20export PYTHONPATH=.
21
22readonly MYPY_VENV='_tmp/mypy-venv'
23
24install-mypy() {
25 local venv=$MYPY_VENV
26
27 rm -r -f -v $venv
28
29 python3 -m venv $venv
30
31 . $venv/bin/activate
32
33 python3 -m pip install mypy
34
35 # Says 1.5.1 (compiled: yes)
36 mypy-version
37}
38
39mypy-version() {
40 . $MYPY_VENV/bin/activate
41 python3 -m mypy --version
42}
43
44#
45# Run Pea
46#
47
48pea-main() {
49 pea/pea_main.py "$@"
50}
51
52parse-one() {
53 pea-main parse "$@"
54}
55
56translate-cpp() {
57 ### Used by mycpp/NINJA-steps.sh
58
59 pea-main cpp "$@"
60}
61
62all-files() {
63 # Can't run this on Soil because we only have build/py.sh py-source, not
64 # 'minimal'
65
66 # Update this file with build/dynamic-deps.sh pea-hack
67
68 cat pea/oils-typecheck.txt
69
70 for path in */*.pyi; do
71 echo $path
72 done
73}
74
75parse-all() {
76 time all-files | xargs --verbose -- $0 pea-main parse
77}
78
79# Good illustration of "distributing your overhead"
80#
81# Total work goes up, while latency goes down. To a point. Then it goes back
82# up.
83
84# batch size 30
85#
86# real 0m0.342s
87# user 0m0.735s
88# sys 0m0.059s
89#
90# batch size 20
91#
92# real 0m0.305s
93# user 0m0.993s
94# sys 0m0.081s
95#
96# batch size 15
97#
98# real 0m0.299s
99# user 0m1.110s
100# sys 0m0.123s
101#
102# batch size 10
103#
104# real 0m0.272s
105# user 0m1.362s
106# sys 0m0.145s
107
108batch-size() {
109 local num_files=$1
110
111 local num_procs
112 num_procs=$(nproc)
113
114 # Use (p-1) as a fudge so we don't end up more batches than processors
115 local files_per_process=$(( num_files / (num_procs - 1) ))
116
117 echo "$num_procs $files_per_process"
118}
119
120demo-par() {
121 ### Demo parallelism of Python processes
122
123 local files
124 num_files=$(all-files | wc -l)
125
126 # 103 files
127
128 shopt -s lastpipe
129 batch-size $num_files | read num_procs optimal
130
131 echo "Parsing $num_files files with $num_procs parallel processes"
132 echo "Optimal batch size is $optimal"
133
134 echo
135
136 echo 'All at once:'
137 time parse-all > /dev/null 2>&1
138 echo
139
140 # 5 is meant to be suboptimal
141 for n in 50 30 20 10 5 $optimal; do
142 echo "batch size $n"
143 time all-files | xargs --verbose -P $num_procs -n $n -- \
144 $0 parse-one > /dev/null 2>&1
145 echo
146 done
147}
148
149# - 0.40 secs to parse
150# - 0.56 secs pickle, so that's 160 ms
151# Then
152#
153# - 0.39 secs load pickle
154#
155# That's definitely slower than I want. It's 6.6 MB of data.
156#
157# So
158# - parallel parsing can be done in <300 ms
159# - parallel pickling
160# - serial unpickling (reduce) in 390 ms
161#
162# So now we're at ~700 ms or so. Can we type check in 300 ms in pure Python?
163#
164# What if we compress the generated ASDL? Those are very repetitive.
165
166# Problem statement:
167
168_serial-pickle() {
169 mkdir -p _tmp
170 local tmp=_tmp/serial
171
172 time all-files | xargs --verbose -- $0 pea-main dump-pickles > $tmp
173
174 ls -l -h $tmp
175
176 echo 'loading'
177 time pea-main load-pickles < $tmp
178}
179
180# 1.07 seconds
181serial-pickle() { time $0 _serial-pickle; }
182
183pickle-one() {
184 pea-main dump-pickles "$@" > _tmp/p/$$
185}
186
187_par-pickle() {
188 local files
189 num_files=$(all-files | wc -l)
190
191 shopt -s lastpipe
192 batch-size $num_files | read num_procs optimal
193
194 local dir=_tmp/p
195 rm -r -f -v $dir
196 mkdir -p $dir
197
198 time all-files | xargs --verbose -P $num_procs -n $optimal -- $0 pickle-one
199
200 ls -l -h $dir
201
202 # This takes 410-430 ms? Wow that's slow.
203 time cat $dir/* | pea-main load-pickles
204}
205
206# Can get this down to ~700 ms
207#
208# Note parsing serially in a single process is 410 ms !!! So this is NOT a win
209# unless we have more work besides parsing to parallelize.
210#
211# We can extract constants and forward declarations in parallel I suppose.
212#
213# BUT immutable string constants have to be de-duplciated! Though I guess that
214# is a natural 'reduce' step.
215#
216# And we can even do implementation and prototypes in parallel too?
217#
218# I think the entire algorithm can be OPTIMISTIC without serialized type
219# checking?
220#
221# I think
222#
223# a = 5
224# b = a # do not know the type without a global algorithm
225#
226# Or I guess you can do type checking within a function. Functions require
227# signatures. So yes let's do that in parallel.
228#
229# --
230#
231# The ideal way to do this would be to split Oils up into MODULES, like
232#
233# _debuild/
234# builtin/
235# core/
236# data_lang/
237# frontend/
238# osh/
239# ysh/
240# Smaller: pgen2/ pylib/ tea/ tools/
241#
242# And modules are acyclic, and can compile on their own with dependencies. If
243# you pick random .py files and spit out header files, I think they won't compile.
244# The forward declarations and constants will work, but the prototype won't.
245
246par-pickle() { time $0 _par-pickle; }
247
248sum1() {
249 awk '{ sum += $1 } END { print sum }'
250}
251
252sum-sizes() {
253 xargs -I {} -- find {} -printf '%s %p\n' | sum1
254}
255
256size-ratio() {
257 # all-files
258 # echo _tmp/p/*
259
260 # 1.96 MB of source code
261 all-files | sum-sizes
262
263 # 7.13 MB of pickle files
264 # Weirdly echo _tmp/p/* doesn't work here
265 for f in _tmp/p/*; do echo $f; done | sum-sizes
266}
267
268# Only 47 ms!
269# I want the overhead to be less than 1 second:
270# 1. parallel parsing + pickle
271# 2. serial unpickle + type check
272# 3. starting the process
273#
274# So unpickling is slow.
275
276osh-overhead() {
277 time bin/osh -c 'echo hi'
278}
279
280
281# MyPy dev version takes 10.2 seconds the first time (without their mypyc
282# speedups)
283#
284# 0.150 seconds the second time, WITHOUT code changes
285# 0.136 seconds
286
287# 4.1 seconds: whitespace change
288# 3.9 seconds: again, and this is on my fast hoover machine
289
290# 5.0 seconds - Invalid type!
291# 4.9 seconds - again invalid
292
293
294mypy-compare() {
295 devtools/types.sh check-oils
296}
297
298check-types() {
299
300 # install-mypy creates this. May not be present in CI machine.
301 local activate=$MYPY_VENV/bin/activate
302 if test -f $activate; then
303 . $activate
304 fi
305
306 time python3 -m mypy --strict pea/pea_main.py
307}
308
309test-translate() {
310 translate-cpp bin/oils_for_unix.py
311}
312
313test-syntax-error() {
314 set +o errexit
315
316 # error in Python syntax
317 parse-one pea/testdata/py_err.py
318 assert $? -eq 1
319
320 # error in signature
321 parse-one pea/testdata/sig_err.py
322 assert $? -eq 1
323
324 # error in assignment
325 parse-one pea/testdata/assign_err.py
326 assert $? -eq 1
327}
328
329run-tests() {
330 # Making this separate for soil/worker.sh
331
332 echo 'Running test functions'
333 run-test-funcs
334}
335
336"$@"