OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

438 lines, 263 significant
1#!/usr/bin/env bash
2#
3# Test scripts found in the wild for both correctness and performance.
4#
5# Usage:
6# benchmarks/osh-runtime.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source benchmarks/common.sh # tsv-concat
15source benchmarks/id.sh # print-job-id
16source soil/common.sh # find-dir-html
17source test/common.sh
18source test/tsv-lib.sh # tsv-row
19
20readonly BASE_DIR=_tmp/osh-runtime
21
22# TODO: Move to ../oil_DEPS
23readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25#
26# Dependencies
27#
28
29readonly PY27_DIR=$PWD/Python-2.7.13
30
31# NOTE: Same list in oilshell.org/blob/run.sh.
32tarballs() {
33 cat <<EOF
34tcc-0.9.26.tar.bz2
35yash-2.46.tar.xz
36ocaml-4.06.0.tar.xz
37openvswitch-3.3.0.tar.gz
38EOF
39}
40
41download() {
42 mkdir -p $TAR_DIR
43 tarballs | xargs -n 1 -I {} --verbose -- \
44 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45}
46
47extract() {
48 set -x
49 time for f in $TAR_DIR/*.{gz,bz2,xz}; do
50 tar -x --directory $TAR_DIR --file $f
51 done
52 set +x
53
54 ls -l $TAR_DIR
55}
56
57#
58# Computation
59#
60
61run-tasks() {
62 local raw_out_dir=$1
63 raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65 local task_id=0
66 while read -r host_name sh_path workload; do
67
68 log "*** $host_name $sh_path $workload $task_id"
69
70 local sh_run_path
71 case $sh_path in
72 /*) # Already absolute
73 sh_run_path=$sh_path
74 ;;
75 */*) # It's relative, so make it absolute
76 sh_run_path=$PWD/$sh_path
77 ;;
78 *) # 'dash' should remain 'dash'
79 sh_run_path=$sh_path
80 ;;
81 esac
82
83 local working_dir=''
84 local files_out_dir="$raw_out_dir/files-$task_id"
85 mkdir -v -p $files_out_dir
86
87 local save_new_files=''
88
89 local -a argv
90 case $workload in
91 hello-world)
92 argv=( testdata/osh-runtime/hello_world.sh )
93 ;;
94
95 abuild-print-help)
96 argv=( testdata/osh-runtime/abuild -h )
97 ;;
98
99 configure.cpython)
100 argv=( $PY27_DIR/configure )
101 working_dir=$files_out_dir
102 ;;
103
104 configure.*)
105 argv=( ./configure )
106
107 local conf_dir
108 case $workload in
109 *.openvswitch)
110 conf_dir='openvswitch-3.3.0'
111 ;;
112 *.ocaml)
113 conf_dir='ocaml-4.06.0'
114 ;;
115 *.tcc)
116 conf_dir='tcc-0.9.26'
117 ;;
118 *.yash)
119 conf_dir='yash-2.46'
120 ;;
121 *)
122 die "Invalid workload $workload"
123 esac
124
125 working_dir=$TAR_DIR/$conf_dir
126 ;;
127
128 *)
129 die "Invalid workload $workload"
130 ;;
131 esac
132
133 local -a time_argv=(
134 time-tsv
135 --output "$raw_out_dir/times.tsv" --append
136 --rusage
137 --field "$task_id"
138 --field "$host_name" --field "$sh_path"
139 --field "$workload"
140 -- "$sh_run_path" "${argv[@]}"
141 )
142
143 local stdout_file="$files_out_dir/STDOUT.txt"
144 local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
145
146 # Maybe change dirs
147 if test -n "$working_dir"; then
148 pushd "$working_dir"
149 fi
150
151 if test -n "$save_new_files"; then
152 touch __TIMESTAMP
153 fi
154
155 # Run it, possibly with GC stats
156 case $sh_path in
157 *_bin/*/osh)
158 OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
159 ;;
160 *)
161 "${time_argv[@]}" > $stdout_file
162 ;;
163 esac
164
165 if test -n "$save_new_files"; then
166 echo "COPYING to $files_out_dir"
167 find . -type f -newer __TIMESTAMP \
168 | xargs -I {} -- cp --verbose {} $files_out_dir
169 fi
170
171 # Restore dir
172 if test -n "$working_dir"; then
173 popd
174 fi
175
176 task_id=$((task_id + 1))
177 done
178}
179
180print-tasks() {
181 local host_name=$1
182 local osh_native=$2
183
184 local -a workloads=(
185 hello-world
186 abuild-print-help
187
188 configure.cpython
189 configure.openvswitch
190 configure.ocaml
191 configure.tcc
192 configure.yash
193 )
194
195 if test -n "${QUICKLY:-}"; then
196 # Just do the first two
197 workloads=(
198 configure.openvswitch
199 #hello-world
200 #abuild-print-help
201 )
202 fi
203
204 for sh_path in bash dash bin/osh $osh_native; do
205 for workload in "${workloads[@]}"; do
206 tsv-row $host_name $sh_path $workload
207 done
208 done
209}
210
211measure() {
212 local host_name=$1 # 'no-host' or 'lenny'
213 local raw_out_dir=$2
214 local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
215 local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
216
217 mkdir -v -p $raw_out_dir
218
219 local tsv_out="$raw_out_dir/times.tsv"
220
221 # Write header of the TSV file that is appended to.
222 time-tsv -o $tsv_out --print-header \
223 --rusage \
224 --field task_id \
225 --field host_name --field sh_path \
226 --field workload
227
228 # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
229 # per-task GC stats
230 print-tasks $host_name $osh_native | run-tasks $raw_out_dir
231
232 # Turn individual files into a TSV, adding host
233 benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
234 | tsv-add-const-column host_name "$host_name" \
235 > $raw_out_dir/gc_stats.tsv
236
237 cp -v _tmp/provenance.tsv $raw_out_dir
238}
239
240stage1() {
241 local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
242 local single_machine=${2:-}
243
244 local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
245 mkdir -p $out_dir
246
247 # Globs are in lexicographical order, which works for our dates.
248
249 local -a raw_times=()
250 local -a raw_gc_stats=()
251 local -a raw_provenance=()
252
253 if test -n "$single_machine"; then
254 local -a a=( $base_dir/raw.$single_machine.* )
255
256 raw_times+=( ${a[-1]}/times.tsv )
257 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
258 raw_provenance+=( ${a[-1]}/provenance.tsv )
259
260 else
261 local -a a=( $base_dir/raw.$MACHINE1.* )
262 local -a b=( $base_dir/raw.$MACHINE2.* )
263
264 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
265 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
266 raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
267 fi
268
269 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
270
271 tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
272
273 tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
274}
275
276print-report() {
277 local in_dir=$1
278
279 benchmark-html-head 'OSH Runtime Performance'
280
281 cat <<EOF
282 <body class="width60">
283 <p id="home-link">
284 <a href="/">oilshell.org</a>
285 </p>
286EOF
287
288 cmark <<'EOF'
289## OSH Runtime Performance
290
291Source code: [oil/benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
292
293### Elapsed Time by Shell (milliseconds)
294
295Some benchmarks call many external tools, while some exercise the shell
296interpreter itself. Parse time is included.
297
298Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
299EOF
300 tsv2html $in_dir/elapsed.tsv
301
302 cmark <<EOF
303### Memory Usage (Max Resident Set Size in MB)
304EOF
305 tsv2html $in_dir/max_rss.tsv
306
307 cmark <<EOF
308### GC Stats
309EOF
310 tsv2html $in_dir/gc_stats.tsv
311
312 cmark <<EOF
313### Details of All Tasks
314EOF
315 tsv2html $in_dir/details.tsv
316
317
318 cmark <<'EOF'
319
320### Shell and Host Details
321EOF
322 tsv2html $in_dir/shells.tsv
323 tsv2html $in_dir/hosts.tsv
324
325 # Only show files.html link on a single machine
326 if test -f $(dirname $in_dir)/files.html; then
327 cmark <<'EOF'
328---
329
330[raw files](files.html)
331EOF
332 fi
333
334 cat <<EOF
335 </body>
336</html>
337EOF
338}
339
340soil-run() {
341 ### Run it on just this machine, and make a report
342
343 rm -r -f $BASE_DIR
344 mkdir -p $BASE_DIR
345
346 # TODO: This testdata should be baked into Docker image, or mounted
347 download
348 extract
349
350 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
351 local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
352 ninja "${osh_bin[@]}"
353
354 local single_machine='no-host'
355
356 local job_id
357 job_id=$(print-job-id)
358
359 # Write _tmp/provenance.* and _tmp/{host,shell}-id
360 shell-provenance-2 \
361 $single_machine $job_id _tmp \
362 bash dash bin/osh "${osh_bin[@]}"
363
364 local host_job_id="$single_machine.$job_id"
365 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
366 mkdir -p $raw_out_dir $BASE_DIR/stage1
367
368 measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
369
370 # Trivial concatenation for 1 machine
371 stage1 '' $single_machine
372
373 benchmarks/report.sh stage2 $BASE_DIR
374
375 # Make _tmp/osh-parser/files.html, so index.html can potentially link to it
376 find-dir-html _tmp/osh-runtime files
377
378 benchmarks/report.sh stage3 $BASE_DIR
379}
380
381#
382# Debugging
383#
384
385compare-cpython() {
386 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
387 local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
388
389 # More of a diff here?
390 #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
391 # less diff here
392 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
393
394 local dir=${a[-1]}
395
396 echo $dir
397
398 head -n 1 $dir/times.tsv
399 fgrep 'configure.cpython' $dir/times.tsv
400
401 local bash_id=2
402 local dash_id=8
403 local osh_py_id=14
404 local osh_cpp_id=20
405
406 set +o errexit
407
408 local out_dir=_tmp/cpython-configure
409 mkdir -p $out_dir
410
411 echo 'bash vs. dash'
412 diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
413 diffstat $out_dir/bash-vs-dash.txt
414 echo
415
416 echo 'bash vs. osh-py'
417 diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
418 diffstat $out_dir/bash-vs-osh-py.txt
419 echo
420
421 echo 'bash vs. osh-cpp'
422 diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
423 diffstat $out_dir/bash-vs-osh-cpp.txt
424 echo
425
426 return
427
428 diff -u $dir/{files-2,files-20}/STDOUT.txt
429 echo
430
431 diff -u $dir/{files-2,files-20}/pyconfig.h
432 echo
433
434 cdiff -u $dir/{files-2,files-20}/config.log
435 echo
436}
437
438"$@"