OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

479 lines, 274 significant
1#!/usr/bin/env bash
2#
3# Test scripts found in the wild for both correctness and performance.
4#
5# Usage:
6# benchmarks/osh-runtime.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source benchmarks/common.sh # tsv-concat
15source benchmarks/id.sh # print-job-id
16source soil/common.sh # find-dir-html
17source test/common.sh
18source test/tsv-lib.sh # tsv-row
19
20readonly BASE_DIR=_tmp/osh-runtime
21
22# TODO: Move to ../oil_DEPS
23readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25#
26# Dependencies
27#
28
29readonly PY27_DIR=$PWD/Python-2.7.13
30
31# NOTE: Same list in oilshell.org/blob/run.sh.
32tarballs() {
33 cat <<EOF
34tcc-0.9.26.tar.bz2
35yash-2.46.tar.xz
36ocaml-4.06.0.tar.xz
37util-linux-2.40.tar.xz
38EOF
39}
40
41download() {
42 mkdir -p $TAR_DIR
43 tarballs | xargs -n 1 -I {} --verbose -- \
44 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45}
46
47extract() {
48 set -x
49 time for f in $TAR_DIR/*.{bz2,xz}; do
50 tar -x --directory $TAR_DIR --file $f
51 done
52 set +x
53
54 ls -l $TAR_DIR
55}
56
57#
58# Computation
59#
60
61run-tasks() {
62 local raw_out_dir=$1
63 raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65 local task_id=0
66 while read -r host_name sh_path workload; do
67
68 log "*** $host_name $sh_path $workload $task_id"
69
70 local sh_run_path
71 case $sh_path in
72 /*) # Already absolute
73 sh_run_path=$sh_path
74 ;;
75 */*) # It's relative, so make it absolute
76 sh_run_path=$PWD/$sh_path
77 ;;
78 *) # 'dash' should remain 'dash'
79 sh_run_path=$sh_path
80 ;;
81 esac
82
83 local working_dir=''
84 local files_out_dir="$raw_out_dir/files-$task_id"
85 mkdir -v -p $files_out_dir
86
87 local save_new_files=''
88
89 local -a argv
90 case $workload in
91 hello-world)
92 argv=( testdata/osh-runtime/hello_world.sh )
93 ;;
94
95 abuild-print-help)
96 argv=( testdata/osh-runtime/abuild -h )
97 ;;
98
99 configure.cpython)
100 argv=( $PY27_DIR/configure )
101 working_dir=$files_out_dir
102 ;;
103
104 configure.util-linux)
105 # flag needed to avoid sqlite3 dep error message
106 argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
107 working_dir=$files_out_dir
108 ;;
109
110 configure.*)
111 argv=( ./configure )
112
113 local conf_dir
114 case $workload in
115 *.ocaml)
116 conf_dir='ocaml-4.06.0'
117 ;;
118 *.tcc)
119 conf_dir='tcc-0.9.26'
120 ;;
121 *.yash)
122 conf_dir='yash-2.46'
123 ;;
124 *)
125 die "Invalid workload $workload"
126 esac
127
128 # These are run in-tree?
129 working_dir=$TAR_DIR/$conf_dir
130 ;;
131
132 *)
133 die "Invalid workload $workload"
134 ;;
135 esac
136
137 local -a time_argv=(
138 time-tsv
139 --output "$raw_out_dir/times.tsv" --append
140 --rusage
141 --rusage-2
142 --field "$task_id"
143 --field "$host_name" --field "$sh_path"
144 --field "$workload"
145 -- "$sh_run_path" "${argv[@]}"
146 )
147
148 local stdout_file="$files_out_dir/STDOUT.txt"
149 local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
150
151 # Maybe change dirs
152 if test -n "$working_dir"; then
153 pushd "$working_dir"
154 fi
155
156 if test -n "$save_new_files"; then
157 touch __TIMESTAMP
158 fi
159
160 # Run it, possibly with GC stats
161 case $sh_path in
162 *_bin/*/osh)
163 OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
164 ;;
165 *)
166 "${time_argv[@]}" > $stdout_file
167 ;;
168 esac
169
170 if test -n "$save_new_files"; then
171 echo "COPYING to $files_out_dir"
172 find . -type f -newer __TIMESTAMP \
173 | xargs -I {} -- cp --verbose {} $files_out_dir
174 fi
175
176 # Restore dir
177 if test -n "$working_dir"; then
178 popd
179 fi
180
181 task_id=$((task_id + 1))
182 done
183}
184
185print-tasks() {
186 local host_name=$1
187 local osh_native=$2
188
189 local -a workloads=(
190 hello-world
191 abuild-print-help
192
193 configure.cpython
194 configure.util-linux
195 configure.ocaml
196 configure.tcc
197 configure.yash
198 )
199
200 if test -n "${QUICKLY:-}"; then
201 # Just do the first two
202 workloads=(
203 #configure.util-linux
204 hello-world
205 abuild-print-help
206 )
207 fi
208
209 for sh_path in bash dash bin/osh $osh_native; do
210 for workload in "${workloads[@]}"; do
211 tsv-row $host_name $sh_path $workload
212 done
213 done
214}
215
216measure() {
217 local host_name=$1 # 'no-host' or 'lenny'
218 local raw_out_dir=$2
219 local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
220 local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
221
222 mkdir -v -p $raw_out_dir
223
224 local tsv_out="$raw_out_dir/times.tsv"
225
226 # Write header of the TSV file that is appended to.
227 time-tsv -o $tsv_out --print-header \
228 --rusage \
229 --rusage-2 \
230 --field task_id \
231 --field host_name --field sh_path \
232 --field workload
233
234 # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
235 # per-task GC stats
236 print-tasks $host_name $osh_native | run-tasks $raw_out_dir
237
238 # Turn individual files into a TSV, adding host
239 benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
240 | tsv-add-const-column host_name "$host_name" \
241 > $raw_out_dir/gc_stats.tsv
242
243 cp -v _tmp/provenance.tsv $raw_out_dir
244}
245
246stage1() {
247 local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
248 local single_machine=${2:-}
249
250 local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
251 mkdir -p $out_dir
252
253 # Globs are in lexicographical order, which works for our dates.
254
255 local -a raw_times=()
256 local -a raw_gc_stats=()
257 local -a raw_provenance=()
258
259 if test -n "$single_machine"; then
260 local -a a=( $base_dir/raw.$single_machine.* )
261
262 raw_times+=( ${a[-1]}/times.tsv )
263 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
264 raw_provenance+=( ${a[-1]}/provenance.tsv )
265
266 else
267 local -a a=( $base_dir/raw.$MACHINE1.* )
268 local -a b=( $base_dir/raw.$MACHINE2.* )
269
270 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
271 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
272 raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
273 fi
274
275 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
276
277 tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
278
279 tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
280}
281
282print-report() {
283 local in_dir=$1
284
285 benchmark-html-head 'OSH Runtime Performance'
286
287 cat <<EOF
288 <body class="width60">
289 <p id="home-link">
290 <a href="/">oilshell.org</a>
291 </p>
292EOF
293
294 cmark <<'EOF'
295## OSH Runtime Performance
296
297Source code: [benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
298
299- [Elapsed Time](#elapsed-time)
300- [Minor Page Faults](#page-faults)
301- [Memory Usage](#memory-usage)
302- [GC Stats](#gc-stats)
303- [rusage Details](#rusage-details)
304- [More Details](#more-details)
305- [Shell and Host](#shell-and-host)
306
307<a name="elapsed-time" />
308
309### Elapsed Time by Shell (milliseconds)
310
311Some benchmarks call many external tools, while some exercise the shell
312interpreter itself.
313EOF
314 tsv2html $in_dir/elapsed.tsv
315
316 cmark <<EOF
317<a name="page-faults" />
318
319### Minor Page Faults
320EOF
321
322 tsv2html $in_dir/page_faults.tsv
323
324 cmark <<EOF
325<a name="memory-usage" />
326
327### Memory Usage (Max Resident Set Size in MB)
328
329Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
330EOF
331 tsv2html $in_dir/max_rss.tsv
332
333 cmark <<EOF
334<a name="gc-stats" />
335
336### GC Stats
337EOF
338 tsv2html $in_dir/gc_stats.tsv
339
340 cmark <<EOF
341<a name="rusage-details" />
342
343### rusage Details
344EOF
345 tsv2html $in_dir/details.tsv
346
347 cmark <<EOF
348<a name="more-details" />
349
350### More Details
351EOF
352 tsv2html $in_dir/details_io.tsv
353
354 cmark <<'EOF'
355<a name="shell-and-host" />
356
357### Shell and Host
358EOF
359 tsv2html $in_dir/shells.tsv
360 tsv2html $in_dir/hosts.tsv
361
362 # Only show files.html link on a single machine
363 if test -f $(dirname $in_dir)/files.html; then
364 cmark <<'EOF'
365---
366
367[raw files](files.html)
368EOF
369 fi
370
371 cat <<EOF
372 </body>
373</html>
374EOF
375}
376
377test-oils-run() {
378 echo 'Hello from benchmarks/osh-runtime.sh'
379}
380
381soil-run() {
382 ### Run it on just this machine, and make a report
383
384 rm -r -f $BASE_DIR
385 mkdir -p $BASE_DIR
386
387 # TODO: This testdata should be baked into Docker image, or mounted
388 download
389 extract
390
391 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
392 local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
393 ninja "${osh_bin[@]}"
394
395 local single_machine='no-host'
396
397 local job_id
398 job_id=$(print-job-id)
399
400 # Write _tmp/provenance.* and _tmp/{host,shell}-id
401 shell-provenance-2 \
402 $single_machine $job_id _tmp \
403 bash dash bin/osh "${osh_bin[@]}"
404
405 local host_job_id="$single_machine.$job_id"
406 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
407 mkdir -p $raw_out_dir $BASE_DIR/stage1
408
409 measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
410
411 # Trivial concatenation for 1 machine
412 stage1 '' $single_machine
413
414 benchmarks/report.sh stage2 $BASE_DIR
415
416 # Make _tmp/osh-parser/files.html, so index.html can potentially link to it
417 find-dir-html _tmp/osh-runtime files
418
419 benchmarks/report.sh stage3 $BASE_DIR
420}
421
422#
423# Debugging
424#
425
426compare-cpython() {
427 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
428 local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
429
430 # More of a diff here?
431 #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
432 # less diff here
433 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
434
435 local dir=${a[-1]}
436
437 echo $dir
438
439 head -n 1 $dir/times.tsv
440 fgrep 'configure.cpython' $dir/times.tsv
441
442 local bash_id=2
443 local dash_id=8
444 local osh_py_id=14
445 local osh_cpp_id=20
446
447 set +o errexit
448
449 local out_dir=_tmp/cpython-configure
450 mkdir -p $out_dir
451
452 echo 'bash vs. dash'
453 diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
454 diffstat $out_dir/bash-vs-dash.txt
455 echo
456
457 echo 'bash vs. osh-py'
458 diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
459 diffstat $out_dir/bash-vs-osh-py.txt
460 echo
461
462 echo 'bash vs. osh-cpp'
463 diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
464 diffstat $out_dir/bash-vs-osh-cpp.txt
465 echo
466
467 return
468
469 diff -u $dir/{files-2,files-20}/STDOUT.txt
470 echo
471
472 diff -u $dir/{files-2,files-20}/pyconfig.h
473 echo
474
475 cdiff -u $dir/{files-2,files-20}/config.log
476 echo
477}
478
479"$@"