OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

577 lines, 330 significant
1#!/usr/bin/env bash
2#
3# Test scripts found in the wild for both correctness and performance.
4#
5# Usage:
6# benchmarks/osh-runtime.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source benchmarks/common.sh # tsv-concat
15source benchmarks/id.sh # print-job-id
16source soil/common.sh # find-dir-html
17source test/common.sh
18source test/tsv-lib.sh # tsv-row
19
20readonly BASE_DIR=_tmp/osh-runtime
21
22# TODO: Move to ../oil_DEPS
23readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25#
26# Dependencies
27#
28
29readonly PY27_DIR=$PWD/Python-2.7.13
30
31# NOTE: Same list in oilshell.org/blob/run.sh.
32tarballs() {
33 cat <<EOF
34tcc-0.9.26.tar.bz2
35yash-2.46.tar.xz
36ocaml-4.06.0.tar.xz
37util-linux-2.40.tar.xz
38EOF
39}
40
41download() {
42 mkdir -p $TAR_DIR
43 tarballs | xargs -n 1 -I {} --verbose -- \
44 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45}
46
47extract() {
48 set -x
49 time for f in $TAR_DIR/*.{bz2,xz}; do
50 tar -x --directory $TAR_DIR --file $f
51 done
52 set +x
53
54 ls -l $TAR_DIR
55}
56
57#
58# Computation
59#
60
61run-tasks() {
62 local raw_out_dir=$1
63 raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65 local task_id=0
66 while read -r host_name sh_path workload; do
67
68 log "*** $host_name $sh_path $workload $task_id"
69
70 local sh_run_path
71 case $sh_path in
72 /*) # Already absolute
73 sh_run_path=$sh_path
74 ;;
75 */*) # It's relative, so make it absolute
76 sh_run_path=$PWD/$sh_path
77 ;;
78 *) # 'dash' should remain 'dash'
79 sh_run_path=$sh_path
80 ;;
81 esac
82
83 local working_dir=''
84 local files_out_dir="$raw_out_dir/files-$task_id"
85 mkdir -v -p $files_out_dir
86
87 local save_new_files=''
88
89 local -a argv
90 case $workload in
91 hello-world)
92 argv=( testdata/osh-runtime/hello_world.sh )
93 ;;
94
95 bin-true)
96 argv=( testdata/osh-runtime/bin_true.sh )
97 ;;
98
99 abuild-print-help)
100 argv=( testdata/osh-runtime/abuild -h )
101 ;;
102
103 configure.cpython)
104 argv=( $PY27_DIR/configure )
105 working_dir=$files_out_dir
106 ;;
107
108 configure.util-linux)
109 # flag needed to avoid sqlite3 dep error message
110 argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
111 working_dir=$files_out_dir
112 ;;
113
114 configure.*)
115 argv=( ./configure )
116
117 local conf_dir
118 case $workload in
119 *.ocaml)
120 conf_dir='ocaml-4.06.0'
121 ;;
122 *.tcc)
123 conf_dir='tcc-0.9.26'
124 ;;
125 *.yash)
126 conf_dir='yash-2.46'
127 ;;
128 *)
129 die "Invalid workload $workload"
130 esac
131
132 # These are run in-tree?
133 working_dir=$TAR_DIR/$conf_dir
134 ;;
135
136 *)
137 die "Invalid workload $workload"
138 ;;
139 esac
140
141 local -a time_argv=(
142 time-tsv
143 --output "$raw_out_dir/times.tsv" --append
144 --rusage
145 --rusage-2
146 --field "$task_id"
147 --field "$host_name" --field "$sh_path"
148 --field "$workload"
149 -- "$sh_run_path" "${argv[@]}"
150 )
151
152 local stdout_file="$files_out_dir/STDOUT.txt"
153 local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
154
155 # Maybe change dirs
156 if test -n "$working_dir"; then
157 pushd "$working_dir"
158 fi
159
160 if test -n "$save_new_files"; then
161 touch __TIMESTAMP
162 fi
163
164 # Run it, possibly with GC stats
165 case $sh_path in
166 *_bin/*/osh)
167 OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
168 ;;
169 *)
170 "${time_argv[@]}" > $stdout_file
171 ;;
172 esac
173
174 if test -n "$save_new_files"; then
175 echo "COPYING to $files_out_dir"
176 find . -type f -newer __TIMESTAMP \
177 | xargs -I {} -- cp --verbose {} $files_out_dir
178 fi
179
180 # Restore dir
181 if test -n "$working_dir"; then
182 popd
183 fi
184
185 task_id=$((task_id + 1))
186 done
187}
188
189# Sorted by priority for test-oils.sh osh-runtime --num-shells 3
190
191readonly -a ALL_WORKLOADS=(
192 hello-world
193 bin-true
194
195 configure.cpython
196 configure.util-linux
197 configure.ocaml
198 configure.tcc
199 configure.yash
200
201 abuild-print-help
202)
203
204print-workloads() {
205 ### for help
206
207 for w in "${ALL_WORKLOADS[@]}"; do
208 echo " $w"
209 done
210}
211
212print-tasks() {
213 local host_name=$1
214 local osh_native=$2
215
216 if test -n "${QUICKLY:-}"; then
217 workloads=(
218 hello-world
219 bin-true
220 #configure.util-linux
221 #abuild-print-help
222 )
223 else
224 workloads=( "${ALL_WORKLOADS[@]}" )
225 fi
226
227 for sh_path in bash dash bin/osh $osh_native; do
228 for workload in "${workloads[@]}"; do
229 tsv-row $host_name $sh_path $workload
230 done
231 done
232}
233
234print-tasks-xshar() {
235 local host_name=$1
236 local osh_native=$2
237
238 local num_iters=${3:-1}
239 local num_shells=${4:-1}
240 local num_workloads=${5:-1}
241
242 local s=0
243 local w=0
244
245 for i in $(seq $num_iters); do
246
247 for sh_path in $osh_native bash dash; do
248
249 for workload in "${ALL_WORKLOADS[@]}"; do
250 tsv-row $host_name $sh_path $workload
251
252 w=$(( w + 1 )) # cut off at specified workloads
253 if test $w -eq $num_workloads; then
254 break
255 fi
256 done
257
258 s=$(( s + 1 )) # cut off as specified shells
259 if test $s -eq $num_shells; then
260 break
261 fi
262
263 done
264 done
265}
266
267run-tasks-wrapper() {
268 ### reads tasks from stdin
269
270 local host_name=$1 # 'no-host' or 'lenny'
271 local raw_out_dir=$2
272
273 mkdir -v -p $raw_out_dir
274
275 local tsv_out="$raw_out_dir/times.tsv"
276
277 # Write header of the TSV file that is appended to.
278 time-tsv -o $tsv_out --print-header \
279 --rusage \
280 --rusage-2 \
281 --field task_id \
282 --field host_name --field sh_path \
283 --field workload
284
285 # reads tasks from stdin
286 # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
287 # per-task GC stats
288 run-tasks $raw_out_dir
289
290 # Turn individual files into a TSV, adding host
291 benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
292 | tsv-add-const-column host_name "$host_name" \
293 > $raw_out_dir/gc_stats.tsv
294
295 cp -v _tmp/provenance.tsv $raw_out_dir
296}
297
298measure() {
299 ### For release and CI
300 local host_name=$1 # 'no-host' or 'lenny'
301 local raw_out_dir=$2 # _tmp/osh-runtime or ../../benchmark-data/osh-runtime
302 local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
303
304 print-tasks $host_name $osh_native | run-tasks-wrapper $host_name $raw_out_dir
305}
306
307stage1() {
308 local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
309 local single_machine=${2:-}
310
311 local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
312 mkdir -p $out_dir
313
314 # Globs are in lexicographical order, which works for our dates.
315
316 local -a raw_times=()
317 local -a raw_gc_stats=()
318 local -a raw_provenance=()
319
320 if test -n "$single_machine"; then
321 local -a a=( $base_dir/raw.$single_machine.* )
322
323 raw_times+=( ${a[-1]}/times.tsv )
324 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
325 raw_provenance+=( ${a[-1]}/provenance.tsv )
326
327 else
328 local -a a=( $base_dir/raw.$MACHINE1.* )
329 local -a b=( $base_dir/raw.$MACHINE2.* )
330
331 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
332 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
333 raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
334 fi
335
336 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
337
338 tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
339
340 tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
341}
342
343print-report() {
344 local in_dir=$1
345
346 benchmark-html-head 'OSH Runtime Performance'
347
348 cat <<EOF
349 <body class="width60">
350 <p id="home-link">
351 <a href="/">oilshell.org</a>
352 </p>
353EOF
354
355 cmark <<'EOF'
356## OSH Runtime Performance
357
358Source code: [benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
359
360- [Elapsed Time](#elapsed-time)
361- [Minor Page Faults](#page-faults)
362- [Memory Usage](#memory-usage)
363- [GC Stats](#gc-stats)
364- [rusage Details](#rusage-details)
365- [More Details](#more-details)
366- [Shell and Host](#shell-and-host)
367
368<a name="elapsed-time" />
369
370### Elapsed Time by Shell (milliseconds)
371
372Some benchmarks call many external tools, while some exercise the shell
373interpreter itself.
374EOF
375 tsv2html $in_dir/elapsed.tsv
376
377 cmark <<EOF
378<a name="page-faults" />
379
380### Minor Page Faults
381EOF
382
383 tsv2html $in_dir/page_faults.tsv
384
385 cmark <<EOF
386<a name="memory-usage" />
387
388### Memory Usage (Max Resident Set Size in MB)
389
390Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
391EOF
392 tsv2html $in_dir/max_rss.tsv
393
394 cmark <<EOF
395<a name="gc-stats" />
396
397### GC Stats
398EOF
399 tsv2html $in_dir/gc_stats.tsv
400
401 cmark <<EOF
402<a name="rusage-details" />
403
404### rusage Details
405EOF
406 tsv2html $in_dir/details.tsv
407
408 cmark <<EOF
409<a name="more-details" />
410
411### More Details
412EOF
413 tsv2html $in_dir/details_io.tsv
414
415 cmark <<'EOF'
416<a name="shell-and-host" />
417
418### Shell and Host
419EOF
420 tsv2html $in_dir/shells.tsv
421 tsv2html $in_dir/hosts.tsv
422
423 # Only show files.html link on a single machine
424 if test -f $(dirname $in_dir)/files.html; then
425 cmark <<'EOF'
426---
427
428[raw files](files.html)
429EOF
430 fi
431
432 cat <<EOF
433 </body>
434</html>
435EOF
436}
437
438test-oils-run() {
439 local osh=$1
440
441 # flags passed by caller
442 local num_iters=${2:-1}
443 local num_shells=${3:-1}
444 local num_workloads=${4:-1}
445
446 local time_py=$XSHAR_DIR/benchmarks/time_.py
447 $time_py --tsv --rusage -- \
448 $osh -c 'echo "smoke test: hi from benchmarks/osh-runtime.sh"'
449
450 local host_name
451 host_name=$(hostname)
452
453 local job_id
454 job_id=$(print-job-id)
455
456 # Write _tmp/provenance.* and _tmp/{host,shell}-id
457 shell-provenance-2 \
458 $host_name $job_id _tmp \
459 bash dash $osh
460
461 # e.g. 2024-05-01__10-11-12.ci-vm-name
462 local raw_out_dir="$BASE_DIR/$job_id.$host_name"
463 mkdir -p $raw_out_dir
464
465 # Similar to 'measure', for soil-run and release
466 print-tasks-xshar $host_name $osh $num_iters $num_shells $num_workloads \
467 | run-tasks-wrapper $host_name $raw_out_dir
468
469 # Note: 'stage1' in soil-run is a trivial concatenation, so we can create input for
470 # benchmarks/report.R. We don't need that here
471
472 # TODO: upload
473 # _tmp/
474 # osh-runtime/
475 # shell-id/
476 # host-id/
477}
478
479soil-run() {
480 ### Run it on just this machine, and make a report
481
482 rm -r -f $BASE_DIR
483 mkdir -p $BASE_DIR
484
485 # TODO: This testdata should be baked into Docker image, or mounted
486 download
487 extract
488
489 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
490 local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
491 ninja "${osh_bin[@]}"
492
493 local single_machine='no-host'
494
495 local job_id
496 job_id=$(print-job-id)
497
498 # Write _tmp/provenance.* and _tmp/{host,shell}-id
499 shell-provenance-2 \
500 $single_machine $job_id _tmp \
501 bash dash bin/osh "${osh_bin[@]}"
502
503 local host_job_id="$single_machine.$job_id"
504 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
505 mkdir -p $raw_out_dir $BASE_DIR/stage1
506
507 measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
508
509 # Trivial concatenation for 1 machine
510 stage1 '' $single_machine
511
512 benchmarks/report.sh stage2 $BASE_DIR
513
514 # Make _tmp/osh-parser/files.html, so index.html can potentially link to it
515 find-dir-html _tmp/osh-runtime files
516
517 benchmarks/report.sh stage3 $BASE_DIR
518}
519
520#
521# Debugging
522#
523
524compare-cpython() {
525 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
526 local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
527
528 # More of a diff here?
529 #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
530 # less diff here
531 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
532
533 local dir=${a[-1]}
534
535 echo $dir
536
537 head -n 1 $dir/times.tsv
538 fgrep 'configure.cpython' $dir/times.tsv
539
540 local bash_id=2
541 local dash_id=8
542 local osh_py_id=14
543 local osh_cpp_id=20
544
545 set +o errexit
546
547 local out_dir=_tmp/cpython-configure
548 mkdir -p $out_dir
549
550 echo 'bash vs. dash'
551 diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
552 diffstat $out_dir/bash-vs-dash.txt
553 echo
554
555 echo 'bash vs. osh-py'
556 diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
557 diffstat $out_dir/bash-vs-osh-py.txt
558 echo
559
560 echo 'bash vs. osh-cpp'
561 diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
562 diffstat $out_dir/bash-vs-osh-cpp.txt
563 echo
564
565 return
566
567 diff -u $dir/{files-2,files-20}/STDOUT.txt
568 echo
569
570 diff -u $dir/{files-2,files-20}/pyconfig.h
571 echo
572
573 cdiff -u $dir/{files-2,files-20}/config.log
574 echo
575}
576
577"$@"