OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

458 lines, 274 significant
1#!/usr/bin/env bash
2#
3# Test scripts found in the wild for both correctness and performance.
4#
5# Usage:
6# benchmarks/osh-runtime.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source benchmarks/common.sh # tsv-concat
15source benchmarks/id.sh # print-job-id
16source soil/common.sh # find-dir-html
17source test/common.sh
18source test/tsv-lib.sh # tsv-row
19
20readonly BASE_DIR=_tmp/osh-runtime
21
22# TODO: Move to ../oil_DEPS
23readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25#
26# Dependencies
27#
28
29readonly PY27_DIR=$PWD/Python-2.7.13
30
31# NOTE: Same list in oilshell.org/blob/run.sh.
32tarballs() {
33 cat <<EOF
34tcc-0.9.26.tar.bz2
35yash-2.46.tar.xz
36ocaml-4.06.0.tar.xz
37util-linux-2.40.tar.xz
38EOF
39}
40
41download() {
42 mkdir -p $TAR_DIR
43 tarballs | xargs -n 1 -I {} --verbose -- \
44 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45}
46
47extract() {
48 set -x
49 time for f in $TAR_DIR/*.{bz2,xz}; do
50 tar -x --directory $TAR_DIR --file $f
51 done
52 set +x
53
54 ls -l $TAR_DIR
55}
56
57#
58# Computation
59#
60
61run-tasks() {
62 local raw_out_dir=$1
63 raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65 local task_id=0
66 while read -r host_name sh_path workload; do
67
68 log "*** $host_name $sh_path $workload $task_id"
69
70 local sh_run_path
71 case $sh_path in
72 /*) # Already absolute
73 sh_run_path=$sh_path
74 ;;
75 */*) # It's relative, so make it absolute
76 sh_run_path=$PWD/$sh_path
77 ;;
78 *) # 'dash' should remain 'dash'
79 sh_run_path=$sh_path
80 ;;
81 esac
82
83 local working_dir=''
84 local files_out_dir="$raw_out_dir/files-$task_id"
85 mkdir -v -p $files_out_dir
86
87 local save_new_files=''
88
89 local -a argv
90 case $workload in
91 hello-world)
92 argv=( testdata/osh-runtime/hello_world.sh )
93 ;;
94
95 abuild-print-help)
96 argv=( testdata/osh-runtime/abuild -h )
97 ;;
98
99 configure.cpython)
100 argv=( $PY27_DIR/configure )
101 working_dir=$files_out_dir
102 ;;
103
104 configure.util-linux)
105 # flag needed to avoid sqlite3 dep error message
106 argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
107 working_dir=$files_out_dir
108 ;;
109
110 configure.*)
111 argv=( ./configure )
112
113 local conf_dir
114 case $workload in
115 *.ocaml)
116 conf_dir='ocaml-4.06.0'
117 ;;
118 *.tcc)
119 conf_dir='tcc-0.9.26'
120 ;;
121 *.yash)
122 conf_dir='yash-2.46'
123 ;;
124 *)
125 die "Invalid workload $workload"
126 esac
127
128 # These are run in-tree?
129 working_dir=$TAR_DIR/$conf_dir
130 ;;
131
132 *)
133 die "Invalid workload $workload"
134 ;;
135 esac
136
137 local -a time_argv=(
138 time-tsv
139 --output "$raw_out_dir/times.tsv" --append
140 --rusage
141 --rusage-2
142 --field "$task_id"
143 --field "$host_name" --field "$sh_path"
144 --field "$workload"
145 -- "$sh_run_path" "${argv[@]}"
146 )
147
148 local stdout_file="$files_out_dir/STDOUT.txt"
149 local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
150
151 # Maybe change dirs
152 if test -n "$working_dir"; then
153 pushd "$working_dir"
154 fi
155
156 if test -n "$save_new_files"; then
157 touch __TIMESTAMP
158 fi
159
160 # Run it, possibly with GC stats
161 case $sh_path in
162 *_bin/*/osh)
163 OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
164 ;;
165 *)
166 "${time_argv[@]}" > $stdout_file
167 ;;
168 esac
169
170 if test -n "$save_new_files"; then
171 echo "COPYING to $files_out_dir"
172 find . -type f -newer __TIMESTAMP \
173 | xargs -I {} -- cp --verbose {} $files_out_dir
174 fi
175
176 # Restore dir
177 if test -n "$working_dir"; then
178 popd
179 fi
180
181 task_id=$((task_id + 1))
182 done
183}
184
185print-tasks() {
186 local host_name=$1
187 local osh_native=$2
188
189 local -a workloads=(
190 hello-world
191 abuild-print-help
192
193 configure.cpython
194 configure.util-linux
195 configure.ocaml
196 configure.tcc
197 configure.yash
198 )
199
200 if test -n "${QUICKLY:-}"; then
201 # Just do the first two
202 workloads=(
203 #configure.util-linux
204 hello-world
205 abuild-print-help
206 )
207 fi
208
209 for sh_path in bash dash bin/osh $osh_native; do
210 for workload in "${workloads[@]}"; do
211 tsv-row $host_name $sh_path $workload
212 done
213 done
214}
215
216measure() {
217 local host_name=$1 # 'no-host' or 'lenny'
218 local raw_out_dir=$2
219 local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
220 local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
221
222 mkdir -v -p $raw_out_dir
223
224 local tsv_out="$raw_out_dir/times.tsv"
225
226 # Write header of the TSV file that is appended to.
227 time-tsv -o $tsv_out --print-header \
228 --rusage \
229 --rusage-2 \
230 --field task_id \
231 --field host_name --field sh_path \
232 --field workload
233
234 # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
235 # per-task GC stats
236 print-tasks $host_name $osh_native | run-tasks $raw_out_dir
237
238 # Turn individual files into a TSV, adding host
239 benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
240 | tsv-add-const-column host_name "$host_name" \
241 > $raw_out_dir/gc_stats.tsv
242
243 cp -v _tmp/provenance.tsv $raw_out_dir
244}
245
246stage1() {
247 local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
248 local single_machine=${2:-}
249
250 local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
251 mkdir -p $out_dir
252
253 # Globs are in lexicographical order, which works for our dates.
254
255 local -a raw_times=()
256 local -a raw_gc_stats=()
257 local -a raw_provenance=()
258
259 if test -n "$single_machine"; then
260 local -a a=( $base_dir/raw.$single_machine.* )
261
262 raw_times+=( ${a[-1]}/times.tsv )
263 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
264 raw_provenance+=( ${a[-1]}/provenance.tsv )
265
266 else
267 local -a a=( $base_dir/raw.$MACHINE1.* )
268 local -a b=( $base_dir/raw.$MACHINE2.* )
269
270 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
271 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
272 raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
273 fi
274
275 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
276
277 tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
278
279 tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
280}
281
282print-report() {
283 local in_dir=$1
284
285 benchmark-html-head 'OSH Runtime Performance'
286
287 cat <<EOF
288 <body class="width60">
289 <p id="home-link">
290 <a href="/">oilshell.org</a>
291 </p>
292EOF
293
294 cmark <<'EOF'
295## OSH Runtime Performance
296
297Source code: [oil/benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
298
299### Elapsed Time by Shell (milliseconds)
300
301Some benchmarks call many external tools, while some exercise the shell
302interpreter itself. Parse time is included.
303
304Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
305EOF
306 tsv2html $in_dir/elapsed.tsv
307
308 cmark <<EOF
309### Minor Page Faults
310EOF
311
312 tsv2html $in_dir/page_faults.tsv
313
314 cmark <<EOF
315### Memory Usage (Max Resident Set Size in MB)
316EOF
317 tsv2html $in_dir/max_rss.tsv
318
319 cmark <<EOF
320### GC Stats
321EOF
322 tsv2html $in_dir/gc_stats.tsv
323
324 cmark <<EOF
325### Details of All Tasks
326EOF
327 tsv2html $in_dir/details.tsv
328
329 cmark <<EOF
330### I/O Details
331EOF
332 tsv2html $in_dir/details_io.tsv
333
334 cmark <<'EOF'
335
336### Shell and Host Details
337EOF
338 tsv2html $in_dir/shells.tsv
339 tsv2html $in_dir/hosts.tsv
340
341 # Only show files.html link on a single machine
342 if test -f $(dirname $in_dir)/files.html; then
343 cmark <<'EOF'
344---
345
346[raw files](files.html)
347EOF
348 fi
349
350 cat <<EOF
351 </body>
352</html>
353EOF
354}
355
356test-oils-run() {
357 echo 'Hello from benchmarks/osh-runtime.sh'
358}
359
360soil-run() {
361 ### Run it on just this machine, and make a report
362
363 rm -r -f $BASE_DIR
364 mkdir -p $BASE_DIR
365
366 # TODO: This testdata should be baked into Docker image, or mounted
367 download
368 extract
369
370 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
371 local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
372 ninja "${osh_bin[@]}"
373
374 local single_machine='no-host'
375
376 local job_id
377 job_id=$(print-job-id)
378
379 # Write _tmp/provenance.* and _tmp/{host,shell}-id
380 shell-provenance-2 \
381 $single_machine $job_id _tmp \
382 bash dash bin/osh "${osh_bin[@]}"
383
384 local host_job_id="$single_machine.$job_id"
385 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
386 mkdir -p $raw_out_dir $BASE_DIR/stage1
387
388 measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
389
390 # Trivial concatenation for 1 machine
391 stage1 '' $single_machine
392
393 benchmarks/report.sh stage2 $BASE_DIR
394
395 # Make _tmp/osh-parser/files.html, so index.html can potentially link to it
396 find-dir-html _tmp/osh-runtime files
397
398 benchmarks/report.sh stage3 $BASE_DIR
399}
400
401#
402# Debugging
403#
404
405compare-cpython() {
406 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
407 local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
408
409 # More of a diff here?
410 #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
411 # less diff here
412 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
413
414 local dir=${a[-1]}
415
416 echo $dir
417
418 head -n 1 $dir/times.tsv
419 fgrep 'configure.cpython' $dir/times.tsv
420
421 local bash_id=2
422 local dash_id=8
423 local osh_py_id=14
424 local osh_cpp_id=20
425
426 set +o errexit
427
428 local out_dir=_tmp/cpython-configure
429 mkdir -p $out_dir
430
431 echo 'bash vs. dash'
432 diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
433 diffstat $out_dir/bash-vs-dash.txt
434 echo
435
436 echo 'bash vs. osh-py'
437 diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
438 diffstat $out_dir/bash-vs-osh-py.txt
439 echo
440
441 echo 'bash vs. osh-cpp'
442 diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
443 diffstat $out_dir/bash-vs-osh-cpp.txt
444 echo
445
446 return
447
448 diff -u $dir/{files-2,files-20}/STDOUT.txt
449 echo
450
451 diff -u $dir/{files-2,files-20}/pyconfig.h
452 echo
453
454 cdiff -u $dir/{files-2,files-20}/config.log
455 echo
456}
457
458"$@"