OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

442 lines, 264 significant
1#!/usr/bin/env bash
2#
3# Test scripts found in the wild for both correctness and performance.
4#
5# Usage:
6# benchmarks/osh-runtime.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source benchmarks/common.sh # tsv-concat
15source benchmarks/id.sh # print-job-id
16source soil/common.sh # find-dir-html
17source test/common.sh
18source test/tsv-lib.sh # tsv-row
19
20readonly BASE_DIR=_tmp/osh-runtime
21
22# TODO: Move to ../oil_DEPS
23readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25#
26# Dependencies
27#
28
29readonly PY27_DIR=$PWD/Python-2.7.13
30
31# NOTE: Same list in oilshell.org/blob/run.sh.
32tarballs() {
33 cat <<EOF
34tcc-0.9.26.tar.bz2
35yash-2.46.tar.xz
36ocaml-4.06.0.tar.xz
37util-linux-2.40.tar.xz
38EOF
39}
40
41download() {
42 mkdir -p $TAR_DIR
43 tarballs | xargs -n 1 -I {} --verbose -- \
44 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45}
46
47extract() {
48 set -x
49 time for f in $TAR_DIR/*.{bz2,xz}; do
50 tar -x --directory $TAR_DIR --file $f
51 done
52 set +x
53
54 ls -l $TAR_DIR
55}
56
57#
58# Computation
59#
60
61run-tasks() {
62 local raw_out_dir=$1
63 raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65 local task_id=0
66 while read -r host_name sh_path workload; do
67
68 log "*** $host_name $sh_path $workload $task_id"
69
70 local sh_run_path
71 case $sh_path in
72 /*) # Already absolute
73 sh_run_path=$sh_path
74 ;;
75 */*) # It's relative, so make it absolute
76 sh_run_path=$PWD/$sh_path
77 ;;
78 *) # 'dash' should remain 'dash'
79 sh_run_path=$sh_path
80 ;;
81 esac
82
83 local working_dir=''
84 local files_out_dir="$raw_out_dir/files-$task_id"
85 mkdir -v -p $files_out_dir
86
87 local save_new_files=''
88
89 local -a argv
90 case $workload in
91 hello-world)
92 argv=( testdata/osh-runtime/hello_world.sh )
93 ;;
94
95 abuild-print-help)
96 argv=( testdata/osh-runtime/abuild -h )
97 ;;
98
99 configure.cpython)
100 argv=( $PY27_DIR/configure )
101 working_dir=$files_out_dir
102 ;;
103
104 configure.util-linux)
105 # flag needed to avoid sqlite3 dep error message
106 argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
107 working_dir=$files_out_dir
108 ;;
109
110 configure.*)
111 argv=( ./configure )
112
113 local conf_dir
114 case $workload in
115 *.ocaml)
116 conf_dir='ocaml-4.06.0'
117 ;;
118 *.tcc)
119 conf_dir='tcc-0.9.26'
120 ;;
121 *.yash)
122 conf_dir='yash-2.46'
123 ;;
124 *)
125 die "Invalid workload $workload"
126 esac
127
128 # These are run in-tree?
129 working_dir=$TAR_DIR/$conf_dir
130 ;;
131
132 *)
133 die "Invalid workload $workload"
134 ;;
135 esac
136
137 local -a time_argv=(
138 time-tsv
139 --output "$raw_out_dir/times.tsv" --append
140 --rusage
141 --field "$task_id"
142 --field "$host_name" --field "$sh_path"
143 --field "$workload"
144 -- "$sh_run_path" "${argv[@]}"
145 )
146
147 local stdout_file="$files_out_dir/STDOUT.txt"
148 local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
149
150 # Maybe change dirs
151 if test -n "$working_dir"; then
152 pushd "$working_dir"
153 fi
154
155 if test -n "$save_new_files"; then
156 touch __TIMESTAMP
157 fi
158
159 # Run it, possibly with GC stats
160 case $sh_path in
161 *_bin/*/osh)
162 OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
163 ;;
164 *)
165 "${time_argv[@]}" > $stdout_file
166 ;;
167 esac
168
169 if test -n "$save_new_files"; then
170 echo "COPYING to $files_out_dir"
171 find . -type f -newer __TIMESTAMP \
172 | xargs -I {} -- cp --verbose {} $files_out_dir
173 fi
174
175 # Restore dir
176 if test -n "$working_dir"; then
177 popd
178 fi
179
180 task_id=$((task_id + 1))
181 done
182}
183
184print-tasks() {
185 local host_name=$1
186 local osh_native=$2
187
188 local -a workloads=(
189 hello-world
190 abuild-print-help
191
192 configure.cpython
193 configure.util-linux
194 configure.ocaml
195 configure.tcc
196 configure.yash
197 )
198
199 if test -n "${QUICKLY:-}"; then
200 # Just do the first two
201 workloads=(
202 configure.util-linux
203 #hello-world
204 #abuild-print-help
205 )
206 fi
207
208 for sh_path in bash dash bin/osh $osh_native; do
209 for workload in "${workloads[@]}"; do
210 tsv-row $host_name $sh_path $workload
211 done
212 done
213}
214
215measure() {
216 local host_name=$1 # 'no-host' or 'lenny'
217 local raw_out_dir=$2
218 local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
219 local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
220
221 mkdir -v -p $raw_out_dir
222
223 local tsv_out="$raw_out_dir/times.tsv"
224
225 # Write header of the TSV file that is appended to.
226 time-tsv -o $tsv_out --print-header \
227 --rusage \
228 --field task_id \
229 --field host_name --field sh_path \
230 --field workload
231
232 # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
233 # per-task GC stats
234 print-tasks $host_name $osh_native | run-tasks $raw_out_dir
235
236 # Turn individual files into a TSV, adding host
237 benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
238 | tsv-add-const-column host_name "$host_name" \
239 > $raw_out_dir/gc_stats.tsv
240
241 cp -v _tmp/provenance.tsv $raw_out_dir
242}
243
244stage1() {
245 local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
246 local single_machine=${2:-}
247
248 local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
249 mkdir -p $out_dir
250
251 # Globs are in lexicographical order, which works for our dates.
252
253 local -a raw_times=()
254 local -a raw_gc_stats=()
255 local -a raw_provenance=()
256
257 if test -n "$single_machine"; then
258 local -a a=( $base_dir/raw.$single_machine.* )
259
260 raw_times+=( ${a[-1]}/times.tsv )
261 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
262 raw_provenance+=( ${a[-1]}/provenance.tsv )
263
264 else
265 local -a a=( $base_dir/raw.$MACHINE1.* )
266 local -a b=( $base_dir/raw.$MACHINE2.* )
267
268 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
269 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
270 raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
271 fi
272
273 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
274
275 tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
276
277 tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
278}
279
280print-report() {
281 local in_dir=$1
282
283 benchmark-html-head 'OSH Runtime Performance'
284
285 cat <<EOF
286 <body class="width60">
287 <p id="home-link">
288 <a href="/">oilshell.org</a>
289 </p>
290EOF
291
292 cmark <<'EOF'
293## OSH Runtime Performance
294
295Source code: [oil/benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
296
297### Elapsed Time by Shell (milliseconds)
298
299Some benchmarks call many external tools, while some exercise the shell
300interpreter itself. Parse time is included.
301
302Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
303EOF
304 tsv2html $in_dir/elapsed.tsv
305
306 cmark <<EOF
307### Memory Usage (Max Resident Set Size in MB)
308EOF
309 tsv2html $in_dir/max_rss.tsv
310
311 cmark <<EOF
312### GC Stats
313EOF
314 tsv2html $in_dir/gc_stats.tsv
315
316 cmark <<EOF
317### Details of All Tasks
318EOF
319 tsv2html $in_dir/details.tsv
320
321
322 cmark <<'EOF'
323
324### Shell and Host Details
325EOF
326 tsv2html $in_dir/shells.tsv
327 tsv2html $in_dir/hosts.tsv
328
329 # Only show files.html link on a single machine
330 if test -f $(dirname $in_dir)/files.html; then
331 cmark <<'EOF'
332---
333
334[raw files](files.html)
335EOF
336 fi
337
338 cat <<EOF
339 </body>
340</html>
341EOF
342}
343
344soil-run() {
345 ### Run it on just this machine, and make a report
346
347 rm -r -f $BASE_DIR
348 mkdir -p $BASE_DIR
349
350 # TODO: This testdata should be baked into Docker image, or mounted
351 download
352 extract
353
354 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
355 local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
356 ninja "${osh_bin[@]}"
357
358 local single_machine='no-host'
359
360 local job_id
361 job_id=$(print-job-id)
362
363 # Write _tmp/provenance.* and _tmp/{host,shell}-id
364 shell-provenance-2 \
365 $single_machine $job_id _tmp \
366 bash dash bin/osh "${osh_bin[@]}"
367
368 local host_job_id="$single_machine.$job_id"
369 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
370 mkdir -p $raw_out_dir $BASE_DIR/stage1
371
372 measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
373
374 # Trivial concatenation for 1 machine
375 stage1 '' $single_machine
376
377 benchmarks/report.sh stage2 $BASE_DIR
378
379 # Make _tmp/osh-parser/files.html, so index.html can potentially link to it
380 find-dir-html _tmp/osh-runtime files
381
382 benchmarks/report.sh stage3 $BASE_DIR
383}
384
385#
386# Debugging
387#
388
389compare-cpython() {
390 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
391 local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
392
393 # More of a diff here?
394 #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
395 # less diff here
396 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
397
398 local dir=${a[-1]}
399
400 echo $dir
401
402 head -n 1 $dir/times.tsv
403 fgrep 'configure.cpython' $dir/times.tsv
404
405 local bash_id=2
406 local dash_id=8
407 local osh_py_id=14
408 local osh_cpp_id=20
409
410 set +o errexit
411
412 local out_dir=_tmp/cpython-configure
413 mkdir -p $out_dir
414
415 echo 'bash vs. dash'
416 diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
417 diffstat $out_dir/bash-vs-dash.txt
418 echo
419
420 echo 'bash vs. osh-py'
421 diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
422 diffstat $out_dir/bash-vs-osh-py.txt
423 echo
424
425 echo 'bash vs. osh-cpp'
426 diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
427 diffstat $out_dir/bash-vs-osh-cpp.txt
428 echo
429
430 return
431
432 diff -u $dir/{files-2,files-20}/STDOUT.txt
433 echo
434
435 diff -u $dir/{files-2,files-20}/pyconfig.h
436 echo
437
438 cdiff -u $dir/{files-2,files-20}/config.log
439 echo
440}
441
442"$@"