OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

432 lines, 260 significant
1#!/usr/bin/env bash
2#
3# Test scripts found in the wild for both correctness and performance.
4#
5# Usage:
6# benchmarks/osh-runtime.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source benchmarks/common.sh # tsv-concat
15source benchmarks/id.sh # print-job-id
16source soil/common.sh # find-dir-html
17source test/common.sh
18source test/tsv-lib.sh # tsv-row
19
20readonly BASE_DIR=_tmp/osh-runtime
21
22# TODO: Move to ../oil_DEPS
23readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25#
26# Dependencies
27#
28
29readonly PY27_DIR=$PWD/Python-2.7.13
30
31# NOTE: Same list in oilshell.org/blob/run.sh.
32tarballs() {
33 cat <<EOF
34tcc-0.9.26.tar.bz2
35yash-2.46.tar.xz
36ocaml-4.06.0.tar.xz
37EOF
38}
39
40download() {
41 mkdir -p $TAR_DIR
42 tarballs | xargs -n 1 -I {} --verbose -- \
43 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
44}
45
46extract() {
47 set -x
48 time for f in $TAR_DIR/*.{bz2,xz}; do
49 tar -x --directory $TAR_DIR --file $f
50 done
51 set +x
52
53 ls -l $TAR_DIR
54}
55
56#
57# Computation
58#
59
60run-tasks() {
61 local raw_out_dir=$1
62 raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
63
64 local task_id=0
65 while read -r host_name sh_path workload; do
66
67 log "*** $host_name $sh_path $workload $task_id"
68
69 local sh_run_path
70 case $sh_path in
71 /*) # Already absolute
72 sh_run_path=$sh_path
73 ;;
74 */*) # It's relative, so make it absolute
75 sh_run_path=$PWD/$sh_path
76 ;;
77 *) # 'dash' should remain 'dash'
78 sh_run_path=$sh_path
79 ;;
80 esac
81
82 local working_dir=''
83 local files_out_dir="$raw_out_dir/files-$task_id"
84 mkdir -v -p $files_out_dir
85
86 local save_new_files=''
87
88 local -a argv
89 case $workload in
90 hello-world)
91 argv=( testdata/osh-runtime/hello_world.sh )
92 ;;
93
94 abuild-print-help)
95 argv=( testdata/osh-runtime/abuild -h )
96 ;;
97
98 configure.cpython)
99 argv=( $PY27_DIR/configure )
100 working_dir=$files_out_dir
101 ;;
102
103 configure.*)
104 argv=( ./configure )
105
106 local conf_dir
107 case $workload in
108 *.ocaml)
109 conf_dir='ocaml-4.06.0'
110 ;;
111 *.tcc)
112 conf_dir='tcc-0.9.26'
113 ;;
114 *.yash)
115 conf_dir='yash-2.46'
116 ;;
117 *)
118 die "Invalid workload $workload"
119 esac
120
121 working_dir=$TAR_DIR/$conf_dir
122 ;;
123
124 *)
125 die "Invalid workload $workload"
126 ;;
127 esac
128
129 local -a time_argv=(
130 time-tsv
131 --output "$raw_out_dir/times.tsv" --append
132 --rusage
133 --field "$task_id"
134 --field "$host_name" --field "$sh_path"
135 --field "$workload"
136 -- "$sh_run_path" "${argv[@]}"
137 )
138
139 local stdout_file="$files_out_dir/STDOUT.txt"
140 local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
141
142 # Maybe change dirs
143 if test -n "$working_dir"; then
144 pushd "$working_dir"
145 fi
146
147 if test -n "$save_new_files"; then
148 touch __TIMESTAMP
149 fi
150
151 # Run it, possibly with GC stats
152 case $sh_path in
153 *_bin/*/osh)
154 OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
155 ;;
156 *)
157 "${time_argv[@]}" > $stdout_file
158 ;;
159 esac
160
161 if test -n "$save_new_files"; then
162 echo "COPYING to $files_out_dir"
163 find . -type f -newer __TIMESTAMP \
164 | xargs -I {} -- cp --verbose {} $files_out_dir
165 fi
166
167 # Restore dir
168 if test -n "$working_dir"; then
169 popd
170 fi
171
172 task_id=$((task_id + 1))
173 done
174}
175
176print-tasks() {
177 local host_name=$1
178 local osh_native=$2
179
180 local -a workloads=(
181 hello-world
182 abuild-print-help
183
184 configure.cpython
185 configure.ocaml
186 configure.tcc
187 configure.yash
188 )
189
190 if test -n "${QUICKLY:-}"; then
191 # Just do the first two
192 workloads=(
193 hello-world
194 abuild-print-help
195 )
196 fi
197
198 for sh_path in bash dash bin/osh $osh_native; do
199 for workload in "${workloads[@]}"; do
200 tsv-row $host_name $sh_path $workload
201 done
202 done
203}
204
205measure() {
206 local host_name=$1 # 'no-host' or 'lenny'
207 local raw_out_dir=$2
208 local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
209 local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
210
211 mkdir -v -p $raw_out_dir
212
213 local tsv_out="$raw_out_dir/times.tsv"
214
215 # Write header of the TSV file that is appended to.
216 time-tsv -o $tsv_out --print-header \
217 --rusage \
218 --field task_id \
219 --field host_name --field sh_path \
220 --field workload
221
222 # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
223 # per-task GC stats
224 print-tasks $host_name $osh_native | run-tasks $raw_out_dir
225
226 # Turn individual files into a TSV, adding host
227 benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
228 | tsv-add-const-column host_name "$host_name" \
229 > $raw_out_dir/gc_stats.tsv
230
231 cp -v _tmp/provenance.tsv $raw_out_dir
232}
233
234stage1() {
235 local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
236 local single_machine=${2:-}
237
238 local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
239 mkdir -p $out_dir
240
241 # Globs are in lexicographical order, which works for our dates.
242
243 local -a raw_times=()
244 local -a raw_gc_stats=()
245 local -a raw_provenance=()
246
247 if test -n "$single_machine"; then
248 local -a a=( $base_dir/raw.$single_machine.* )
249
250 raw_times+=( ${a[-1]}/times.tsv )
251 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
252 raw_provenance+=( ${a[-1]}/provenance.tsv )
253
254 else
255 local -a a=( $base_dir/raw.$MACHINE1.* )
256 local -a b=( $base_dir/raw.$MACHINE2.* )
257
258 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
259 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
260 raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
261 fi
262
263 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
264
265 tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
266
267 tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
268}
269
270print-report() {
271 local in_dir=$1
272
273 benchmark-html-head 'OSH Runtime Performance'
274
275 cat <<EOF
276 <body class="width60">
277 <p id="home-link">
278 <a href="/">oilshell.org</a>
279 </p>
280EOF
281
282 cmark <<'EOF'
283## OSH Runtime Performance
284
285Source code: [oil/benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
286
287### Elapsed Time by Shell (milliseconds)
288
289Some benchmarks call many external tools, while some exercise the shell
290interpreter itself. Parse time is included.
291
292Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
293EOF
294 tsv2html $in_dir/elapsed.tsv
295
296 cmark <<EOF
297### Memory Usage (Max Resident Set Size in MB)
298EOF
299 tsv2html $in_dir/max_rss.tsv
300
301 cmark <<EOF
302### GC Stats
303EOF
304 tsv2html $in_dir/gc_stats.tsv
305
306 cmark <<EOF
307### Details of All Tasks
308EOF
309 tsv2html $in_dir/details.tsv
310
311
312 cmark <<'EOF'
313
314### Shell and Host Details
315EOF
316 tsv2html $in_dir/shells.tsv
317 tsv2html $in_dir/hosts.tsv
318
319 # Only show files.html link on a single machine
320 if test -f $(dirname $in_dir)/files.html; then
321 cmark <<'EOF'
322---
323
324[raw files](files.html)
325EOF
326 fi
327
328 cat <<EOF
329 </body>
330</html>
331EOF
332}
333
334soil-run() {
335 ### Run it on just this machine, and make a report
336
337 rm -r -f $BASE_DIR
338 mkdir -p $BASE_DIR
339
340 # TODO: This testdata should be baked into Docker image, or mounted
341 download
342 extract
343
344 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
345 local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
346 ninja "${osh_bin[@]}"
347
348 local single_machine='no-host'
349
350 local job_id
351 job_id=$(print-job-id)
352
353 # Write _tmp/provenance.* and _tmp/{host,shell}-id
354 shell-provenance-2 \
355 $single_machine $job_id _tmp \
356 bash dash bin/osh "${osh_bin[@]}"
357
358 local host_job_id="$single_machine.$job_id"
359 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
360 mkdir -p $raw_out_dir $BASE_DIR/stage1
361
362 measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
363
364 # Trivial concatenation for 1 machine
365 stage1 '' $single_machine
366
367 benchmarks/report.sh stage2 $BASE_DIR
368
369 # Make _tmp/osh-parser/files.html, so index.html can potentially link to it
370 find-dir-html _tmp/osh-runtime files
371
372 benchmarks/report.sh stage3 $BASE_DIR
373}
374
375#
376# Debugging
377#
378
379compare-cpython() {
380 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
381 local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
382
383 # More of a diff here?
384 #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
385 # less diff here
386 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
387
388 local dir=${a[-1]}
389
390 echo $dir
391
392 head -n 1 $dir/times.tsv
393 fgrep 'configure.cpython' $dir/times.tsv
394
395 local bash_id=2
396 local dash_id=8
397 local osh_py_id=14
398 local osh_cpp_id=20
399
400 set +o errexit
401
402 local out_dir=_tmp/cpython-configure
403 mkdir -p $out_dir
404
405 echo 'bash vs. dash'
406 diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
407 diffstat $out_dir/bash-vs-dash.txt
408 echo
409
410 echo 'bash vs. osh-py'
411 diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
412 diffstat $out_dir/bash-vs-osh-py.txt
413 echo
414
415 echo 'bash vs. osh-cpp'
416 diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
417 diffstat $out_dir/bash-vs-osh-cpp.txt
418 echo
419
420 return
421
422 diff -u $dir/{files-2,files-20}/STDOUT.txt
423 echo
424
425 diff -u $dir/{files-2,files-20}/pyconfig.h
426 echo
427
428 cdiff -u $dir/{files-2,files-20}/config.log
429 echo
430}
431
432"$@"