1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Test scripts found in the wild for both correctness and performance.
|
4 | #
|
5 | # Usage:
|
6 | # benchmarks/osh-runtime.sh <function name>
|
7 |
|
8 | set -o nounset
|
9 | set -o pipefail
|
10 | set -o errexit
|
11 |
|
12 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
|
13 |
|
14 | source benchmarks/common.sh # tsv-concat
|
15 | source benchmarks/id.sh # print-job-id
|
16 | source soil/common.sh # find-dir-html
|
17 | source test/common.sh
|
18 | source test/tsv-lib.sh # tsv-row
|
19 |
|
20 | readonly BASE_DIR=_tmp/osh-runtime
|
21 |
|
22 | # TODO: Move to ../oil_DEPS
|
23 | readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
|
24 |
|
25 | #
|
26 | # Dependencies
|
27 | #
|
28 |
|
29 | readonly PY27_DIR=$PWD/Python-2.7.13
|
30 |
|
31 | # NOTE: Same list in oilshell.org/blob/run.sh.
|
32 | tarballs() {
|
33 | cat <<EOF
|
34 | tcc-0.9.26.tar.bz2
|
35 | yash-2.46.tar.xz
|
36 | ocaml-4.06.0.tar.xz
|
37 | EOF
|
38 | }
|
39 |
|
40 | download() {
|
41 | mkdir -p $TAR_DIR
|
42 | tarballs | xargs -n 1 -I {} --verbose -- \
|
43 | wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
|
44 | }
|
45 |
|
46 | extract() {
|
47 | set -x
|
48 | time for f in $TAR_DIR/*.{bz2,xz}; do
|
49 | tar -x --directory $TAR_DIR --file $f
|
50 | done
|
51 | set +x
|
52 |
|
53 | ls -l $TAR_DIR
|
54 | }
|
55 |
|
56 | #
|
57 | # Computation
|
58 | #
|
59 |
|
60 | run-tasks() {
|
61 | local raw_out_dir=$1
|
62 | raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
|
63 |
|
64 | local task_id=0
|
65 | while read -r host_name sh_path workload; do
|
66 |
|
67 | log "*** $host_name $sh_path $workload $task_id"
|
68 |
|
69 | local sh_run_path
|
70 | case $sh_path in
|
71 | /*) # Already absolute
|
72 | sh_run_path=$sh_path
|
73 | ;;
|
74 | */*) # It's relative, so make it absolute
|
75 | sh_run_path=$PWD/$sh_path
|
76 | ;;
|
77 | *) # 'dash' should remain 'dash'
|
78 | sh_run_path=$sh_path
|
79 | ;;
|
80 | esac
|
81 |
|
82 | local working_dir=''
|
83 | local files_out_dir="$raw_out_dir/files-$task_id"
|
84 | mkdir -v -p $files_out_dir
|
85 |
|
86 | local save_new_files=''
|
87 |
|
88 | local -a argv
|
89 | case $workload in
|
90 | hello-world)
|
91 | argv=( testdata/osh-runtime/hello_world.sh )
|
92 | ;;
|
93 |
|
94 | abuild-print-help)
|
95 | argv=( testdata/osh-runtime/abuild -h )
|
96 | ;;
|
97 |
|
98 | configure.cpython)
|
99 | argv=( $PY27_DIR/configure )
|
100 | working_dir=$files_out_dir
|
101 | ;;
|
102 |
|
103 | configure.*)
|
104 | argv=( ./configure )
|
105 |
|
106 | local conf_dir
|
107 | case $workload in
|
108 | *.ocaml)
|
109 | conf_dir='ocaml-4.06.0'
|
110 | ;;
|
111 | *.tcc)
|
112 | conf_dir='tcc-0.9.26'
|
113 | ;;
|
114 | *.yash)
|
115 | conf_dir='yash-2.46'
|
116 | ;;
|
117 | *)
|
118 | die "Invalid workload $workload"
|
119 | esac
|
120 |
|
121 | working_dir=$TAR_DIR/$conf_dir
|
122 | ;;
|
123 |
|
124 | *)
|
125 | die "Invalid workload $workload"
|
126 | ;;
|
127 | esac
|
128 |
|
129 | local -a time_argv=(
|
130 | time-tsv
|
131 | --output "$raw_out_dir/times.tsv" --append
|
132 | --rusage
|
133 | --field "$task_id"
|
134 | --field "$host_name" --field "$sh_path"
|
135 | --field "$workload"
|
136 | -- "$sh_run_path" "${argv[@]}"
|
137 | )
|
138 |
|
139 | local stdout_file="$files_out_dir/STDOUT.txt"
|
140 | local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
|
141 |
|
142 | # Maybe change dirs
|
143 | if test -n "$working_dir"; then
|
144 | pushd "$working_dir"
|
145 | fi
|
146 |
|
147 | if test -n "$save_new_files"; then
|
148 | touch __TIMESTAMP
|
149 | fi
|
150 |
|
151 | # Run it, possibly with GC stats
|
152 | case $sh_path in
|
153 | *_bin/*/osh)
|
154 | OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
|
155 | ;;
|
156 | *)
|
157 | "${time_argv[@]}" > $stdout_file
|
158 | ;;
|
159 | esac
|
160 |
|
161 | if test -n "$save_new_files"; then
|
162 | echo "COPYING to $files_out_dir"
|
163 | find . -type f -newer __TIMESTAMP \
|
164 | | xargs -I {} -- cp --verbose {} $files_out_dir
|
165 | fi
|
166 |
|
167 | # Restore dir
|
168 | if test -n "$working_dir"; then
|
169 | popd
|
170 | fi
|
171 |
|
172 | task_id=$((task_id + 1))
|
173 | done
|
174 | }
|
175 |
|
176 | print-tasks() {
|
177 | local host_name=$1
|
178 | local osh_native=$2
|
179 |
|
180 | local -a workloads=(
|
181 | hello-world
|
182 | abuild-print-help
|
183 |
|
184 | configure.cpython
|
185 | configure.ocaml
|
186 | configure.tcc
|
187 | configure.yash
|
188 | )
|
189 |
|
190 | if test -n "${QUICKLY:-}"; then
|
191 | # Just do the first two
|
192 | workloads=(
|
193 | hello-world
|
194 | abuild-print-help
|
195 | )
|
196 | fi
|
197 |
|
198 | for sh_path in bash dash bin/osh $osh_native; do
|
199 | for workload in "${workloads[@]}"; do
|
200 | tsv-row $host_name $sh_path $workload
|
201 | done
|
202 | done
|
203 | }
|
204 |
|
205 | measure() {
|
206 | local host_name=$1 # 'no-host' or 'lenny'
|
207 | local raw_out_dir=$2
|
208 | local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
|
209 | local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
|
210 |
|
211 | mkdir -v -p $raw_out_dir
|
212 |
|
213 | local tsv_out="$raw_out_dir/times.tsv"
|
214 |
|
215 | # Write header of the TSV file that is appended to.
|
216 | time-tsv -o $tsv_out --print-header \
|
217 | --rusage \
|
218 | --field task_id \
|
219 | --field host_name --field sh_path \
|
220 | --field workload
|
221 |
|
222 | # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
|
223 | # per-task GC stats
|
224 | print-tasks $host_name $osh_native | run-tasks $raw_out_dir
|
225 |
|
226 | # Turn individual files into a TSV, adding host
|
227 | benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
|
228 | | tsv-add-const-column host_name "$host_name" \
|
229 | > $raw_out_dir/gc_stats.tsv
|
230 |
|
231 | cp -v _tmp/provenance.tsv $raw_out_dir
|
232 | }
|
233 |
|
234 | stage1() {
|
235 | local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
|
236 | local single_machine=${2:-}
|
237 |
|
238 | local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
|
239 | mkdir -p $out_dir
|
240 |
|
241 | # Globs are in lexicographical order, which works for our dates.
|
242 |
|
243 | local -a raw_times=()
|
244 | local -a raw_gc_stats=()
|
245 | local -a raw_provenance=()
|
246 |
|
247 | if test -n "$single_machine"; then
|
248 | local -a a=( $base_dir/raw.$single_machine.* )
|
249 |
|
250 | raw_times+=( ${a[-1]}/times.tsv )
|
251 | raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
|
252 | raw_provenance+=( ${a[-1]}/provenance.tsv )
|
253 |
|
254 | else
|
255 | local -a a=( $base_dir/raw.$MACHINE1.* )
|
256 | local -a b=( $base_dir/raw.$MACHINE2.* )
|
257 |
|
258 | raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
|
259 | raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
|
260 | raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
|
261 | fi
|
262 |
|
263 | tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
|
264 |
|
265 | tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
|
266 |
|
267 | tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
|
268 | }
|
269 |
|
270 | print-report() {
|
271 | local in_dir=$1
|
272 |
|
273 | benchmark-html-head 'OSH Runtime Performance'
|
274 |
|
275 | cat <<EOF
|
276 | <body class="width60">
|
277 | <p id="home-link">
|
278 | <a href="/">oilshell.org</a>
|
279 | </p>
|
280 | EOF
|
281 |
|
282 | cmark <<'EOF'
|
283 | ## OSH Runtime Performance
|
284 |
|
285 | Source code: [oil/benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
|
286 |
|
287 | ### Elapsed Time by Shell (milliseconds)
|
288 |
|
289 | Some benchmarks call many external tools, while some exercise the shell
|
290 | interpreter itself. Parse time is included.
|
291 |
|
292 | Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
|
293 | EOF
|
294 | tsv2html $in_dir/elapsed.tsv
|
295 |
|
296 | cmark <<EOF
|
297 | ### Memory Usage (Max Resident Set Size in MB)
|
298 | EOF
|
299 | tsv2html $in_dir/max_rss.tsv
|
300 |
|
301 | cmark <<EOF
|
302 | ### GC Stats
|
303 | EOF
|
304 | tsv2html $in_dir/gc_stats.tsv
|
305 |
|
306 | cmark <<EOF
|
307 | ### Details of All Tasks
|
308 | EOF
|
309 | tsv2html $in_dir/details.tsv
|
310 |
|
311 |
|
312 | cmark <<'EOF'
|
313 |
|
314 | ### Shell and Host Details
|
315 | EOF
|
316 | tsv2html $in_dir/shells.tsv
|
317 | tsv2html $in_dir/hosts.tsv
|
318 |
|
319 | # Only show files.html link on a single machine
|
320 | if test -f $(dirname $in_dir)/files.html; then
|
321 | cmark <<'EOF'
|
322 | ---
|
323 |
|
324 | [raw files](files.html)
|
325 | EOF
|
326 | fi
|
327 |
|
328 | cat <<EOF
|
329 | </body>
|
330 | </html>
|
331 | EOF
|
332 | }
|
333 |
|
334 | soil-run() {
|
335 | ### Run it on just this machine, and make a report
|
336 |
|
337 | rm -r -f $BASE_DIR
|
338 | mkdir -p $BASE_DIR
|
339 |
|
340 | # TODO: This testdata should be baked into Docker image, or mounted
|
341 | download
|
342 | extract
|
343 |
|
344 | # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
|
345 | local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
|
346 | ninja "${osh_bin[@]}"
|
347 |
|
348 | local single_machine='no-host'
|
349 |
|
350 | local job_id
|
351 | job_id=$(print-job-id)
|
352 |
|
353 | # Write _tmp/provenance.* and _tmp/{host,shell}-id
|
354 | shell-provenance-2 \
|
355 | $single_machine $job_id _tmp \
|
356 | bash dash bin/osh "${osh_bin[@]}"
|
357 |
|
358 | local host_job_id="$single_machine.$job_id"
|
359 | local raw_out_dir="$BASE_DIR/raw.$host_job_id"
|
360 | mkdir -p $raw_out_dir $BASE_DIR/stage1
|
361 |
|
362 | measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
|
363 |
|
364 | # Trivial concatenation for 1 machine
|
365 | stage1 '' $single_machine
|
366 |
|
367 | benchmarks/report.sh stage2 $BASE_DIR
|
368 |
|
369 | # Make _tmp/osh-parser/files.html, so index.html can potentially link to it
|
370 | find-dir-html _tmp/osh-runtime files
|
371 |
|
372 | benchmarks/report.sh stage3 $BASE_DIR
|
373 | }
|
374 |
|
375 | #
|
376 | # Debugging
|
377 | #
|
378 |
|
379 | compare-cpython() {
|
380 | #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
|
381 | local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
|
382 |
|
383 | # More of a diff here?
|
384 | #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
|
385 | # less diff here
|
386 | #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
|
387 |
|
388 | local dir=${a[-1]}
|
389 |
|
390 | echo $dir
|
391 |
|
392 | head -n 1 $dir/times.tsv
|
393 | fgrep 'configure.cpython' $dir/times.tsv
|
394 |
|
395 | local bash_id=2
|
396 | local dash_id=8
|
397 | local osh_py_id=14
|
398 | local osh_cpp_id=20
|
399 |
|
400 | set +o errexit
|
401 |
|
402 | local out_dir=_tmp/cpython-configure
|
403 | mkdir -p $out_dir
|
404 |
|
405 | echo 'bash vs. dash'
|
406 | diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
|
407 | diffstat $out_dir/bash-vs-dash.txt
|
408 | echo
|
409 |
|
410 | echo 'bash vs. osh-py'
|
411 | diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
|
412 | diffstat $out_dir/bash-vs-osh-py.txt
|
413 | echo
|
414 |
|
415 | echo 'bash vs. osh-cpp'
|
416 | diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
|
417 | diffstat $out_dir/bash-vs-osh-cpp.txt
|
418 | echo
|
419 |
|
420 | return
|
421 |
|
422 | diff -u $dir/{files-2,files-20}/STDOUT.txt
|
423 | echo
|
424 |
|
425 | diff -u $dir/{files-2,files-20}/pyconfig.h
|
426 | echo
|
427 |
|
428 | cdiff -u $dir/{files-2,files-20}/config.log
|
429 | echo
|
430 | }
|
431 |
|
432 | "$@"
|