1 | #!/usr/bin/env bash
2 | #
3 | # Measure the time it takes to build a binary with different compilers on
4 | # different machines, and measure the binary size.
5 | #
6 | # Usage:
7 | # benchmarks/ovm-build.sh <function name>
8 | #
9 | # Run on its own:
10 | # 1. Follow common instructions in benchmarks/osh-parser.sh
11 | # 2. benchmarks/auto.sh measure-builds
12 | # 3. benchmarks/report.sh ovm-build
13 |
14 | # Directories used:
15 | #
16 | # oilshell.org/blob/
17 | # ovm-build/
18 | #
19 | # ~/git/oilshell/
20 | # oil/
21 | # _deps/
22 | # ovm-build # tarballs and extracted source
23 | # _tmp/
24 | # ovm-build/
25 | # raw/ # output CSV
26 | # stage1
27 | # benchmark-data/
28 | # ovm-build/
29 | # raw/
30 | # compiler-id/
31 | # host-id/
32 |
33 | set -o nounset
34 | set -o pipefail
35 | set -o errexit
36 |
37 | source benchmarks/common.sh # for log, etc.
38 | source benchmarks/id.sh # print-job-id
39 | source build/common.sh # for $CLANG
40 |
41 | REPO_ROOT=$(cd $(dirname $0)/..; pwd)
42 | source test/tsv-lib.sh # uses REPO_ROOT
43 |
44 | readonly BASE_DIR=_tmp/ovm-build
45 | readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
46 |
47 | #
48 | # Dependencies
49 | #
50 |
51 | readonly -a TAR_SUBDIRS=(
52 | dash-
53 | bash-4.4
54 | )
55 |
56 | # NOTE: Same list in oilshell.org/blob/run.sh.
57 | tarballs() {
58 | cat <<EOF
59 | bash-4.4.tar.gz
60 | dash-
61 | mksh-R56c.tgz
62 | EOF
63 | }
64 |
65 | download() {
66 | mkdir -p $TAR_DIR
67 | tarballs | xargs -n 1 -I {} --verbose -- \
68 | wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
69 | }
70 |
71 | # Done MANUALLY.
72 | extract-other() {
73 | time for f in $TAR_DIR/*gz; do
74 | tar -x --directory $TAR_DIR --file $f
75 | done
76 | }
77 |
78 | # Done automatically by 'measure' function.
79 |
80 | # TODO: CI should download this from previous
81 | extract-oils() {
82 | # To run on multiple machines, use the one in the benchmarks-data repo.
83 | cp --recursive --no-target-directory \
84 | ../benchmark-data/src/oils-for-unix-$OIL_VERSION/ \
85 | $TAR_DIR/oils-for-unix-$OIL_VERSION/
86 | }
87 |
88 | #
89 | # Measure Size of Binaries.
90 | #
91 |
92 | # Other tools:
93 | # - bloaty to look inside elf file
94 | # - nm? Just a flat list of symbols? Counting them would be nice.
95 | # - zipfile.py to look inside bytecode.zip
96 |
97 | sizes-tsv() {
98 | # host_label matches the times.tsv file output by report.R
99 | tsv-row host_label num_bytes path
100 | local host=$(hostname)
101 | find "$@" -maxdepth 0 -printf "$host\t%s\t%p\n"
102 | }
103 |
104 | # NOTE: This should be the same on all x64 machines. But I want to run it on
105 | # x64 machines.
106 | measure-sizes() {
107 | local raw_out_dir=$1
108 |
109 | # PROBLEM: Do I need provenance for gcc/clang here? I can just join it later
110 | # in R.
111 |
112 | # clang/oils-for-unix
113 | # clang/oils-for-unix.stripped
114 | # gcc/oils-for-unix
115 | # gcc/oils-for-unix.stripped
116 | sizes-tsv $BASE_DIR/bin/*/{oils-for-unix,oils-for-unix.stripped} \
117 | > ${raw_out_dir}/native-sizes.tsv
118 |
119 | # Not used - we're not stripping these, etc.
120 | sizes-tsv $BASE_DIR/bin/*/*sh \
121 | > ${raw_out_dir}/other-shell-sizes.tsv
122 |
123 | log "Wrote ${raw_out_dir}/*.tsv"
124 | }
125 |
126 | #
127 | # Unused Demos
128 | #
129 |
130 | bytecode-size() {
131 | local zip=_build/oil/bytecode.zip
132 |
133 | # 242 files, 1.85 MB
134 | unzip -l $zip | tail -n 1
135 |
136 | # 1.88 MB, so there's 30K of header overhead.
137 | ls -l $zip
138 | }
139 |
140 | # 6.8 seconds for debug build, instead of 8 seconds.
141 | clang-oil-dbg() {
142 | make clean
143 | CC=$CLANG make _build/oil/ovm-dbg
144 | }
145 |
146 | #
147 | # Measure Elapsed Time
148 | #
149 |
150 | # Add --target-size? Add that functionality to benchmarks/time.py?
151 | #
152 | # Should we add explicit targets?
153 | # - ovm-clang, ovm-clang-dbg
154 | # - ovm-gcc, ovm-gcc-dbg
155 | #
156 | # It would be possible, but it complicates the makefile.
157 |
158 | build-task() {
159 | local raw_out_dir=$1
160 | local job_id=$2
161 | local host=$3
162 | local host_hash=$4
163 | local compiler_path=$5
164 | local compiler_hash=$6
165 | local src_dir=$7
166 | local action=$8
167 |
168 | local times_out="$PWD/$raw_out_dir/times.tsv"
169 |
170 | # Definitions that depends on $PWD.
171 | local -a TIME_PREFIX=(
172 | time-tsv \
173 | --append \
174 | --output $times_out \
175 | --field "$host" --field "$host_hash" \
176 | --field "$compiler_path" --field "$compiler_hash" \
177 | --field "$src_dir" --field "$action"
178 | )
179 | local bin_base_dir=$PWD/$BASE_DIR/bin
180 |
181 | local bin_dir="$bin_base_dir/$(basename $compiler_path)"
182 | mkdir -p $bin_dir
183 |
184 | pushd $src_dir >/dev/null
185 |
186 | # NOTE: We're not saving the output anywhere. We save the status, which
187 | # protects against basic errors.
188 |
189 | case $action in
190 | (configure)
191 | "${TIME_PREFIX[@]}" -- ./configure
192 |
193 | # Cleaning here relies on the ORDER of tasks.txt. configure happens
194 | # before build. The Clang build shouldn't reuse GCC objects!
195 | # It has to be done after configure, because the Makefile must exist!
196 | make clean
197 | ;;
198 |
199 | (make)
200 | "${TIME_PREFIX[@]}" -- make CC=$compiler_path
201 |
202 | local target
203 | case $src_dir in
204 | (*/bash*)
205 | target=bash
206 | ;;
207 | (*/dash*)
208 | target=src/dash
209 | ;;
210 | esac
211 |
212 | strip $target
213 | cp -v $target $bin_dir
214 | ;;
215 |
216 | (oils-for-unix*)
217 | case $action in
218 | (oils-for-unix)
219 | local variant='dbg'
220 | ;;
221 | (oils-for-unix.stripped)
222 | local variant='opt'
223 | ;;
224 | *)
225 | die "Invalid target"
226 | ;;
227 | esac
228 |
229 | # Change the C compiler into the corresponding C++ compiler
230 | local compiler
231 | case $compiler_path in
232 | (*gcc)
233 | # note: we take provenance of /usr/bin/gcc, but the shell script runs 'c++'
234 | compiler='cxx'
235 | ;;
236 | (*clang)
237 | # Note on slight mess: benchmarks/id.sh takes the provenanec of
238 | # $CLANG. We translate that to 'clang' here, and
239 | # _build/oils.sh uses $CLANGXX.
240 | compiler='clang'
241 | ;;
242 | *)
243 | die "Invalid compiler"
244 | ;;
245 | esac
246 |
247 | "${TIME_PREFIX[@]}" -- _build/oils.sh $compiler $variant
248 |
249 | # e.g. cp _bin/clang-opt-sh/oils-for-unix.stripped _tmp/ovm-build/bin/clang/
250 | local filename=$action
251 | cp -v _bin/$compiler-$variant-sh/$filename $bin_dir
252 | ;;
253 |
254 | *)
255 | local target=$action # Assume it's a target like _bin/oil.ovm
256 |
257 | "${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
258 |
259 | cp -v $target $bin_dir
260 | ;;
261 | esac
262 |
263 | popd >/dev/null
264 |
265 | log "DONE BUILD TASK $action $src_dir __ status=$?"
266 | }
267 |
268 | oils-tasks() {
269 | local provenance=$1
270 |
271 | local ofu_dir="$TAR_DIR/oils-for-unix-$OIL_VERSION"
272 |
273 | # Add 1 field for each of 5 fields.
274 | cat $provenance | while read line; do
275 | echo "$line" $ofu_dir oils-for-unix
276 | echo "$line" $ofu_dir oils-for-unix.stripped
277 | done
278 | }
279 |
280 | other-shell-tasks() {
281 | local provenance=$1
282 |
283 | # Add 1 field for each of 5 fields.
284 | cat $provenance | while read line; do
285 | case $line in
286 | # Skip clang for now.
287 | (*clang*)
288 | continue
289 | ;;
290 | esac
291 |
292 | for dir in "${TAR_SUBDIRS[@]}"; do
293 | echo "$line" $TAR_DIR/$dir configure
294 | echo "$line" $TAR_DIR/$dir make
295 | done
296 | done
297 | }
298 |
299 | # 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
300 | # show the drop.
301 | oil-historical-tasks() {
302 | echo
303 | }
304 |
305 | # action is 'configure', a target name, etc.
306 | readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
307 |
308 | print-tasks() {
309 | local build_prov=$1
310 |
311 | local t1=$BASE_DIR/oils-tasks.txt
312 | local t2=$BASE_DIR/other-shell-tasks.txt
313 |
314 | oils-tasks $build_prov > $t1
315 | other-shell-tasks $build_prov > $t2
316 |
317 | if test -n "${QUICKLY:-}"; then
318 | head -n 2 $t1 # debug and opt binary
319 | head -n 2 $t2 # do dash configure make
320 | else
321 | cat $t1 $t2
322 | fi
323 | }
324 |
325 | measure() {
326 | local build_prov=$1 # from benchmarks/id.sh compiler-provenance
327 | local raw_out_dir=$2 # _tmp/ovm-build/$X or ../../benchmark-data/ovm-build/$X
328 |
329 | extract-oils
330 |
331 | local times_out="$raw_out_dir/times.tsv"
332 | # NOTE: Do we need two raw dirs?
333 | mkdir -p $BASE_DIR/{stage1,bin} $raw_out_dir
334 |
335 | # TODO: the $times_out calculation is duplicated in build-task()
336 |
337 | # Write header of the TSV file that is appended to.
338 | tsv-row \
339 | status elapsed_secs \
340 | host_name host_hash compiler_path compiler_hash \
341 | src_dir action > $times_out
342 |
343 | # TODO: remove xargs
344 | # - print-tasks | run-tasks with a loop
345 | # - exit code is more reliable, and we're not running in parallel anyway
346 |
347 | set +o errexit
348 | time print-tasks $build_prov \
349 | | xargs --verbose -n $NUM_COLUMNS -- $0 build-task $raw_out_dir
350 | local status=$?
351 | set -o errexit
352 |
353 | if test $status -ne 0; then
354 | die "*** Some tasks failed. (xargs status=$status) ***"
355 | fi
356 |
357 | measure-sizes $raw_out_dir
358 | }
359 |
360 | #
361 | # Data Preparation and Analysis
362 | #
363 |
364 | stage1() {
365 | local base_dir=${1:-$BASE_DIR} # _tmp/ovm-build or ../benchmark-data/ovm-build
366 | local single_machine=${2:-}
367 |
368 | local out_dir=$BASE_DIR/stage1
369 | mkdir -p $out_dir
370 |
371 | local -a raw_times=()
372 | local -a raw_sizes=()
373 |
374 | if test -n "$single_machine"; then
375 | # find dir in _tmp/ovm-build
376 | local -a a=( $base_dir/raw.$single_machine.* )
377 |
378 | raw_times+=( ${a[-1]}/times.tsv )
379 | raw_sizes+=( ${a[-1]}/native-sizes.tsv )
380 |
381 | else
382 | # find last dirs in ../benchmark-data/ovm-build
383 | # Globs are in lexicographical order, which works for our dates.
384 | local -a a=( $base_dir/raw.$MACHINE1.* )
385 | local -a b=( $base_dir/raw.$MACHINE2.* )
386 |
387 | raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
388 | raw_sizes+=( ${a[-1]}/native-sizes.tsv ${b[-1]}/native-sizes.tsv )
389 | fi
390 |
391 | tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
392 | tsv-concat "${raw_sizes[@]}" > $out_dir/native-sizes.tsv
393 |
394 | return
395 |
396 | # NOTE: unused
397 | # Construct a one-column TSV file
398 | local raw_data_tsv=$out/raw-data.tsv
399 | { echo 'path'
400 | echo ${a[-1]}
401 | echo ${b[-1]}
402 | } > $raw_data_tsv
403 |
404 | head $out/*
405 | wc -l $out/*
406 | }
407 |
408 | print-report() {
409 | local in_dir=$1
410 | local base_url='../../web'
411 |
412 | benchmark-html-head 'OVM Build Performance'
413 |
414 | cat <<EOF
415 | <body class="width60">
416 | <p id="home-link">
417 | <a href="/">oilshell.org</a>
418 | </p>
419 | EOF
420 |
421 | cmark << 'EOF'
422 | ## OVM Build Performance
423 |
424 | Source code: [oil/benchmarks/osh-parser.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-parser.sh)
425 |
426 | ### Time in Seconds by Host and Compiler
427 |
428 | We measure the build speed of `bash` and `dash` for comparison.
429 | EOF
430 |
431 | # Highlighting clang makes this table easier to read.
432 | tsv2html \
433 | --css-class-pattern 'special ^gcc' \
434 | $in_dir/times.tsv
435 |
436 | cmark << 'EOF'
437 | ### Native Binary Size
438 |
439 | EOF
440 | tsv2html --css-class-pattern 'special ^gcc' $in_dir/native-sizes.tsv
441 |
442 | cmark << 'EOF'
443 |
444 | ### Host and Compiler Details
445 | EOF
446 | tsv2html $in_dir/hosts.tsv
447 | tsv2html $in_dir/compilers.tsv
448 |
449 | cat <<EOF
450 | </body>
451 | </html>
452 | EOF
453 | }
454 |
455 | soil-run() {
456 | rm -r -f $BASE_DIR
457 | mkdir -p $BASE_DIR
458 |
459 | download
460 | extract-other
461 |
462 | # Copied from benchmarks/osh-runtime.sh soil-run
463 |
464 | # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
465 | local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
466 | ninja "${osh_bin[@]}"
467 |
468 | local single_machine='no-host'
469 |
470 | local single_machine='no-host'
471 |
472 | local job_id
473 | job_id=$(print-job-id)
474 |
475 | # Like benchmarks/auto.sh
476 | #local build_prov
477 | #build_prov=$(benchmarks/id.sh compiler-provenance $job_id)
478 |
479 | compiler-provenance-2 \
480 | $single_machine $job_id _tmp
481 |
482 | local host_job_id="$single_machine.$job_id"
483 | local raw_out_dir="$BASE_DIR/raw.$host_job_id"
484 | mkdir -p $raw_out_dir $BASE_DIR/stage1
485 |
486 | measure _tmp/compiler-provenance.txt $raw_out_dir
487 |
488 | # Trivial concatenation for 1 machine
489 | stage1 '' $single_machine
490 |
491 | benchmarks/report.sh stage2 $BASE_DIR
492 |
493 | benchmarks/report.sh stage3 $BASE_DIR
494 | }
495 |
496 | "$@"