OILS / benchmarks / ovm-build.sh View on Github | oilshell.org

496 lines, 256 significant
1#!/usr/bin/env bash
2#
3# Measure the time it takes to build a binary with different compilers on
4# different machines, and measure the binary size.
5#
6# Usage:
7# benchmarks/ovm-build.sh <function name>
8#
9# Run on its own:
10# 1. Follow common instructions in benchmarks/osh-parser.sh
11# 2. benchmarks/auto.sh measure-builds
12# 3. benchmarks/report.sh ovm-build
13
14# Directories used:
15#
16# oilshell.org/blob/
17# ovm-build/
18#
19# ~/git/oilshell/
20# oil/
21# _deps/
22# ovm-build # tarballs and extracted source
23# _tmp/
24# ovm-build/
25# raw/ # output CSV
26# stage1
27# benchmark-data/
28# ovm-build/
29# raw/
30# compiler-id/
31# host-id/
32
33set -o nounset
34set -o pipefail
35set -o errexit
36
37source benchmarks/common.sh # for log, etc.
38source benchmarks/id.sh # print-job-id
39source build/common.sh # for $CLANG
40
41REPO_ROOT=$(cd $(dirname $0)/..; pwd)
42source test/tsv-lib.sh # uses REPO_ROOT
43
44readonly BASE_DIR=_tmp/ovm-build
45readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
46
47#
48# Dependencies
49#
50
51readonly -a TAR_SUBDIRS=(
52 dash-0.5.9.1
53 bash-4.4
54)
55
56# NOTE: Same list in oilshell.org/blob/run.sh.
57tarballs() {
58 cat <<EOF
59bash-4.4.tar.gz
60dash-0.5.9.1.tar.gz
61mksh-R56c.tgz
62EOF
63}
64
65download() {
66 mkdir -p $TAR_DIR
67 tarballs | xargs -n 1 -I {} --verbose -- \
68 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
69}
70
71# Done MANUALLY.
72extract-other() {
73 time for f in $TAR_DIR/*gz; do
74 tar -x --directory $TAR_DIR --file $f
75 done
76}
77
78# Done automatically by 'measure' function.
79
80# TODO: CI should download this from previous
81extract-oils() {
82 # To run on multiple machines, use the one in the benchmarks-data repo.
83 cp --recursive --no-target-directory \
84 ../benchmark-data/src/oils-for-unix-$OIL_VERSION/ \
85 $TAR_DIR/oils-for-unix-$OIL_VERSION/
86}
87
88#
89# Measure Size of Binaries.
90#
91
92# Other tools:
93# - bloaty to look inside elf file
94# - nm? Just a flat list of symbols? Counting them would be nice.
95# - zipfile.py to look inside bytecode.zip
96
97sizes-tsv() {
98 # host_label matches the times.tsv file output by report.R
99 tsv-row host_label num_bytes path
100 local host=$(hostname)
101 find "$@" -maxdepth 0 -printf "$host\t%s\t%p\n"
102}
103
104# NOTE: This should be the same on all x64 machines. But I want to run it on
105# x64 machines.
106measure-sizes() {
107 local raw_out_dir=$1
108
109 # PROBLEM: Do I need provenance for gcc/clang here? I can just join it later
110 # in R.
111
112 # clang/oils-for-unix
113 # clang/oils-for-unix.stripped
114 # gcc/oils-for-unix
115 # gcc/oils-for-unix.stripped
116 sizes-tsv $BASE_DIR/bin/*/{oils-for-unix,oils-for-unix.stripped} \
117 > ${raw_out_dir}/native-sizes.tsv
118
119 # Not used - we're not stripping these, etc.
120 sizes-tsv $BASE_DIR/bin/*/*sh \
121 > ${raw_out_dir}/other-shell-sizes.tsv
122
123 log "Wrote ${raw_out_dir}/*.tsv"
124}
125
126#
127# Unused Demos
128#
129
130bytecode-size() {
131 local zip=_build/oil/bytecode.zip
132
133 # 242 files, 1.85 MB
134 unzip -l $zip | tail -n 1
135
136 # 1.88 MB, so there's 30K of header overhead.
137 ls -l $zip
138}
139
140# 6.8 seconds for debug build, instead of 8 seconds.
141clang-oil-dbg() {
142 make clean
143 CC=$CLANG make _build/oil/ovm-dbg
144}
145
146#
147# Measure Elapsed Time
148#
149
150# Add --target-size? Add that functionality to benchmarks/time.py?
151#
152# Should we add explicit targets?
153# - ovm-clang, ovm-clang-dbg
154# - ovm-gcc, ovm-gcc-dbg
155#
156# It would be possible, but it complicates the makefile.
157
158build-task() {
159 local raw_out_dir=$1
160 local job_id=$2
161 local host=$3
162 local host_hash=$4
163 local compiler_path=$5
164 local compiler_hash=$6
165 local src_dir=$7
166 local action=$8
167
168 local times_out="$PWD/$raw_out_dir/times.tsv"
169
170 # Definitions that depends on $PWD.
171 local -a TIME_PREFIX=(
172 time-tsv \
173 --append \
174 --output $times_out \
175 --field "$host" --field "$host_hash" \
176 --field "$compiler_path" --field "$compiler_hash" \
177 --field "$src_dir" --field "$action"
178 )
179 local bin_base_dir=$PWD/$BASE_DIR/bin
180
181 local bin_dir="$bin_base_dir/$(basename $compiler_path)"
182 mkdir -p $bin_dir
183
184 pushd $src_dir >/dev/null
185
186 # NOTE: We're not saving the output anywhere. We save the status, which
187 # protects against basic errors.
188
189 case $action in
190 (configure)
191 "${TIME_PREFIX[@]}" -- ./configure
192
193 # Cleaning here relies on the ORDER of tasks.txt. configure happens
194 # before build. The Clang build shouldn't reuse GCC objects!
195 # It has to be done after configure, because the Makefile must exist!
196 make clean
197 ;;
198
199 (make)
200 "${TIME_PREFIX[@]}" -- make CC=$compiler_path
201
202 local target
203 case $src_dir in
204 (*/bash*)
205 target=bash
206 ;;
207 (*/dash*)
208 target=src/dash
209 ;;
210 esac
211
212 strip $target
213 cp -v $target $bin_dir
214 ;;
215
216 (oils-for-unix*)
217 case $action in
218 (oils-for-unix)
219 local variant='dbg'
220 ;;
221 (oils-for-unix.stripped)
222 local variant='opt'
223 ;;
224 *)
225 die "Invalid target"
226 ;;
227 esac
228
229 # Change the C compiler into the corresponding C++ compiler
230 local compiler
231 case $compiler_path in
232 (*gcc)
233 # note: we take provenance of /usr/bin/gcc, but the shell script runs 'c++'
234 compiler='cxx'
235 ;;
236 (*clang)
237 # Note on slight mess: benchmarks/id.sh takes the provenanec of
238 # $CLANG. We translate that to 'clang' here, and
239 # _build/oils.sh uses $CLANGXX.
240 compiler='clang'
241 ;;
242 *)
243 die "Invalid compiler"
244 ;;
245 esac
246
247 "${TIME_PREFIX[@]}" -- _build/oils.sh $compiler $variant
248
249 # e.g. cp _bin/clang-opt-sh/oils-for-unix.stripped _tmp/ovm-build/bin/clang/
250 local filename=$action
251 cp -v _bin/$compiler-$variant-sh/$filename $bin_dir
252 ;;
253
254 *)
255 local target=$action # Assume it's a target like _bin/oil.ovm
256
257 "${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
258
259 cp -v $target $bin_dir
260 ;;
261 esac
262
263 popd >/dev/null
264
265 log "DONE BUILD TASK $action $src_dir __ status=$?"
266}
267
268oils-tasks() {
269 local provenance=$1
270
271 local ofu_dir="$TAR_DIR/oils-for-unix-$OIL_VERSION"
272
273 # Add 1 field for each of 5 fields.
274 cat $provenance | while read line; do
275 echo "$line" $ofu_dir oils-for-unix
276 echo "$line" $ofu_dir oils-for-unix.stripped
277 done
278}
279
280other-shell-tasks() {
281 local provenance=$1
282
283 # Add 1 field for each of 5 fields.
284 cat $provenance | while read line; do
285 case $line in
286 # Skip clang for now.
287 (*clang*)
288 continue
289 ;;
290 esac
291
292 for dir in "${TAR_SUBDIRS[@]}"; do
293 echo "$line" $TAR_DIR/$dir configure
294 echo "$line" $TAR_DIR/$dir make
295 done
296 done
297}
298
299# 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
300# show the drop.
301oil-historical-tasks() {
302 echo
303}
304
305# action is 'configure', a target name, etc.
306readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
307
308print-tasks() {
309 local build_prov=$1
310
311 local t1=$BASE_DIR/oils-tasks.txt
312 local t2=$BASE_DIR/other-shell-tasks.txt
313
314 oils-tasks $build_prov > $t1
315 other-shell-tasks $build_prov > $t2
316
317 if test -n "${QUICKLY:-}"; then
318 head -n 2 $t1 # debug and opt binary
319 head -n 2 $t2 # do dash configure make
320 else
321 cat $t1 $t2
322 fi
323}
324
325measure() {
326 local build_prov=$1 # from benchmarks/id.sh compiler-provenance
327 local raw_out_dir=$2 # _tmp/ovm-build/$X or ../../benchmark-data/ovm-build/$X
328
329 extract-oils
330
331 local times_out="$raw_out_dir/times.tsv"
332 # NOTE: Do we need two raw dirs?
333 mkdir -p $BASE_DIR/{stage1,bin} $raw_out_dir
334
335 # TODO: the $times_out calculation is duplicated in build-task()
336
337 # Write header of the TSV file that is appended to.
338 tsv-row \
339 status elapsed_secs \
340 host_name host_hash compiler_path compiler_hash \
341 src_dir action > $times_out
342
343 # TODO: remove xargs
344 # - print-tasks | run-tasks with a loop
345 # - exit code is more reliable, and we're not running in parallel anyway
346
347 set +o errexit
348 time print-tasks $build_prov \
349 | xargs --verbose -n $NUM_COLUMNS -- $0 build-task $raw_out_dir
350 local status=$?
351 set -o errexit
352
353 if test $status -ne 0; then
354 die "*** Some tasks failed. (xargs status=$status) ***"
355 fi
356
357 measure-sizes $raw_out_dir
358}
359
360#
361# Data Preparation and Analysis
362#
363
364stage1() {
365 local base_dir=${1:-$BASE_DIR} # _tmp/ovm-build or ../benchmark-data/ovm-build
366 local single_machine=${2:-}
367
368 local out_dir=$BASE_DIR/stage1
369 mkdir -p $out_dir
370
371 local -a raw_times=()
372 local -a raw_sizes=()
373
374 if test -n "$single_machine"; then
375 # find dir in _tmp/ovm-build
376 local -a a=( $base_dir/raw.$single_machine.* )
377
378 raw_times+=( ${a[-1]}/times.tsv )
379 raw_sizes+=( ${a[-1]}/native-sizes.tsv )
380
381 else
382 # find last dirs in ../benchmark-data/ovm-build
383 # Globs are in lexicographical order, which works for our dates.
384 local -a a=( $base_dir/raw.$MACHINE1.* )
385 local -a b=( $base_dir/raw.$MACHINE2.* )
386
387 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
388 raw_sizes+=( ${a[-1]}/native-sizes.tsv ${b[-1]}/native-sizes.tsv )
389 fi
390
391 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
392 tsv-concat "${raw_sizes[@]}" > $out_dir/native-sizes.tsv
393
394 return
395
396 # NOTE: unused
397 # Construct a one-column TSV file
398 local raw_data_tsv=$out/raw-data.tsv
399 { echo 'path'
400 echo ${a[-1]}
401 echo ${b[-1]}
402 } > $raw_data_tsv
403
404 head $out/*
405 wc -l $out/*
406}
407
408print-report() {
409 local in_dir=$1
410 local base_url='../../web'
411
412 benchmark-html-head 'OVM Build Performance'
413
414 cat <<EOF
415 <body class="width60">
416 <p id="home-link">
417 <a href="/">oilshell.org</a>
418 </p>
419EOF
420
421 cmark << 'EOF'
422## OVM Build Performance
423
424Source code: [oil/benchmarks/osh-parser.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-parser.sh)
425
426### Time in Seconds by Host and Compiler
427
428We measure the build speed of `bash` and `dash` for comparison.
429EOF
430
431 # Highlighting clang makes this table easier to read.
432 tsv2html \
433 --css-class-pattern 'special ^gcc' \
434 $in_dir/times.tsv
435
436 cmark << 'EOF'
437### Native Binary Size
438
439EOF
440 tsv2html --css-class-pattern 'special ^gcc' $in_dir/native-sizes.tsv
441
442 cmark << 'EOF'
443
444### Host and Compiler Details
445EOF
446 tsv2html $in_dir/hosts.tsv
447 tsv2html $in_dir/compilers.tsv
448
449 cat <<EOF
450 </body>
451</html>
452EOF
453}
454
455soil-run() {
456 rm -r -f $BASE_DIR
457 mkdir -p $BASE_DIR
458
459 download
460 extract-other
461
462 # Copied from benchmarks/osh-runtime.sh soil-run
463
464 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
465 local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
466 ninja "${osh_bin[@]}"
467
468 local single_machine='no-host'
469
470 local single_machine='no-host'
471
472 local job_id
473 job_id=$(print-job-id)
474
475 # Like benchmarks/auto.sh
476 #local build_prov
477 #build_prov=$(benchmarks/id.sh compiler-provenance $job_id)
478
479 compiler-provenance-2 \
480 $single_machine $job_id _tmp
481
482 local host_job_id="$single_machine.$job_id"
483 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
484 mkdir -p $raw_out_dir $BASE_DIR/stage1
485
486 measure _tmp/compiler-provenance.txt $raw_out_dir
487
488 # Trivial concatenation for 1 machine
489 stage1 '' $single_machine
490
491 benchmarks/report.sh stage2 $BASE_DIR
492
493 benchmarks/report.sh stage3 $BASE_DIR
494}
495
496"$@"