benchmarks/ovm-build.sh

OILS / benchmarks / ovm-build.sh View on Github | oilshell.org

496 lines, 256 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the time it takes to build a binary with different compilers on
4	# different machines, and measure the binary size.
5	#
6	# Usage:
7	# benchmarks/ovm-build.sh <function name>
8	#
9	# Run on its own:
10	# 1. Follow common instructions in benchmarks/osh-parser.sh
11	# 2. benchmarks/auto.sh measure-builds
12	# 3. benchmarks/report.sh ovm-build
13
14	# Directories used:
15	#
16	# oilshell.org/blob/
17	# ovm-build/
18	#
19	# ~/git/oilshell/
20	# oil/
21	# _deps/
22	# ovm-build # tarballs and extracted source
23	# _tmp/
24	# ovm-build/
25	# raw/ # output CSV
26	# stage1
27	# benchmark-data/
28	# ovm-build/
29	# raw/
30	# compiler-id/
31	# host-id/
32
33	set -o nounset
34	set -o pipefail
35	set -o errexit
36
37	source benchmarks/common.sh # for log, etc.
38	source benchmarks/id.sh # print-job-id
39	source build/common.sh # for $CLANG
40
41	REPO_ROOT=$(cd $(dirname $0)/..; pwd)
42	source test/tsv-lib.sh # uses REPO_ROOT
43
44	readonly BASE_DIR=_tmp/ovm-build
45	readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
46
47	#
48	# Dependencies
49	#
50
51	readonly -a TAR_SUBDIRS=(
52	dash-0.5.9.1
53	bash-4.4
54	)
55
56	# NOTE: Same list in oilshell.org/blob/run.sh.
57	tarballs() {
58	cat <<EOF
59	bash-4.4.tar.gz
60	dash-0.5.9.1.tar.gz
61	mksh-R56c.tgz
62	EOF
63	}
64
65	download() {
66	mkdir -p $TAR_DIR
67	tarballs \| xargs -n 1 -I {} --verbose -- \
68	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
69	}
70
71	# Done MANUALLY.
72	extract-other() {
73	time for f in $TAR_DIR/*gz; do
74	tar -x --directory $TAR_DIR --file $f
75	done
76	}
77
78	# Done automatically by 'measure' function.
79
80	# TODO: CI should download this from previous
81	extract-oils() {
82	# To run on multiple machines, use the one in the benchmarks-data repo.
83	cp --recursive --no-target-directory \
84	../benchmark-data/src/oils-for-unix-$OIL_VERSION/ \
85	$TAR_DIR/oils-for-unix-$OIL_VERSION/
86	}
87
88	#
89	# Measure Size of Binaries.
90	#
91
92	# Other tools:
93	# - bloaty to look inside elf file
94	# - nm? Just a flat list of symbols? Counting them would be nice.
95	# - zipfile.py to look inside bytecode.zip
96
97	sizes-tsv() {
98	# host_label matches the times.tsv file output by report.R
99	tsv-row host_label num_bytes path
100	local host=$(hostname)
101	find "$@" -maxdepth 0 -printf "$host\t%s\t%p\n"
102	}
103
104	# NOTE: This should be the same on all x64 machines. But I want to run it on
105	# x64 machines.
106	measure-sizes() {
107	local raw_out_dir=$1
108
109	# PROBLEM: Do I need provenance for gcc/clang here? I can just join it later
110	# in R.
111
112	# clang/oils-for-unix
113	# clang/oils-for-unix.stripped
114	# gcc/oils-for-unix
115	# gcc/oils-for-unix.stripped
116	sizes-tsv $BASE_DIR/bin/*/{oils-for-unix,oils-for-unix.stripped} \
117	> ${raw_out_dir}/native-sizes.tsv
118
119	# Not used - we're not stripping these, etc.
120	sizes-tsv $BASE_DIR/bin//sh \
121	> ${raw_out_dir}/other-shell-sizes.tsv
122
123	log "Wrote ${raw_out_dir}/*.tsv"
124	}
125
126	#
127	# Unused Demos
128	#
129
130	bytecode-size() {
131	local zip=_build/oil/bytecode.zip
132
133	# 242 files, 1.85 MB
134	unzip -l $zip \| tail -n 1
135
136	# 1.88 MB, so there's 30K of header overhead.
137	ls -l $zip
138	}
139
140	# 6.8 seconds for debug build, instead of 8 seconds.
141	clang-oil-dbg() {
142	make clean
143	CC=$CLANG make _build/oil/ovm-dbg
144	}
145
146	#
147	# Measure Elapsed Time
148	#
149
150	# Add --target-size? Add that functionality to benchmarks/time.py?
151	#
152	# Should we add explicit targets?
153	# - ovm-clang, ovm-clang-dbg
154	# - ovm-gcc, ovm-gcc-dbg
155	#
156	# It would be possible, but it complicates the makefile.
157
158	build-task() {
159	local raw_out_dir=$1
160	local job_id=$2
161	local host=$3
162	local host_hash=$4
163	local compiler_path=$5
164	local compiler_hash=$6
165	local src_dir=$7
166	local action=$8
167
168	local times_out="$PWD/$raw_out_dir/times.tsv"
169
170	# Definitions that depends on $PWD.
171	local -a TIME_PREFIX=(
172	time-tsv \
173	--append \
174	--output $times_out \
175	--field "$host" --field "$host_hash" \
176	--field "$compiler_path" --field "$compiler_hash" \
177	--field "$src_dir" --field "$action"
178	)
179	local bin_base_dir=$PWD/$BASE_DIR/bin
180
181	local bin_dir="$bin_base_dir/$(basename $compiler_path)"
182	mkdir -p $bin_dir
183
184	pushd $src_dir >/dev/null
185
186	# NOTE: We're not saving the output anywhere. We save the status, which
187	# protects against basic errors.
188
189	case $action in
190	(configure)
191	"${TIME_PREFIX[@]}" -- ./configure
192
193	# Cleaning here relies on the ORDER of tasks.txt. configure happens
194	# before build. The Clang build shouldn't reuse GCC objects!
195	# It has to be done after configure, because the Makefile must exist!
196	make clean
197	;;
198
199	(make)
200	"${TIME_PREFIX[@]}" -- make CC=$compiler_path
201
202	local target
203	case $src_dir in
204	(/bash)
205	target=bash
206	;;
207	(/dash)
208	target=src/dash
209	;;
210	esac
211
212	strip $target
213	cp -v $target $bin_dir
214	;;
215
216	(oils-for-unix*)
217	case $action in
218	(oils-for-unix)
219	local variant='dbg'
220	;;
221	(oils-for-unix.stripped)
222	local variant='opt'
223	;;
224	*)
225	die "Invalid target"
226	;;
227	esac
228
229	# Change the C compiler into the corresponding C++ compiler
230	local compiler
231	case $compiler_path in
232	(*gcc)
233	# note: we take provenance of /usr/bin/gcc, but the shell script runs 'c++'
234	compiler='cxx'
235	;;
236	(*clang)
237	# Note on slight mess: benchmarks/id.sh takes the provenanec of
238	# $CLANG. We translate that to 'clang' here, and
239	# _build/oils.sh uses $CLANGXX.
240	compiler='clang'
241	;;
242	*)
243	die "Invalid compiler"
244	;;
245	esac
246
247	"${TIME_PREFIX[@]}" -- _build/oils.sh $compiler $variant
248
249	# e.g. cp _bin/clang-opt-sh/oils-for-unix.stripped _tmp/ovm-build/bin/clang/
250	local filename=$action
251	cp -v _bin/$compiler-$variant-sh/$filename $bin_dir
252	;;
253
254	*)
255	local target=$action # Assume it's a target like _bin/oil.ovm
256
257	"${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
258
259	cp -v $target $bin_dir
260	;;
261	esac
262
263	popd >/dev/null
264
265	log "DONE BUILD TASK $action $src_dir __ status=$?"
266	}
267
268	oils-tasks() {
269	local provenance=$1
270
271	local ofu_dir="$TAR_DIR/oils-for-unix-$OIL_VERSION"
272
273	# Add 1 field for each of 5 fields.
274	cat $provenance \| while read line; do
275	echo "$line" $ofu_dir oils-for-unix
276	echo "$line" $ofu_dir oils-for-unix.stripped
277	done
278	}
279
280	other-shell-tasks() {
281	local provenance=$1
282
283	# Add 1 field for each of 5 fields.
284	cat $provenance \| while read line; do
285	case $line in
286	# Skip clang for now.
287	(clang)
288	continue
289	;;
290	esac
291
292	for dir in "${TAR_SUBDIRS[@]}"; do
293	echo "$line" $TAR_DIR/$dir configure
294	echo "$line" $TAR_DIR/$dir make
295	done
296	done
297	}
298
299	# 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
300	# show the drop.
301	oil-historical-tasks() {
302	echo
303	}
304
305	# action is 'configure', a target name, etc.
306	readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
307
308	print-tasks() {
309	local build_prov=$1
310
311	local t1=$BASE_DIR/oils-tasks.txt
312	local t2=$BASE_DIR/other-shell-tasks.txt
313
314	oils-tasks $build_prov > $t1
315	other-shell-tasks $build_prov > $t2
316
317	if test -n "${QUICKLY:-}"; then
318	head -n 2 $t1 # debug and opt binary
319	head -n 2 $t2 # do dash configure make
320	else
321	cat $t1 $t2
322	fi
323	}
324
325	measure() {
326	local build_prov=$1 # from benchmarks/id.sh compiler-provenance
327	local raw_out_dir=$2 # _tmp/ovm-build/$X or ../../benchmark-data/ovm-build/$X
328
329	extract-oils
330
331	local times_out="$raw_out_dir/times.tsv"
332	# NOTE: Do we need two raw dirs?
333	mkdir -p $BASE_DIR/{stage1,bin} $raw_out_dir
334
335	# TODO: the $times_out calculation is duplicated in build-task()
336
337	# Write header of the TSV file that is appended to.
338	tsv-row \
339	status elapsed_secs \
340	host_name host_hash compiler_path compiler_hash \
341	src_dir action > $times_out
342
343	# TODO: remove xargs
344	# - print-tasks \| run-tasks with a loop
345	# - exit code is more reliable, and we're not running in parallel anyway
346
347	set +o errexit
348	time print-tasks $build_prov \
349	\| xargs --verbose -n $NUM_COLUMNS -- $0 build-task $raw_out_dir
350	local status=$?
351	set -o errexit
352
353	if test $status -ne 0; then
354	die "* Some tasks failed. (xargs status=$status) *"
355	fi
356
357	measure-sizes $raw_out_dir
358	}
359
360	#
361	# Data Preparation and Analysis
362	#
363
364	stage1() {
365	local base_dir=${1:-$BASE_DIR} # _tmp/ovm-build or ../benchmark-data/ovm-build
366	local single_machine=${2:-}
367
368	local out_dir=$BASE_DIR/stage1
369	mkdir -p $out_dir
370
371	local -a raw_times=()
372	local -a raw_sizes=()
373
374	if test -n "$single_machine"; then
375	# find dir in _tmp/ovm-build
376	local -a a=( $base_dir/raw.$single_machine.* )
377
378	raw_times+=( ${a[-1]}/times.tsv )
379	raw_sizes+=( ${a[-1]}/native-sizes.tsv )
380
381	else
382	# find last dirs in ../benchmark-data/ovm-build
383	# Globs are in lexicographical order, which works for our dates.
384	local -a a=( $base_dir/raw.$MACHINE1.* )
385	local -a b=( $base_dir/raw.$MACHINE2.* )
386
387	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
388	raw_sizes+=( ${a[-1]}/native-sizes.tsv ${b[-1]}/native-sizes.tsv )
389	fi
390
391	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
392	tsv-concat "${raw_sizes[@]}" > $out_dir/native-sizes.tsv
393
394	return
395
396	# NOTE: unused
397	# Construct a one-column TSV file
398	local raw_data_tsv=$out/raw-data.tsv
399	{ echo 'path'
400	echo ${a[-1]}
401	echo ${b[-1]}
402	} > $raw_data_tsv
403
404	head $out/*
405	wc -l $out/*
406	}
407
408	print-report() {
409	local in_dir=$1
410	local base_url='../../web'
411
412	benchmark-html-head 'OVM Build Performance'
413
414	cat <<EOF
415	<body class="width60">
416	<p id="home-link">
417	<a href="/">oilshell.org</a>
418	</p>
419	EOF
420
421	cmark << 'EOF'
422	## OVM Build Performance
423
424	Source code: [oil/benchmarks/osh-parser.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-parser.sh)
425
426	### Time in Seconds by Host and Compiler
427
428	We measure the build speed of `bash` and `dash` for comparison.
429	EOF
430
431	# Highlighting clang makes this table easier to read.
432	tsv2html \
433	--css-class-pattern 'special ^gcc' \
434	$in_dir/times.tsv
435
436	cmark << 'EOF'
437	### Native Binary Size
438
439	EOF
440	tsv2html --css-class-pattern 'special ^gcc' $in_dir/native-sizes.tsv
441
442	cmark << 'EOF'
443
444	### Host and Compiler Details
445	EOF
446	tsv2html $in_dir/hosts.tsv
447	tsv2html $in_dir/compilers.tsv
448
449	cat <<EOF
450	</body>
451	</html>
452	EOF
453	}
454
455	soil-run() {
456	rm -r -f $BASE_DIR
457	mkdir -p $BASE_DIR
458
459	download
460	extract-other
461
462	# Copied from benchmarks/osh-runtime.sh soil-run
463
464	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
465	local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
466	ninja "${osh_bin[@]}"
467
468	local single_machine='no-host'
469
470	local single_machine='no-host'
471
472	local job_id
473	job_id=$(print-job-id)
474
475	# Like benchmarks/auto.sh
476	#local build_prov
477	#build_prov=$(benchmarks/id.sh compiler-provenance $job_id)
478
479	compiler-provenance-2 \
480	$single_machine $job_id _tmp
481
482	local host_job_id="$single_machine.$job_id"
483	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
484	mkdir -p $raw_out_dir $BASE_DIR/stage1
485
486	measure _tmp/compiler-provenance.txt $raw_out_dir
487
488	# Trivial concatenation for 1 machine
489	stage1 '' $single_machine
490
491	benchmarks/report.sh stage2 $BASE_DIR
492
493	benchmarks/report.sh stage3 $BASE_DIR
494	}
495
496	"$@"