benchmarks/ovm-build.sh

OILS / benchmarks / ovm-build.sh View on Github | oilshell.org

492 lines, 259 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the time it takes to build a binary with different compilers on
4	# different machines, and measure the binary size.
5	#
6	# Usage:
7	# ./ovm-build.sh <function name>
8	#
9	# Run on its own:
10	# 1. Follow common instructions in benchmarks/osh-parser.sh
11	# 2. benchmarks/auto.sh measure-builds
12	# 3. benchmarks/report.sh ovm-build
13
14	# Directories used:
15	#
16	# oilshell.org/blob/
17	# ovm-build/
18	#
19	# ~/git/oilshell/
20	# oil/
21	# _deps/
22	# ovm-build # tarballs and extracted source
23	# _tmp/
24	# ovm-build/
25	# raw/ # output CSV
26	# stage1
27	# benchmark-data/
28	# ovm-build/
29	# raw/
30	# compiler-id/
31	# host-id/
32
33	set -o nounset
34	set -o pipefail
35	set -o errexit
36
37	source benchmarks/common.sh # for log, etc.
38	source benchmarks/id.sh # print-job-id
39	source build/common.sh # for $CLANG
40
41	REPO_ROOT=$(cd $(dirname $0)/..; pwd)
42	source test/tsv-lib.sh # uses REPO_ROOT
43
44	readonly BASE_DIR=_tmp/ovm-build
45	readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
46
47	#
48	# Dependencies
49	#
50
51	readonly -a TAR_SUBDIRS=(
52	dash-0.5.9.1
53	bash-4.4
54	)
55
56	# NOTE: Same list in oilshell.org/blob/run.sh.
57	tarballs() {
58	cat <<EOF
59	bash-4.4.tar.gz
60	dash-0.5.9.1.tar.gz
61	mksh-R56c.tgz
62	EOF
63	}
64
65	download() {
66	mkdir -p $TAR_DIR
67	tarballs \| xargs -n 1 -I {} --verbose -- \
68	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
69	}
70
71	# Done MANUALLY.
72	extract-other() {
73	time for f in $TAR_DIR/*gz; do
74	tar -x --directory $TAR_DIR --file $f
75	done
76	}
77
78	# Done automatically by 'measure' function.
79
80	# TODO: CI should download this from previous
81	extract-oils() {
82	# To run on multiple machines, use the one in the benchmarks-data repo.
83	cp --recursive --no-target-directory \
84	../benchmark-data/src/oils-for-unix-$OIL_VERSION/ \
85	$TAR_DIR/oils-for-unix-$OIL_VERSION/
86	}
87
88	#
89	# Measure Size of Binaries.
90	#
91
92	# Other tools:
93	# - bloaty to look inside elf file
94	# - nm? Just a flat list of symbols? Counting them would be nice.
95	# - zipfile.py to look inside bytecode.zip
96
97	sizes-tsv() {
98	# host_label matches the times.tsv file output by report.R
99	tsv-row host_label num_bytes path
100	local host=$(hostname)
101	find "$@" -maxdepth 0 -printf "$host\t%s\t%p\n"
102	}
103
104	# NOTE: This should be the same on all x64 machines. But I want to run it on
105	# x64 machines.
106	measure-sizes() {
107	local prefix=${1:-$BASE_DIR/raw/demo}
108
109	# PROBLEM: Do I need provenance for gcc/clang here? I can just join it later
110	# in R.
111
112	# clang/oils-for-unix
113	# clang/oils-for-unix.stripped
114	# gcc/oils-for-unix
115	# gcc/oils-for-unix.stripped
116	sizes-tsv $BASE_DIR/bin/*/{oils-for-unix,oils-for-unix.stripped} \
117	> ${prefix}.native-sizes.tsv
118
119	sizes-tsv $BASE_DIR/bin//sh \
120	> ${prefix}.other-shell-sizes.tsv
121
122	log "Wrote ${prefix}.*.tsv"
123	}
124
125	#
126	# Unused Demos
127	#
128
129	bytecode-size() {
130	local zip=_build/oil/bytecode.zip
131
132	# 242 files, 1.85 MB
133	unzip -l $zip \| tail -n 1
134
135	# 1.88 MB, so there's 30K of header overhead.
136	ls -l $zip
137	}
138
139	# 6.8 seconds for debug build, instead of 8 seconds.
140	clang-oil-dbg() {
141	make clean
142	CC=$CLANG make _build/oil/ovm-dbg
143	}
144
145	#
146	# Measure Elapsed Time
147	#
148
149	# Add --target-size? Add that functionality to benchmarks/time.py?
150	#
151	# Should we add explicit targets?
152	# - ovm-clang, ovm-clang-dbg
153	# - ovm-gcc, ovm-gcc-dbg
154	#
155	# It would be possible, but it complicates the makefile.
156
157	build-task() {
158	local out_dir=$1
159	local job_id=$2
160	local host=$3
161	local host_hash=$4
162	local compiler_path=$5
163	local compiler_hash=$6
164	local src_dir=$7
165	local action=$8
166
167	local times_out="$PWD/$out_dir/$host.$job_id.times.tsv"
168
169	# Definitions that depends on $PWD.
170	local -a TIME_PREFIX=(
171	time-tsv \
172	--append \
173	--output $times_out \
174	--field "$host" --field "$host_hash" \
175	--field "$compiler_path" --field "$compiler_hash" \
176	--field "$src_dir" --field "$action"
177	)
178	local bin_base_dir=$PWD/$BASE_DIR/bin
179
180	local bin_dir="$bin_base_dir/$(basename $compiler_path)"
181	mkdir -p $bin_dir
182
183	pushd $src_dir >/dev/null
184
185	# NOTE: We're not saving the output anywhere. We save the status, which
186	# protects against basic errors.
187
188	case $action in
189	(configure)
190	"${TIME_PREFIX[@]}" -- ./configure
191
192	# Cleaning here relies on the ORDER of tasks.txt. configure happens
193	# before build. The Clang build shouldn't reuse GCC objects!
194	# It has to be done after configure, because the Makefile must exist!
195	make clean
196	;;
197
198	(make)
199	"${TIME_PREFIX[@]}" -- make CC=$compiler_path
200
201	local target
202	case $src_dir in
203	(/bash)
204	target=bash
205	;;
206	(/dash)
207	target=src/dash
208	;;
209	esac
210
211	strip $target
212	cp -v $target $bin_dir
213	;;
214
215	(oils-for-unix*)
216	case $action in
217	(oils-for-unix)
218	local variant='dbg'
219	;;
220	(oils-for-unix.stripped)
221	local variant='opt'
222	;;
223	*)
224	die "Invalid target"
225	;;
226	esac
227
228	# Change the C compiler into the corresponding C++ compiler
229	local compiler
230	case $compiler_path in
231	(*gcc)
232	# note: we take provenance of /usr/bin/gcc, but the shell script runs 'c++'
233	compiler='cxx'
234	;;
235	(*clang)
236	# Note on slight mess: benchmarks/id.sh takes the provenanec of
237	# $CLANG. We translate that to 'clang' here, and
238	# _build/oils.sh uses $CLANGXX.
239	compiler='clang'
240	;;
241	*)
242	die "Invalid compiler"
243	;;
244	esac
245
246	"${TIME_PREFIX[@]}" -- _build/oils.sh $compiler $variant
247
248	# e.g. cp _bin/clang-opt-sh/oils-for-unix.stripped _tmp/ovm-build/bin/clang/
249	local filename=$action
250	cp -v _bin/$compiler-$variant-sh/$filename $bin_dir
251	;;
252
253	*)
254	local target=$action # Assume it's a target like _bin/oil.ovm
255
256	"${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
257
258	cp -v $target $bin_dir
259	;;
260	esac
261
262	popd >/dev/null
263
264	log "DONE BUILD TASK $action $src_dir __ status=$?"
265	}
266
267	oils-tasks() {
268	local provenance=$1
269
270	local ofu_dir="$TAR_DIR/oils-for-unix-$OIL_VERSION"
271
272	# Add 1 field for each of 5 fields.
273	cat $provenance \| while read line; do
274	echo "$line" $ofu_dir oils-for-unix
275	echo "$line" $ofu_dir oils-for-unix.stripped
276	done
277	}
278
279	other-shell-tasks() {
280	local provenance=$1
281
282	# Add 1 field for each of 5 fields.
283	cat $provenance \| while read line; do
284	case $line in
285	# Skip clang for now.
286	(clang)
287	continue
288	;;
289	esac
290
291	for dir in "${TAR_SUBDIRS[@]}"; do
292	echo "$line" $TAR_DIR/$dir configure
293	echo "$line" $TAR_DIR/$dir make
294	done
295	done
296	}
297
298	# 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
299	# show the drop.
300	oil-historical-tasks() {
301	echo
302	}
303
304	# action is 'configure', a target name, etc.
305	readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
306
307	print-tasks() {
308	local build_prov=$1
309
310	local t1=$BASE_DIR/oils-tasks.txt
311	local t2=$BASE_DIR/other-shell-tasks.txt
312
313	oils-tasks $build_prov > $t1
314	other-shell-tasks $build_prov > $t2
315
316	if test -n "${QUICKLY:-}"; then
317	# Cut the work in half
318	head -n 2 $t1
319	head -n 2 $t2
320	else
321	cat $t1 $t2
322	fi
323	}
324
325	measure() {
326	local build_prov=$1 # from benchmarks/id.sh compiler-provenance
327	local out_dir=${2:-$BASE_DIR/raw}
328
329	extract-oils
330
331	# Job ID is everything up to the first dot in the filename.
332	local name=$(basename $build_prov)
333	local prefix=${name%.compiler-provenance.txt} # strip suffix
334
335	local times_out="$out_dir/$prefix.times.tsv"
336	# NOTE: Do we need two raw dirs?
337	mkdir -p $BASE_DIR/{raw,stage1,bin} $out_dir
338
339	# TODO: the $times_out calculation is duplicated in build-task()
340
341	# Write header of the TSV file that is appended to.
342	tsv-row \
343	status elapsed_secs \
344	host_name host_hash compiler_path compiler_hash \
345	src_dir action > $times_out
346
347	set +o errexit
348	time print-tasks $build_prov \| xargs --verbose -n $NUM_COLUMNS -- $0 build-task $out_dir
349	local status=$?
350	set -o errexit
351
352	if test $status -ne 0; then
353	die "* Some tasks failed. (xargs status=$status) *"
354	fi
355
356	measure-sizes $out_dir/$prefix
357
358	cp -v $build_prov $out_dir
359	}
360
361	#
362	# Data Preparation and Analysis
363	#
364
365	stage1() {
366	local raw_dir=${1:-$BASE_DIR/raw}
367	local single_machine=${2:-}
368
369	local out=$BASE_DIR/stage1
370	mkdir -p $out
371
372	local x
373	local -a a b
374
375	# Globs are in lexicographical order, which works for our dates.
376	x=$out/times.tsv
377	a=($raw_dir/$MACHINE1.*.times.tsv)
378	b=($raw_dir/$MACHINE2.*.times.tsv)
379	tsv-concat ${a[-1]} ${b[-1]} > $x
380
381	x=$out/bin-sizes.tsv
382	a=($raw_dir/$MACHINE1.*.bin-sizes.tsv)
383	b=($raw_dir/$MACHINE2.*.bin-sizes.tsv)
384	tsv-concat ${a[-1]} ${b[-1]} > $x
385
386	x=$out/native-sizes.tsv
387	a=($raw_dir/$MACHINE1.*.native-sizes.tsv)
388	b=($raw_dir/$MACHINE2.*.native-sizes.tsv)
389	#tsv-concat ${b[-1]} > $x
390	tsv-concat ${a[-1]} ${b[-1]} > $x
391
392	# NOTE: unused
393	# Construct a one-column TSV file
394	local raw_data_tsv=$out/raw-data.tsv
395	{ echo 'path'
396	echo ${a[-1]}
397	echo ${b[-1]}
398	} > $raw_data_tsv
399
400	head $out/*
401	wc -l $out/*
402	}
403
404	print-report() {
405	local in_dir=$1
406	local base_url='../../web'
407
408	benchmark-html-head 'OVM Build Performance'
409
410	cat <<EOF
411	<body class="width60">
412	<p id="home-link">
413	<a href="/">oilshell.org</a>
414	</p>
415	EOF
416
417	cmark << 'EOF'
418	## OVM Build Performance
419
420	Source code: [oil/benchmarks/osh-parser.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-parser.sh)
421
422	### Time in Seconds by Host and Compiler
423
424	We measure the build speed of `bash` and `dash` for comparison.
425	EOF
426
427	# Highlighting clang makes this table easier to read.
428	tsv2html \
429	--css-class-pattern 'special ^gcc' \
430	$in_dir/times.tsv
431
432	cmark << 'EOF'
433	### Native Binary Size
434
435	EOF
436	tsv2html --css-class-pattern 'special ^gcc' $in_dir/native-sizes.tsv
437
438	cmark << 'EOF'
439
440	### Host and Compiler Details
441	EOF
442	tsv2html $in_dir/hosts.tsv
443	tsv2html $in_dir/compilers.tsv
444
445	cat <<EOF
446	</body>
447	</html>
448	EOF
449	}
450
451	soil-run() {
452	rm -r -f $BASE_DIR
453	mkdir -p $BASE_DIR
454
455	download
456	extract-other
457
458	# Copied from benchmarks/osh-runtime.sh soil-run
459
460	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
461	local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
462	ninja "${osh_bin[@]}"
463
464	local single_machine='no-host'
465
466	local job_id
467	job_id=$(print-job-id)
468
469	# Like benchmarks/auto.sh
470	local build_prov
471	build_prov=$(benchmarks/id.sh compiler-provenance $job_id)
472
473	# Write _tmp/provenance.* and _tmp/{host,shell}-id
474	shell-provenance-2 \
475	$single_machine $job_id _tmp \
476	bash dash "${osh_bin[@]}"
477
478	local host_job_id="$single_machine.$job_id"
479	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
480	mkdir -p $raw_out_dir $BASE_DIR/stage1
481
482	measure $build_prov $raw_out_dir
483
484	# Trivial concatenation for 1 machine
485	stage1 '' $single_machine
486
487	benchmarks/report.sh stage2 $BASE_DIR
488
489	benchmarks/report.sh stage3 $BASE_DIR
490	}
491
492	"$@"