benchmarks/osh-runtime.sh

OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

442 lines, 264 significant

1	#!/usr/bin/env bash
2	#
3	# Test scripts found in the wild for both correctness and performance.
4	#
5	# Usage:
6	# benchmarks/osh-runtime.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14	source benchmarks/common.sh # tsv-concat
15	source benchmarks/id.sh # print-job-id
16	source soil/common.sh # find-dir-html
17	source test/common.sh
18	source test/tsv-lib.sh # tsv-row
19
20	readonly BASE_DIR=_tmp/osh-runtime
21
22	# TODO: Move to ../oil_DEPS
23	readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25	#
26	# Dependencies
27	#
28
29	readonly PY27_DIR=$PWD/Python-2.7.13
30
31	# NOTE: Same list in oilshell.org/blob/run.sh.
32	tarballs() {
33	cat <<EOF
34	tcc-0.9.26.tar.bz2
35	yash-2.46.tar.xz
36	ocaml-4.06.0.tar.xz
37	util-linux-2.40.tar.xz
38	EOF
39	}
40
41	download() {
42	mkdir -p $TAR_DIR
43	tarballs \| xargs -n 1 -I {} --verbose -- \
44	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45	}
46
47	extract() {
48	set -x
49	time for f in $TAR_DIR/*.{bz2,xz}; do
50	tar -x --directory $TAR_DIR --file $f
51	done
52	set +x
53
54	ls -l $TAR_DIR
55	}
56
57	#
58	# Computation
59	#
60
61	run-tasks() {
62	local raw_out_dir=$1
63	raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65	local task_id=0
66	while read -r host_name sh_path workload; do
67
68	log "*** $host_name $sh_path $workload $task_id"
69
70	local sh_run_path
71	case $sh_path in
72	/*) # Already absolute
73	sh_run_path=$sh_path
74	;;
75	/) # It's relative, so make it absolute
76	sh_run_path=$PWD/$sh_path
77	;;
78	*) # 'dash' should remain 'dash'
79	sh_run_path=$sh_path
80	;;
81	esac
82
83	local working_dir=''
84	local files_out_dir="$raw_out_dir/files-$task_id"
85	mkdir -v -p $files_out_dir
86
87	local save_new_files=''
88
89	local -a argv
90	case $workload in
91	hello-world)
92	argv=( testdata/osh-runtime/hello_world.sh )
93	;;
94
95	abuild-print-help)
96	argv=( testdata/osh-runtime/abuild -h )
97	;;
98
99	configure.cpython)
100	argv=( $PY27_DIR/configure )
101	working_dir=$files_out_dir
102	;;
103
104	configure.util-linux)
105	# flag needed to avoid sqlite3 dep error message
106	argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
107	working_dir=$files_out_dir
108	;;
109
110	configure.*)
111	argv=( ./configure )
112
113	local conf_dir
114	case $workload in
115	*.ocaml)
116	conf_dir='ocaml-4.06.0'
117	;;
118	*.tcc)
119	conf_dir='tcc-0.9.26'
120	;;
121	*.yash)
122	conf_dir='yash-2.46'
123	;;
124	*)
125	die "Invalid workload $workload"
126	esac
127
128	# These are run in-tree?
129	working_dir=$TAR_DIR/$conf_dir
130	;;
131
132	*)
133	die "Invalid workload $workload"
134	;;
135	esac
136
137	local -a time_argv=(
138	time-tsv
139	--output "$raw_out_dir/times.tsv" --append
140	--rusage
141	--field "$task_id"
142	--field "$host_name" --field "$sh_path"
143	--field "$workload"
144	-- "$sh_run_path" "${argv[@]}"
145	)
146
147	local stdout_file="$files_out_dir/STDOUT.txt"
148	local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
149
150	# Maybe change dirs
151	if test -n "$working_dir"; then
152	pushd "$working_dir"
153	fi
154
155	if test -n "$save_new_files"; then
156	touch __TIMESTAMP
157	fi
158
159	# Run it, possibly with GC stats
160	case $sh_path in
161	_bin//osh)
162	OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
163	;;
164	*)
165	"${time_argv[@]}" > $stdout_file
166	;;
167	esac
168
169	if test -n "$save_new_files"; then
170	echo "COPYING to $files_out_dir"
171	find . -type f -newer __TIMESTAMP \
172	\| xargs -I {} -- cp --verbose {} $files_out_dir
173	fi
174
175	# Restore dir
176	if test -n "$working_dir"; then
177	popd
178	fi
179
180	task_id=$((task_id + 1))
181	done
182	}
183
184	print-tasks() {
185	local host_name=$1
186	local osh_native=$2
187
188	local -a workloads=(
189	hello-world
190	abuild-print-help
191
192	configure.cpython
193	configure.util-linux
194	configure.ocaml
195	configure.tcc
196	configure.yash
197	)
198
199	if test -n "${QUICKLY:-}"; then
200	# Just do the first two
201	workloads=(
202	configure.util-linux
203	#hello-world
204	#abuild-print-help
205	)
206	fi
207
208	for sh_path in bash dash bin/osh $osh_native; do
209	for workload in "${workloads[@]}"; do
210	tsv-row $host_name $sh_path $workload
211	done
212	done
213	}
214
215	measure() {
216	local host_name=$1 # 'no-host' or 'lenny'
217	local raw_out_dir=$2
218	local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
219	local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
220
221	mkdir -v -p $raw_out_dir
222
223	local tsv_out="$raw_out_dir/times.tsv"
224
225	# Write header of the TSV file that is appended to.
226	time-tsv -o $tsv_out --print-header \
227	--rusage \
228	--field task_id \
229	--field host_name --field sh_path \
230	--field workload
231
232	# run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
233	# per-task GC stats
234	print-tasks $host_name $osh_native \| run-tasks $raw_out_dir
235
236	# Turn individual files into a TSV, adding host
237	benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
238	\| tsv-add-const-column host_name "$host_name" \
239	> $raw_out_dir/gc_stats.tsv
240
241	cp -v _tmp/provenance.tsv $raw_out_dir
242	}
243
244	stage1() {
245	local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
246	local single_machine=${2:-}
247
248	local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
249	mkdir -p $out_dir
250
251	# Globs are in lexicographical order, which works for our dates.
252
253	local -a raw_times=()
254	local -a raw_gc_stats=()
255	local -a raw_provenance=()
256
257	if test -n "$single_machine"; then
258	local -a a=( $base_dir/raw.$single_machine.* )
259
260	raw_times+=( ${a[-1]}/times.tsv )
261	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
262	raw_provenance+=( ${a[-1]}/provenance.tsv )
263
264	else
265	local -a a=( $base_dir/raw.$MACHINE1.* )
266	local -a b=( $base_dir/raw.$MACHINE2.* )
267
268	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
269	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
270	raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
271	fi
272
273	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
274
275	tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
276
277	tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
278	}
279
280	print-report() {
281	local in_dir=$1
282
283	benchmark-html-head 'OSH Runtime Performance'
284
285	cat <<EOF
286	<body class="width60">
287	<p id="home-link">
288	<a href="/">oilshell.org</a>
289	</p>
290	EOF
291
292	cmark <<'EOF'
293	## OSH Runtime Performance
294
295	Source code: [oil/benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
296
297	### Elapsed Time by Shell (milliseconds)
298
299	Some benchmarks call many external tools, while some exercise the shell
300	interpreter itself. Parse time is included.
301
302	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
303	EOF
304	tsv2html $in_dir/elapsed.tsv
305
306	cmark <<EOF
307	### Memory Usage (Max Resident Set Size in MB)
308	EOF
309	tsv2html $in_dir/max_rss.tsv
310
311	cmark <<EOF
312	### GC Stats
313	EOF
314	tsv2html $in_dir/gc_stats.tsv
315
316	cmark <<EOF
317	### Details of All Tasks
318	EOF
319	tsv2html $in_dir/details.tsv
320
321
322	cmark <<'EOF'
323
324	### Shell and Host Details
325	EOF
326	tsv2html $in_dir/shells.tsv
327	tsv2html $in_dir/hosts.tsv
328
329	# Only show files.html link on a single machine
330	if test -f $(dirname $in_dir)/files.html; then
331	cmark <<'EOF'
332	---
333
334	[raw files](files.html)
335	EOF
336	fi
337
338	cat <<EOF
339	</body>
340	</html>
341	EOF
342	}
343
344	soil-run() {
345	### Run it on just this machine, and make a report
346
347	rm -r -f $BASE_DIR
348	mkdir -p $BASE_DIR
349
350	# TODO: This testdata should be baked into Docker image, or mounted
351	download
352	extract
353
354	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
355	local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
356	ninja "${osh_bin[@]}"
357
358	local single_machine='no-host'
359
360	local job_id
361	job_id=$(print-job-id)
362
363	# Write _tmp/provenance.* and _tmp/{host,shell}-id
364	shell-provenance-2 \
365	$single_machine $job_id _tmp \
366	bash dash bin/osh "${osh_bin[@]}"
367
368	local host_job_id="$single_machine.$job_id"
369	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
370	mkdir -p $raw_out_dir $BASE_DIR/stage1
371
372	measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
373
374	# Trivial concatenation for 1 machine
375	stage1 '' $single_machine
376
377	benchmarks/report.sh stage2 $BASE_DIR
378
379	# Make _tmp/osh-parser/files.html, so index.html can potentially link to it
380	find-dir-html _tmp/osh-runtime files
381
382	benchmarks/report.sh stage3 $BASE_DIR
383	}
384
385	#
386	# Debugging
387	#
388
389	compare-cpython() {
390	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2024 )
391	local -a a=( ../benchmark-data/osh-runtime/.hoover.2024 )
392
393	# More of a diff here?
394	#local -a a=( ../benchmark-data/osh-runtime/.broome.2023 )
395	# less diff here
396	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2023 )
397
398	local dir=${a[-1]}
399
400	echo $dir
401
402	head -n 1 $dir/times.tsv
403	fgrep 'configure.cpython' $dir/times.tsv
404
405	local bash_id=2
406	local dash_id=8
407	local osh_py_id=14
408	local osh_cpp_id=20
409
410	set +o errexit
411
412	local out_dir=_tmp/cpython-configure
413	mkdir -p $out_dir
414
415	echo 'bash vs. dash'
416	diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
417	diffstat $out_dir/bash-vs-dash.txt
418	echo
419
420	echo 'bash vs. osh-py'
421	diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
422	diffstat $out_dir/bash-vs-osh-py.txt
423	echo
424
425	echo 'bash vs. osh-cpp'
426	diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
427	diffstat $out_dir/bash-vs-osh-cpp.txt
428	echo
429
430	return
431
432	diff -u $dir/{files-2,files-20}/STDOUT.txt
433	echo
434
435	diff -u $dir/{files-2,files-20}/pyconfig.h
436	echo
437
438	cdiff -u $dir/{files-2,files-20}/config.log
439	echo
440	}
441
442	"$@"