benchmarks/osh-runtime.sh

OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

438 lines, 263 significant

1	#!/usr/bin/env bash
2	#
3	# Test scripts found in the wild for both correctness and performance.
4	#
5	# Usage:
6	# benchmarks/osh-runtime.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14	source benchmarks/common.sh # tsv-concat
15	source benchmarks/id.sh # print-job-id
16	source soil/common.sh # find-dir-html
17	source test/common.sh
18	source test/tsv-lib.sh # tsv-row
19
20	readonly BASE_DIR=_tmp/osh-runtime
21
22	# TODO: Move to ../oil_DEPS
23	readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25	#
26	# Dependencies
27	#
28
29	readonly PY27_DIR=$PWD/Python-2.7.13
30
31	# NOTE: Same list in oilshell.org/blob/run.sh.
32	tarballs() {
33	cat <<EOF
34	tcc-0.9.26.tar.bz2
35	yash-2.46.tar.xz
36	ocaml-4.06.0.tar.xz
37	openvswitch-3.3.0.tar.gz
38	EOF
39	}
40
41	download() {
42	mkdir -p $TAR_DIR
43	tarballs \| xargs -n 1 -I {} --verbose -- \
44	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45	}
46
47	extract() {
48	set -x
49	time for f in $TAR_DIR/*.{gz,bz2,xz}; do
50	tar -x --directory $TAR_DIR --file $f
51	done
52	set +x
53
54	ls -l $TAR_DIR
55	}
56
57	#
58	# Computation
59	#
60
61	run-tasks() {
62	local raw_out_dir=$1
63	raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65	local task_id=0
66	while read -r host_name sh_path workload; do
67
68	log "*** $host_name $sh_path $workload $task_id"
69
70	local sh_run_path
71	case $sh_path in
72	/*) # Already absolute
73	sh_run_path=$sh_path
74	;;
75	/) # It's relative, so make it absolute
76	sh_run_path=$PWD/$sh_path
77	;;
78	*) # 'dash' should remain 'dash'
79	sh_run_path=$sh_path
80	;;
81	esac
82
83	local working_dir=''
84	local files_out_dir="$raw_out_dir/files-$task_id"
85	mkdir -v -p $files_out_dir
86
87	local save_new_files=''
88
89	local -a argv
90	case $workload in
91	hello-world)
92	argv=( testdata/osh-runtime/hello_world.sh )
93	;;
94
95	abuild-print-help)
96	argv=( testdata/osh-runtime/abuild -h )
97	;;
98
99	configure.cpython)
100	argv=( $PY27_DIR/configure )
101	working_dir=$files_out_dir
102	;;
103
104	configure.*)
105	argv=( ./configure )
106
107	local conf_dir
108	case $workload in
109	*.openvswitch)
110	conf_dir='openvswitch-3.3.0'
111	;;
112	*.ocaml)
113	conf_dir='ocaml-4.06.0'
114	;;
115	*.tcc)
116	conf_dir='tcc-0.9.26'
117	;;
118	*.yash)
119	conf_dir='yash-2.46'
120	;;
121	*)
122	die "Invalid workload $workload"
123	esac
124
125	working_dir=$TAR_DIR/$conf_dir
126	;;
127
128	*)
129	die "Invalid workload $workload"
130	;;
131	esac
132
133	local -a time_argv=(
134	time-tsv
135	--output "$raw_out_dir/times.tsv" --append
136	--rusage
137	--field "$task_id"
138	--field "$host_name" --field "$sh_path"
139	--field "$workload"
140	-- "$sh_run_path" "${argv[@]}"
141	)
142
143	local stdout_file="$files_out_dir/STDOUT.txt"
144	local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
145
146	# Maybe change dirs
147	if test -n "$working_dir"; then
148	pushd "$working_dir"
149	fi
150
151	if test -n "$save_new_files"; then
152	touch __TIMESTAMP
153	fi
154
155	# Run it, possibly with GC stats
156	case $sh_path in
157	_bin//osh)
158	OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
159	;;
160	*)
161	"${time_argv[@]}" > $stdout_file
162	;;
163	esac
164
165	if test -n "$save_new_files"; then
166	echo "COPYING to $files_out_dir"
167	find . -type f -newer __TIMESTAMP \
168	\| xargs -I {} -- cp --verbose {} $files_out_dir
169	fi
170
171	# Restore dir
172	if test -n "$working_dir"; then
173	popd
174	fi
175
176	task_id=$((task_id + 1))
177	done
178	}
179
180	print-tasks() {
181	local host_name=$1
182	local osh_native=$2
183
184	local -a workloads=(
185	hello-world
186	abuild-print-help
187
188	configure.cpython
189	configure.openvswitch
190	configure.ocaml
191	configure.tcc
192	configure.yash
193	)
194
195	if test -n "${QUICKLY:-}"; then
196	# Just do the first two
197	workloads=(
198	configure.openvswitch
199	#hello-world
200	#abuild-print-help
201	)
202	fi
203
204	for sh_path in bash dash bin/osh $osh_native; do
205	for workload in "${workloads[@]}"; do
206	tsv-row $host_name $sh_path $workload
207	done
208	done
209	}
210
211	measure() {
212	local host_name=$1 # 'no-host' or 'lenny'
213	local raw_out_dir=$2
214	local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
215	local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
216
217	mkdir -v -p $raw_out_dir
218
219	local tsv_out="$raw_out_dir/times.tsv"
220
221	# Write header of the TSV file that is appended to.
222	time-tsv -o $tsv_out --print-header \
223	--rusage \
224	--field task_id \
225	--field host_name --field sh_path \
226	--field workload
227
228	# run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
229	# per-task GC stats
230	print-tasks $host_name $osh_native \| run-tasks $raw_out_dir
231
232	# Turn individual files into a TSV, adding host
233	benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
234	\| tsv-add-const-column host_name "$host_name" \
235	> $raw_out_dir/gc_stats.tsv
236
237	cp -v _tmp/provenance.tsv $raw_out_dir
238	}
239
240	stage1() {
241	local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
242	local single_machine=${2:-}
243
244	local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
245	mkdir -p $out_dir
246
247	# Globs are in lexicographical order, which works for our dates.
248
249	local -a raw_times=()
250	local -a raw_gc_stats=()
251	local -a raw_provenance=()
252
253	if test -n "$single_machine"; then
254	local -a a=( $base_dir/raw.$single_machine.* )
255
256	raw_times+=( ${a[-1]}/times.tsv )
257	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
258	raw_provenance+=( ${a[-1]}/provenance.tsv )
259
260	else
261	local -a a=( $base_dir/raw.$MACHINE1.* )
262	local -a b=( $base_dir/raw.$MACHINE2.* )
263
264	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
265	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
266	raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
267	fi
268
269	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
270
271	tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
272
273	tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
274	}
275
276	print-report() {
277	local in_dir=$1
278
279	benchmark-html-head 'OSH Runtime Performance'
280
281	cat <<EOF
282	<body class="width60">
283	<p id="home-link">
284	<a href="/">oilshell.org</a>
285	</p>
286	EOF
287
288	cmark <<'EOF'
289	## OSH Runtime Performance
290
291	Source code: [oil/benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
292
293	### Elapsed Time by Shell (milliseconds)
294
295	Some benchmarks call many external tools, while some exercise the shell
296	interpreter itself. Parse time is included.
297
298	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
299	EOF
300	tsv2html $in_dir/elapsed.tsv
301
302	cmark <<EOF
303	### Memory Usage (Max Resident Set Size in MB)
304	EOF
305	tsv2html $in_dir/max_rss.tsv
306
307	cmark <<EOF
308	### GC Stats
309	EOF
310	tsv2html $in_dir/gc_stats.tsv
311
312	cmark <<EOF
313	### Details of All Tasks
314	EOF
315	tsv2html $in_dir/details.tsv
316
317
318	cmark <<'EOF'
319
320	### Shell and Host Details
321	EOF
322	tsv2html $in_dir/shells.tsv
323	tsv2html $in_dir/hosts.tsv
324
325	# Only show files.html link on a single machine
326	if test -f $(dirname $in_dir)/files.html; then
327	cmark <<'EOF'
328	---
329
330	[raw files](files.html)
331	EOF
332	fi
333
334	cat <<EOF
335	</body>
336	</html>
337	EOF
338	}
339
340	soil-run() {
341	### Run it on just this machine, and make a report
342
343	rm -r -f $BASE_DIR
344	mkdir -p $BASE_DIR
345
346	# TODO: This testdata should be baked into Docker image, or mounted
347	download
348	extract
349
350	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
351	local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
352	ninja "${osh_bin[@]}"
353
354	local single_machine='no-host'
355
356	local job_id
357	job_id=$(print-job-id)
358
359	# Write _tmp/provenance.* and _tmp/{host,shell}-id
360	shell-provenance-2 \
361	$single_machine $job_id _tmp \
362	bash dash bin/osh "${osh_bin[@]}"
363
364	local host_job_id="$single_machine.$job_id"
365	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
366	mkdir -p $raw_out_dir $BASE_DIR/stage1
367
368	measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
369
370	# Trivial concatenation for 1 machine
371	stage1 '' $single_machine
372
373	benchmarks/report.sh stage2 $BASE_DIR
374
375	# Make _tmp/osh-parser/files.html, so index.html can potentially link to it
376	find-dir-html _tmp/osh-runtime files
377
378	benchmarks/report.sh stage3 $BASE_DIR
379	}
380
381	#
382	# Debugging
383	#
384
385	compare-cpython() {
386	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2024 )
387	local -a a=( ../benchmark-data/osh-runtime/.hoover.2024 )
388
389	# More of a diff here?
390	#local -a a=( ../benchmark-data/osh-runtime/.broome.2023 )
391	# less diff here
392	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2023 )
393
394	local dir=${a[-1]}
395
396	echo $dir
397
398	head -n 1 $dir/times.tsv
399	fgrep 'configure.cpython' $dir/times.tsv
400
401	local bash_id=2
402	local dash_id=8
403	local osh_py_id=14
404	local osh_cpp_id=20
405
406	set +o errexit
407
408	local out_dir=_tmp/cpython-configure
409	mkdir -p $out_dir
410
411	echo 'bash vs. dash'
412	diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
413	diffstat $out_dir/bash-vs-dash.txt
414	echo
415
416	echo 'bash vs. osh-py'
417	diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
418	diffstat $out_dir/bash-vs-osh-py.txt
419	echo
420
421	echo 'bash vs. osh-cpp'
422	diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
423	diffstat $out_dir/bash-vs-osh-cpp.txt
424	echo
425
426	return
427
428	diff -u $dir/{files-2,files-20}/STDOUT.txt
429	echo
430
431	diff -u $dir/{files-2,files-20}/pyconfig.h
432	echo
433
434	cdiff -u $dir/{files-2,files-20}/config.log
435	echo
436	}
437
438	"$@"