benchmarks/osh-runtime.sh

OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

479 lines, 274 significant

1	#!/usr/bin/env bash
2	#
3	# Test scripts found in the wild for both correctness and performance.
4	#
5	# Usage:
6	# benchmarks/osh-runtime.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14	source benchmarks/common.sh # tsv-concat
15	source benchmarks/id.sh # print-job-id
16	source soil/common.sh # find-dir-html
17	source test/common.sh
18	source test/tsv-lib.sh # tsv-row
19
20	readonly BASE_DIR=_tmp/osh-runtime
21
22	# TODO: Move to ../oil_DEPS
23	readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25	#
26	# Dependencies
27	#
28
29	readonly PY27_DIR=$PWD/Python-2.7.13
30
31	# NOTE: Same list in oilshell.org/blob/run.sh.
32	tarballs() {
33	cat <<EOF
34	tcc-0.9.26.tar.bz2
35	yash-2.46.tar.xz
36	ocaml-4.06.0.tar.xz
37	util-linux-2.40.tar.xz
38	EOF
39	}
40
41	download() {
42	mkdir -p $TAR_DIR
43	tarballs \| xargs -n 1 -I {} --verbose -- \
44	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45	}
46
47	extract() {
48	set -x
49	time for f in $TAR_DIR/*.{bz2,xz}; do
50	tar -x --directory $TAR_DIR --file $f
51	done
52	set +x
53
54	ls -l $TAR_DIR
55	}
56
57	#
58	# Computation
59	#
60
61	run-tasks() {
62	local raw_out_dir=$1
63	raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65	local task_id=0
66	while read -r host_name sh_path workload; do
67
68	log "*** $host_name $sh_path $workload $task_id"
69
70	local sh_run_path
71	case $sh_path in
72	/*) # Already absolute
73	sh_run_path=$sh_path
74	;;
75	/) # It's relative, so make it absolute
76	sh_run_path=$PWD/$sh_path
77	;;
78	*) # 'dash' should remain 'dash'
79	sh_run_path=$sh_path
80	;;
81	esac
82
83	local working_dir=''
84	local files_out_dir="$raw_out_dir/files-$task_id"
85	mkdir -v -p $files_out_dir
86
87	local save_new_files=''
88
89	local -a argv
90	case $workload in
91	hello-world)
92	argv=( testdata/osh-runtime/hello_world.sh )
93	;;
94
95	abuild-print-help)
96	argv=( testdata/osh-runtime/abuild -h )
97	;;
98
99	configure.cpython)
100	argv=( $PY27_DIR/configure )
101	working_dir=$files_out_dir
102	;;
103
104	configure.util-linux)
105	# flag needed to avoid sqlite3 dep error message
106	argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
107	working_dir=$files_out_dir
108	;;
109
110	configure.*)
111	argv=( ./configure )
112
113	local conf_dir
114	case $workload in
115	*.ocaml)
116	conf_dir='ocaml-4.06.0'
117	;;
118	*.tcc)
119	conf_dir='tcc-0.9.26'
120	;;
121	*.yash)
122	conf_dir='yash-2.46'
123	;;
124	*)
125	die "Invalid workload $workload"
126	esac
127
128	# These are run in-tree?
129	working_dir=$TAR_DIR/$conf_dir
130	;;
131
132	*)
133	die "Invalid workload $workload"
134	;;
135	esac
136
137	local -a time_argv=(
138	time-tsv
139	--output "$raw_out_dir/times.tsv" --append
140	--rusage
141	--rusage-2
142	--field "$task_id"
143	--field "$host_name" --field "$sh_path"
144	--field "$workload"
145	-- "$sh_run_path" "${argv[@]}"
146	)
147
148	local stdout_file="$files_out_dir/STDOUT.txt"
149	local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
150
151	# Maybe change dirs
152	if test -n "$working_dir"; then
153	pushd "$working_dir"
154	fi
155
156	if test -n "$save_new_files"; then
157	touch __TIMESTAMP
158	fi
159
160	# Run it, possibly with GC stats
161	case $sh_path in
162	_bin//osh)
163	OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
164	;;
165	*)
166	"${time_argv[@]}" > $stdout_file
167	;;
168	esac
169
170	if test -n "$save_new_files"; then
171	echo "COPYING to $files_out_dir"
172	find . -type f -newer __TIMESTAMP \
173	\| xargs -I {} -- cp --verbose {} $files_out_dir
174	fi
175
176	# Restore dir
177	if test -n "$working_dir"; then
178	popd
179	fi
180
181	task_id=$((task_id + 1))
182	done
183	}
184
185	print-tasks() {
186	local host_name=$1
187	local osh_native=$2
188
189	local -a workloads=(
190	hello-world
191	abuild-print-help
192
193	configure.cpython
194	configure.util-linux
195	configure.ocaml
196	configure.tcc
197	configure.yash
198	)
199
200	if test -n "${QUICKLY:-}"; then
201	# Just do the first two
202	workloads=(
203	#configure.util-linux
204	hello-world
205	abuild-print-help
206	)
207	fi
208
209	for sh_path in bash dash bin/osh $osh_native; do
210	for workload in "${workloads[@]}"; do
211	tsv-row $host_name $sh_path $workload
212	done
213	done
214	}
215
216	measure() {
217	local host_name=$1 # 'no-host' or 'lenny'
218	local raw_out_dir=$2
219	local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
220	local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
221
222	mkdir -v -p $raw_out_dir
223
224	local tsv_out="$raw_out_dir/times.tsv"
225
226	# Write header of the TSV file that is appended to.
227	time-tsv -o $tsv_out --print-header \
228	--rusage \
229	--rusage-2 \
230	--field task_id \
231	--field host_name --field sh_path \
232	--field workload
233
234	# run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
235	# per-task GC stats
236	print-tasks $host_name $osh_native \| run-tasks $raw_out_dir
237
238	# Turn individual files into a TSV, adding host
239	benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
240	\| tsv-add-const-column host_name "$host_name" \
241	> $raw_out_dir/gc_stats.tsv
242
243	cp -v _tmp/provenance.tsv $raw_out_dir
244	}
245
246	stage1() {
247	local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
248	local single_machine=${2:-}
249
250	local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
251	mkdir -p $out_dir
252
253	# Globs are in lexicographical order, which works for our dates.
254
255	local -a raw_times=()
256	local -a raw_gc_stats=()
257	local -a raw_provenance=()
258
259	if test -n "$single_machine"; then
260	local -a a=( $base_dir/raw.$single_machine.* )
261
262	raw_times+=( ${a[-1]}/times.tsv )
263	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
264	raw_provenance+=( ${a[-1]}/provenance.tsv )
265
266	else
267	local -a a=( $base_dir/raw.$MACHINE1.* )
268	local -a b=( $base_dir/raw.$MACHINE2.* )
269
270	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
271	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
272	raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
273	fi
274
275	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
276
277	tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
278
279	tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
280	}
281
282	print-report() {
283	local in_dir=$1
284
285	benchmark-html-head 'OSH Runtime Performance'
286
287	cat <<EOF
288	<body class="width60">
289	<p id="home-link">
290	<a href="/">oilshell.org</a>
291	</p>
292	EOF
293
294	cmark <<'EOF'
295	## OSH Runtime Performance
296
297	Source code: [benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
298
299	- [Elapsed Time](#elapsed-time)
300	- [Minor Page Faults](#page-faults)
301	- [Memory Usage](#memory-usage)
302	- [GC Stats](#gc-stats)
303	- [rusage Details](#rusage-details)
304	- [More Details](#more-details)
305	- [Shell and Host](#shell-and-host)
306
307	<a name="elapsed-time" />
308
309	### Elapsed Time by Shell (milliseconds)
310
311	Some benchmarks call many external tools, while some exercise the shell
312	interpreter itself.
313	EOF
314	tsv2html $in_dir/elapsed.tsv
315
316	cmark <<EOF
317	<a name="page-faults" />
318
319	### Minor Page Faults
320	EOF
321
322	tsv2html $in_dir/page_faults.tsv
323
324	cmark <<EOF
325	<a name="memory-usage" />
326
327	### Memory Usage (Max Resident Set Size in MB)
328
329	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
330	EOF
331	tsv2html $in_dir/max_rss.tsv
332
333	cmark <<EOF
334	<a name="gc-stats" />
335
336	### GC Stats
337	EOF
338	tsv2html $in_dir/gc_stats.tsv
339
340	cmark <<EOF
341	<a name="rusage-details" />
342
343	### rusage Details
344	EOF
345	tsv2html $in_dir/details.tsv
346
347	cmark <<EOF
348	<a name="more-details" />
349
350	### More Details
351	EOF
352	tsv2html $in_dir/details_io.tsv
353
354	cmark <<'EOF'
355	<a name="shell-and-host" />
356
357	### Shell and Host
358	EOF
359	tsv2html $in_dir/shells.tsv
360	tsv2html $in_dir/hosts.tsv
361
362	# Only show files.html link on a single machine
363	if test -f $(dirname $in_dir)/files.html; then
364	cmark <<'EOF'
365	---
366
367	[raw files](files.html)
368	EOF
369	fi
370
371	cat <<EOF
372	</body>
373	</html>
374	EOF
375	}
376
377	test-oils-run() {
378	echo 'Hello from benchmarks/osh-runtime.sh'
379	}
380
381	soil-run() {
382	### Run it on just this machine, and make a report
383
384	rm -r -f $BASE_DIR
385	mkdir -p $BASE_DIR
386
387	# TODO: This testdata should be baked into Docker image, or mounted
388	download
389	extract
390
391	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
392	local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
393	ninja "${osh_bin[@]}"
394
395	local single_machine='no-host'
396
397	local job_id
398	job_id=$(print-job-id)
399
400	# Write _tmp/provenance.* and _tmp/{host,shell}-id
401	shell-provenance-2 \
402	$single_machine $job_id _tmp \
403	bash dash bin/osh "${osh_bin[@]}"
404
405	local host_job_id="$single_machine.$job_id"
406	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
407	mkdir -p $raw_out_dir $BASE_DIR/stage1
408
409	measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
410
411	# Trivial concatenation for 1 machine
412	stage1 '' $single_machine
413
414	benchmarks/report.sh stage2 $BASE_DIR
415
416	# Make _tmp/osh-parser/files.html, so index.html can potentially link to it
417	find-dir-html _tmp/osh-runtime files
418
419	benchmarks/report.sh stage3 $BASE_DIR
420	}
421
422	#
423	# Debugging
424	#
425
426	compare-cpython() {
427	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2024 )
428	local -a a=( ../benchmark-data/osh-runtime/.hoover.2024 )
429
430	# More of a diff here?
431	#local -a a=( ../benchmark-data/osh-runtime/.broome.2023 )
432	# less diff here
433	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2023 )
434
435	local dir=${a[-1]}
436
437	echo $dir
438
439	head -n 1 $dir/times.tsv
440	fgrep 'configure.cpython' $dir/times.tsv
441
442	local bash_id=2
443	local dash_id=8
444	local osh_py_id=14
445	local osh_cpp_id=20
446
447	set +o errexit
448
449	local out_dir=_tmp/cpython-configure
450	mkdir -p $out_dir
451
452	echo 'bash vs. dash'
453	diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
454	diffstat $out_dir/bash-vs-dash.txt
455	echo
456
457	echo 'bash vs. osh-py'
458	diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
459	diffstat $out_dir/bash-vs-osh-py.txt
460	echo
461
462	echo 'bash vs. osh-cpp'
463	diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
464	diffstat $out_dir/bash-vs-osh-cpp.txt
465	echo
466
467	return
468
469	diff -u $dir/{files-2,files-20}/STDOUT.txt
470	echo
471
472	diff -u $dir/{files-2,files-20}/pyconfig.h
473	echo
474
475	cdiff -u $dir/{files-2,files-20}/config.log
476	echo
477	}
478
479	"$@"