benchmarks/osh-runtime.sh

OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

458 lines, 274 significant

1	#!/usr/bin/env bash
2	#
3	# Test scripts found in the wild for both correctness and performance.
4	#
5	# Usage:
6	# benchmarks/osh-runtime.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14	source benchmarks/common.sh # tsv-concat
15	source benchmarks/id.sh # print-job-id
16	source soil/common.sh # find-dir-html
17	source test/common.sh
18	source test/tsv-lib.sh # tsv-row
19
20	readonly BASE_DIR=_tmp/osh-runtime
21
22	# TODO: Move to ../oil_DEPS
23	readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25	#
26	# Dependencies
27	#
28
29	readonly PY27_DIR=$PWD/Python-2.7.13
30
31	# NOTE: Same list in oilshell.org/blob/run.sh.
32	tarballs() {
33	cat <<EOF
34	tcc-0.9.26.tar.bz2
35	yash-2.46.tar.xz
36	ocaml-4.06.0.tar.xz
37	util-linux-2.40.tar.xz
38	EOF
39	}
40
41	download() {
42	mkdir -p $TAR_DIR
43	tarballs \| xargs -n 1 -I {} --verbose -- \
44	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45	}
46
47	extract() {
48	set -x
49	time for f in $TAR_DIR/*.{bz2,xz}; do
50	tar -x --directory $TAR_DIR --file $f
51	done
52	set +x
53
54	ls -l $TAR_DIR
55	}
56
57	#
58	# Computation
59	#
60
61	run-tasks() {
62	local raw_out_dir=$1
63	raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65	local task_id=0
66	while read -r host_name sh_path workload; do
67
68	log "*** $host_name $sh_path $workload $task_id"
69
70	local sh_run_path
71	case $sh_path in
72	/*) # Already absolute
73	sh_run_path=$sh_path
74	;;
75	/) # It's relative, so make it absolute
76	sh_run_path=$PWD/$sh_path
77	;;
78	*) # 'dash' should remain 'dash'
79	sh_run_path=$sh_path
80	;;
81	esac
82
83	local working_dir=''
84	local files_out_dir="$raw_out_dir/files-$task_id"
85	mkdir -v -p $files_out_dir
86
87	local save_new_files=''
88
89	local -a argv
90	case $workload in
91	hello-world)
92	argv=( testdata/osh-runtime/hello_world.sh )
93	;;
94
95	abuild-print-help)
96	argv=( testdata/osh-runtime/abuild -h )
97	;;
98
99	configure.cpython)
100	argv=( $PY27_DIR/configure )
101	working_dir=$files_out_dir
102	;;
103
104	configure.util-linux)
105	# flag needed to avoid sqlite3 dep error message
106	argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
107	working_dir=$files_out_dir
108	;;
109
110	configure.*)
111	argv=( ./configure )
112
113	local conf_dir
114	case $workload in
115	*.ocaml)
116	conf_dir='ocaml-4.06.0'
117	;;
118	*.tcc)
119	conf_dir='tcc-0.9.26'
120	;;
121	*.yash)
122	conf_dir='yash-2.46'
123	;;
124	*)
125	die "Invalid workload $workload"
126	esac
127
128	# These are run in-tree?
129	working_dir=$TAR_DIR/$conf_dir
130	;;
131
132	*)
133	die "Invalid workload $workload"
134	;;
135	esac
136
137	local -a time_argv=(
138	time-tsv
139	--output "$raw_out_dir/times.tsv" --append
140	--rusage
141	--rusage-2
142	--field "$task_id"
143	--field "$host_name" --field "$sh_path"
144	--field "$workload"
145	-- "$sh_run_path" "${argv[@]}"
146	)
147
148	local stdout_file="$files_out_dir/STDOUT.txt"
149	local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
150
151	# Maybe change dirs
152	if test -n "$working_dir"; then
153	pushd "$working_dir"
154	fi
155
156	if test -n "$save_new_files"; then
157	touch __TIMESTAMP
158	fi
159
160	# Run it, possibly with GC stats
161	case $sh_path in
162	_bin//osh)
163	OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
164	;;
165	*)
166	"${time_argv[@]}" > $stdout_file
167	;;
168	esac
169
170	if test -n "$save_new_files"; then
171	echo "COPYING to $files_out_dir"
172	find . -type f -newer __TIMESTAMP \
173	\| xargs -I {} -- cp --verbose {} $files_out_dir
174	fi
175
176	# Restore dir
177	if test -n "$working_dir"; then
178	popd
179	fi
180
181	task_id=$((task_id + 1))
182	done
183	}
184
185	print-tasks() {
186	local host_name=$1
187	local osh_native=$2
188
189	local -a workloads=(
190	hello-world
191	abuild-print-help
192
193	configure.cpython
194	configure.util-linux
195	configure.ocaml
196	configure.tcc
197	configure.yash
198	)
199
200	if test -n "${QUICKLY:-}"; then
201	# Just do the first two
202	workloads=(
203	#configure.util-linux
204	hello-world
205	abuild-print-help
206	)
207	fi
208
209	for sh_path in bash dash bin/osh $osh_native; do
210	for workload in "${workloads[@]}"; do
211	tsv-row $host_name $sh_path $workload
212	done
213	done
214	}
215
216	measure() {
217	local host_name=$1 # 'no-host' or 'lenny'
218	local raw_out_dir=$2
219	local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
220	local out_dir=${4:-$BASE_DIR} # ../benchmark-data/osh-runtime or _tmp/osh-runtime
221
222	mkdir -v -p $raw_out_dir
223
224	local tsv_out="$raw_out_dir/times.tsv"
225
226	# Write header of the TSV file that is appended to.
227	time-tsv -o $tsv_out --print-header \
228	--rusage \
229	--rusage-2 \
230	--field task_id \
231	--field host_name --field sh_path \
232	--field workload
233
234	# run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
235	# per-task GC stats
236	print-tasks $host_name $osh_native \| run-tasks $raw_out_dir
237
238	# Turn individual files into a TSV, adding host
239	benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
240	\| tsv-add-const-column host_name "$host_name" \
241	> $raw_out_dir/gc_stats.tsv
242
243	cp -v _tmp/provenance.tsv $raw_out_dir
244	}
245
246	stage1() {
247	local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
248	local single_machine=${2:-}
249
250	local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
251	mkdir -p $out_dir
252
253	# Globs are in lexicographical order, which works for our dates.
254
255	local -a raw_times=()
256	local -a raw_gc_stats=()
257	local -a raw_provenance=()
258
259	if test -n "$single_machine"; then
260	local -a a=( $base_dir/raw.$single_machine.* )
261
262	raw_times+=( ${a[-1]}/times.tsv )
263	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
264	raw_provenance+=( ${a[-1]}/provenance.tsv )
265
266	else
267	local -a a=( $base_dir/raw.$MACHINE1.* )
268	local -a b=( $base_dir/raw.$MACHINE2.* )
269
270	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
271	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
272	raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
273	fi
274
275	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
276
277	tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
278
279	tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
280	}
281
282	print-report() {
283	local in_dir=$1
284
285	benchmark-html-head 'OSH Runtime Performance'
286
287	cat <<EOF
288	<body class="width60">
289	<p id="home-link">
290	<a href="/">oilshell.org</a>
291	</p>
292	EOF
293
294	cmark <<'EOF'
295	## OSH Runtime Performance
296
297	Source code: [oil/benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
298
299	### Elapsed Time by Shell (milliseconds)
300
301	Some benchmarks call many external tools, while some exercise the shell
302	interpreter itself. Parse time is included.
303
304	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
305	EOF
306	tsv2html $in_dir/elapsed.tsv
307
308	cmark <<EOF
309	### Minor Page Faults
310	EOF
311
312	tsv2html $in_dir/page_faults.tsv
313
314	cmark <<EOF
315	### Memory Usage (Max Resident Set Size in MB)
316	EOF
317	tsv2html $in_dir/max_rss.tsv
318
319	cmark <<EOF
320	### GC Stats
321	EOF
322	tsv2html $in_dir/gc_stats.tsv
323
324	cmark <<EOF
325	### Details of All Tasks
326	EOF
327	tsv2html $in_dir/details.tsv
328
329	cmark <<EOF
330	### I/O Details
331	EOF
332	tsv2html $in_dir/details_io.tsv
333
334	cmark <<'EOF'
335
336	### Shell and Host Details
337	EOF
338	tsv2html $in_dir/shells.tsv
339	tsv2html $in_dir/hosts.tsv
340
341	# Only show files.html link on a single machine
342	if test -f $(dirname $in_dir)/files.html; then
343	cmark <<'EOF'
344	---
345
346	[raw files](files.html)
347	EOF
348	fi
349
350	cat <<EOF
351	</body>
352	</html>
353	EOF
354	}
355
356	test-oils-run() {
357	echo 'Hello from benchmarks/osh-runtime.sh'
358	}
359
360	soil-run() {
361	### Run it on just this machine, and make a report
362
363	rm -r -f $BASE_DIR
364	mkdir -p $BASE_DIR
365
366	# TODO: This testdata should be baked into Docker image, or mounted
367	download
368	extract
369
370	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
371	local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
372	ninja "${osh_bin[@]}"
373
374	local single_machine='no-host'
375
376	local job_id
377	job_id=$(print-job-id)
378
379	# Write _tmp/provenance.* and _tmp/{host,shell}-id
380	shell-provenance-2 \
381	$single_machine $job_id _tmp \
382	bash dash bin/osh "${osh_bin[@]}"
383
384	local host_job_id="$single_machine.$job_id"
385	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
386	mkdir -p $raw_out_dir $BASE_DIR/stage1
387
388	measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
389
390	# Trivial concatenation for 1 machine
391	stage1 '' $single_machine
392
393	benchmarks/report.sh stage2 $BASE_DIR
394
395	# Make _tmp/osh-parser/files.html, so index.html can potentially link to it
396	find-dir-html _tmp/osh-runtime files
397
398	benchmarks/report.sh stage3 $BASE_DIR
399	}
400
401	#
402	# Debugging
403	#
404
405	compare-cpython() {
406	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2024 )
407	local -a a=( ../benchmark-data/osh-runtime/.hoover.2024 )
408
409	# More of a diff here?
410	#local -a a=( ../benchmark-data/osh-runtime/.broome.2023 )
411	# less diff here
412	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2023 )
413
414	local dir=${a[-1]}
415
416	echo $dir
417
418	head -n 1 $dir/times.tsv
419	fgrep 'configure.cpython' $dir/times.tsv
420
421	local bash_id=2
422	local dash_id=8
423	local osh_py_id=14
424	local osh_cpp_id=20
425
426	set +o errexit
427
428	local out_dir=_tmp/cpython-configure
429	mkdir -p $out_dir
430
431	echo 'bash vs. dash'
432	diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
433	diffstat $out_dir/bash-vs-dash.txt
434	echo
435
436	echo 'bash vs. osh-py'
437	diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
438	diffstat $out_dir/bash-vs-osh-py.txt
439	echo
440
441	echo 'bash vs. osh-cpp'
442	diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
443	diffstat $out_dir/bash-vs-osh-cpp.txt
444	echo
445
446	return
447
448	diff -u $dir/{files-2,files-20}/STDOUT.txt
449	echo
450
451	diff -u $dir/{files-2,files-20}/pyconfig.h
452	echo
453
454	cdiff -u $dir/{files-2,files-20}/config.log
455	echo
456	}
457
458	"$@"