benchmarks/osh-runtime.sh

OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

577 lines, 330 significant

1	#!/usr/bin/env bash
2	#
3	# Test scripts found in the wild for both correctness and performance.
4	#
5	# Usage:
6	# benchmarks/osh-runtime.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14	source benchmarks/common.sh # tsv-concat
15	source benchmarks/id.sh # print-job-id
16	source soil/common.sh # find-dir-html
17	source test/common.sh
18	source test/tsv-lib.sh # tsv-row
19
20	readonly BASE_DIR=_tmp/osh-runtime
21
22	# TODO: Move to ../oil_DEPS
23	readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
24
25	#
26	# Dependencies
27	#
28
29	readonly PY27_DIR=$PWD/Python-2.7.13
30
31	# NOTE: Same list in oilshell.org/blob/run.sh.
32	tarballs() {
33	cat <<EOF
34	tcc-0.9.26.tar.bz2
35	yash-2.46.tar.xz
36	ocaml-4.06.0.tar.xz
37	util-linux-2.40.tar.xz
38	EOF
39	}
40
41	download() {
42	mkdir -p $TAR_DIR
43	tarballs \| xargs -n 1 -I {} --verbose -- \
44	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
45	}
46
47	extract() {
48	set -x
49	time for f in $TAR_DIR/*.{bz2,xz}; do
50	tar -x --directory $TAR_DIR --file $f
51	done
52	set +x
53
54	ls -l $TAR_DIR
55	}
56
57	#
58	# Computation
59	#
60
61	run-tasks() {
62	local raw_out_dir=$1
63	raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
64
65	local task_id=0
66	while read -r host_name sh_path workload; do
67
68	log "*** $host_name $sh_path $workload $task_id"
69
70	local sh_run_path
71	case $sh_path in
72	/*) # Already absolute
73	sh_run_path=$sh_path
74	;;
75	/) # It's relative, so make it absolute
76	sh_run_path=$PWD/$sh_path
77	;;
78	*) # 'dash' should remain 'dash'
79	sh_run_path=$sh_path
80	;;
81	esac
82
83	local working_dir=''
84	local files_out_dir="$raw_out_dir/files-$task_id"
85	mkdir -v -p $files_out_dir
86
87	local save_new_files=''
88
89	local -a argv
90	case $workload in
91	hello-world)
92	argv=( testdata/osh-runtime/hello_world.sh )
93	;;
94
95	bin-true)
96	argv=( testdata/osh-runtime/bin_true.sh )
97	;;
98
99	abuild-print-help)
100	argv=( testdata/osh-runtime/abuild -h )
101	;;
102
103	configure.cpython)
104	argv=( $PY27_DIR/configure )
105	working_dir=$files_out_dir
106	;;
107
108	configure.util-linux)
109	# flag needed to avoid sqlite3 dep error message
110	argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
111	working_dir=$files_out_dir
112	;;
113
114	configure.*)
115	argv=( ./configure )
116
117	local conf_dir
118	case $workload in
119	*.ocaml)
120	conf_dir='ocaml-4.06.0'
121	;;
122	*.tcc)
123	conf_dir='tcc-0.9.26'
124	;;
125	*.yash)
126	conf_dir='yash-2.46'
127	;;
128	*)
129	die "Invalid workload $workload"
130	esac
131
132	# These are run in-tree?
133	working_dir=$TAR_DIR/$conf_dir
134	;;
135
136	*)
137	die "Invalid workload $workload"
138	;;
139	esac
140
141	local -a time_argv=(
142	time-tsv
143	--output "$raw_out_dir/times.tsv" --append
144	--rusage
145	--rusage-2
146	--field "$task_id"
147	--field "$host_name" --field "$sh_path"
148	--field "$workload"
149	-- "$sh_run_path" "${argv[@]}"
150	)
151
152	local stdout_file="$files_out_dir/STDOUT.txt"
153	local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
154
155	# Maybe change dirs
156	if test -n "$working_dir"; then
157	pushd "$working_dir"
158	fi
159
160	if test -n "$save_new_files"; then
161	touch __TIMESTAMP
162	fi
163
164	# Run it, possibly with GC stats
165	case $sh_path in
166	_bin//osh)
167	OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
168	;;
169	*)
170	"${time_argv[@]}" > $stdout_file
171	;;
172	esac
173
174	if test -n "$save_new_files"; then
175	echo "COPYING to $files_out_dir"
176	find . -type f -newer __TIMESTAMP \
177	\| xargs -I {} -- cp --verbose {} $files_out_dir
178	fi
179
180	# Restore dir
181	if test -n "$working_dir"; then
182	popd
183	fi
184
185	task_id=$((task_id + 1))
186	done
187	}
188
189	# Sorted by priority for test-oils.sh osh-runtime --num-shells 3
190
191	readonly -a ALL_WORKLOADS=(
192	hello-world
193	bin-true
194
195	configure.cpython
196	configure.util-linux
197	configure.ocaml
198	configure.tcc
199	configure.yash
200
201	abuild-print-help
202	)
203
204	print-workloads() {
205	### for help
206
207	for w in "${ALL_WORKLOADS[@]}"; do
208	echo " $w"
209	done
210	}
211
212	print-tasks() {
213	local host_name=$1
214	local osh_native=$2
215
216	if test -n "${QUICKLY:-}"; then
217	workloads=(
218	hello-world
219	bin-true
220	#configure.util-linux
221	#abuild-print-help
222	)
223	else
224	workloads=( "${ALL_WORKLOADS[@]}" )
225	fi
226
227	for sh_path in bash dash bin/osh $osh_native; do
228	for workload in "${workloads[@]}"; do
229	tsv-row $host_name $sh_path $workload
230	done
231	done
232	}
233
234	print-tasks-xshar() {
235	local host_name=$1
236	local osh_native=$2
237
238	local num_iters=${3:-1}
239	local num_shells=${4:-1}
240	local num_workloads=${5:-1}
241
242	local s=0
243	local w=0
244
245	for i in $(seq $num_iters); do
246
247	for sh_path in $osh_native bash dash; do
248
249	for workload in "${ALL_WORKLOADS[@]}"; do
250	tsv-row $host_name $sh_path $workload
251
252	w=$(( w + 1 )) # cut off at specified workloads
253	if test $w -eq $num_workloads; then
254	break
255	fi
256	done
257
258	s=$(( s + 1 )) # cut off as specified shells
259	if test $s -eq $num_shells; then
260	break
261	fi
262
263	done
264	done
265	}
266
267	run-tasks-wrapper() {
268	### reads tasks from stdin
269
270	local host_name=$1 # 'no-host' or 'lenny'
271	local raw_out_dir=$2
272
273	mkdir -v -p $raw_out_dir
274
275	local tsv_out="$raw_out_dir/times.tsv"
276
277	# Write header of the TSV file that is appended to.
278	time-tsv -o $tsv_out --print-header \
279	--rusage \
280	--rusage-2 \
281	--field task_id \
282	--field host_name --field sh_path \
283	--field workload
284
285	# reads tasks from stdin
286	# run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
287	# per-task GC stats
288	run-tasks $raw_out_dir
289
290	# Turn individual files into a TSV, adding host
291	benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
292	\| tsv-add-const-column host_name "$host_name" \
293	> $raw_out_dir/gc_stats.tsv
294
295	cp -v _tmp/provenance.tsv $raw_out_dir
296	}
297
298	measure() {
299	### For release and CI
300	local host_name=$1 # 'no-host' or 'lenny'
301	local raw_out_dir=$2 # _tmp/osh-runtime or ../../benchmark-data/osh-runtime
302	local osh_native=$3 # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA
303
304	print-tasks $host_name $osh_native \| run-tasks-wrapper $host_name $raw_out_dir
305	}
306
307	stage1() {
308	local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
309	local single_machine=${2:-}
310
311	local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
312	mkdir -p $out_dir
313
314	# Globs are in lexicographical order, which works for our dates.
315
316	local -a raw_times=()
317	local -a raw_gc_stats=()
318	local -a raw_provenance=()
319
320	if test -n "$single_machine"; then
321	local -a a=( $base_dir/raw.$single_machine.* )
322
323	raw_times+=( ${a[-1]}/times.tsv )
324	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
325	raw_provenance+=( ${a[-1]}/provenance.tsv )
326
327	else
328	local -a a=( $base_dir/raw.$MACHINE1.* )
329	local -a b=( $base_dir/raw.$MACHINE2.* )
330
331	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
332	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
333	raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
334	fi
335
336	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
337
338	tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
339
340	tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
341	}
342
343	print-report() {
344	local in_dir=$1
345
346	benchmark-html-head 'OSH Runtime Performance'
347
348	cat <<EOF
349	<body class="width60">
350	<p id="home-link">
351	<a href="/">oilshell.org</a>
352	</p>
353	EOF
354
355	cmark <<'EOF'
356	## OSH Runtime Performance
357
358	Source code: [benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
359
360	- [Elapsed Time](#elapsed-time)
361	- [Minor Page Faults](#page-faults)
362	- [Memory Usage](#memory-usage)
363	- [GC Stats](#gc-stats)
364	- [rusage Details](#rusage-details)
365	- [More Details](#more-details)
366	- [Shell and Host](#shell-and-host)
367
368	<a name="elapsed-time" />
369
370	### Elapsed Time by Shell (milliseconds)
371
372	Some benchmarks call many external tools, while some exercise the shell
373	interpreter itself.
374	EOF
375	tsv2html $in_dir/elapsed.tsv
376
377	cmark <<EOF
378	<a name="page-faults" />
379
380	### Minor Page Faults
381	EOF
382
383	tsv2html $in_dir/page_faults.tsv
384
385	cmark <<EOF
386	<a name="memory-usage" />
387
388	### Memory Usage (Max Resident Set Size in MB)
389
390	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
391	EOF
392	tsv2html $in_dir/max_rss.tsv
393
394	cmark <<EOF
395	<a name="gc-stats" />
396
397	### GC Stats
398	EOF
399	tsv2html $in_dir/gc_stats.tsv
400
401	cmark <<EOF
402	<a name="rusage-details" />
403
404	### rusage Details
405	EOF
406	tsv2html $in_dir/details.tsv
407
408	cmark <<EOF
409	<a name="more-details" />
410
411	### More Details
412	EOF
413	tsv2html $in_dir/details_io.tsv
414
415	cmark <<'EOF'
416	<a name="shell-and-host" />
417
418	### Shell and Host
419	EOF
420	tsv2html $in_dir/shells.tsv
421	tsv2html $in_dir/hosts.tsv
422
423	# Only show files.html link on a single machine
424	if test -f $(dirname $in_dir)/files.html; then
425	cmark <<'EOF'
426	---
427
428	[raw files](files.html)
429	EOF
430	fi
431
432	cat <<EOF
433	</body>
434	</html>
435	EOF
436	}
437
438	test-oils-run() {
439	local osh=$1
440
441	# flags passed by caller
442	local num_iters=${2:-1}
443	local num_shells=${3:-1}
444	local num_workloads=${4:-1}
445
446	local time_py=$XSHAR_DIR/benchmarks/time_.py
447	$time_py --tsv --rusage -- \
448	$osh -c 'echo "smoke test: hi from benchmarks/osh-runtime.sh"'
449
450	local host_name
451	host_name=$(hostname)
452
453	local job_id
454	job_id=$(print-job-id)
455
456	# Write _tmp/provenance.* and _tmp/{host,shell}-id
457	shell-provenance-2 \
458	$host_name $job_id _tmp \
459	bash dash $osh
460
461	# e.g. 2024-05-01__10-11-12.ci-vm-name
462	local raw_out_dir="$BASE_DIR/$job_id.$host_name"
463	mkdir -p $raw_out_dir
464
465	# Similar to 'measure', for soil-run and release
466	print-tasks-xshar $host_name $osh $num_iters $num_shells $num_workloads \
467	\| run-tasks-wrapper $host_name $raw_out_dir
468
469	# Note: 'stage1' in soil-run is a trivial concatenation, so we can create input for
470	# benchmarks/report.R. We don't need that here
471
472	# TODO: upload
473	# _tmp/
474	# osh-runtime/
475	# shell-id/
476	# host-id/
477	}
478
479	soil-run() {
480	### Run it on just this machine, and make a report
481
482	rm -r -f $BASE_DIR
483	mkdir -p $BASE_DIR
484
485	# TODO: This testdata should be baked into Docker image, or mounted
486	download
487	extract
488
489	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
490	local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
491	ninja "${osh_bin[@]}"
492
493	local single_machine='no-host'
494
495	local job_id
496	job_id=$(print-job-id)
497
498	# Write _tmp/provenance.* and _tmp/{host,shell}-id
499	shell-provenance-2 \
500	$single_machine $job_id _tmp \
501	bash dash bin/osh "${osh_bin[@]}"
502
503	local host_job_id="$single_machine.$job_id"
504	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
505	mkdir -p $raw_out_dir $BASE_DIR/stage1
506
507	measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD
508
509	# Trivial concatenation for 1 machine
510	stage1 '' $single_machine
511
512	benchmarks/report.sh stage2 $BASE_DIR
513
514	# Make _tmp/osh-parser/files.html, so index.html can potentially link to it
515	find-dir-html _tmp/osh-runtime files
516
517	benchmarks/report.sh stage3 $BASE_DIR
518	}
519
520	#
521	# Debugging
522	#
523
524	compare-cpython() {
525	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2024 )
526	local -a a=( ../benchmark-data/osh-runtime/.hoover.2024 )
527
528	# More of a diff here?
529	#local -a a=( ../benchmark-data/osh-runtime/.broome.2023 )
530	# less diff here
531	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2023 )
532
533	local dir=${a[-1]}
534
535	echo $dir
536
537	head -n 1 $dir/times.tsv
538	fgrep 'configure.cpython' $dir/times.tsv
539
540	local bash_id=2
541	local dash_id=8
542	local osh_py_id=14
543	local osh_cpp_id=20
544
545	set +o errexit
546
547	local out_dir=_tmp/cpython-configure
548	mkdir -p $out_dir
549
550	echo 'bash vs. dash'
551	diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
552	diffstat $out_dir/bash-vs-dash.txt
553	echo
554
555	echo 'bash vs. osh-py'
556	diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
557	diffstat $out_dir/bash-vs-osh-py.txt
558	echo
559
560	echo 'bash vs. osh-cpp'
561	diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
562	diffstat $out_dir/bash-vs-osh-cpp.txt
563	echo
564
565	return
566
567	diff -u $dir/{files-2,files-20}/STDOUT.txt
568	echo
569
570	diff -u $dir/{files-2,files-20}/pyconfig.h
571	echo
572
573	cdiff -u $dir/{files-2,files-20}/config.log
574	echo
575	}
576
577	"$@"