test/syscall.sh

OILS / test / syscall.sh View on Github | oilshell.org

438 lines, 194 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the number of syscalls that shells use.
4	#
5	# Usage:
6	# test/syscall.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/task-five.sh
11
12	source build/dev-shell.sh
13
14	OSH=${OSH:-osh}
15	YSH=${YSH:-ysh}
16
17	# Compare bash 4 vs. bash 5
18	#readonly -a SHELLS=(dash bash-4.4 bash $OSH)
19	readonly -a SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash yash $OSH)
20
21	readonly BASE_DIR='_tmp/syscall' # What we'll publish
22	readonly RAW_DIR='_tmp/syscall-raw' # Raw data
23
24	# Run it against the dev version of OSH
25	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
26
27	count-procs() {
28	local out_prefix=$1
29	local sh=$2
30	shift 2
31
32	case $sh in
33	# avoid the extra processes that bin/osh starts!
34	# relies on word splitting
35	#(X) # to compare against osh 0.8.pre3 installed
36	osh)
37	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
38	;;
39	ysh)
40	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
41	;;
42	osh-cpp)
43	sh=_bin/cxx-dbg/osh
44	;;
45	ysh-cpp)
46	sh=_bin/cxx-dbg/ysh
47	;;
48	esac
49
50	strace -ff -o $out_prefix -- $sh "$@"
51	}
52
53	run-case() {
54	### Run a test case with many shells
55
56	local num=$1
57	local code_str=$2
58
59	for sh in "${SHELLS[@]}"; do
60	local out_prefix=$RAW_DIR/${sh}__${num}
61	echo "--- $sh"
62	count-procs $out_prefix $sh -c "$code_str"
63	done
64	}
65
66	run-case-file() {
67	### Like the above, but the shell reads from a file
68
69	local num=$1
70	local code_str=$2
71
72	echo -n "$code_str" > _tmp/$num.sh
73
74	for sh in "${SHELLS[@]}"; do
75	local out_prefix=$RAW_DIR/${sh}__${num}
76	echo "--- $sh"
77	count-procs $out_prefix $sh _tmp/$num.sh
78	done
79	}
80
81	run-case-stdin() {
82	### Like the above, but read from a pipe
83
84	local num=$1
85	local code_str=$2
86
87	for sh in "${SHELLS[@]}"; do
88	local out_prefix=$RAW_DIR/${sh}__${num}
89	echo "--- $sh"
90	echo -n "$code_str" \| count-procs $out_prefix $sh
91	done
92	}
93
94	print-cases() {
95	# format: number, whitespace, then an arbitrary code string
96	egrep -v '^[[:space:]]*(#\|$)' <<EOF
97
98	# builtin
99	echo hi
100
101	# external command
102	date
103
104	# Oil sentence
105	date ;
106
107	# external then builtin
108	date; echo hi
109
110	# builtin then external
111	echo hi; date
112
113	# two external commands
114	date; date
115
116	# does a brace group make a difference?
117	{ date; date; }
118
119	# singleton brace group
120	date; { date; }
121
122	# does it behave differently if sourced?
123	. _tmp/sourced.sh
124
125	# dash and zsh somehow optimize this to 1
126	(echo hi)
127
128	(date)
129
130	( ( date ) )
131
132	( ( date ) ); echo hi
133
134	echo hi; (date)
135
136	# Sentence in Oil
137	(date;) > /tmp/out.txt
138
139	(date; echo hi)
140
141	# command sub
142	echo \$(date)
143
144	# command sub with builtin
145	echo \$(echo hi)
146
147	# command sub with useless subshell (some scripts use this)
148	echo \$( ( date ) )
149
150	# command sub with other subshell
151	echo \$( ( date ); echo hi )
152
153	# 2 processes for all shells
154	( echo hi ); echo done
155
156	# simple pipeline
157	date \| wc -l
158
159	# every shell does 3
160	echo a \| wc -l
161
162	# every shell does 3
163	command echo a \| wc -l
164
165	# bash does 4 here!
166	command date \| wc -l
167
168	# 3 processes for all?
169	# osh gives FIVE??? But others give 3. That's bad.
170	( date ) \| wc -l
171
172	# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
173	date \| read x
174
175	# osh has 3, but should be 2 like zsh?
176	# hm how can zsh do 2 here? That seems impossible.
177	# oh it's lastpipe turns the shell process into wc -l ??? wow.
178	{ echo a; echo b; } \| wc -l
179
180	# zsh behaves normally here. That is a crazy optimization. I guess it's
181	# nice when you have SH -c 'mypipeline \| wc-l'
182	{ echo a; echo b; } \| wc -l; echo done
183
184	# this is all over the map too. 3 4 4 2.
185	{ echo a; date; } \| wc -l
186
187	# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
188	( echo a; echo b ) \| wc -l
189
190	# osh does 5 when others do 3.
191	( echo a; echo b ) \| ( wc -l )
192	EOF
193
194	# Discarded because they're identical
195	# pipeline with redirect last
196	#date \| wc -l > /tmp/out.txt
197
198	# pipeline with redirect first
199	#date 2>&1 \| wc -l
200
201	}
202
203	number-cases() {
204	# Right justified, leading zeros, with 2
205	# Wish this was %02d
206	print-cases \| nl --number-format rz --number-width 2
207	}
208
209	by-input() {
210	### Run cases that vary by input reader
211	if ! strace true; then
212	echo "Aborting because we couldn't run strace"
213	return
214	fi
215
216	local suite='by-input'
217
218	rm -r -f -v $RAW_DIR
219	mkdir -p $RAW_DIR $BASE_DIR
220
221	# Wow this newline makes a difference in shells!
222
223	# This means that Id.Eof_Real is different than Id.Op_Newline?
224	# Should we create a Sentence for it too then?
225	# That is possible in _ParseCommandLine
226
227	zero=$'date; date'
228	one=$'date; date\n'
229	two=$'date; date\n#comment\n'
230	comment=$'# comment\ndate;date'
231	newline=$'date\n\ndate'
232	newline2=$'date\n\ndate\n#comment'
233
234	# zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
235	run-case 30 "$zero"
236	run-case 31 "$one"
237	run-case 32 "$two"
238	run-case 33 "$comment"
239	run-case 34 "$newline"
240	run-case 35 "$newline2"
241
242	run-case-file 40 "$zero"
243	run-case-file 41 "$one"
244	run-case-file 42 "$two"
245	run-case-file 43 "$comment"
246	run-case-file 44 "$newline2"
247	run-case-file 45 "$newline2"
248
249	# yash is the only shell to optimize the stdin case at all!
250	# it looks for a lack of trailing newline.
251	run-case-stdin 50 "$zero"
252	run-case-stdin 51 "$one"
253	run-case-stdin 52 "$two"
254	run-case-stdin 53 "$comment"
255	run-case-stdin 54 "$newline2"
256	run-case-stdin 55 "$newline2"
257
258	# This is identical for all shells
259	#run-case 32 $'date; date\n#comment\n'
260
261	cat >$BASE_DIR/cases.${suite}.txt <<EOF
262	30 -c: zero lines
263	31 -c: one line
264	32 -c: one line and comment
265	33 -c: comment first
266	34 -c: newline
267	35 -c: newline2
268	40 file: zero lines
269	41 file: one line
270	42 file: one line and comment
271	43 file: comment first
272	44 file: newline
273	45 file: newline2
274	50 stdin: zero lines
275	51 stdin: one line
276	52 stdin: one line and comment
277	53 stdin: comment first
278	54 stdin: newline
279	55 stdin: newline2
280	EOF
281
282	count-lines $suite
283	summarize $suite 3 0
284	}
285
286	# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
287	weird-command-sub() {
288	shopt -s nullglob
289	rm -r -f -v $RAW_DIR/*
290
291	local tmp=_tmp/cs
292	echo FOO > $tmp
293	run-case 60 "echo $(< $tmp)"
294	run-case 61 "echo $(< $tmp; echo hi)"
295
296	local suite=weird-command-sub
297
298	cat >$BASE_DIR/cases.${suite}.txt <<EOF
299	60 \$(< file)
300	61 \$(< file; echo hi)
301	EOF
302
303	count-lines $suite
304	summarize $suite 0 0
305	}
306
307	readonly MAX_CASES=100
308	#readonly MAX_CASES=3
309
310	by-code() {
311	### Run cases that vary by code snippet
312
313	if ! strace true; then
314	echo "Aborting because we couldn't run strace"
315	return
316	fi
317
318	local max_cases=${1:-$MAX_CASES}
319
320	rm -r -f -v $RAW_DIR
321	mkdir -p $RAW_DIR $BASE_DIR
322
323	write-sourced
324
325	local suite='by-code'
326	local cases=$BASE_DIR/cases.${suite}.txt
327
328	number-cases > $cases
329	head -n $max_cases $cases \| while read -r num code_str; do
330	echo
331	echo '==='
332	echo "$num $code_str"
333	echo
334
335	run-case $num "$code_str"
336	done
337
338	# omit total line
339	count-lines $suite
340	summarize $suite 3 0
341	}
342
343	by-code-cpp() {
344	ninja _bin/cxx-dbg/{osh,ysh}
345	OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
346	}
347
348	by-input-cpp() {
349	ninja _bin/cxx-dbg/{osh,ysh}
350	OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
351	}
352
353	syscall-py() {
354	PYTHONPATH=. test/syscall.py "$@"
355	}
356
357	write-sourced() {
358	echo -n 'date; date' > _tmp/sourced.sh
359	}
360
361	count-lines() {
362	local suite=${1:-by-code}
363	( cd $RAW_DIR && wc -l * ) \| head -n -1 > $BASE_DIR/wc.${suite}.txt
364	}
365
366	summarize() {
367	local suite=${1:-by-code}
368	local not_minimum=${2:-0}
369	local more_than_bash=${3:-0}
370
371	set +o errexit
372	cat $BASE_DIR/wc.${suite}.txt \
373	\| syscall-py \
374	--not-minimum $not_minimum \
375	--more-than-bash $more_than_bash \
376	--suite $suite \
377	$BASE_DIR/cases.${suite}.txt \
378	$BASE_DIR
379	local status=$?
380	set -o errexit
381
382	if test $status -eq 0; then
383	echo 'OK'
384	else
385	echo 'FAIL'
386	fi
387	}
388
389	soil-run() {
390	# Invoked as one of the "other" tests. Soil runs by-code and by-input
391	# separately.
392
393	# Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
394	by-code
395	by-input
396
397	echo 'OK'
398	}
399
400	run-for-release() {
401	### Run the two syscall suites
402
403	soil-run
404	}
405
406	#
407	# Real World
408	#
409	# $ ls\|grep dash\|wc -l
410	# 6098
411	# $ ls\|grep bash\|wc -l
412	# 6102
413	# $ ls\|grep osh\|wc -l
414	# 6098
415	#
416	# So Oil is already at dash level for CPython's configure, and bash isn't
417	# far off. So autoconf-generated scripts probably already use constructs
418	# that are already "optimal" in most shells.
419
420	readonly PY27_DIR=$PWD/Python-2.7.13
421
422	cpython-configure() {
423	local raw_dir=$PWD/$RAW_DIR/real
424	mkdir -p $raw_dir
425
426	pushd $PY27_DIR
427	#for sh in "${SHELLS[@]}"; do
428	for sh in bash dash osh; do
429	local out_prefix=$raw_dir/cpython-$sh
430	echo "--- $sh"
431
432	# TODO: Use a different dir
433	count-procs $out_prefix $sh -c './configure'
434	done
435	popd
436	}
437
438	task-five "$@"