test/syscall.sh

OILS / test / syscall.sh View on Github | oilshell.org

445 lines, 194 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the number of syscalls that shells use.
4	#
5	# Usage:
6	# test/syscall.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/task-five.sh
11
12	source build/dev-shell.sh
13
14	OSH=${OSH:-osh}
15	YSH=${YSH:-ysh}
16
17	# Compare bash 4 vs. bash 5
18	#readonly -a SHELLS=(dash bash-4.4 bash $OSH)
19	readonly -a SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash yash $OSH)
20
21	readonly BASE_DIR='_tmp/syscall' # What we'll publish
22	readonly RAW_DIR='_tmp/syscall-raw' # Raw data
23
24	# Run it against the dev version of OSH
25	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
26
27	count-procs() {
28	local out_prefix=$1
29	local sh=$2
30	shift 2
31
32	case $sh in
33	# avoid the extra processes that bin/osh starts!
34	# relies on word splitting
35	#(X) # to compare against osh 0.8.pre3 installed
36	osh)
37	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
38	;;
39	ysh)
40	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
41	;;
42	osh-cpp)
43	sh=_bin/cxx-dbg/osh
44	;;
45	ysh-cpp)
46	sh=_bin/cxx-dbg/ysh
47	;;
48	esac
49
50	# Ignore failure, because we are just counting
51	strace -ff -o $out_prefix -- $sh "$@" \|\| true
52	}
53
54	run-case() {
55	### Run a test case with many shells
56
57	local num=$1
58	local code_str=$2
59
60	for sh in "${SHELLS[@]}"; do
61	local out_prefix=$RAW_DIR/${sh}__${num}
62	echo "--- $sh"
63	count-procs $out_prefix $sh -c "$code_str"
64	done
65	}
66
67	run-case-file() {
68	### Like the above, but the shell reads from a file
69
70	local num=$1
71	local code_str=$2
72
73	echo -n "$code_str" > _tmp/$num.sh
74
75	for sh in "${SHELLS[@]}"; do
76	local out_prefix=$RAW_DIR/${sh}__${num}
77	echo "--- $sh"
78	count-procs $out_prefix $sh _tmp/$num.sh
79	done
80	}
81
82	run-case-stdin() {
83	### Like the above, but read from a pipe
84
85	local num=$1
86	local code_str=$2
87
88	for sh in "${SHELLS[@]}"; do
89	local out_prefix=$RAW_DIR/${sh}__${num}
90	echo "--- $sh"
91	echo -n "$code_str" \| count-procs $out_prefix $sh
92	done
93	}
94
95	print-cases() {
96	# format: number, whitespace, then an arbitrary code string
97	egrep -v '^[[:space:]]*(#\|$)' <<EOF
98
99	# builtin
100	echo hi
101
102	# external command
103	date
104
105	# Oil sentence
106	date ;
107
108	# external then builtin
109	date; echo hi
110
111	# builtin then external
112	echo hi; date
113
114	# two external commands
115	date; date
116
117	# does a brace group make a difference?
118	{ date; date; }
119
120	# singleton brace group
121	date; { date; }
122
123	# does it behave differently if sourced?
124	. _tmp/sourced.sh
125
126	# dash and zsh somehow optimize this to 1
127	(echo hi)
128
129	(date)
130
131	( ( date ) )
132
133	( ( date ) ); echo hi
134
135	echo hi; (date)
136
137	# Sentence in Oil
138	(date;) > /tmp/out.txt
139
140	(date; echo hi)
141
142	# command sub
143	echo \$(date)
144
145	# command sub with builtin
146	echo \$(echo hi)
147
148	# command sub with useless subshell (some scripts use this)
149	echo \$( ( date ) )
150
151	# command sub with other subshell
152	echo \$( ( date ); echo hi )
153
154	# 2 processes for all shells
155	( echo hi ); echo done
156
157	# simple pipeline
158	date \| wc -l
159
160	# negated
161	! date \| wc -l
162
163	# every shell does 3
164	echo a \| wc -l
165
166	# every shell does 3
167	command echo a \| wc -l
168
169	# bash does 4 here!
170	command date \| wc -l
171
172	# negated
173	! command date \| wc -l
174
175	# 3 processes for all?
176	# osh gives FIVE??? But others give 3. That's bad.
177	( date ) \| wc -l
178
179	# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
180	date \| read x
181
182	# osh has 3, but should be 2 like zsh?
183	# hm how can zsh do 2 here? That seems impossible.
184	# oh it's lastpipe turns the shell process into wc -l ??? wow.
185	{ echo a; echo b; } \| wc -l
186
187	# zsh behaves normally here. That is a crazy optimization. I guess it's
188	# nice when you have SH -c 'mypipeline \| wc-l'
189	{ echo a; echo b; } \| wc -l; echo done
190
191	# this is all over the map too. 3 4 4 2.
192	{ echo a; date; } \| wc -l
193
194	# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
195	( echo a; echo b ) \| wc -l
196
197	# osh does 5 when others do 3.
198	( echo a; echo b ) \| ( wc -l )
199	EOF
200
201	# Discarded because they're identical
202	# pipeline with redirect last
203	#date \| wc -l > /tmp/out.txt
204
205	# pipeline with redirect first
206	#date 2>&1 \| wc -l
207
208	}
209
210	number-cases() {
211	# Right justified, leading zeros, with 2
212	# Wish this was %02d
213	print-cases \| nl --number-format rz --number-width 2
214	}
215
216	by-input() {
217	### Run cases that vary by input reader
218	if ! strace true; then
219	echo "Aborting because we couldn't run strace"
220	return
221	fi
222
223	local suite='by-input'
224
225	rm -r -f -v $RAW_DIR
226	mkdir -p $RAW_DIR $BASE_DIR
227
228	# Wow this newline makes a difference in shells!
229
230	# This means that Id.Eof_Real is different than Id.Op_Newline?
231	# Should we create a Sentence for it too then?
232	# That is possible in _ParseCommandLine
233
234	zero=$'date; date'
235	one=$'date; date\n'
236	two=$'date; date\n#comment\n'
237	comment=$'# comment\ndate;date'
238	newline=$'date\n\ndate'
239	newline2=$'date\n\ndate\n#comment'
240
241	# zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
242	run-case 30 "$zero"
243	run-case 31 "$one"
244	run-case 32 "$two"
245	run-case 33 "$comment"
246	run-case 34 "$newline"
247	run-case 35 "$newline2"
248
249	run-case-file 40 "$zero"
250	run-case-file 41 "$one"
251	run-case-file 42 "$two"
252	run-case-file 43 "$comment"
253	run-case-file 44 "$newline2"
254	run-case-file 45 "$newline2"
255
256	# yash is the only shell to optimize the stdin case at all!
257	# it looks for a lack of trailing newline.
258	run-case-stdin 50 "$zero"
259	run-case-stdin 51 "$one"
260	run-case-stdin 52 "$two"
261	run-case-stdin 53 "$comment"
262	run-case-stdin 54 "$newline2"
263	run-case-stdin 55 "$newline2"
264
265	# This is identical for all shells
266	#run-case 32 $'date; date\n#comment\n'
267
268	cat >$BASE_DIR/cases.${suite}.txt <<EOF
269	30 -c: zero lines
270	31 -c: one line
271	32 -c: one line and comment
272	33 -c: comment first
273	34 -c: newline
274	35 -c: newline2
275	40 file: zero lines
276	41 file: one line
277	42 file: one line and comment
278	43 file: comment first
279	44 file: newline
280	45 file: newline2
281	50 stdin: zero lines
282	51 stdin: one line
283	52 stdin: one line and comment
284	53 stdin: comment first
285	54 stdin: newline
286	55 stdin: newline2
287	EOF
288
289	count-lines $suite
290	summarize $suite 3 0
291	}
292
293	# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
294	weird-command-sub() {
295	shopt -s nullglob
296	rm -r -f -v $RAW_DIR/*
297
298	local tmp=_tmp/cs
299	echo FOO > $tmp
300	run-case 60 "echo $(< $tmp)"
301	run-case 61 "echo $(< $tmp; echo hi)"
302
303	local suite=weird-command-sub
304
305	cat >$BASE_DIR/cases.${suite}.txt <<EOF
306	60 \$(< file)
307	61 \$(< file; echo hi)
308	EOF
309
310	count-lines $suite
311	summarize $suite 0 0
312	}
313
314	readonly MAX_CASES=100
315	#readonly MAX_CASES=3
316
317	by-code() {
318	### Run cases that vary by code snippet
319
320	if ! strace true; then
321	echo "Aborting because we couldn't run strace"
322	return
323	fi
324
325	local max_cases=${1:-$MAX_CASES}
326
327	rm -r -f -v $RAW_DIR
328	mkdir -p $RAW_DIR $BASE_DIR
329
330	write-sourced
331
332	local suite='by-code'
333	local cases=$BASE_DIR/cases.${suite}.txt
334
335	number-cases > $cases
336	head -n $max_cases $cases \| while read -r num code_str; do
337	echo
338	echo '==='
339	echo "$num $code_str"
340	echo
341
342	run-case $num "$code_str"
343	done
344
345	# omit total line
346	count-lines $suite
347	summarize $suite 3 0
348	}
349
350	by-code-cpp() {
351	ninja _bin/cxx-dbg/{osh,ysh}
352	OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
353	}
354
355	by-input-cpp() {
356	ninja _bin/cxx-dbg/{osh,ysh}
357	OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
358	}
359
360	syscall-py() {
361	PYTHONPATH=. test/syscall.py "$@"
362	}
363
364	write-sourced() {
365	echo -n 'date; date' > _tmp/sourced.sh
366	}
367
368	count-lines() {
369	local suite=${1:-by-code}
370	( cd $RAW_DIR && wc -l * ) \| head -n -1 > $BASE_DIR/wc.${suite}.txt
371	}
372
373	summarize() {
374	local suite=${1:-by-code}
375	local not_minimum=${2:-0}
376	local more_than_bash=${3:-0}
377
378	set +o errexit
379	cat $BASE_DIR/wc.${suite}.txt \
380	\| syscall-py \
381	--not-minimum $not_minimum \
382	--more-than-bash $more_than_bash \
383	--suite $suite \
384	$BASE_DIR/cases.${suite}.txt \
385	$BASE_DIR
386	local status=$?
387	set -o errexit
388
389	if test $status -eq 0; then
390	echo 'OK'
391	else
392	echo 'FAIL'
393	fi
394	}
395
396	soil-run() {
397	# Invoked as one of the "other" tests. Soil runs by-code and by-input
398	# separately.
399
400	# Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
401	by-code
402	by-input
403
404	echo 'OK'
405	}
406
407	run-for-release() {
408	### Run the two syscall suites
409
410	soil-run
411	}
412
413	#
414	# Real World
415	#
416	# $ ls\|grep dash\|wc -l
417	# 6098
418	# $ ls\|grep bash\|wc -l
419	# 6102
420	# $ ls\|grep osh\|wc -l
421	# 6098
422	#
423	# So Oil is already at dash level for CPython's configure, and bash isn't
424	# far off. So autoconf-generated scripts probably already use constructs
425	# that are already "optimal" in most shells.
426
427	readonly PY27_DIR=$PWD/Python-2.7.13
428
429	cpython-configure() {
430	local raw_dir=$PWD/$RAW_DIR/real
431	mkdir -p $raw_dir
432
433	pushd $PY27_DIR
434	#for sh in "${SHELLS[@]}"; do
435	for sh in bash dash osh; do
436	local out_prefix=$raw_dir/cpython-$sh
437	echo "--- $sh"
438
439	# TODO: Use a different dir
440	count-procs $out_prefix $sh -c './configure'
441	done
442	popd
443	}
444
445	task-five "$@"