test/syscall.sh

OILS / test / syscall.sh View on Github | oilshell.org

437 lines, 194 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the number of syscalls that shells use.
4	#
5	# Usage:
6	# test/syscall.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/task-five.sh
11
12	source build/dev-shell.sh
13
14	OSH=${OSH:-osh}
15	YSH=${YSH:-ysh}
16
17	readonly -a SHELLS=(dash bash mksh zsh ash yash $OSH $YSH)
18
19	readonly BASE_DIR='_tmp/syscall' # What we'll publish
20	readonly RAW_DIR='_tmp/syscall-raw' # Raw data
21
22	# Run it against the dev version of OSH
23	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
24
25	count-procs() {
26	local out_prefix=$1
27	local sh=$2
28	shift 2
29
30	case $sh in
31	# avoid the extra processes that bin/osh starts!
32	# relies on word splitting
33	#(X) # to compare against osh 0.8.pre3 installed
34	osh)
35	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
36	;;
37	ysh)
38	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
39	;;
40	osh-cpp)
41	sh=_bin/cxx-dbg/osh
42	;;
43	ysh-cpp)
44	sh=_bin/cxx-dbg/ysh
45	;;
46	esac
47
48	strace -ff -o $out_prefix -- $sh "$@"
49	}
50
51	run-case() {
52	### Run a test case with many shells
53
54	local num=$1
55	local code_str=$2
56
57	for sh in "${SHELLS[@]}"; do
58	local out_prefix=$RAW_DIR/$num.$sh
59	echo "--- $sh"
60	count-procs $out_prefix $sh -c "$code_str"
61	done
62	}
63
64	run-case-file() {
65	### Like the above, but the shell reads from a file
66
67	local num=$1
68	local code_str=$2
69
70	echo -n "$code_str" > _tmp/$num.sh
71
72	for sh in "${SHELLS[@]}"; do
73	local out_prefix=$RAW_DIR/$num.$sh
74	echo "--- $sh"
75	count-procs $out_prefix $sh _tmp/$num.sh
76	done
77	}
78
79	run-case-stdin() {
80	### Like the above, but read from a pipe
81
82	local num=$1
83	local code_str=$2
84
85	for sh in "${SHELLS[@]}"; do
86	local out_prefix=$RAW_DIR/$num.$sh
87	echo "--- $sh"
88	echo -n "$code_str" \| count-procs $out_prefix $sh
89	done
90	}
91
92
93	print-cases() {
94	# format: number, whitespace, then an arbitrary code string
95	egrep -v '^[[:space:]]*(#\|$)' <<EOF
96
97	# builtin
98	echo hi
99
100	# external command
101	date
102
103	# Oil sentence
104	date ;
105
106	# external then builtin
107	date; echo hi
108
109	# builtin then external
110	echo hi; date
111
112	# two external commands
113	date; date
114
115	# does a brace group make a difference?
116	{ date; date; }
117
118	# singleton brace group
119	date; { date; }
120
121	# does it behave differently if sourced?
122	. _tmp/sourced.sh
123
124	# dash and zsh somehow optimize this to 1
125	(echo hi)
126
127	(date)
128
129	( ( date ) )
130
131	( ( date ) ); echo hi
132
133	echo hi; (date)
134
135	# Sentence in Oil
136	(date;) > /tmp/out.txt
137
138	(date; echo hi)
139
140	# command sub
141	echo \$(date)
142
143	# command sub with builtin
144	echo \$(echo hi)
145
146	# command sub with useless subshell (some scripts use this)
147	echo \$( ( date ) )
148
149	# command sub with other subshell
150	echo \$( ( date ); echo hi )
151
152	# 2 processes for all shells
153	( echo hi ); echo done
154
155	# simple pipeline
156	date \| wc -l
157
158	# every shell does 3
159	echo a \| wc -l
160
161	# every shell does 3
162	command echo a \| wc -l
163
164	# bash does 4 here!
165	command date \| wc -l
166
167	# 3 processes for all?
168	# osh gives FIVE??? But others give 3. That's bad.
169	( date ) \| wc -l
170
171	# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
172	date \| read x
173
174	# osh has 3, but should be 2 like zsh?
175	# hm how can zsh do 2 here? That seems impossible.
176	# oh it's lastpipe turns the shell process into wc -l ??? wow.
177	{ echo a; echo b; } \| wc -l
178
179	# zsh behaves normally here. That is a crazy optimization. I guess it's
180	# nice when you have SH -c 'mypipeline \| wc-l'
181	{ echo a; echo b; } \| wc -l; echo done
182
183	# this is all over the map too. 3 4 4 2.
184	{ echo a; date; } \| wc -l
185
186	# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
187	( echo a; echo b ) \| wc -l
188
189	# osh does 5 when others do 3.
190	( echo a; echo b ) \| ( wc -l )
191	EOF
192
193	# Discarded because they're identical
194	# pipeline with redirect last
195	#date \| wc -l > /tmp/out.txt
196
197	# pipeline with redirect first
198	#date 2>&1 \| wc -l
199
200	}
201
202	number-cases() {
203	# Right justified, leading zeros, with 2
204	# Wish this was %02d
205	print-cases \| nl --number-format rz --number-width 2
206	}
207
208	by-input() {
209	### Run cases that vary by input reader
210	if ! strace true; then
211	echo "Aborting because we couldn't run strace"
212	return
213	fi
214
215	local suite='by-input'
216
217	rm -r -f -v $RAW_DIR
218	mkdir -p $RAW_DIR $BASE_DIR
219
220	# Wow this newline makes a difference in shells!
221
222	# This means that Id.Eof_Real is different than Id.Op_Newline?
223	# Should we create a Sentence for it too then?
224	# That is possible in _ParseCommandLine
225
226	zero=$'date; date'
227	one=$'date; date\n'
228	two=$'date; date\n#comment\n'
229	comment=$'# comment\ndate;date'
230	newline=$'date\n\ndate'
231	newline2=$'date\n\ndate\n#comment'
232
233	# zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
234	run-case 30 "$zero"
235	run-case 31 "$one"
236	run-case 32 "$two"
237	run-case 33 "$comment"
238	run-case 34 "$newline"
239	run-case 35 "$newline2"
240
241	run-case-file 40 "$zero"
242	run-case-file 41 "$one"
243	run-case-file 42 "$two"
244	run-case-file 43 "$comment"
245	run-case-file 44 "$newline2"
246	run-case-file 45 "$newline2"
247
248	# yash is the only shell to optimize the stdin case at all!
249	# it looks for a lack of trailing newline.
250	run-case-stdin 50 "$zero"
251	run-case-stdin 51 "$one"
252	run-case-stdin 52 "$two"
253	run-case-stdin 53 "$comment"
254	run-case-stdin 54 "$newline2"
255	run-case-stdin 55 "$newline2"
256
257	# This is identical for all shells
258	#run-case 32 $'date; date\n#comment\n'
259
260	cat >$BASE_DIR/cases.${suite}.txt <<EOF
261	30 -c: zero lines
262	31 -c: one line
263	32 -c: one line and comment
264	33 -c: comment first
265	34 -c: newline
266	35 -c: newline2
267	40 file: zero lines
268	41 file: one line
269	42 file: one line and comment
270	43 file: comment first
271	44 file: newline
272	45 file: newline2
273	50 stdin: zero lines
274	51 stdin: one line
275	52 stdin: one line and comment
276	53 stdin: comment first
277	54 stdin: newline
278	55 stdin: newline2
279	EOF
280
281	count-lines $suite
282	summarize $suite 3 0
283	}
284
285	# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
286	weird-command-sub() {
287	shopt -s nullglob
288	rm -r -f -v $RAW_DIR/*
289
290	local tmp=_tmp/cs
291	echo FOO > $tmp
292	run-case 60 "echo $(< $tmp)"
293	run-case 61 "echo $(< $tmp; echo hi)"
294
295	local suite=weird-command-sub
296
297	cat >$BASE_DIR/cases.${suite}.txt <<EOF
298	60 \$(< file)
299	61 \$(< file; echo hi)
300	EOF
301
302	count-lines $suite
303	summarize $suite 0 0
304	}
305
306	readonly MAX_CASES=100
307	#readonly MAX_CASES=3
308
309	by-code() {
310	### Run cases that vary by code snippet
311
312	if ! strace true; then
313	echo "Aborting because we couldn't run strace"
314	return
315	fi
316
317	local max_cases=${1:-$MAX_CASES}
318
319	rm -r -f -v $RAW_DIR
320	mkdir -p $RAW_DIR $BASE_DIR
321
322	write-sourced
323
324	local suite='by-code'
325	local cases=$BASE_DIR/cases.${suite}.txt
326
327	number-cases > $cases
328	head -n $max_cases $cases \| while read -r num code_str; do
329	echo
330	echo '==='
331	echo "$num $code_str"
332	echo
333
334	run-case $num "$code_str"
335	done
336
337	# omit total line
338	count-lines $suite
339	summarize $suite 3 0
340	}
341
342	by-code-cpp() {
343	ninja _bin/cxx-dbg/{osh,ysh}
344	OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
345	}
346
347	by-input-cpp() {
348	ninja _bin/cxx-dbg/{osh,ysh}
349	OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
350	}
351
352	syscall-py() {
353	PYTHONPATH=. test/syscall.py "$@"
354	}
355
356	write-sourced() {
357	echo -n 'date; date' > _tmp/sourced.sh
358	}
359
360	count-lines() {
361	local suite=${1:-by-code}
362	( cd $RAW_DIR && wc -l * ) \| head -n -1 > $BASE_DIR/wc.${suite}.txt
363	}
364
365	summarize() {
366	local suite=${1:-by-code}
367	local not_minimum=${2:-0}
368	local more_than_bash=${3:-0}
369
370	set +o errexit
371	cat $BASE_DIR/wc.${suite}.txt \
372	\| syscall-py \
373	--not-minimum $not_minimum \
374	--more-than-bash $more_than_bash \
375	--suite $suite \
376	$BASE_DIR/cases.${suite}.txt \
377	$BASE_DIR
378	local status=$?
379	set -o errexit
380
381	if test $status -eq 0; then
382	echo 'OK'
383	else
384	echo 'FAIL'
385	fi
386	}
387
388	soil-run() {
389	# Invoked as one of the "other" tests. Soil runs by-code and by-input
390	# separately.
391
392	# Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
393	by-code
394	by-input
395
396	echo 'OK'
397	}
398
399	run-for-release() {
400	### Run the two syscall suites
401
402	soil-run
403	}
404
405	#
406	# Real World
407	#
408	# $ ls\|grep dash\|wc -l
409	# 6098
410	# $ ls\|grep bash\|wc -l
411	# 6102
412	# $ ls\|grep osh\|wc -l
413	# 6098
414	#
415	# So Oil is already at dash level for CPython's configure, and bash isn't
416	# far off. So autoconf-generated scripts probably already use constructs
417	# that are already "optimal" in most shells.
418
419	readonly PY27_DIR=$PWD/Python-2.7.13
420
421	cpython-configure() {
422	local raw_dir=$PWD/$RAW_DIR/real
423	mkdir -p $raw_dir
424
425	pushd $PY27_DIR
426	#for sh in "${SHELLS[@]}"; do
427	for sh in bash dash osh; do
428	local out_prefix=$raw_dir/cpython-$sh
429	echo "--- $sh"
430
431	# TODO: Use a different dir
432	count-procs $out_prefix $sh -c './configure'
433	done
434	popd
435	}
436
437	task-five "$@"