test/syscall.sh

OILS / test / syscall.sh View on Github | oilshell.org

493 lines, 205 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the number of syscalls that shells use.
4	#
5	# Usage:
6	# test/syscall.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/task-five.sh
11
12	source build/dev-shell.sh
13
14	OSH=${OSH:-osh}
15	YSH=${YSH:-ysh}
16
17	#readonly -a SHELLS=(dash bash-4.4 bash $OSH)
18
19	# Compare bash 4 vs. bash 5
20	SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash $OSH)
21
22	# yash does something fundamentally different in by-code.wrapped - it
23	# understands functions
24	#SHELLS+=(yash)
25
26	readonly BASE_DIR='_tmp/syscall' # What we'll publish
27	readonly RAW_DIR='_tmp/syscall-raw' # Raw data
28
29	# Run it against the dev version of OSH
30	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
31
32	count-procs() {
33	local out_prefix=$1
34	local sh=$2
35	shift 2
36
37	case $sh in
38	# avoid the extra processes that bin/osh starts!
39	# relies on word splitting
40	#(X) # to compare against osh 0.8.pre3 installed
41	osh)
42	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
43	;;
44	ysh)
45	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
46	;;
47	osh-cpp)
48	sh=_bin/cxx-dbg/osh
49	;;
50	ysh-cpp)
51	sh=_bin/cxx-dbg/ysh
52	;;
53	esac
54
55	# Ignore failure, because we are just counting
56	strace -ff -o $out_prefix -- $sh "$@" \|\| true
57	}
58
59	run-case() {
60	### Run a test case with many shells
61
62	local num=$1
63	local code_str=$2
64	local func_wrap=${3:-}
65
66	if test -n "$func_wrap"; then
67	code_str="wrapper() { $code_str; }; wrapper"
68	fi
69
70	for sh in "${SHELLS[@]}"; do
71	local out_prefix=$RAW_DIR/${sh}__${num}
72	echo "--- $sh"
73	count-procs $out_prefix $sh -c "$code_str"
74	done
75	}
76
77	run-case-file() {
78	### Like the above, but the shell reads from a file
79
80	local num=$1
81	local code_str=$2
82
83	echo -n "$code_str" > _tmp/$num.sh
84
85	for sh in "${SHELLS[@]}"; do
86	local out_prefix=$RAW_DIR/${sh}__${num}
87	echo "--- $sh"
88	count-procs $out_prefix $sh _tmp/$num.sh
89	done
90	}
91
92	run-case-stdin() {
93	### Like the above, but read from a pipe
94
95	local num=$1
96	local code_str=$2
97
98	for sh in "${SHELLS[@]}"; do
99	local out_prefix=$RAW_DIR/${sh}__${num}
100	echo "--- $sh"
101	echo -n "$code_str" \| count-procs $out_prefix $sh
102	done
103	}
104
105	print-cases() {
106	# format: number, whitespace, then an arbitrary code string
107	egrep -v '^[[:space:]]*(#\|$)' <<EOF
108
109	# builtin
110	echo hi
111
112	# external command
113	date
114
115	# OSH calls this "sentence"
116	date ;
117
118	# trap - bash has special logic for this
119	trap 'echo mytrap' EXIT; date
120
121	# external then builtin
122	date; echo hi
123
124	# builtin then external
125	echo hi; date
126
127	# two external commands
128	date; date
129
130	# does a brace group make a difference?
131	{ date; date; }
132
133	# singleton brace group
134	date; { date; }
135
136	# does it behave differently if sourced?
137	. _tmp/sourced.sh
138
139	# dash and zsh somehow optimize this to 1
140	(echo hi)
141
142	(date)
143
144	( ( date ) )
145
146	( ( date ) ); echo hi
147
148	echo hi; (date)
149
150	echo hi; (date;)
151
152	echo hi; (echo hi;)
153
154	echo hi; (echo hi; date)
155
156	( echo hi ); echo hi
157
158	# Sentence in Oil
159	(date;) > /tmp/out.txt
160
161	(date; echo hi)
162
163	# command sub
164	echo \$(date)
165
166	# command sub with builtin
167	echo \$(echo hi)
168
169	# command sub with useless subshell (some scripts use this)
170	echo \$( ( date ) )
171
172	# command sub with other subshell
173	echo \$( ( date ); echo hi )
174
175	# 2 processes for all shells
176	( echo hi ); echo done
177
178	# simple pipeline
179	date \| wc -l
180
181	# negated
182	! date \| wc -l
183
184	# every shell does 3
185	echo a \| wc -l
186
187	# every shell does 3
188	command echo a \| wc -l
189
190	# bash does 4 here!
191	command date \| wc -l
192
193	# negated
194	! command date \| wc -l
195
196	# 3 processes for all?
197	# osh gives FIVE??? But others give 3. That's bad.
198	( date ) \| wc -l
199
200	# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
201	date \| read x
202
203	# osh has 3, but should be 2 like zsh?
204	# hm how can zsh do 2 here? That seems impossible.
205	# oh it's lastpipe turns the shell process into wc -l ??? wow.
206	{ echo a; echo b; } \| wc -l
207
208	# zsh behaves normally here. That is a crazy optimization. I guess it's
209	# nice when you have SH -c 'mypipeline \| wc-l'
210	{ echo a; echo b; } \| wc -l; echo done
211
212	# this is all over the map too. 3 4 4 2.
213	{ echo a; date; } \| wc -l
214
215	# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
216	( echo a; echo b ) \| wc -l
217
218	( echo a; echo b ) \| ( wc -l )
219
220	{ echo prefix; ( echo a; echo b ); } \| ( wc -l )
221
222	echo hi & wait
223
224	date & wait
225
226	echo hi \| wc -l & wait
227
228	date \| wc -l & wait
229
230	trap 'echo mytrap' EXIT; date & wait
231
232	trap 'echo mytrap' EXIT; date \| wc -l & wait
233
234	# trap in SubProgramThunk
235	{ trap 'echo mytrap' EXIT; date; } & wait
236	EOF
237
238	# Discarded because they're identical
239	# pipeline with redirect last
240	#date \| wc -l > /tmp/out.txt
241
242	# pipeline with redirect first
243	#date 2>&1 \| wc -l
244
245	}
246
247	number-cases() {
248	# Right justified, leading zeros, with 2
249	# Wish this was %02d
250	print-cases \| nl --number-format rz --number-width 2
251	}
252
253	by-input() {
254	### Run cases that vary by input reader
255	if ! strace true; then
256	echo "Aborting because we couldn't run strace"
257	return
258	fi
259
260	local suite='by-input'
261
262	rm -r -f -v $RAW_DIR
263	mkdir -p $RAW_DIR $BASE_DIR
264
265	# Wow this newline makes a difference in shells!
266
267	# This means that Id.Eof_Real is different than Id.Op_Newline?
268	# Should we create a Sentence for it too then?
269	# That is possible in _ParseCommandLine
270
271	zero=$'date; date'
272	one=$'date; date\n'
273	two=$'date; date\n#comment\n'
274	comment=$'# comment\ndate;date'
275	newline=$'date\n\ndate'
276	newline2=$'date\n\ndate\n#comment'
277
278	# zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
279	run-case 30 "$zero"
280	run-case 31 "$one"
281	run-case 32 "$two"
282	run-case 33 "$comment"
283	run-case 34 "$newline"
284	run-case 35 "$newline2"
285
286	run-case-file 40 "$zero"
287	run-case-file 41 "$one"
288	run-case-file 42 "$two"
289	run-case-file 43 "$comment"
290	run-case-file 44 "$newline2"
291	run-case-file 45 "$newline2"
292
293	# yash is the only shell to optimize the stdin case at all!
294	# it looks for a lack of trailing newline.
295	run-case-stdin 50 "$zero"
296	run-case-stdin 51 "$one"
297	run-case-stdin 52 "$two"
298	run-case-stdin 53 "$comment"
299	run-case-stdin 54 "$newline2"
300	run-case-stdin 55 "$newline2"
301
302	# This is identical for all shells
303	#run-case 32 $'date; date\n#comment\n'
304
305	cat >$BASE_DIR/cases.${suite}.txt <<EOF
306	30 -c: zero lines
307	31 -c: one line
308	32 -c: one line and comment
309	33 -c: comment first
310	34 -c: newline
311	35 -c: newline2
312	40 file: zero lines
313	41 file: one line
314	42 file: one line and comment
315	43 file: comment first
316	44 file: newline
317	45 file: newline2
318	50 stdin: zero lines
319	51 stdin: one line
320	52 stdin: one line and comment
321	53 stdin: comment first
322	54 stdin: newline
323	55 stdin: newline2
324	EOF
325
326	count-lines $suite
327	summarize $suite 3 0
328	}
329
330	# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
331	weird-command-sub() {
332	shopt -s nullglob
333	rm -r -f -v $RAW_DIR/*
334
335	local tmp=_tmp/cs
336	echo FOO > $tmp
337	run-case 60 "echo $(< $tmp)"
338	run-case 61 "echo $(< $tmp; echo hi)"
339
340	local suite=weird-command-sub
341
342	cat >$BASE_DIR/cases.${suite}.txt <<EOF
343	60 \$(< file)
344	61 \$(< file; echo hi)
345	EOF
346
347	count-lines $suite
348	summarize $suite 0 0
349	}
350
351	readonly MAX_CASES=100
352	#readonly MAX_CASES=3
353
354	by-code() {
355	### Run cases that vary by code snippet
356	local func_wrap=${1:-}
357
358	if ! strace true; then
359	echo "Aborting because we couldn't run strace"
360	return
361	fi
362
363	local max_cases=${1:-$MAX_CASES}
364
365	rm -r -f -v $RAW_DIR
366	mkdir -p $RAW_DIR $BASE_DIR
367
368	write-sourced
369
370	local suite
371	if test -n "$func_wrap"; then
372	suite='by-code-wrapped'
373	else
374	suite='by-code'
375	fi
376
377	local cases=$BASE_DIR/cases.${suite}.txt
378
379	number-cases > $cases
380	head -n $max_cases $cases \| while read -r num code_str; do
381	echo
382	echo '==='
383	echo "$num $code_str"
384	echo
385
386	run-case $num "$code_str" "$func_wrap"
387	done
388
389	# omit total line
390	count-lines $suite
391	summarize $suite 3 0
392	}
393
394	by-code-cpp() {
395	ninja _bin/cxx-dbg/{osh,ysh}
396	OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
397	}
398
399	by-input-cpp() {
400	ninja _bin/cxx-dbg/{osh,ysh}
401	OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
402	}
403
404	syscall-py() {
405	PYTHONPATH=. test/syscall.py "$@"
406	}
407
408	write-sourced() {
409	echo -n 'date; date' > _tmp/sourced.sh
410	}
411
412	count-lines() {
413	local suite=${1:-by-code}
414	( cd $RAW_DIR && wc -l * ) \| head -n -1 > $BASE_DIR/wc.${suite}.txt
415	}
416
417	summarize() {
418	local suite=${1:-by-code}
419	local not_minimum=${2:-0}
420	local more_than_bash=${3:-0}
421
422	set +o errexit
423	cat $BASE_DIR/wc.${suite}.txt \
424	\| syscall-py \
425	--not-minimum $not_minimum \
426	--more-than-bash $more_than_bash \
427	--suite $suite \
428	$BASE_DIR/cases.${suite}.txt \
429	$BASE_DIR
430	local status=$?
431	set -o errexit
432
433	if test $status -eq 0; then
434	echo 'OK'
435	else
436	echo 'FAIL'
437	fi
438	}
439
440	soil-run() {
441	# Invoked as one of the "other" tests. Soil runs by-code and by-input
442	# separately.
443
444	# Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
445	by-code
446
447	# wrapped
448	by-code T
449
450	by-input
451
452	echo 'OK'
453	}
454
455	run-for-release() {
456	### Run the two syscall suites
457
458	soil-run
459	}
460
461	#
462	# Real World
463	#
464	# $ ls\|grep dash\|wc -l
465	# 6098
466	# $ ls\|grep bash\|wc -l
467	# 6102
468	# $ ls\|grep osh\|wc -l
469	# 6098
470	#
471	# So Oil is already at dash level for CPython's configure, and bash isn't
472	# far off. So autoconf-generated scripts probably already use constructs
473	# that are already "optimal" in most shells.
474
475	readonly PY27_DIR=$PWD/Python-2.7.13
476
477	cpython-configure() {
478	local raw_dir=$PWD/$RAW_DIR/real
479	mkdir -p $raw_dir
480
481	pushd $PY27_DIR
482	#for sh in "${SHELLS[@]}"; do
483	for sh in bash dash osh; do
484	local out_prefix=$raw_dir/cpython-$sh
485	echo "--- $sh"
486
487	# TODO: Use a different dir
488	count-procs $out_prefix $sh -c './configure'
489	done
490	popd
491	}
492
493	task-five "$@"