test/syscall.sh

OILS / test / syscall.sh View on Github | oilshell.org

482 lines, 205 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the number of syscalls that shells use.
4	#
5	# Usage:
6	# test/syscall.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/task-five.sh
11
12	source build/dev-shell.sh
13
14	OSH=${OSH:-osh}
15	YSH=${YSH:-ysh}
16
17	# Compare bash 4 vs. bash 5
18	#readonly -a SHELLS=(dash bash-4.4 bash $OSH)
19	#readonly -a SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash yash $OSH)
20
21	# Remove yash since functions are over-optimized - by-code.wrapped
22	readonly -a SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash $OSH)
23
24	readonly BASE_DIR='_tmp/syscall' # What we'll publish
25	readonly RAW_DIR='_tmp/syscall-raw' # Raw data
26
27	# Run it against the dev version of OSH
28	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
29
30	count-procs() {
31	local out_prefix=$1
32	local sh=$2
33	shift 2
34
35	case $sh in
36	# avoid the extra processes that bin/osh starts!
37	# relies on word splitting
38	#(X) # to compare against osh 0.8.pre3 installed
39	osh)
40	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
41	;;
42	ysh)
43	sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
44	;;
45	osh-cpp)
46	sh=_bin/cxx-dbg/osh
47	;;
48	ysh-cpp)
49	sh=_bin/cxx-dbg/ysh
50	;;
51	esac
52
53	# Ignore failure, because we are just counting
54	strace -ff -o $out_prefix -- $sh "$@" \|\| true
55	}
56
57	run-case() {
58	### Run a test case with many shells
59
60	local num=$1
61	local code_str=$2
62	local func_wrap=${3:-}
63
64	if test -n "$func_wrap"; then
65	code_str="wrapper() { $code_str; }; wrapper"
66	fi
67
68	for sh in "${SHELLS[@]}"; do
69	local out_prefix=$RAW_DIR/${sh}__${num}
70	echo "--- $sh"
71	count-procs $out_prefix $sh -c "$code_str"
72	done
73	}
74
75	run-case-file() {
76	### Like the above, but the shell reads from a file
77
78	local num=$1
79	local code_str=$2
80
81	echo -n "$code_str" > _tmp/$num.sh
82
83	for sh in "${SHELLS[@]}"; do
84	local out_prefix=$RAW_DIR/${sh}__${num}
85	echo "--- $sh"
86	count-procs $out_prefix $sh _tmp/$num.sh
87	done
88	}
89
90	run-case-stdin() {
91	### Like the above, but read from a pipe
92
93	local num=$1
94	local code_str=$2
95
96	for sh in "${SHELLS[@]}"; do
97	local out_prefix=$RAW_DIR/${sh}__${num}
98	echo "--- $sh"
99	echo -n "$code_str" \| count-procs $out_prefix $sh
100	done
101	}
102
103	print-cases() {
104	# format: number, whitespace, then an arbitrary code string
105	egrep -v '^[[:space:]]*(#\|$)' <<EOF
106
107	# builtin
108	echo hi
109
110	# external command
111	date
112
113	# OSH calls this "sentence"
114	date ;
115
116	# trap - bash has special logic for this
117	trap 'echo mytrap' EXIT; date
118
119	# external then builtin
120	date; echo hi
121
122	# builtin then external
123	echo hi; date
124
125	# two external commands
126	date; date
127
128	# does a brace group make a difference?
129	{ date; date; }
130
131	# singleton brace group
132	date; { date; }
133
134	# does it behave differently if sourced?
135	. _tmp/sourced.sh
136
137	# dash and zsh somehow optimize this to 1
138	(echo hi)
139
140	(date)
141
142	( ( date ) )
143
144	( ( date ) ); echo hi
145
146	echo hi; (date)
147
148	# Sentence in Oil
149	(date;) > /tmp/out.txt
150
151	(date; echo hi)
152
153	# command sub
154	echo \$(date)
155
156	# command sub with builtin
157	echo \$(echo hi)
158
159	# command sub with useless subshell (some scripts use this)
160	echo \$( ( date ) )
161
162	# command sub with other subshell
163	echo \$( ( date ); echo hi )
164
165	# 2 processes for all shells
166	( echo hi ); echo done
167
168	# simple pipeline
169	date \| wc -l
170
171	# negated
172	! date \| wc -l
173
174	# every shell does 3
175	echo a \| wc -l
176
177	# every shell does 3
178	command echo a \| wc -l
179
180	# bash does 4 here!
181	command date \| wc -l
182
183	# negated
184	! command date \| wc -l
185
186	# 3 processes for all?
187	# osh gives FIVE??? But others give 3. That's bad.
188	( date ) \| wc -l
189
190	# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
191	date \| read x
192
193	# osh has 3, but should be 2 like zsh?
194	# hm how can zsh do 2 here? That seems impossible.
195	# oh it's lastpipe turns the shell process into wc -l ??? wow.
196	{ echo a; echo b; } \| wc -l
197
198	# zsh behaves normally here. That is a crazy optimization. I guess it's
199	# nice when you have SH -c 'mypipeline \| wc-l'
200	{ echo a; echo b; } \| wc -l; echo done
201
202	# this is all over the map too. 3 4 4 2.
203	{ echo a; date; } \| wc -l
204
205	# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
206	( echo a; echo b ) \| wc -l
207
208	# osh does 5 when others do 3.
209	( echo a; echo b ) \| ( wc -l )
210
211	echo hi & wait
212
213	date & wait
214
215	echo hi \| wc -l & wait
216
217	date \| wc -l & wait
218
219	trap 'echo mytrap' EXIT; date & wait
220
221	trap 'echo mytrap' EXIT; date \| wc -l & wait
222
223	# trap in SubProgramThunk
224	{ trap 'echo mytrap' EXIT; date; } & wait
225	EOF
226
227	# Discarded because they're identical
228	# pipeline with redirect last
229	#date \| wc -l > /tmp/out.txt
230
231	# pipeline with redirect first
232	#date 2>&1 \| wc -l
233
234	}
235
236	number-cases() {
237	# Right justified, leading zeros, with 2
238	# Wish this was %02d
239	print-cases \| nl --number-format rz --number-width 2
240	}
241
242	by-input() {
243	### Run cases that vary by input reader
244	if ! strace true; then
245	echo "Aborting because we couldn't run strace"
246	return
247	fi
248
249	local suite='by-input'
250
251	rm -r -f -v $RAW_DIR
252	mkdir -p $RAW_DIR $BASE_DIR
253
254	# Wow this newline makes a difference in shells!
255
256	# This means that Id.Eof_Real is different than Id.Op_Newline?
257	# Should we create a Sentence for it too then?
258	# That is possible in _ParseCommandLine
259
260	zero=$'date; date'
261	one=$'date; date\n'
262	two=$'date; date\n#comment\n'
263	comment=$'# comment\ndate;date'
264	newline=$'date\n\ndate'
265	newline2=$'date\n\ndate\n#comment'
266
267	# zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
268	run-case 30 "$zero"
269	run-case 31 "$one"
270	run-case 32 "$two"
271	run-case 33 "$comment"
272	run-case 34 "$newline"
273	run-case 35 "$newline2"
274
275	run-case-file 40 "$zero"
276	run-case-file 41 "$one"
277	run-case-file 42 "$two"
278	run-case-file 43 "$comment"
279	run-case-file 44 "$newline2"
280	run-case-file 45 "$newline2"
281
282	# yash is the only shell to optimize the stdin case at all!
283	# it looks for a lack of trailing newline.
284	run-case-stdin 50 "$zero"
285	run-case-stdin 51 "$one"
286	run-case-stdin 52 "$two"
287	run-case-stdin 53 "$comment"
288	run-case-stdin 54 "$newline2"
289	run-case-stdin 55 "$newline2"
290
291	# This is identical for all shells
292	#run-case 32 $'date; date\n#comment\n'
293
294	cat >$BASE_DIR/cases.${suite}.txt <<EOF
295	30 -c: zero lines
296	31 -c: one line
297	32 -c: one line and comment
298	33 -c: comment first
299	34 -c: newline
300	35 -c: newline2
301	40 file: zero lines
302	41 file: one line
303	42 file: one line and comment
304	43 file: comment first
305	44 file: newline
306	45 file: newline2
307	50 stdin: zero lines
308	51 stdin: one line
309	52 stdin: one line and comment
310	53 stdin: comment first
311	54 stdin: newline
312	55 stdin: newline2
313	EOF
314
315	count-lines $suite
316	summarize $suite 3 0
317	}
318
319	# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
320	weird-command-sub() {
321	shopt -s nullglob
322	rm -r -f -v $RAW_DIR/*
323
324	local tmp=_tmp/cs
325	echo FOO > $tmp
326	run-case 60 "echo $(< $tmp)"
327	run-case 61 "echo $(< $tmp; echo hi)"
328
329	local suite=weird-command-sub
330
331	cat >$BASE_DIR/cases.${suite}.txt <<EOF
332	60 \$(< file)
333	61 \$(< file; echo hi)
334	EOF
335
336	count-lines $suite
337	summarize $suite 0 0
338	}
339
340	readonly MAX_CASES=100
341	#readonly MAX_CASES=3
342
343	by-code() {
344	### Run cases that vary by code snippet
345	local func_wrap=${1:-}
346
347	if ! strace true; then
348	echo "Aborting because we couldn't run strace"
349	return
350	fi
351
352	local max_cases=${1:-$MAX_CASES}
353
354	rm -r -f -v $RAW_DIR
355	mkdir -p $RAW_DIR $BASE_DIR
356
357	write-sourced
358
359	local suite
360	if test -n "$func_wrap"; then
361	suite='by-code-wrapped'
362	else
363	suite='by-code'
364	fi
365
366	local cases=$BASE_DIR/cases.${suite}.txt
367
368	number-cases > $cases
369	head -n $max_cases $cases \| while read -r num code_str; do
370	echo
371	echo '==='
372	echo "$num $code_str"
373	echo
374
375	run-case $num "$code_str" "$func_wrap"
376	done
377
378	# omit total line
379	count-lines $suite
380	summarize $suite 3 0
381	}
382
383	by-code-cpp() {
384	ninja _bin/cxx-dbg/{osh,ysh}
385	OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
386	}
387
388	by-input-cpp() {
389	ninja _bin/cxx-dbg/{osh,ysh}
390	OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
391	}
392
393	syscall-py() {
394	PYTHONPATH=. test/syscall.py "$@"
395	}
396
397	write-sourced() {
398	echo -n 'date; date' > _tmp/sourced.sh
399	}
400
401	count-lines() {
402	local suite=${1:-by-code}
403	( cd $RAW_DIR && wc -l * ) \| head -n -1 > $BASE_DIR/wc.${suite}.txt
404	}
405
406	summarize() {
407	local suite=${1:-by-code}
408	local not_minimum=${2:-0}
409	local more_than_bash=${3:-0}
410
411	set +o errexit
412	cat $BASE_DIR/wc.${suite}.txt \
413	\| syscall-py \
414	--not-minimum $not_minimum \
415	--more-than-bash $more_than_bash \
416	--suite $suite \
417	$BASE_DIR/cases.${suite}.txt \
418	$BASE_DIR
419	local status=$?
420	set -o errexit
421
422	if test $status -eq 0; then
423	echo 'OK'
424	else
425	echo 'FAIL'
426	fi
427	}
428
429	soil-run() {
430	# Invoked as one of the "other" tests. Soil runs by-code and by-input
431	# separately.
432
433	# Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
434	by-code
435
436	# wrapped
437	by-code T
438
439	by-input
440
441	echo 'OK'
442	}
443
444	run-for-release() {
445	### Run the two syscall suites
446
447	soil-run
448	}
449
450	#
451	# Real World
452	#
453	# $ ls\|grep dash\|wc -l
454	# 6098
455	# $ ls\|grep bash\|wc -l
456	# 6102
457	# $ ls\|grep osh\|wc -l
458	# 6098
459	#
460	# So Oil is already at dash level for CPython's configure, and bash isn't
461	# far off. So autoconf-generated scripts probably already use constructs
462	# that are already "optimal" in most shells.
463
464	readonly PY27_DIR=$PWD/Python-2.7.13
465
466	cpython-configure() {
467	local raw_dir=$PWD/$RAW_DIR/real
468	mkdir -p $raw_dir
469
470	pushd $PY27_DIR
471	#for sh in "${SHELLS[@]}"; do
472	for sh in bash dash osh; do
473	local out_prefix=$raw_dir/cpython-$sh
474	echo "--- $sh"
475
476	# TODO: Use a different dir
477	count-procs $out_prefix $sh -c './configure'
478	done
479	popd
480	}
481
482	task-five "$@"