OILS / test / syscall.sh View on Github | oilshell.org

432 lines, 190 significant
1#!/usr/bin/env bash
2#
3# Measure the number of syscalls that shells use.
4#
5# Usage:
6# test/syscall.sh <function name>
7
8: ${LIB_OSH=stdlib/osh}
9source $LIB_OSH/bash-strict.sh
10source $LIB_OSH/task-five.sh
11
12source build/dev-shell.sh
13
14OSH=${OSH:-osh}
15YSH=${YSH:-ysh}
16
17readonly -a SHELLS=(dash bash mksh zsh ash yash $OSH $YSH)
18
19readonly BASE_DIR='_tmp/syscall' # What we'll publish
20readonly RAW_DIR='_tmp/syscall-raw' # Raw data
21
22# Run it against the dev version of OSH
23REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
24
25count-procs() {
26 local out_prefix=$1
27 local sh=$2
28 shift 2
29
30 case $sh in
31 # avoid the extra processes that bin/osh starts!
32 # relies on word splitting
33 #(X) # to compare against osh 0.8.pre3 installed
34 osh)
35 sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
36 ;;
37 ysh)
38 sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
39 ;;
40 osh-cpp)
41 sh=_bin/cxx-dbg/osh
42 ;;
43 ysh-cpp)
44 sh=_bin/cxx-dbg/ysh
45 ;;
46 esac
47
48 strace -ff -o $out_prefix -- $sh "$@"
49}
50
51run-case() {
52 ### Run a test case with many shells
53
54 local num=$1
55 local code_str=$2
56
57 for sh in "${SHELLS[@]}"; do
58 local out_prefix=$RAW_DIR/$num.$sh
59 echo "--- $sh"
60 count-procs $out_prefix $sh -c "$code_str"
61 done
62}
63
64run-case-file() {
65 ### Like the above, but the shell reads from a file
66
67 local num=$1
68 local code_str=$2
69
70 echo -n "$code_str" > _tmp/$num.sh
71
72 for sh in "${SHELLS[@]}"; do
73 local out_prefix=$RAW_DIR/$num.$sh
74 echo "--- $sh"
75 count-procs $out_prefix $sh _tmp/$num.sh
76 done
77}
78
79run-case-stdin() {
80 ### Like the above, but read from a pipe
81
82 local num=$1
83 local code_str=$2
84
85 for sh in "${SHELLS[@]}"; do
86 local out_prefix=$RAW_DIR/$num.$sh
87 echo "--- $sh"
88 echo -n "$code_str" | count-procs $out_prefix $sh
89 done
90}
91
92
93print-cases() {
94 # format: number, whitespace, then an arbitrary code string
95 egrep -v '^[[:space:]]*(#|$)' <<EOF
96
97# builtin
98echo hi
99
100# external command
101date
102
103# Oil sentence
104date ;
105
106# external then builtin
107date; echo hi
108
109# builtin then external
110echo hi; date
111
112# two external commands
113date; date
114
115# does a brace group make a difference?
116{ date; date; }
117
118# singleton brace group
119date; { date; }
120
121# does it behave differently if sourced?
122. _tmp/sourced.sh
123
124# dash and zsh somehow optimize this to 1
125(echo hi)
126
127(date)
128
129( ( date ) )
130
131( ( date ) ); echo hi
132
133echo hi; (date)
134
135# Sentence in Oil
136(date;) > /tmp/out.txt
137
138(date; echo hi)
139
140# command sub
141echo \$(date)
142
143# command sub with builtin
144echo \$(echo hi)
145
146# command sub with useless subshell (some scripts use this)
147echo \$( ( date ) )
148
149# command sub with other subshell
150echo \$( ( date ); echo hi )
151
152# 2 processes for all shells
153( echo hi ); echo done
154
155# simple pipeline
156date | wc -l
157
158# every shell does 3
159echo a | wc -l
160
161# every shell does 3
162command echo a | wc -l
163
164# bash does 4 here!
165command date | wc -l
166
167# 3 processes for all?
168# osh gives FIVE??? But others give 3. That's bad.
169( date ) | wc -l
170
171# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
172date | read x
173
174# osh has 3, but should be 2 like zsh?
175# hm how can zsh do 2 here? That seems impossible.
176# oh it's lastpipe turns the shell process into wc -l ??? wow.
177{ echo a; echo b; } | wc -l
178
179# zsh behaves normally here. That is a crazy optimization. I guess it's
180# nice when you have SH -c 'mypipeline | wc-l'
181{ echo a; echo b; } | wc -l; echo done
182
183# this is all over the map too. 3 4 4 2.
184{ echo a; date; } | wc -l
185
186# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
187( echo a; echo b ) | wc -l
188
189# osh does 5 when others do 3.
190( echo a; echo b ) | ( wc -l )
191EOF
192
193# Discarded because they're identical
194# pipeline with redirect last
195#date | wc -l > /tmp/out.txt
196
197# pipeline with redirect first
198#date 2>&1 | wc -l
199
200}
201
202number-cases() {
203 # Right justified, leading zeros, with 2
204 # Wish this was %02d
205 print-cases | nl --number-format rz --number-width 2
206}
207
208by-input() {
209 ### Run cases that vary by input reader
210 if ! strace true; then
211 echo "Aborting because we couldn't run strace"
212 return
213 fi
214
215 local suite='by-input'
216
217 rm -r -f -v $RAW_DIR
218 mkdir -p $RAW_DIR
219
220 # Wow this newline makes a difference in shells!
221
222 # This means that Id.Eof_Real is different than Id.Op_Newline?
223 # Should we create a Sentence for it too then?
224 # That is possible in _ParseCommandLine
225
226 zero=$'date; date'
227 one=$'date; date\n'
228 two=$'date; date\n#comment\n'
229 comment=$'# comment\ndate;date'
230 newline=$'date\n\ndate'
231 newline2=$'date\n\ndate\n#comment'
232
233 # zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
234 run-case 30 "$zero"
235 run-case 31 "$one"
236 run-case 32 "$two"
237 run-case 33 "$comment"
238 run-case 34 "$newline"
239 run-case 35 "$newline2"
240
241 run-case-file 40 "$zero"
242 run-case-file 41 "$one"
243 run-case-file 42 "$two"
244 run-case-file 43 "$comment"
245 run-case-file 44 "$newline2"
246 run-case-file 45 "$newline2"
247
248 # yash is the only shell to optimize the stdin case at all!
249 # it looks for a lack of trailing newline.
250 run-case-stdin 50 "$zero"
251 run-case-stdin 51 "$one"
252 run-case-stdin 52 "$two"
253 run-case-stdin 53 "$comment"
254 run-case-stdin 54 "$newline2"
255 run-case-stdin 55 "$newline2"
256
257 # This is identical for all shells
258 #run-case 32 $'date; date\n#comment\n'
259
260 cat >$BASE_DIR/${suite}-cases.txt <<EOF
26130 -c: zero lines
26231 -c: one line
26332 -c: one line and comment
26433 -c: comment first
26534 -c: newline
26635 -c: newline2
26740 file: zero lines
26841 file: one line
26942 file: one line and comment
27043 file: comment first
27144 file: newline
27245 file: newline2
27350 stdin: zero lines
27451 stdin: one line
27552 stdin: one line and comment
27653 stdin: comment first
27754 stdin: newline
27855 stdin: newline2
279EOF
280
281 count-lines $suite
282 summarize $suite 3 0
283}
284
285# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
286weird-command-sub() {
287 shopt -s nullglob
288 rm -r -f -v $RAW_DIR/*
289
290 local tmp=_tmp/cs
291 echo FOO > $tmp
292 run-case 60 "echo $(< $tmp)"
293 run-case 61 "echo $(< $tmp; echo hi)"
294
295 local suite=weird-command-sub
296
297 cat >$BASE_DIR/${suite}-cases.txt <<EOF
29860 \$(< file)
29961 \$(< file; echo hi)
300EOF
301
302 count-lines $suite
303 summarize $suite 0 0
304}
305
306readonly MAX_CASES=100
307#readonly MAX_CASES=3
308
309by-code() {
310 ### Run cases that vary by code snippet
311
312 if ! strace true; then
313 echo "Aborting because we couldn't run strace"
314 return
315 fi
316
317 local max_cases=${1:-$MAX_CASES}
318
319 rm -r -f -v $RAW_DIR
320 mkdir -p $RAW_DIR $BASE_DIR
321
322 write-sourced
323
324 local suite='by-code'
325 local cases=$BASE_DIR/${suite}-cases.txt
326
327 number-cases > $cases
328 head -n $max_cases $cases | while read -r num code_str; do
329 echo
330 echo '==='
331 echo "$num $code_str"
332 echo
333
334 run-case $num "$code_str"
335 done
336
337 # omit total line
338 count-lines $suite
339 summarize $suite 3 0
340}
341
342by-code-cpp() {
343 ninja _bin/cxx-dbg/{osh,ysh}
344 OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
345}
346
347by-input-cpp() {
348 ninja _bin/cxx-dbg/{osh,ysh}
349 OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
350}
351
352syscall-py() {
353 PYTHONPATH=. test/syscall.py "$@"
354}
355
356write-sourced() {
357 echo -n 'date; date' > _tmp/sourced.sh
358}
359
360count-lines() {
361 local suite=${1:-by-code}
362 ( cd $RAW_DIR && wc -l * ) | head -n -1 > $BASE_DIR/${suite}-counts.txt
363}
364
365summarize() {
366 local suite=${1:-by-code}
367 local not_minimum=${2:-0}
368 local more_than_bash=${3:-0}
369
370 local out=$BASE_DIR/${suite}.txt
371 set +o errexit
372 cat $BASE_DIR/${suite}-counts.txt \
373 | syscall-py --not-minimum $not_minimum --more-than-bash $more_than_bash \
374 $BASE_DIR/${suite}-cases.txt \
375 > $out
376 local status=$?
377 set -o errexit
378
379 echo "Wrote $out"
380 if test $status -eq 0; then
381 echo 'OK'
382 else
383 echo 'FAIL'
384 fi
385}
386
387run-for-release() {
388 ### Run the two syscall suites
389
390 # Invoked as one of the "other" tests. Soil runs by-code and by-input
391 # separately.
392
393 # Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
394 by-code
395 by-input
396
397 echo 'OK'
398}
399
400#
401# Real World
402#
403# $ ls|grep dash|wc -l
404# 6098
405# $ ls|grep bash|wc -l
406# 6102
407# $ ls|grep osh|wc -l
408# 6098
409#
410# So Oil is already at dash level for CPython's configure, and bash isn't
411# far off. So autoconf-generated scripts probably already use constructs
412# that are already "optimal" in most shells.
413
414readonly PY27_DIR=$PWD/Python-2.7.13
415
416cpython-configure() {
417 local raw_dir=$PWD/$RAW_DIR/real
418 mkdir -p $raw_dir
419
420 pushd $PY27_DIR
421 #for sh in "${SHELLS[@]}"; do
422 for sh in bash dash osh; do
423 local out_prefix=$raw_dir/cpython-$sh
424 echo "--- $sh"
425
426 # TODO: Use a different dir
427 count-procs $out_prefix $sh -c './configure'
428 done
429 popd
430}
431
432task-five "$@"