OILS / test / syscall.sh View on Github | oilshell.org

499 lines, 210 significant
1#!/usr/bin/env bash
2#
3# Measure the number of syscalls that shells use.
4#
5# Usage:
6# test/syscall.sh <function name>
7
8: ${LIB_OSH=stdlib/osh}
9source $LIB_OSH/bash-strict.sh
10source $LIB_OSH/task-five.sh
11
12source build/dev-shell.sh
13
14OSH=${OSH:-osh}
15YSH=${YSH:-ysh}
16
17#readonly -a SHELLS=(dash bash-4.4 bash $OSH)
18
19# Compare bash 4 vs. bash 5
20SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash $OSH)
21
22SHELLS_MORE=( ${SHELLS[@]} yash )
23
24# yash does something fundamentally different in by-code.wrapped - it
25# understands functions
26#SHELLS+=(yash)
27
28readonly BASE_DIR='_tmp/syscall' # What we'll publish
29readonly RAW_DIR='_tmp/syscall-raw' # Raw data
30
31# Run it against the dev version of OSH
32REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
33
34count-procs() {
35 local out_prefix=$1
36 local sh=$2
37 shift 2
38
39 case $sh in
40 # avoid the extra processes that bin/osh starts!
41 # relies on word splitting
42 #(X) # to compare against osh 0.8.pre3 installed
43 osh)
44 sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
45 ;;
46 ysh)
47 sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
48 ;;
49 osh-cpp)
50 sh=_bin/cxx-dbg/osh
51 ;;
52 ysh-cpp)
53 sh=_bin/cxx-dbg/ysh
54 ;;
55 esac
56
57 # Ignore failure, because we are just counting
58 strace -ff -o $out_prefix -- $sh "$@" || true
59}
60
61run-case() {
62 ### Run a test case with many shells
63
64 local num=$1
65 local code_str=$2
66 local func_wrap=${3:-}
67
68 local -a shells
69 if test -n "$func_wrap"; then
70 code_str="wrapper() { $code_str; }; wrapper"
71 shells=( "${SHELLS[@]}" )
72 else
73 shells=( "${SHELLS_MORE[@]}" )
74 fi
75
76 for sh in "${shells[@]}"; do
77 local out_prefix=$RAW_DIR/${sh}__${num}
78 echo "--- $sh"
79 count-procs $out_prefix $sh -c "$code_str"
80 done
81}
82
83run-case-file() {
84 ### Like the above, but the shell reads from a file
85
86 local num=$1
87 local code_str=$2
88
89 echo -n "$code_str" > _tmp/$num.sh
90
91 for sh in "${SHELLS_MORE[@]}"; do
92 local out_prefix=$RAW_DIR/${sh}__${num}
93 echo "--- $sh"
94 count-procs $out_prefix $sh _tmp/$num.sh
95 done
96}
97
98run-case-stdin() {
99 ### Like the above, but read from a pipe
100
101 local num=$1
102 local code_str=$2
103
104 for sh in "${SHELLS_MORE[@]}"; do
105 local out_prefix=$RAW_DIR/${sh}__${num}
106 echo "--- $sh"
107 echo -n "$code_str" | count-procs $out_prefix $sh
108 done
109}
110
111print-cases() {
112 # format: number, whitespace, then an arbitrary code string
113 egrep -v '^[[:space:]]*(#|$)' <<EOF
114
115# builtin
116echo hi
117
118# external command
119date
120
121# OSH calls this "sentence"
122date ;
123
124# trap - bash has special logic for this
125trap 'echo mytrap' EXIT; date
126
127# external then builtin
128date; echo hi
129
130# builtin then external
131echo hi; date
132
133# two external commands
134date; date
135
136# does a brace group make a difference?
137{ date; date; }
138
139# singleton brace group
140date; { date; }
141
142# does it behave differently if sourced?
143. _tmp/sourced.sh
144
145# dash and zsh somehow optimize this to 1
146(echo hi)
147
148(date)
149
150( ( date ) )
151
152( ( date ) ); echo hi
153
154echo hi; (date)
155
156echo hi; (date;)
157
158echo hi; (echo hi;)
159
160echo hi; (echo hi; date)
161
162( echo hi ); echo hi
163
164# Sentence in Oil
165(date;) > /tmp/out.txt
166
167(date; echo hi)
168
169# command sub
170echo \$(date)
171
172# command sub with builtin
173echo \$(echo hi)
174
175# command sub with useless subshell (some scripts use this)
176echo \$( ( date ) )
177
178# command sub with other subshell
179echo \$( ( date ); echo hi )
180
181# 2 processes for all shells
182( echo hi ); echo done
183
184# simple pipeline
185date | wc -l
186
187# negated
188! date | wc -l
189
190# every shell does 3
191echo a | wc -l
192
193# every shell does 3
194command echo a | wc -l
195
196# bash does 4 here!
197command date | wc -l
198
199# negated
200! command date | wc -l
201
202# 3 processes for all?
203# osh gives FIVE??? But others give 3. That's bad.
204( date ) | wc -l
205
206# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
207date | read x
208
209# osh has 3, but should be 2 like zsh?
210# hm how can zsh do 2 here? That seems impossible.
211# oh it's lastpipe turns the shell process into wc -l ??? wow.
212{ echo a; echo b; } | wc -l
213
214# zsh behaves normally here. That is a crazy optimization. I guess it's
215# nice when you have SH -c 'mypipeline | wc-l'
216{ echo a; echo b; } | wc -l; echo done
217
218# this is all over the map too. 3 4 4 2.
219{ echo a; date; } | wc -l
220
221# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
222( echo a; echo b ) | wc -l
223
224( echo a; echo b ) | ( wc -l )
225
226{ echo prefix; ( echo a; echo b ); } | ( wc -l )
227
228echo hi & wait
229
230date & wait
231
232echo hi | wc -l & wait
233
234date | wc -l & wait
235
236trap 'echo mytrap' EXIT; date & wait
237
238trap 'echo mytrap' EXIT; date | wc -l & wait
239
240# trap in SubProgramThunk
241{ trap 'echo mytrap' EXIT; date; } & wait
242EOF
243
244# Discarded because they're identical
245# pipeline with redirect last
246#date | wc -l > /tmp/out.txt
247
248# pipeline with redirect first
249#date 2>&1 | wc -l
250
251}
252
253number-cases() {
254 # Right justified, leading zeros, with 2
255 # Wish this was %02d
256 print-cases | nl --number-format rz --number-width 2
257}
258
259by-input() {
260 ### Run cases that vary by input reader
261 if ! strace true; then
262 echo "Aborting because we couldn't run strace"
263 return
264 fi
265
266 local suite='by-input'
267
268 rm -r -f -v $RAW_DIR
269 mkdir -p $RAW_DIR $BASE_DIR
270
271 # Wow this newline makes a difference in shells!
272
273 # This means that Id.Eof_Real is different than Id.Op_Newline?
274 # Should we create a Sentence for it too then?
275 # That is possible in _ParseCommandLine
276
277 zero=$'date; date'
278 one=$'date; date\n'
279 two=$'date; date\n#comment\n'
280 comment=$'# comment\ndate;date'
281 newline=$'date\n\ndate'
282 newline2=$'date\n\ndate\n#comment'
283
284 # zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
285 run-case 50 "$zero"
286 run-case 51 "$one"
287 run-case 52 "$two"
288 run-case 53 "$comment"
289 run-case 54 "$newline"
290 run-case 55 "$newline2"
291
292 run-case-file 60 "$zero"
293 run-case-file 61 "$one"
294 run-case-file 62 "$two"
295 run-case-file 63 "$comment"
296 run-case-file 64 "$newline2"
297 run-case-file 65 "$newline2"
298
299 # yash is the only shell to optimize the stdin case at all!
300 # it looks for a lack of trailing newline.
301 run-case-stdin 70 "$zero"
302 run-case-stdin 71 "$one"
303 run-case-stdin 72 "$two"
304 run-case-stdin 73 "$comment"
305 run-case-stdin 74 "$newline2"
306 run-case-stdin 75 "$newline2"
307
308 # This is identical for all shells
309 #run-case 32 $'date; date\n#comment\n'
310
311 cat >$BASE_DIR/cases.${suite}.txt <<EOF
31250 -c: zero lines
31351 -c: one line
31452 -c: one line and comment
31553 -c: comment first
31654 -c: newline
31755 -c: newline2
31860 file: zero lines
31961 file: one line
32062 file: one line and comment
32163 file: comment first
32264 file: newline
32365 file: newline2
32470 stdin: zero lines
32571 stdin: one line
32672 stdin: one line and comment
32773 stdin: comment first
32874 stdin: newline
32975 stdin: newline2
330EOF
331
332 count-lines $suite
333 summarize $suite 3 0
334}
335
336# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
337weird-command-sub() {
338 shopt -s nullglob
339 rm -r -f -v $RAW_DIR/*
340
341 local tmp=_tmp/cs
342 echo FOO > $tmp
343 run-case 60 "echo $(< $tmp)"
344 run-case 61 "echo $(< $tmp; echo hi)"
345
346 local suite=weird-command-sub
347
348 cat >$BASE_DIR/cases.${suite}.txt <<EOF
34960 \$(< file)
35061 \$(< file; echo hi)
351EOF
352
353 count-lines $suite
354 summarize $suite 0 0
355}
356
357readonly MAX_CASES=100
358#readonly MAX_CASES=3
359
360by-code() {
361 ### Run cases that vary by code snippet
362 local func_wrap=${1:-}
363
364 if ! strace true; then
365 echo "Aborting because we couldn't run strace"
366 return
367 fi
368
369 local max_cases=${1:-$MAX_CASES}
370
371 rm -r -f -v $RAW_DIR
372 mkdir -p $RAW_DIR $BASE_DIR
373
374 write-sourced
375
376 local suite
377 if test -n "$func_wrap"; then
378 suite='by-code-wrapped'
379 else
380 suite='by-code'
381 fi
382
383 local cases=$BASE_DIR/cases.${suite}.txt
384
385 number-cases > $cases
386 head -n $max_cases $cases | while read -r num code_str; do
387 echo
388 echo '==='
389 echo "$num $code_str"
390 echo
391
392 run-case $num "$code_str" "$func_wrap"
393 done
394
395 # omit total line
396 count-lines $suite
397 summarize $suite 3 0
398}
399
400by-code-cpp() {
401 ninja _bin/cxx-dbg/{osh,ysh}
402 OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
403}
404
405by-input-cpp() {
406 ninja _bin/cxx-dbg/{osh,ysh}
407 OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
408}
409
410syscall-py() {
411 PYTHONPATH=. test/syscall.py "$@"
412}
413
414write-sourced() {
415 echo -n 'date; date' > _tmp/sourced.sh
416}
417
418count-lines() {
419 local suite=${1:-by-code}
420 ( cd $RAW_DIR && wc -l * ) | head -n -1 > $BASE_DIR/wc.${suite}.txt
421}
422
423summarize() {
424 local suite=${1:-by-code}
425 local not_minimum=${2:-0}
426 local more_than_bash=${3:-0}
427
428 set +o errexit
429 cat $BASE_DIR/wc.${suite}.txt \
430 | syscall-py \
431 --not-minimum $not_minimum \
432 --more-than-bash $more_than_bash \
433 --suite $suite \
434 $BASE_DIR/cases.${suite}.txt \
435 $BASE_DIR
436 local status=$?
437 set -o errexit
438
439 if test $status -eq 0; then
440 echo 'OK'
441 else
442 echo 'FAIL'
443 fi
444}
445
446soil-run() {
447 # Invoked as one of the "other" tests. Soil runs by-code and by-input
448 # separately.
449
450 # Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
451 by-code
452
453 # wrapped
454 by-code T
455
456 by-input
457
458 echo 'OK'
459}
460
461run-for-release() {
462 ### Run the two syscall suites
463
464 soil-run
465}
466
467#
468# Real World
469#
470# $ ls|grep dash|wc -l
471# 6098
472# $ ls|grep bash|wc -l
473# 6102
474# $ ls|grep osh|wc -l
475# 6098
476#
477# So Oil is already at dash level for CPython's configure, and bash isn't
478# far off. So autoconf-generated scripts probably already use constructs
479# that are already "optimal" in most shells.
480
481readonly PY27_DIR=$PWD/Python-2.7.13
482
483cpython-configure() {
484 local raw_dir=$PWD/$RAW_DIR/real
485 mkdir -p $raw_dir
486
487 pushd $PY27_DIR
488 #for sh in "${SHELLS[@]}"; do
489 for sh in bash dash osh; do
490 local out_prefix=$raw_dir/cpython-$sh
491 echo "--- $sh"
492
493 # TODO: Use a different dir
494 count-procs $out_prefix $sh -c './configure'
495 done
496 popd
497}
498
499task-five "$@"