OILS / benchmarks / autoconf.sh View on Github | oilshell.org

598 lines, 242 significant
1#!/usr/bin/env bash
2#
3# Why is CPython configure slower under OSH?
4#
5# Usage:
6# benchmarks/autoconf.sh <function name>
7#
8# Examples:
9# $0 patch-pyconf # times builtin
10# $0 measure-times # time-tsv with gc stats
11# $0 report-times # times builtin
12#
13# $0 measure-syscalls
14# $0 report-syscalls
15# $0 report-processes
16# $0 report-external
17#
18# Simpler:
19# $0 measure-rusage # time-tsv
20
21set -o nounset
22set -o pipefail
23set -o errexit
24
25REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
26
27source benchmarks/cachegrind.sh # with-cachegrind
28source benchmarks/callgrind.sh # with-cachegrind
29source test/tsv-lib.sh # $TAB
30
31readonly BASE_DIR_RELATIVE=_tmp/autoconf
32readonly BASE_DIR=$REPO_ROOT/$BASE_DIR_RELATIVE
33readonly PY_CONF=$REPO_ROOT/Python-2.7.13/configure
34
35#
36# Trying to measure allocation/GC overhead
37#
38# This DOES NOT HELP because bumpleak/bumproot are **slower** on bigger heaps.
39# There's less cache locality!
40#
41
42cpython-configure-tasks() {
43 local -a variants=( opt+bumpleak opt+bumproot opt )
44 for v in ${variants[@]}; do
45 echo "${v}${TAB}_bin/cxx-$v/osh"
46 done
47}
48
49cpython-setup() {
50 cpython-configure-tasks | while read -r _ osh; do
51 ninja $osh
52 done
53}
54
55measure-alloc-overhead() {
56 local base_dir=$BASE_DIR/alloc-overhead
57 rm -r -f -v $base_dir
58
59 cpython-configure-tasks | while read -r variant osh; do
60 osh=$REPO_ROOT/$osh
61
62 local task_dir=$base_dir/$variant
63
64 mkdir -p $task_dir
65 pushd $task_dir > /dev/null
66
67 local -a flags=(
68 --output "$base_dir/$variant.tsv"
69 --rusage
70 )
71
72 local -a time_argv
73
74 time_argv=(
75 time-tsv --print-header
76 "${flags[@]}"
77 --field variant
78 )
79 "${time_argv[@]}"
80
81 time_argv=(
82 time-tsv --append
83 "${flags[@]}"
84 --field "$variant"
85 -- $osh $PY_CONF
86 )
87
88 #echo "${time_argv[@]}"
89 "${time_argv[@]}"
90
91 popd > /dev/null
92
93 done
94}
95
96#
97# Compare bash/dash/osh locally
98#
99
100measure-rusage() {
101 local base_dir=$BASE_DIR/rusage
102 rm -r -f -v $base_dir
103
104 shell-tasks | while read -r sh_label sh_path; do
105
106 local task_dir=$base_dir/$sh_label
107
108 mkdir -p $task_dir
109 pushd $task_dir > /dev/null
110
111 local -a flags=(
112 --output "$base_dir/$sh_label.tsv"
113 --rusage
114 )
115
116 local -a time_argv
117
118 time_argv=(
119 time-tsv --print-header
120 "${flags[@]}"
121 --field sh_label
122 )
123 "${time_argv[@]}"
124
125 time_argv=(
126 time-tsv --append
127 "${flags[@]}"
128 --field "$sh_label"
129 -- $sh_path $PY_CONF
130 )
131
132 #echo "${time_argv[@]}"
133 "${time_argv[@]}"
134
135 popd > /dev/null
136
137 done
138}
139
140#
141# Now try strace
142#
143
144shell-tasks() {
145 echo "bash${TAB}bash"
146 echo "dash${TAB}dash"
147 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt/osh"
148}
149
150measure-syscalls() {
151 local osh=_bin/cxx-opt/osh
152 #local osh=_bin/cxx-dbg/osh
153
154 ninja $osh
155
156 local base_dir=$BASE_DIR/syscalls
157
158 rm -r -f -v $base_dir
159
160 shell-tasks | while read -r sh_label sh_path; do
161 local dir=$base_dir/$sh_label
162 mkdir -p $dir
163
164 local counts_dir=$base_dir/$sh_label
165 mkdir -p $counts_dir
166
167 pushd $dir
168 #strace -o $counts -c $sh_path $PY_CONF
169 # See how many external processes are started?
170 #strace -o $counts -ff -e execve $sh_path $PY_CONF
171 strace -o $counts_dir/syscalls -ff $sh_path $PY_CONF
172 popd
173 done
174}
175
176# --- _tmp/autoconf/bash
177# 6047
178# 4621
179# --- _tmp/autoconf/dash
180# 6088
181# 4627
182# --- _tmp/autoconf/osh
183# 5691
184# 4631
185#
186# Woah we start fewer processes! But are not faster?
187
188grep-exec() {
189 egrep --no-filename -o 'execve\("[^"]+' "$@"
190}
191
192# andy@hoover:~/git/oilshell/oil$ benchmarks/autoconf.sh report-syscalls
193# --- _tmp/autoconf/syscalls/bash
194# 2592 _tmp/autoconf/syscalls/bash/syscalls.903220
195# 2608 _tmp/autoconf/syscalls/bash/syscalls.898727
196# 2632 _tmp/autoconf/syscalls/bash/syscalls.898387
197# 2679 _tmp/autoconf/syscalls/bash/syscalls.898292
198# 2853 _tmp/autoconf/syscalls/bash/syscalls.898927
199# 2873 _tmp/autoconf/syscalls/bash/syscalls.898334
200# 2920 _tmp/autoconf/syscalls/bash/syscalls.898895
201# 3204 _tmp/autoconf/syscalls/bash/syscalls.898664
202# 112549 _tmp/autoconf/syscalls/bash/syscalls.897471
203# 1360223 total
204#
205# --- _tmp/autoconf/syscalls/dash
206# 2592 _tmp/autoconf/syscalls/dash/syscalls.909344
207# 2607 _tmp/autoconf/syscalls/dash/syscalls.904921
208# 2630 _tmp/autoconf/syscalls/dash/syscalls.904581
209# 2683 _tmp/autoconf/syscalls/dash/syscalls.904486
210# 2851 _tmp/autoconf/syscalls/dash/syscalls.905109
211# 2873 _tmp/autoconf/syscalls/dash/syscalls.904528
212# 2920 _tmp/autoconf/syscalls/dash/syscalls.905088
213# 3204 _tmp/autoconf/syscalls/dash/syscalls.904858
214# 112922 _tmp/autoconf/syscalls/dash/syscalls.903626
215# 1372118 total
216#
217# --- _tmp/autoconf/syscalls/osh
218# 2592 _tmp/autoconf/syscalls/osh/syscalls.915226
219# 2607 _tmp/autoconf/syscalls/osh/syscalls.910993
220# 2630 _tmp/autoconf/syscalls/osh/syscalls.910647
221# 2679 _tmp/autoconf/syscalls/osh/syscalls.910561
222# 2851 _tmp/autoconf/syscalls/osh/syscalls.911162
223# 2873 _tmp/autoconf/syscalls/osh/syscalls.910599
224# 2920 _tmp/autoconf/syscalls/osh/syscalls.911143
225# 3204 _tmp/autoconf/syscalls/osh/syscalls.910936
226# 72921 _tmp/autoconf/syscalls/osh/syscalls.909769
227# 1211074 total
228
229report-processes() {
230 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
231 echo "--- $sh_dir"
232 ls $sh_dir/* | wc -l
233 grep-exec $sh_dir/syscalls.* | wc -l
234 echo
235
236 done
237}
238
239report-external() {
240 local n=${1:-5}
241
242 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
243 echo "--- $sh_dir"
244
245 grep-exec $sh_dir/syscalls.* | sort | uniq -c | sort -n | tail -n $n
246 echo
247 done
248}
249
250report-syscalls() {
251 # Hm this is instructive, the shell itself makes the most syscalls
252 # And fewer than other shells?
253
254 for sh_dir in $BASE_DIR_RELATIVE/syscalls/*; do
255 echo "--- $sh_dir"
256 wc -l $sh_dir/syscalls.* | sort -n | tail
257 echo
258 done
259}
260
261#
262# Cachegrind
263#
264
265measure-valgrind() {
266 local tool=$1
267
268 # opt seems to give OK results, but I thought dbg was more accurate
269 #local osh=_bin/cxx-opt/osh
270 local osh=_bin/cxx-dbg/osh
271
272 ninja $osh
273
274 local osh=$REPO_ROOT/$osh
275
276 local base_dir=$REPO_ROOT/_tmp/$tool
277
278 local dir=$base_dir/cpython-configure
279 rm -r -f -v $dir
280
281 local out_file=$base_dir/cpython-configure.txt
282
283 mkdir -v -p $dir
284
285 pushd $dir
286 $tool $out_file $osh $PY_CONF
287 popd
288}
289
290measure-cachegrind() {
291 measure-valgrind with-cachegrind
292}
293
294measure-callgrind() {
295 # This takes ~5 minutes with opt binary, ~6:43 with dbg
296 # vs ~15 seconds uninstrumented
297 time measure-valgrind with-callgrind
298}
299
300# Note:
301# benchmarks/osh-runtime.sh compares our release, which does not have #ifdef
302# GC_TIMING, so we don't know total GC time.
303
304# TODO:
305#
306# - Run locally, reproduce GC_TIMING - this is not in the release build
307# - it seems to say only 143 ms total GC time, but we're seeing 1.5+ seconds
308# slowdown on Cpython configure vs. bash
309# - I want a local run that automates it, and returns PERCENTAGES for elapsed
310# time, sys time, user time
311# - We also might not want to amortize free() inside Allocate()
312# - #ifdef LAZY_FREE I think! That might show a big slowdown with free
313
314patch-pyconf() {
315 #sed -i $'s/ac_compile=\'$CC/ac_compile=\'times; $CC/g' $PY_CONF
316
317 # temporary
318 echo 'times > $SH_BENCHMARK_TIMES' >> $PY_CONF
319}
320
321measure-times() {
322 local osh=_bin/cxx-opt/osh
323 ninja $osh
324
325 local base_dir=$BASE_DIR/times
326 rm -r -f -v $base_dir
327
328 local trace_dir=$base_dir/oils-trace
329 mkdir -p $trace_dir
330
331 shell-tasks | while read -r sh_label sh_path; do
332 #case $sh_label in bash|dash) continue ;; esac
333
334 local dir=$base_dir/$sh_label
335 mkdir -p $dir
336
337 pushd $dir
338
339 local -a flags=(
340 --output "$base_dir/$sh_label.tsv"
341 --rusage
342 )
343
344 local -a time_argv
345
346 time_argv=(
347 time-tsv --print-header
348 "${flags[@]}"
349 --field sh_label
350 )
351 "${time_argv[@]}"
352
353 time_argv=(
354 time-tsv --append
355 "${flags[@]}"
356 --field "$sh_label"
357 -- $sh_path $PY_CONF
358 )
359
360 #echo "${time_argv[@]}"
361
362 # 1269 argv0.json files created
363 # we can miss some via NOLASTFORK optimization
364 #OILS_TRACE_DIR=$trace_dir \
365
366 _OILS_GC_VERBOSE=1 OILS_GC_STATS_FD=99 \
367 SH_BENCHMARK_TIMES=$base_dir/$sh_label.times.txt \
368 "${time_argv[@]}" \
369 99>$base_dir/$sh_label.gc-stats.txt
370
371 popd
372 done
373}
374
375inner-long-tsv() {
376 python2 -c '
377import os, re, sys
378
379def PrintRow(row):
380 print("\t".join(row))
381
382PrintRow(["shell", "who", "what", "seconds"])
383
384for path in sys.argv[1:]:
385 filename = os.path.basename(path)
386 shell = filename.split(".")[0]
387
388 f = open(path)
389 s = f.read()
390
391 secs = re.findall("0m([0-9.]+)s", s)
392 assert len(secs) == 4, secs
393
394 PrintRow([shell, "self", "user", secs[0]])
395 PrintRow([shell, "self", "sys", secs[1]])
396 PrintRow([shell, "child", "user", secs[2]])
397 PrintRow([shell, "child", "sys", secs[3]])
398
399 # Non-normalized, but OK
400 total_secs = sum(float(s) for s in secs)
401 PrintRow([shell, "both", "both", str(total_secs)])
402
403 ' $BASE_DIR/times/*.times.txt
404}
405
406compare-dim() {
407 # 8% more child system time
408 local who=${1:-child}
409 local what=${2:-user}
410
411 echo "=== $who $what ==="
412
413 # Annoying
414 # https://www.math.utah.edu/docs/info/gawk_8.html
415 # "If, for some reason, you need to force a number to be converted to a
416 # string, concatenate the empty string, "", with that number. To force a
417 # string to be converted to a number, add zero to that string."
418
419 cat $BASE_DIR/times-long.tsv | awk -v "who=$who" -v "what=$what" '
420 BEGIN {
421 TAB = "\t"
422
423 i = 0
424
425 printf "%s\t%s\t%s\t%s\n", "shell", "secs", "ratio", "diff secs"
426 }
427 $2 == who && $3 == what {
428 if (i == 0) {
429 first_secs = $4 + 0
430 }
431 i++
432
433 secs = $4 + 0
434 ratio = secs / first_secs
435 diff = secs - first_secs
436
437 # Need commas for OFMT to work correctly?
438 printf "%s\t%5.3f\t%5.3f\t%5.3f\n", $1, secs, ratio, diff
439 }
440 '
441
442 echo
443}
444
445compare-times() {
446 log "INNER"
447 log ''
448
449 compare-dim self user
450
451 compare-dim self sys
452
453 compare-dim child user
454
455 compare-dim child sys
456
457 compare-dim both both
458
459 # outer
460 log "OUTER"
461 log ''
462
463 compare-dim both elapsed
464
465 # These kinda match
466 return
467 compare-dim both user
468 compare-dim both sys
469}
470
471outer-long-tsv() {
472 log "=== outer times ==="
473 awk '
474 BEGIN {
475 i = 0
476
477 printf "%s\t%s\t%s\t%s\n", "shell", "who", "what", "seconds"
478 }
479 i == 0 {
480 #print "Skipping header"
481 i++
482 next
483 }
484 i >= 1 {
485 elapsed = $2 + 0
486 user = $3 + 0
487 sys = $4 + 0
488 sh_label = $6
489
490 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "elapsed", elapsed
491 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "user", user
492 printf "%s\t%s\t%s\t%5.3f\n", sh_label, "both", "sys", sys
493
494 i++
495 }
496 ' $BASE_DIR/outer-wide.tsv
497}
498
499report-times() {
500 head $BASE_DIR/times/*.tsv
501 echo
502 head $BASE_DIR/times/*.times.txt
503 echo
504
505 inner-long-tsv | tee $BASE_DIR/inner-long.tsv
506 echo
507
508 tsv-concat $BASE_DIR/times/*.tsv | tee $BASE_DIR/outer-wide.tsv
509 outer-long-tsv | tee $BASE_DIR/outer-long.tsv
510 echo
511
512 tsv-concat $BASE_DIR/{inner,outer}-long.tsv | tee $BASE_DIR/times-long.tsv
513
514 compare-times
515}
516
517### Why is clone() taking longer according to strace?
518
519fork-tasks() {
520 echo "bash${TAB}bash"
521 echo "dash${TAB}dash"
522
523 # Hm this is noisy, but cxx-opt-sh does seem slower
524 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt/osh"
525 echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt-sh/osh"
526}
527
528measure-fork() {
529 fork-tasks | while read -r sh_label sh_path; do
530 #case $sh_label in bash|dash) continue ;; esac
531
532 echo "=== $sh_path ==="
533
534 # Builtin is very fast
535 #time $sh_path -c 'for i in $(seq 100); do true; done'
536
537 # Hm this is very noisy
538 # TODO use hyperfine?
539 time $sh_path -c 'for i in $(seq 100); do /bin/true; done'
540
541 case $sh_label in
542 osh)
543 # Oops, we are not symlinking to the .stripped binary!
544 # This is explicitly done for symbols and benchmarking.
545 # Hm does that make it slower then?
546
547 ls -l -L $sh_path
548 ldd $sh_path
549 ;;
550 esac
551 done
552}
553
554# $ head _tmp/elapsed/*.times.txt
555# ==> _tmp/elapsed/bash.times.txt <==
556# 0m0.213s 0m0.477s
557# 0m8.233s 0m2.931s
558#
559# ==> _tmp/elapsed/dash.times.txt <==
560# 0m0.217s 0m0.463s
561# 0m8.281s 0m2.922s
562#
563# ==> _tmp/elapsed/osh.times.txt <==
564# 0m0.360s 0m0.720s
565# 0m8.790s 0m2.960s
566
567# shell user time - GC and allocs
568# shell system time - ???
569# child user time - ???
570# TODO: count how many processes this is.
571# It's more than 500 ms
572# Is that 500 processes, and 1 ms per process?
573
574fork-time() {
575 local osh=_bin/cxx-opt/osh
576
577 $osh -c 'time for i in {1..1000}; do true; done'
578 echo
579
580 $osh -c 'time for i in {1..1000}; do ( true ); done'
581 echo
582
583 # Does this increase fork time or no?
584 # Hm I can only get the forks up to 306ms for 1000, or 300 us
585 # But the HereDocWriter does dup() and so forth
586 $osh -c '
587echo ysh-parse
588time for i in {1..40}; do
589 . test/ysh-parse-errors.sh
590done
591times
592time for i in {1..1000}; do
593 ( true )
594done'
595 echo
596}
597
598"$@"