| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Why is CPython configure slower under OSH?
|
| 4 | #
|
| 5 | # Usage:
|
| 6 | # benchmarks/autoconf.sh <function name>
|
| 7 | #
|
| 8 | # Examples:
|
| 9 | # $0 measure-alloc-overhead
|
| 10 | # $0 measure-syscalls
|
| 11 |
|
| 12 | set -o nounset
|
| 13 | set -o pipefail
|
| 14 | set -o errexit
|
| 15 |
|
| 16 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
|
| 17 |
|
| 18 | source benchmarks/cachegrind.sh # with-cachegrind
|
| 19 | source benchmarks/callgrind.sh # with-cachegrind
|
| 20 | source test/tsv-lib.sh # $TAB
|
| 21 |
|
| 22 | readonly BASE_DIR=_tmp/autoconf
|
| 23 | readonly PY_CONF=$REPO_ROOT/Python-2.7.13/configure
|
| 24 |
|
| 25 | #
|
| 26 | # Trying to measure allocation/GC overhead -- kinda failed because bumproot is
|
| 27 | # **slower** on bigger heaps. There's less cache locality!
|
| 28 | #
|
| 29 |
|
| 30 | cpython-configure-tasks() {
|
| 31 | local -a variants=( opt+bumpleak opt+bumproot opt )
|
| 32 | for v in ${variants[@]}; do
|
| 33 | echo "${v}${TAB}_bin/cxx-$v/osh"
|
| 34 | done
|
| 35 | }
|
| 36 |
|
| 37 | cpython-setup() {
|
| 38 | cpython-configure-tasks | while read -r _ osh; do
|
| 39 | ninja $osh
|
| 40 | done
|
| 41 | }
|
| 42 |
|
| 43 | measure-alloc-overhead() {
|
| 44 | local base_dir=$REPO_ROOT/$BASE_DIR/cpython-configure
|
| 45 | rm -r -f -v $base_dir
|
| 46 |
|
| 47 | cpython-configure-tasks | while read -r variant osh; do
|
| 48 | osh=$REPO_ROOT/$osh
|
| 49 |
|
| 50 | local task_dir=$base_dir/$variant
|
| 51 |
|
| 52 | mkdir -p $task_dir
|
| 53 | pushd $task_dir > /dev/null
|
| 54 |
|
| 55 | local -a flags=(
|
| 56 | --output "$base_dir/$variant.tsv"
|
| 57 | --rusage
|
| 58 | )
|
| 59 |
|
| 60 | local -a time_argv
|
| 61 |
|
| 62 | time_argv=(
|
| 63 | time-tsv --print-header
|
| 64 | "${flags[@]}"
|
| 65 | --field variant
|
| 66 | )
|
| 67 | "${time_argv[@]}"
|
| 68 |
|
| 69 | time_argv=(
|
| 70 | time-tsv --append
|
| 71 | "${flags[@]}"
|
| 72 | --field "$variant"
|
| 73 | -- $osh $PY_CONF
|
| 74 | )
|
| 75 |
|
| 76 | #echo "${time_argv[@]}"
|
| 77 | "${time_argv[@]}"
|
| 78 |
|
| 79 | popd > /dev/null
|
| 80 |
|
| 81 | done
|
| 82 | }
|
| 83 |
|
| 84 | #
|
| 85 | # Now try strace
|
| 86 | #
|
| 87 |
|
| 88 | strace-tasks() {
|
| 89 | echo "bash${TAB}bash"
|
| 90 | echo "dash${TAB}dash"
|
| 91 | echo "osh${TAB}$REPO_ROOT/_bin/cxx-opt/osh"
|
| 92 | }
|
| 93 |
|
| 94 | measure-syscalls() {
|
| 95 | local base_dir=$REPO_ROOT/_tmp/strace
|
| 96 | strace-tasks | while read -r sh_label sh_path; do
|
| 97 | local dir=$base_dir/$sh_label
|
| 98 | mkdir -p $dir
|
| 99 |
|
| 100 | local counts=$base_dir/$sh_label.txt
|
| 101 |
|
| 102 | pushd $dir
|
| 103 | strace -o $counts -c $sh_path $PY_CONF
|
| 104 | popd
|
| 105 | done
|
| 106 | }
|
| 107 |
|
| 108 | #
|
| 109 | # Cachegrind
|
| 110 | #
|
| 111 |
|
| 112 | measure-valgrind() {
|
| 113 | local tool=$1
|
| 114 |
|
| 115 | # opt seems to give OK results, but I thought dbg was more accurate
|
| 116 | #local osh=_bin/cxx-opt/osh
|
| 117 | local osh=_bin/cxx-dbg/osh
|
| 118 |
|
| 119 | ninja $osh
|
| 120 |
|
| 121 | local osh=$REPO_ROOT/$osh
|
| 122 |
|
| 123 | local base_dir=$REPO_ROOT/_tmp/$tool
|
| 124 |
|
| 125 | local dir=$base_dir/cpython-configure
|
| 126 | rm -r -f -v $dir
|
| 127 |
|
| 128 | local out_file=$base_dir/cpython-configure.txt
|
| 129 |
|
| 130 | mkdir -v -p $dir
|
| 131 |
|
| 132 | pushd $dir
|
| 133 | $tool $out_file $osh $PY_CONF
|
| 134 | popd
|
| 135 | }
|
| 136 |
|
| 137 | measure-cachegrind() {
|
| 138 | measure-valgrind with-cachegrind
|
| 139 | }
|
| 140 |
|
| 141 | measure-callgrind() {
|
| 142 | # This takes ~5 minutes with opt binary, ~6:43 with dbg
|
| 143 | # vs ~15 seconds uninstrumented
|
| 144 | time measure-valgrind with-callgrind
|
| 145 | }
|
| 146 |
|
| 147 | "$@"
|