1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Usage:
|
4 | # benchmarks/uftrace.sh <function name>
|
5 | #
|
6 | # Examples:
|
7 | # benchmarks/uftrace.sh record-oils-cpp
|
8 | # benchmarks/uftrace.sh replay-alloc
|
9 | # benchmarks/uftrace.sh plugin-allocs
|
10 | #
|
11 | # TODO:
|
12 | # - uftrace dump --chrome # time-based trace
|
13 | # - uftrace dump --flame-graph # common stack traces, e.g. for allocation
|
14 |
|
15 | set -o nounset
|
16 | set -o pipefail
|
17 | set -o errexit
|
18 |
|
19 | source benchmarks/common.sh # cmark function. TODO: could use executable
|
20 | source build/dev-shell.sh # put uftrace in $PATH, R_LIBS_USER
|
21 | source devtools/common.sh # banner
|
22 | source test/common.sh # escape-html
|
23 |
|
24 | readonly BASE_DIR=_tmp/uftrace
|
25 |
|
26 | download() {
|
27 | wget --no-clobber --directory _cache \
|
28 | https://github.com/namhyung/uftrace/archive/refs/tags/v0.13.tar.gz
|
29 | #https://github.com/namhyung/uftrace/archive/v0.9.3.tar.gz
|
30 |
|
31 | }
|
32 |
|
33 | extract() {
|
34 | pushd _cache
|
35 | tar --extract -z < v0.13.tar.gz
|
36 | popd
|
37 | }
|
38 |
|
39 | build() {
|
40 | cd _cache/uftrace-0.13
|
41 | ./configure
|
42 | make
|
43 |
|
44 | # It can't find some files unless we do this
|
45 | echo 'Run sudo make install'
|
46 | }
|
47 |
|
48 | ubuntu-hack() {
|
49 | # Annoying: the plugin engine tries to look for the wrong file?
|
50 | # What's 3.6m.so vs 3.6.so ???
|
51 |
|
52 | cd /usr/lib/x86_64-linux-gnu
|
53 | ln -s libpython3.6m.so.1.0 libpython3.6.so
|
54 | }
|
55 |
|
56 | # https://github.com/namhyung/uftrace/wiki/Tutorial
|
57 | hello-demo() {
|
58 | cat >_tmp/hello.c <<EOF
|
59 | #include <stdio.h>
|
60 |
|
61 | int main(void) {
|
62 | printf("Hello world\n");
|
63 | return 0;
|
64 | }
|
65 | EOF
|
66 |
|
67 | gcc -o _tmp/hello -pg _tmp/hello.c
|
68 |
|
69 | uftrace _tmp/hello
|
70 | }
|
71 |
|
72 | record-oils-cpp() {
|
73 | ### Record a trace, but limit to allocations functions, for size
|
74 |
|
75 | local out_dir=$1
|
76 | local unfiltered=${2:-}
|
77 | shift 2
|
78 |
|
79 | #local flags=(-F process::Process::RunWait -F process::Process::Process)
|
80 |
|
81 | local -a flags
|
82 |
|
83 | if test -n "$unfiltered"; then
|
84 | out_dir=$out_dir.unfiltered
|
85 |
|
86 | # Look for the pattern:
|
87 | # Alloc() {
|
88 | # MarkSweepHeap::Allocate(24)
|
89 | # syntax_asdl::line_span::line_span()
|
90 | # }
|
91 | flags=(
|
92 | -F 'Alloc'
|
93 | -F 'MarkSweepHeap::Allocate' -A 'MarkSweepHeap::Allocate@arg2'
|
94 | -D 2
|
95 | )
|
96 | # If we don't filter at all, then it's huge
|
97 | # flags=()
|
98 |
|
99 | else
|
100 | # It's faster to filter just these function calls
|
101 | # Need .* for --demangle full
|
102 |
|
103 | flags=(
|
104 | # low level allocation
|
105 | -F 'MarkSweepHeap::Allocate.*' -A 'MarkSweepHeap::Allocate.*@arg2'
|
106 |
|
107 | # typed allocation
|
108 | -F 'Alloc<.*' # missing type info
|
109 |
|
110 | # Flexible array allocation
|
111 | # arg 1 is str_len
|
112 | -F 'NewStr.*' -A 'NewStr.*@arg1'
|
113 | -F 'OverAllocatedStr.*' -A 'OverAllocatedStr.*@arg1'
|
114 |
|
115 | # This constructor doesn't matter. We care about the interface in in
|
116 | # mycpp/gc_alloc.h
|
117 | # -F 'Str::Str.*'
|
118 |
|
119 | # arg1 is number of elements of type T
|
120 | -F 'NewSlab<.*' -A 'NewSlab<.*@arg1'
|
121 | # -F 'Slab<.*>::Slab.*'
|
122 |
|
123 | # Fixed size header allocation
|
124 | # arg2 is the number of items to reserve
|
125 | # -F 'List<.*>::List.*'
|
126 | -F 'List<.*>::reserve.*' -A 'List<.*>::reserve.*@arg2'
|
127 | # -F 'Dict<.*>::Dict.*' # does not allocate
|
128 | -F 'Dict<.*>::reserve.*' -A 'Dict<.*>::reserve.*@arg2'
|
129 |
|
130 | # Common object
|
131 | # -F 'syntax_asdl::Token::Token'
|
132 |
|
133 | -D 1
|
134 | )
|
135 |
|
136 | # Problem: some of these aren't allocations
|
137 | # -F 'Tuple2::Tuple2'
|
138 | # -F 'Tuple3::Tuple3'
|
139 | # -F 'Tuple4::Tuple4'
|
140 |
|
141 | # StrFromC calls NewStr, so we don't need it
|
142 | # -F 'StrFromC' -A 'StrFromC@arg1' -A 'StrFromC@arg2'
|
143 | fi
|
144 |
|
145 | local bin=_bin/cxx-uftrace/osh
|
146 | ninja $bin
|
147 |
|
148 | mkdir -p $out_dir
|
149 | time uftrace record --demangle full -d $out_dir "${flags[@]}" $bin "$@"
|
150 |
|
151 | ls -d $out_dir/
|
152 | ls -l --si $out_dir/
|
153 | }
|
154 |
|
155 | run-tasks() {
|
156 | while read task; do
|
157 | banner "$task: utrace record"
|
158 |
|
159 | # TODO: Could share with benchmarks/gc
|
160 | case $task in
|
161 | parse.configure-cpython)
|
162 | data_file='Python-2.7.13/configure'
|
163 | ;;
|
164 | parse.abuild)
|
165 | data_file='benchmarks/testdata/abuild'
|
166 | ;;
|
167 | esac
|
168 |
|
169 | # Construct argv for each task
|
170 | local -a argv
|
171 | case $task in
|
172 | parse.*)
|
173 | argv=( --ast-format none -n $data_file )
|
174 | ;;
|
175 |
|
176 | ex.compute-fib)
|
177 | argv=( benchmarks/compute/fib.sh 10 44 )
|
178 | ;;
|
179 |
|
180 | ex.bashcomp-excerpt)
|
181 | # NOTE: benchmarks/gc.sh uses the larger clang.txt file
|
182 | argv=( benchmarks/parse-help/pure-excerpt.sh parse_help_file
|
183 | benchmarks/parse-help/mypy.txt )
|
184 | ;;
|
185 |
|
186 | esac
|
187 |
|
188 | local out_dir=$BASE_DIR/raw/$task
|
189 |
|
190 | record-oils-cpp $out_dir '' "${argv[@]}"
|
191 | done
|
192 | }
|
193 |
|
194 | print-tasks() {
|
195 | # Same as benchmarks/gc
|
196 | local -a tasks=(
|
197 | # This one is a bit big
|
198 | # parse.configure-cpython
|
199 |
|
200 | parse.abuild
|
201 | ex.bashcomp-excerpt
|
202 | ex.compute-fib
|
203 | )
|
204 |
|
205 | for task in "${tasks[@]}"; do
|
206 | echo $task
|
207 | done
|
208 | }
|
209 |
|
210 | measure-all() {
|
211 | print-tasks | run-tasks
|
212 | }
|
213 |
|
214 | frequent-calls() {
|
215 | ### Histogram
|
216 |
|
217 | local out_dir=$1
|
218 | uftrace report -d $out_dir -s call --demangle full
|
219 | }
|
220 |
|
221 | call-graph() {
|
222 | ### Time-based trace
|
223 |
|
224 | local out_dir=$1
|
225 | uftrace graph -d $out_dir
|
226 | }
|
227 |
|
228 | tsv-plugin() {
|
229 | local task=${1:-ex.compute-fib}
|
230 |
|
231 | local dir=$BASE_DIR/raw/$task
|
232 |
|
233 | # On the big configure-coreutils script, this takes 10 seconds. That's
|
234 | # acceptable. Gives 2,402,003 allocations.
|
235 |
|
236 | local out_dir=_tmp/uftrace/stage1/$task
|
237 | mkdir -p $out_dir
|
238 | time uftrace script --demangle full -d $dir -S benchmarks/uftrace_allocs.py $out_dir
|
239 |
|
240 | wc -l $out_dir/*.tsv
|
241 | }
|
242 |
|
243 | report-all() {
|
244 | print-tasks | while read task; do
|
245 | banner "$task: report"
|
246 |
|
247 | frequent-calls $BASE_DIR/raw/$task
|
248 |
|
249 | echo
|
250 | done
|
251 | }
|
252 |
|
253 | export-all() {
|
254 | if uftrace --version | grep python3; then
|
255 | echo 'uftrace has Python 3 plugin support'
|
256 | else
|
257 | die 'uftrace is MISSING Python 3 plugin support'
|
258 | fi
|
259 |
|
260 | # TODO: Join into a single TSV file
|
261 | print-tasks | while read task; do
|
262 | banner "$task: export to TSV with Python3 plugin"
|
263 | time tsv-plugin $task
|
264 | done
|
265 | }
|
266 |
|
267 | html-index() {
|
268 | echo '<body style="margin: 0 auto; width: 40em; font-size: large">'
|
269 |
|
270 | cmark << 'EOF'
|
271 | # uftrace reports
|
272 |
|
273 | Workloads:
|
274 | EOF
|
275 |
|
276 | # Link to text files
|
277 | print-tasks | while read task; do
|
278 | echo "<a href="stage2/$task.txt">$task</a> <br/>"
|
279 | done
|
280 |
|
281 | cmark <<< '## Summary'
|
282 |
|
283 | echo '<pre>'
|
284 |
|
285 | cat $BASE_DIR/stage2/summary.txt | escape-html
|
286 |
|
287 | echo '</pre>'
|
288 | echo '</body>'
|
289 | }
|
290 |
|
291 | analyze-all() {
|
292 | local in_dir=$BASE_DIR/stage1/
|
293 | local out_dir=$BASE_DIR/stage2/
|
294 |
|
295 | # prepare dirs for R to write to
|
296 | print-tasks | while read task; do
|
297 | mkdir -v -p $out_dir/$task
|
298 | done
|
299 |
|
300 | # Writes stage2/summary.txt
|
301 | benchmarks/report.R uftrace $in_dir $out_dir
|
302 |
|
303 | html-index > $BASE_DIR/index.html
|
304 | echo "Wrote $BASE_DIR/index.html"
|
305 | }
|
306 |
|
307 |
|
308 | # Hm this shows EVERY call stack that produces a list!
|
309 |
|
310 | # uftrace graph usage shown here
|
311 | # https://github.com/namhyung/uftrace/wiki/Tutorial
|
312 |
|
313 | replay-alloc() {
|
314 | local out_dir=$1
|
315 |
|
316 | # call graph
|
317 | #uftrace graph -C 'MarkSweepHeap::Allocate'
|
318 |
|
319 | # shows what calls this function
|
320 | #uftrace replay -C 'MarkSweepHeap::Allocate'
|
321 |
|
322 | # shows what this function calls
|
323 | #uftrace replay -F 'MarkSweepHeap::Allocate'
|
324 |
|
325 | # filters may happen at record or replay time
|
326 |
|
327 | # depth of 1
|
328 | #uftrace replay -D 1 -F 'MarkSweepHeap::Allocate'
|
329 |
|
330 | uftrace replay -D 1 -F 'MarkSweepHeap::Allocate'
|
331 | }
|
332 |
|
333 | plugin() {
|
334 | # Note this one likes UNFILTERED data
|
335 | uftrace script -S benchmarks/uftrace_plugin.py
|
336 | }
|
337 |
|
338 | soil-run() {
|
339 | measure-all
|
340 | export-all
|
341 | analyze-all
|
342 |
|
343 | }
|
344 |
|
345 | "$@"
|