1 | #!/usr/bin/env Rscript
|
2 | #
|
3 | # benchmarks/report.R -- Analyze data collected by shell scripts.
|
4 | #
|
5 | # Usage:
|
6 | # benchmarks/report.R OUT_DIR [TIMES_CSV...]
|
7 |
|
8 | # Suppress warnings about functions masked from 'package:stats' and 'package:base'
|
9 | # filter, lag
|
10 | # intersect, setdiff, setequal, union
|
11 | library(dplyr, warn.conflicts = FALSE)
|
12 | library(tidyr) # spread()
|
13 | library(stringr)
|
14 |
|
15 | source('benchmarks/common.R')
|
16 |
|
17 | options(stringsAsFactors = F)
|
18 |
|
19 | # For pretty printing
|
20 | commas = function(x) {
|
21 | format(x, big.mark=',')
|
22 | }
|
23 |
|
24 | sourceUrl = function(path) {
|
25 | sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
|
26 | }
|
27 |
|
28 | # Takes a filename, not a path.
|
29 | sourceUrl2 = function(filename) {
|
30 | sprintf(
|
31 | 'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
|
32 | filename)
|
33 | }
|
34 |
|
35 | mycppUrl = function(path) {
|
36 | sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', path)
|
37 | }
|
38 |
|
39 |
|
40 | # TODO: Set up cgit because Github links are slow.
|
41 | benchmarkDataLink = function(subdir, name, suffix) {
|
42 | #sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
|
43 | sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
|
44 | subdir, name, suffix)
|
45 | }
|
46 |
|
47 | provenanceLink = function(subdir, name, suffix) {
|
48 | sprintf('../%s/%s%s', subdir, name, suffix)
|
49 | }
|
50 |
|
51 |
|
52 | GetOshLabel = function(shell_hash, prov_dir) {
|
53 | ### Given a string, return another string.
|
54 |
|
55 | path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
|
56 |
|
57 | if (file.exists(path)) {
|
58 | Log('Reading %s', path)
|
59 | lines = readLines(path)
|
60 | if (length(grep('_bin/osh', lines)) > 0) {
|
61 | label = 'osh-ovm'
|
62 | } else if (length(grep('bin/osh', lines)) > 0) {
|
63 | label = 'osh-cpython'
|
64 | } else if (length(grep('_bin/.*/osh', lines)) > 0) {
|
65 | label = 'osh-native'
|
66 | } else {
|
67 | stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
|
68 | }
|
69 | } else {
|
70 | stop(sprintf("%s doesn't exist", path))
|
71 | }
|
72 | return(label)
|
73 | }
|
74 |
|
75 | opt_suffix1 = '_bin/cxx-opt/osh'
|
76 | opt_suffix2 = '_bin/cxx-opt-sh/osh'
|
77 |
|
78 | ShellLabels = function(shell_name, shell_hash, num_hosts) {
|
79 | ### Given 2 vectors, return a vector of readable labels.
|
80 |
|
81 | # TODO: Clean up callers. Some metrics all this function with a
|
82 | # shell/runtime BASENAME, and others a PATH
|
83 | # - e.g. ComputeReport calls this with runtime_name which is actually a PATH
|
84 |
|
85 | #Log('name %s', shell_name)
|
86 | #Log('hash %s', shell_hash)
|
87 |
|
88 | if (num_hosts == 1) {
|
89 | prov_dir = '_tmp'
|
90 | } else {
|
91 | prov_dir = '../benchmark-data/'
|
92 | }
|
93 |
|
94 | labels = c()
|
95 | for (i in 1:length(shell_name)) {
|
96 | sh = shell_name[i]
|
97 | if (sh == 'osh') {
|
98 | label = GetOshLabel(shell_hash[i], prov_dir)
|
99 |
|
100 | } else if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
|
101 | label = 'opt/osh'
|
102 |
|
103 | } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
|
104 | label = 'bumpleak/osh'
|
105 |
|
106 | } else {
|
107 | label = sh
|
108 | }
|
109 |
|
110 | Log('[%s] [%s]', shell_name[i], label)
|
111 | labels = c(labels, label)
|
112 | }
|
113 |
|
114 | return(labels)
|
115 | }
|
116 |
|
117 | # Simple version of the above, used by benchmarks/gc
|
118 | ShellLabelFromPath = function(sh_path) {
|
119 | labels = c()
|
120 | for (i in 1:length(sh_path)) {
|
121 | sh = sh_path[i]
|
122 |
|
123 | if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
|
124 | # the opt binary is osh-native
|
125 | label = 'osh-native'
|
126 |
|
127 | } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
|
128 | label = 'bumpleak/osh'
|
129 |
|
130 | } else if (endsWith(sh, '_bin/osh')) { # the app bundle
|
131 | label = 'osh-ovm'
|
132 |
|
133 | } else if (endsWith(sh, 'bin/osh')) {
|
134 | label = 'osh-cpython'
|
135 |
|
136 | } else {
|
137 | label = sh
|
138 | }
|
139 | labels = c(labels, label)
|
140 | }
|
141 | return(labels)
|
142 | }
|
143 |
|
144 | DistinctHosts = function(t) {
|
145 | t %>% distinct(host_name, host_hash) -> distinct_hosts
|
146 | # The label is just the name
|
147 | distinct_hosts$host_label = distinct_hosts$host_name
|
148 | return(distinct_hosts)
|
149 | }
|
150 |
|
151 | DistinctShells = function(t, num_hosts = -1) {
|
152 | t %>% distinct(shell_name, shell_hash) -> distinct_shells
|
153 |
|
154 | Log('')
|
155 | Log('Labeling shells')
|
156 |
|
157 | # Calculate it if not passed
|
158 | if (num_hosts == -1) {
|
159 | num_hosts = nrow(DistinctHosts(t))
|
160 | }
|
161 |
|
162 | distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
|
163 | distinct_shells$shell_hash,
|
164 | num_hosts)
|
165 | return(distinct_shells)
|
166 | }
|
167 |
|
168 | ParserReport = function(in_dir, out_dir) {
|
169 | times = read.csv(file.path(in_dir, 'times.csv'))
|
170 | lines = read.csv(file.path(in_dir, 'lines.csv'))
|
171 | raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
|
172 |
|
173 | cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
|
174 |
|
175 | # For joining by filename
|
176 | lines_by_filename = tibble(
|
177 | num_lines = lines$num_lines,
|
178 | filename = basename(lines$path)
|
179 | )
|
180 |
|
181 | # Remove failures
|
182 | times %>% filter(status == 0) %>% select(-c(status)) -> times
|
183 | cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
|
184 |
|
185 | # Add the number of lines, joining on path, and compute lines/ms
|
186 | times %>%
|
187 | left_join(lines, by = c('path')) %>%
|
188 | mutate(filename = basename(path), filename_HREF = sourceUrl(path),
|
189 | max_rss_MB = max_rss_KiB * 1024 / 1e6,
|
190 | elapsed_ms = elapsed_secs * 1000,
|
191 | user_ms = user_secs * 1000,
|
192 | sys_ms = sys_secs * 1000,
|
193 | lines_per_ms = num_lines / elapsed_ms) %>%
|
194 | select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
|
195 | joined_times
|
196 |
|
197 | #print(head(times))
|
198 | #print(head(lines))
|
199 | #print(head(vm))
|
200 | #print(head(joined_times))
|
201 |
|
202 | print(summary(joined_times))
|
203 |
|
204 | #
|
205 | # Find distinct shells and hosts, and label them for readability.
|
206 | #
|
207 |
|
208 | distinct_hosts = DistinctHosts(joined_times)
|
209 | Log('')
|
210 | Log('Distinct hosts')
|
211 | print(distinct_hosts)
|
212 |
|
213 | distinct_shells = DistinctShells(joined_times)
|
214 | Log('')
|
215 | Log('Distinct shells')
|
216 | print(distinct_shells)
|
217 |
|
218 | # Replace name/hash combinations with labels.
|
219 | joined_times %>%
|
220 | left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
|
221 | left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
|
222 | select(-c(host_name, host_hash, shell_name, shell_hash)) ->
|
223 | joined_times
|
224 |
|
225 | # Like 'times', but do shell_label as one step
|
226 | # Hack: we know benchmarks/auto.sh runs this on one machine
|
227 | distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
|
228 | cachegrind %>%
|
229 | left_join(lines, by = c('path')) %>%
|
230 | select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
|
231 | left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
|
232 | select(-c(shell_name, shell_hash)) %>%
|
233 | mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
|
234 | select(-c(path)) ->
|
235 | joined_cachegrind
|
236 |
|
237 | Log('summary(joined_times):')
|
238 | print(summary(joined_times))
|
239 | Log('head(joined_times):')
|
240 | print(head(joined_times))
|
241 |
|
242 | # Summarize rates by platform/shell
|
243 | joined_times %>%
|
244 | mutate(host_label = paste("host", host_label)) %>%
|
245 | group_by(host_label, shell_label) %>%
|
246 | summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
|
247 | mutate(lines_per_ms = total_lines / total_ms) %>%
|
248 | select(-c(total_ms)) %>%
|
249 | spread(key = host_label, value = lines_per_ms) ->
|
250 | times_summary
|
251 |
|
252 | # Sort by parsing rate on the fast machine
|
253 | if ("host lenny" %in% colnames(times_summary)) {
|
254 | times_summary %>% arrange(desc(`host lenny`)) -> times_summary
|
255 | } else {
|
256 | times_summary %>% arrange(desc(`host no-host`)) -> times_summary
|
257 | }
|
258 |
|
259 | Log('times_summary:')
|
260 | print(times_summary)
|
261 |
|
262 | # Summarize cachegrind by platform/shell
|
263 | # Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
|
264 | joined_cachegrind %>%
|
265 | group_by(shell_label) %>%
|
266 | summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
|
267 | mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
|
268 | select(-c(total_irefs)) ->
|
269 | cachegrind_summary
|
270 |
|
271 | if ("no-host" %in% distinct_hosts$host_label) {
|
272 |
|
273 | # We don't have all the shells
|
274 | elapsed = NULL
|
275 | rate = NULL
|
276 | max_rss = NULL
|
277 | instructions = NULL
|
278 |
|
279 | joined_times %>%
|
280 | select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
|
281 | num_lines, filename, filename_HREF)) %>%
|
282 | arrange(filename, elapsed_ms) ->
|
283 | times_flat
|
284 |
|
285 | joined_cachegrind %>%
|
286 | select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
|
287 | arrange(filename, irefs) ->
|
288 | cachegrind_flat
|
289 |
|
290 | } else {
|
291 |
|
292 | times_flat = NULL
|
293 | cachegrind_flat = NULL
|
294 |
|
295 | # Elapsed seconds for each shell by platform and file
|
296 | joined_times %>%
|
297 | select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
|
298 | spread(key = shell_label, value = elapsed_ms) %>%
|
299 | arrange(host_label, num_lines) %>%
|
300 | mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
|
301 | select(c(host_label, bash, dash, mksh, zsh,
|
302 | `osh-ovm`, `osh-cpython`, `osh-native`,
|
303 | osh_to_bash_ratio, num_lines, filename, filename_HREF)) ->
|
304 | elapsed
|
305 |
|
306 | Log('\n')
|
307 | Log('ELAPSED')
|
308 | print(elapsed)
|
309 |
|
310 | # Rates by file and shell
|
311 | joined_times %>%
|
312 | select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
|
313 | spread(key = shell_label, value = lines_per_ms) %>%
|
314 | arrange(host_label, num_lines) %>%
|
315 | select(c(host_label, bash, dash, mksh, zsh,
|
316 | `osh-ovm`, `osh-cpython`, `osh-native`,
|
317 | num_lines, filename, filename_HREF)) ->
|
318 | rate
|
319 |
|
320 | Log('\n')
|
321 | Log('RATE')
|
322 | print(rate)
|
323 |
|
324 | # Memory usage by file
|
325 | joined_times %>%
|
326 | select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
|
327 | spread(key = shell_label, value = max_rss_MB) %>%
|
328 | arrange(host_label, num_lines) %>%
|
329 | select(c(host_label, bash, dash, mksh, zsh,
|
330 | `osh-ovm`, `osh-cpython`, `osh-native`,
|
331 | num_lines, filename, filename_HREF)) ->
|
332 | max_rss
|
333 |
|
334 | Log('\n')
|
335 | Log('MAX RSS')
|
336 | print(max_rss)
|
337 |
|
338 | Log('\n')
|
339 | Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
|
340 | print(joined_cachegrind)
|
341 | #print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
|
342 |
|
343 | # Cachegrind instructions by file
|
344 | joined_cachegrind %>%
|
345 | mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
|
346 | select(-c(irefs)) %>%
|
347 | spread(key = shell_label, value = thousand_irefs_per_line) %>%
|
348 | arrange(num_lines) %>%
|
349 | select(c(bash, dash, mksh, `osh-native`,
|
350 | num_lines, filename, filename_HREF)) ->
|
351 | instructions
|
352 |
|
353 | Log('\n')
|
354 | Log('instructions has %d rows', nrow(instructions))
|
355 | print(instructions)
|
356 | }
|
357 |
|
358 | WriteProvenance(distinct_hosts, distinct_shells, out_dir)
|
359 |
|
360 | raw_data_table = tibble(
|
361 | filename = basename(as.character(raw_data$path)),
|
362 | filename_HREF = benchmarkDataLink('osh-parser', filename, '')
|
363 | )
|
364 | #print(raw_data_table)
|
365 |
|
366 | writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
|
367 |
|
368 | precision = SamePrecision(0) # lines per ms
|
369 | writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
|
370 |
|
371 | precision = ColumnPrecision(list(), default = 1)
|
372 | writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
|
373 |
|
374 | if (!is.null(times_flat)) {
|
375 | precision = SamePrecision(0)
|
376 | writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
|
377 | }
|
378 |
|
379 | if (!is.null(cachegrind_flat)) {
|
380 | precision = SamePrecision(0)
|
381 | writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
|
382 | }
|
383 |
|
384 | if (!is.null(elapsed)) { # equivalent to no-host
|
385 | # Round to nearest millisecond, but the ratio has a decimal point.
|
386 | precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
|
387 | writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
|
388 |
|
389 | precision = SamePrecision(0)
|
390 | writeCsv(rate, file.path(out_dir, 'rate'), precision)
|
391 |
|
392 | writeCsv(max_rss, file.path(out_dir, 'max_rss'))
|
393 |
|
394 | precision = SamePrecision(1)
|
395 | writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
|
396 | }
|
397 |
|
398 | Log('Wrote %s', out_dir)
|
399 | }
|
400 |
|
401 | WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
|
402 |
|
403 | num_hosts = nrow(distinct_hosts)
|
404 | if (num_hosts == 1) {
|
405 | linkify = provenanceLink
|
406 | } else {
|
407 | linkify = benchmarkDataLink
|
408 | }
|
409 |
|
410 | Log('distinct_hosts')
|
411 | print(distinct_hosts)
|
412 | Log('')
|
413 |
|
414 | Log('distinct_shells')
|
415 | print(distinct_shells)
|
416 | Log('')
|
417 |
|
418 | # Should be:
|
419 | # host_id_url
|
420 | # And then csv_to_html will be smart enough? It should take --url flag?
|
421 | host_table = tibble(
|
422 | host_label = distinct_hosts$host_label,
|
423 | host_id = paste(distinct_hosts$host_name,
|
424 | distinct_hosts$host_hash, sep='-'),
|
425 | host_id_HREF = linkify('host-id', host_id, '/')
|
426 | )
|
427 | Log('host_table')
|
428 | print(host_table)
|
429 | Log('')
|
430 |
|
431 | shell_table = tibble(
|
432 | shell_label = distinct_shells$shell_label,
|
433 | shell_id = paste(distinct_shells$shell_name,
|
434 | distinct_shells$shell_hash, sep='-'),
|
435 | shell_id_HREF = linkify('shell-id', shell_id, '/')
|
436 | )
|
437 |
|
438 | Log('shell_table')
|
439 | print(shell_table)
|
440 | Log('')
|
441 |
|
442 | if (tsv) {
|
443 | writeTsv(host_table, file.path(out_dir, 'hosts'))
|
444 | writeTsv(shell_table, file.path(out_dir, 'shells'))
|
445 | } else {
|
446 | writeCsv(host_table, file.path(out_dir, 'hosts'))
|
447 | writeCsv(shell_table, file.path(out_dir, 'shells'))
|
448 | }
|
449 | }
|
450 |
|
451 | WriteSimpleProvenance = function(provenance, out_dir) {
|
452 | Log('provenance')
|
453 | print(provenance)
|
454 | Log('')
|
455 |
|
456 | # Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
|
457 | # benchmarks/id.sh publish-shell-id uses
|
458 | provenance %>%
|
459 | mutate(shell_name = basename(sh_path)) %>%
|
460 | distinct(shell_label, shell_name, shell_hash) ->
|
461 | distinct_shells
|
462 |
|
463 | Log('distinct_shells')
|
464 | print(distinct_shells)
|
465 | Log('')
|
466 |
|
467 | provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
|
468 |
|
469 | WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
|
470 | }
|
471 |
|
472 | RuntimeReport = function(in_dir, out_dir) {
|
473 | times = readTsv(file.path(in_dir, 'times.tsv'))
|
474 |
|
475 | gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
|
476 | provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
|
477 |
|
478 | times %>% filter(status != 0) -> failed
|
479 | if (nrow(failed) != 0) {
|
480 | print(failed)
|
481 | stop('Some osh-runtime tasks failed')
|
482 | }
|
483 |
|
484 | # Joins:
|
485 | # times <= sh_path => provenance
|
486 | # times <= join_id, host_name => gc_stats
|
487 |
|
488 | # TODO: provenance may have rows from 2 machines. Could validate them and
|
489 | # deduplicate.
|
490 |
|
491 | # It should have (host_label, host_name, host_hash)
|
492 | # (shell_label, sh_path, shell_hash)
|
493 | provenance %>%
|
494 | mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
|
495 | provenance
|
496 |
|
497 | provenance %>% distinct(sh_path, shell_label) -> label_lookup
|
498 |
|
499 | Log('label_lookup')
|
500 | print(label_lookup)
|
501 |
|
502 | # Join with provenance for host label and shell label
|
503 | times %>%
|
504 | select(-c(status)) %>%
|
505 | mutate(elapsed_ms = elapsed_secs * 1000,
|
506 | user_ms = user_secs * 1000,
|
507 | sys_ms = sys_secs * 1000,
|
508 | max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
|
509 | select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
|
510 | left_join(label_lookup, by = c('sh_path')) %>%
|
511 | select(-c(sh_path)) ->
|
512 | details
|
513 |
|
514 | Log('details')
|
515 | print(details)
|
516 |
|
517 | # Sort by osh elapsed ms.
|
518 | details %>%
|
519 | select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
|
520 | spread(key = shell_label, value = elapsed_ms) %>%
|
521 | mutate(py_bash_ratio = `osh-cpython` / bash) %>%
|
522 | mutate(native_bash_ratio = `osh-native` / bash) %>%
|
523 | arrange(workload, host_name) %>%
|
524 | select(c(workload, host_name,
|
525 | bash, dash, `osh-cpython`, `osh-native`,
|
526 | py_bash_ratio, native_bash_ratio)) ->
|
527 |
|
528 | elapsed
|
529 |
|
530 | Log('elapsed')
|
531 | print(elapsed)
|
532 |
|
533 | details %>%
|
534 | select(-c(task_id, elapsed_ms, user_ms, sys_ms)) %>%
|
535 | spread(key = shell_label, value = max_rss_MB) %>%
|
536 | mutate(py_bash_ratio = `osh-cpython` / bash) %>%
|
537 | mutate(native_bash_ratio = `osh-native` / bash) %>%
|
538 | arrange(workload, host_name) %>%
|
539 | select(c(workload, host_name,
|
540 | bash, dash, `osh-cpython`, `osh-native`,
|
541 | py_bash_ratio, native_bash_ratio)) ->
|
542 | max_rss
|
543 |
|
544 | Log('max rss')
|
545 | print(max_rss)
|
546 |
|
547 | details %>%
|
548 | select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
|
549 | mutate(join_id = sprintf("gc-%d", task_id)) %>%
|
550 | select(-c(task_id)) ->
|
551 | gc_details
|
552 |
|
553 | Log('GC stats')
|
554 | print(gc_stats)
|
555 |
|
556 | gc_stats %>%
|
557 | left_join(gc_details, by = c('join_id', 'host_name')) %>%
|
558 | select(-c(join_id, roots_capacity, objs_capacity)) %>%
|
559 | # Do same transformations as GcReport()
|
560 | mutate(allocated_MB = bytes_allocated / 1e6) %>%
|
561 | select(-c(bytes_allocated)) %>%
|
562 | rename(num_gc_done = num_collections) %>%
|
563 | # Put these columns first
|
564 | relocate(workload, host_name,
|
565 | elapsed_ms, max_gc_millis, total_gc_millis,
|
566 | allocated_MB, max_rss_MB, num_allocated) ->
|
567 | gc_stats
|
568 |
|
569 | Log('After GC stats')
|
570 | print(gc_stats)
|
571 |
|
572 | WriteSimpleProvenance(provenance, out_dir)
|
573 |
|
574 | precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
|
575 | `osh-native` = 0, py_bash_ratio = 2,
|
576 | native_bash_ratio = 2))
|
577 | writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
|
578 | writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision)
|
579 |
|
580 | precision2 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
|
581 | default = 0)
|
582 | writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision2)
|
583 | writeTsv(details, file.path(out_dir, 'details'), precision2)
|
584 |
|
585 | Log('Wrote %s', out_dir)
|
586 | }
|
587 |
|
588 | VmBaselineReport = function(in_dir, out_dir) {
|
589 | vm = read.csv(file.path(in_dir, 'vm-baseline.csv'))
|
590 | #print(vm)
|
591 |
|
592 | # Not using DistinctHosts() because field host_hash isn't collected
|
593 | num_hosts = nrow(vm %>% distinct(host))
|
594 |
|
595 | vm %>%
|
596 | rename(kib = metric_value) %>%
|
597 | mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
|
598 | megabytes = kib * 1024 / 1e6) %>%
|
599 | select(-c(shell_name, kib)) %>%
|
600 | spread(key = c(metric_name), value = megabytes) %>%
|
601 | rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
|
602 | select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
|
603 | arrange(shell_label, shell_hash, host, VmPeak_MB) ->
|
604 | vm
|
605 |
|
606 | print(vm)
|
607 |
|
608 | writeCsv(vm, file.path(out_dir, 'vm-baseline'))
|
609 | }
|
610 |
|
611 | WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
|
612 | host_table = tibble(
|
613 | host_label = distinct_hosts$host_label,
|
614 | host_id = paste(distinct_hosts$host_name,
|
615 | distinct_hosts$host_hash, sep='-'),
|
616 | host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
|
617 | )
|
618 | print(host_table)
|
619 |
|
620 | dc = distinct_compilers
|
621 | compiler_table = tibble(
|
622 | compiler_label = dc$compiler_label,
|
623 | compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
|
624 | compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
|
625 | )
|
626 | print(compiler_table)
|
627 |
|
628 | writeTsv(host_table, file.path(out_dir, 'hosts'))
|
629 | writeTsv(compiler_table, file.path(out_dir, 'compilers'))
|
630 | }
|
631 |
|
632 | OvmBuildReport = function(in_dir, out_dir) {
|
633 | times = readTsv(file.path(in_dir, 'times.tsv'))
|
634 | bytecode_size = readTsv(file.path(in_dir, 'bytecode-size.tsv'))
|
635 | bin_sizes = readTsv(file.path(in_dir, 'bin-sizes.tsv'))
|
636 | native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
|
637 | raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
|
638 |
|
639 | times %>% filter(status != 0) -> failed
|
640 | if (nrow(failed) != 0) {
|
641 | print(failed)
|
642 | stop('Some ovm-build tasks failed')
|
643 | }
|
644 |
|
645 | times %>% distinct(host_name, host_hash) -> distinct_hosts
|
646 | distinct_hosts$host_label = distinct_hosts$host_name
|
647 |
|
648 | times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
|
649 | distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
|
650 |
|
651 | #print(distinct_hosts)
|
652 | #print(distinct_compilers)
|
653 |
|
654 | WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
|
655 |
|
656 | times %>%
|
657 | select(-c(status)) %>%
|
658 | left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
|
659 | left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
|
660 | select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
|
661 | mutate(src_dir = basename(src_dir),
|
662 | host_label = paste("host ", host_label),
|
663 | is_conf = str_detect(action, 'configure'),
|
664 | is_ovm = str_detect(action, 'oil.ovm'),
|
665 | is_dbg = str_detect(action, 'dbg'),
|
666 | ) %>%
|
667 | select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
|
668 | elapsed_secs) %>%
|
669 | spread(key = c(host_label), value = elapsed_secs) %>%
|
670 | arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
|
671 | select(-c(is_conf, is_ovm, is_dbg)) ->
|
672 | times
|
673 |
|
674 | #print(times)
|
675 |
|
676 | bytecode_size %>%
|
677 | rename(bytecode_size = num_bytes) %>%
|
678 | select(-c(path)) ->
|
679 | bytecode_size
|
680 |
|
681 | bin_sizes %>%
|
682 | # reorder
|
683 | select(c(host_label, path, num_bytes)) %>%
|
684 | left_join(bytecode_size, by = c('host_label')) %>%
|
685 | mutate(native_code_size = num_bytes - bytecode_size) ->
|
686 | sizes
|
687 |
|
688 | # paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
|
689 | native_sizes %>%
|
690 | select(c(host_label, path, num_bytes)) %>%
|
691 | mutate(host_label = paste("host ", host_label),
|
692 | binary = basename(path),
|
693 | compiler = basename(dirname(path)),
|
694 | ) %>%
|
695 | select(-c(path)) %>%
|
696 | spread(key = c(host_label), value = num_bytes) %>%
|
697 | arrange(compiler, binary) ->
|
698 | native_sizes
|
699 |
|
700 | # NOTE: These don't have the host and compiler.
|
701 | writeTsv(times, file.path(out_dir, 'times'))
|
702 | writeTsv(bytecode_size, file.path(out_dir, 'bytecode-size'))
|
703 | writeTsv(sizes, file.path(out_dir, 'sizes'))
|
704 | writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
|
705 |
|
706 | # TODO: I want a size report too
|
707 | #writeCsv(sizes, file.path(out_dir, 'sizes'))
|
708 | }
|
709 |
|
710 | unique_stdout_md5sum = function(t, num_expected) {
|
711 | u = n_distinct(t$stdout_md5sum)
|
712 | if (u != num_expected) {
|
713 | t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
|
714 | stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
|
715 | }
|
716 | }
|
717 |
|
718 | ComputeReport = function(in_dir, out_dir) {
|
719 | # TSV file, not CSV
|
720 | times = read.table(file.path(in_dir, 'times.tsv'), header=T)
|
721 | print(times)
|
722 |
|
723 | times %>% filter(status != 0) -> failed
|
724 | if (nrow(failed) != 0) {
|
725 | print(failed)
|
726 | stop('Some compute tasks failed')
|
727 | }
|
728 |
|
729 | #
|
730 | # Check correctness
|
731 | #
|
732 |
|
733 | times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
|
734 | times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
|
735 | times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
|
736 | # 3 different inputs
|
737 | times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
|
738 |
|
739 | times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
|
740 |
|
741 | # TODO:
|
742 | # - oils_cpp doesn't implement unicode LANG=C
|
743 | # - bash behaves differently on your desktop vs. in the container
|
744 | # - might need layer-locales in the image?
|
745 |
|
746 | #times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
|
747 | # Ditto here
|
748 | #times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
|
749 |
|
750 | #
|
751 | # Find distinct shells and hosts, and label them for readability.
|
752 | #
|
753 |
|
754 | # Runtimes are called shells, as a hack for code reuse
|
755 | times %>%
|
756 | mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
|
757 | select(c(host_name, host_hash, shell_name, shell_hash)) ->
|
758 | tmp
|
759 |
|
760 | distinct_hosts = DistinctHosts(tmp)
|
761 | Log('')
|
762 | Log('Distinct hosts')
|
763 | print(distinct_hosts)
|
764 |
|
765 | distinct_shells = DistinctShells(tmp)
|
766 | Log('')
|
767 | Log('Distinct runtimes')
|
768 | print(distinct_shells)
|
769 |
|
770 | num_hosts = nrow(distinct_hosts)
|
771 |
|
772 | times %>%
|
773 | select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
|
774 | mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
|
775 | elapsed_ms = elapsed_secs * 1000,
|
776 | user_ms = user_secs * 1000,
|
777 | sys_ms = sys_secs * 1000,
|
778 | max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
|
779 | select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
|
780 | arrange(host_name, task_name, arg1, arg2, user_ms) ->
|
781 | details
|
782 |
|
783 | times %>%
|
784 | mutate(
|
785 | runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
|
786 | stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
|
787 | select(c(host_name, task_name, arg1, arg2, runtime_label,
|
788 | stdout_md5sum, stdout_md5sum_HREF)) ->
|
789 | stdout_files
|
790 |
|
791 | details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
|
792 | details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
|
793 | details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
|
794 | # There's no arg2
|
795 | details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
|
796 |
|
797 | details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
|
798 | details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
|
799 |
|
800 | precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
|
801 | writeTsv(details, file.path(out_dir, 'details'), precision)
|
802 |
|
803 | writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
|
804 |
|
805 | writeTsv(hello, file.path(out_dir, 'hello'), precision)
|
806 | writeTsv(fib, file.path(out_dir, 'fib'), precision)
|
807 | writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
|
808 | writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
|
809 |
|
810 | writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
|
811 | writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
|
812 |
|
813 | WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
|
814 | }
|
815 |
|
816 | WriteOneTask = function(times, out_dir, task_name, precision) {
|
817 | times %>%
|
818 | filter(task == task_name) %>%
|
819 | select(-c(task)) -> subset
|
820 |
|
821 | writeTsv(subset, file.path(out_dir, task_name), precision)
|
822 | }
|
823 |
|
824 | SHELL_ORDER = c('dash',
|
825 | 'bash',
|
826 | 'zsh',
|
827 | '_bin/cxx-opt+bumpleak/osh',
|
828 | '_bin/cxx-opt+bumproot/osh',
|
829 | '_bin/cxx-opt+bumpsmall/osh',
|
830 | '_bin/cxx-opt/osh',
|
831 | '_bin/cxx-opt+nopool/osh')
|
832 |
|
833 | GcReport = function(in_dir, out_dir) {
|
834 | times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
|
835 | gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
|
836 |
|
837 | times %>% filter(status != 0) -> failed
|
838 | if (nrow(failed) != 0) {
|
839 | print(failed)
|
840 | stop('Some gc tasks failed')
|
841 | }
|
842 |
|
843 | # Change units and order columns
|
844 | times %>%
|
845 | arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
|
846 | mutate(elapsed_ms = elapsed_secs * 1000,
|
847 | user_ms = user_secs * 1000,
|
848 | sys_ms = sys_secs * 1000,
|
849 | max_rss_MB = max_rss_KiB * 1024 / 1e6,
|
850 | shell_label = ShellLabelFromPath(sh_path)
|
851 | ) %>%
|
852 | select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
|
853 | shell_runtime_opts)) ->
|
854 | times
|
855 |
|
856 | # Join and order columns
|
857 | gc_stats %>% left_join(times, by = c('join_id')) %>%
|
858 | arrange(desc(task)) %>%
|
859 | mutate(allocated_MB = bytes_allocated / 1e6) %>%
|
860 | # try to make the table skinnier
|
861 | rename(num_gc_done = num_collections) %>%
|
862 | select(task, elapsed_ms, max_gc_millis, total_gc_millis,
|
863 | allocated_MB, max_rss_MB, num_allocated,
|
864 | num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
|
865 | shell_label) ->
|
866 | gc_stats
|
867 |
|
868 | times %>% select(-c(join_id)) -> times
|
869 |
|
870 |
|
871 | precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
|
872 | default = 0)
|
873 |
|
874 | writeTsv(times, file.path(out_dir, 'times'), precision)
|
875 | writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
|
876 |
|
877 | tasks = c('parse.configure-coreutils',
|
878 | 'parse.configure-cpython',
|
879 | 'parse.abuild',
|
880 | 'ex.compute-fib',
|
881 | 'ex.bashcomp-parse-help',
|
882 | 'ex.abuild-print-help')
|
883 | # Write out separate rows
|
884 | for (task in tasks) {
|
885 | WriteOneTask(times, out_dir, task, precision)
|
886 | }
|
887 | }
|
888 |
|
889 | GcCachegrindReport = function(in_dir, out_dir) {
|
890 | times = readTsv(file.path(in_dir, 'raw/times.tsv'))
|
891 | counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
|
892 |
|
893 | times %>% filter(status != 0) -> failed
|
894 | if (nrow(failed) != 0) {
|
895 | print(failed)
|
896 | stop('Some gc tasks failed')
|
897 | }
|
898 |
|
899 | print(times)
|
900 | print(counts)
|
901 |
|
902 | counts %>% left_join(times, by = c('join_id')) %>%
|
903 | mutate(million_irefs = irefs / 1e6) %>%
|
904 | select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
|
905 | arrange(factor(sh_path, levels = SHELL_ORDER)) ->
|
906 | counts
|
907 |
|
908 | precision = NULL
|
909 | tasks = c('parse.abuild', 'ex.compute-fib')
|
910 | for (task in tasks) {
|
911 | WriteOneTask(counts, out_dir, task, precision)
|
912 | }
|
913 | }
|
914 |
|
915 | MyCppReport = function(in_dir, out_dir) {
|
916 | times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
|
917 | print(times)
|
918 |
|
919 | times %>% filter(status != 0) -> failed
|
920 | if (nrow(failed) != 0) {
|
921 | print(failed)
|
922 | stop('Some mycpp tasks failed')
|
923 | }
|
924 |
|
925 | # Don't care about elapsed and system
|
926 | times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
|
927 | mutate(example_name_HREF = mycppUrl(example_name),
|
928 | user_ms = user_secs * 1000,
|
929 | sys_ms = sys_secs * 1000,
|
930 | max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
|
931 | select(-c(user_secs, sys_secs, max_rss_KiB)) ->
|
932 | details
|
933 |
|
934 | details %>% select(-c(sys_ms, max_rss_MB)) %>%
|
935 | spread(key = impl, value = user_ms) %>%
|
936 | mutate(`C++ : Python` = `C++` / Python) %>%
|
937 | arrange(`C++ : Python`) ->
|
938 | user_time
|
939 |
|
940 | details %>% select(-c(user_ms, max_rss_MB)) %>%
|
941 | spread(key = impl, value = sys_ms) %>%
|
942 | mutate(`C++ : Python` = `C++` / Python) %>%
|
943 | arrange(`C++ : Python`) ->
|
944 | sys_time
|
945 |
|
946 | details %>% select(-c(user_ms, sys_ms)) %>%
|
947 | spread(key = impl, value = max_rss_MB) %>%
|
948 | mutate(`C++ : Python` = `C++` / Python) %>%
|
949 | arrange(`C++ : Python`) ->
|
950 | max_rss
|
951 |
|
952 | # Sometimes it speeds up by more than 10x
|
953 | precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
|
954 | writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
|
955 | writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
|
956 |
|
957 | precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
|
958 | writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
|
959 |
|
960 | writeTsv(details, file.path(out_dir, 'details'))
|
961 | }
|
962 |
|
963 | UftraceTaskReport = function(env, task_name, summaries) {
|
964 | # Need this again after redirect
|
965 | MaybeDisableColor(stdout())
|
966 |
|
967 | task_env = env[[task_name]]
|
968 |
|
969 | untyped = task_env$untyped
|
970 | typed = task_env$typed
|
971 | strings = task_env$strings
|
972 | slabs = task_env$slabs
|
973 | reserve = task_env$reserve
|
974 |
|
975 | string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
|
976 | strings %>% mutate(obj_len = str_len + string_overhead) -> strings
|
977 |
|
978 | # TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
|
979 | # big/small
|
980 | #
|
981 | # And then zoom in on distributions as well
|
982 |
|
983 | num_allocs = nrow(untyped)
|
984 | total_bytes = sum(untyped$obj_len)
|
985 |
|
986 | untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
|
987 | #print(untyped_hist)
|
988 |
|
989 | untyped_hist %>%
|
990 | mutate(n_less_than = cumsum(n),
|
991 | percent = n_less_than * 100.0 / num_allocs) ->
|
992 | alloc_sizes
|
993 |
|
994 | a24 = untyped_hist %>% filter(obj_len <= 24)
|
995 | a48 = untyped_hist %>% filter(obj_len <= 48)
|
996 | a96 = untyped_hist %>% filter(obj_len <= 96)
|
997 |
|
998 | allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
|
999 | allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
|
1000 | allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
|
1001 |
|
1002 | Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
|
1003 |
|
1004 | options(tibble.print_min=25)
|
1005 |
|
1006 | Log('')
|
1007 | Log('All allocations')
|
1008 | print(alloc_sizes %>% head(22))
|
1009 | print(alloc_sizes %>% tail(5))
|
1010 |
|
1011 | Log('')
|
1012 | Log('Common Sizes')
|
1013 | print(untyped_hist %>% arrange(desc(n)) %>% head(8))
|
1014 |
|
1015 | Log('')
|
1016 | Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
|
1017 | Log('')
|
1018 |
|
1019 | Log('Typed allocations')
|
1020 |
|
1021 | num_typed = nrow(typed)
|
1022 |
|
1023 | typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
|
1024 | mutate(percent = n * 100.0 / num_typed) %>%
|
1025 | arrange(desc(n)) -> most_common_types
|
1026 |
|
1027 | print(most_common_types %>% head(20))
|
1028 | print(most_common_types %>% tail(5))
|
1029 |
|
1030 | lists = typed %>% filter(str_starts(func_name, ('List<')))
|
1031 | #print(lists)
|
1032 |
|
1033 | num_lists = nrow(lists)
|
1034 | total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
|
1035 |
|
1036 | Log('')
|
1037 | Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
|
1038 | Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
|
1039 | Log('')
|
1040 |
|
1041 | #
|
1042 | # Strings
|
1043 | #
|
1044 |
|
1045 | num_strings = nrow(strings)
|
1046 | total_string_bytes = sum(strings$obj_len)
|
1047 |
|
1048 | strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
|
1049 | mutate(n_less_than = cumsum(n),
|
1050 | percent = n_less_than * 100.0 / num_strings) ->
|
1051 | string_lengths
|
1052 |
|
1053 | strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
|
1054 | strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
|
1055 |
|
1056 | # Parse workload
|
1057 | # 62% of strings <= 6 bytes
|
1058 | # 84% of strings <= 14 bytes
|
1059 |
|
1060 | Log('Str - NewStr() and OverAllocatedStr()')
|
1061 | print(string_lengths %>% head(16))
|
1062 | print(string_lengths %>% tail(5))
|
1063 | Log('')
|
1064 |
|
1065 | Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
|
1066 | commas(sum(strings$str_len)), commas(total_string_bytes))
|
1067 | Log('')
|
1068 | Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
|
1069 | Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
|
1070 | Log('')
|
1071 |
|
1072 | #
|
1073 | # Slabs
|
1074 | #
|
1075 |
|
1076 | Log('NewSlab()')
|
1077 |
|
1078 | num_slabs = nrow(slabs)
|
1079 | slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
|
1080 | mutate(n_less_than = cumsum(n),
|
1081 | percent = n_less_than * 100.0 / num_slabs) ->
|
1082 | slab_lengths
|
1083 |
|
1084 | slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
|
1085 | arrange(desc(n)) -> slab_types
|
1086 |
|
1087 | Log(' Lengths')
|
1088 | print(slab_lengths %>% head())
|
1089 | print(slab_lengths %>% tail(5))
|
1090 | Log('')
|
1091 |
|
1092 | Log(' Slab Types')
|
1093 | print(slab_types %>% head())
|
1094 | print(slab_types %>% tail(5))
|
1095 | Log('')
|
1096 |
|
1097 | total_slab_items = sum(slabs$slab_len)
|
1098 |
|
1099 | Log('%s slabs, total items = %s', commas(num_slabs),
|
1100 | commas(sum(slabs$slab_len)))
|
1101 | Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
|
1102 | Log('')
|
1103 |
|
1104 | #
|
1105 | # reserve() calls
|
1106 | #
|
1107 |
|
1108 | # There should be strictly more List::reserve() calls than NewSlab
|
1109 |
|
1110 | Log('::reserve(int n)')
|
1111 | Log('')
|
1112 |
|
1113 | num_reserve = nrow(reserve)
|
1114 | reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
|
1115 | mutate(n_less_than = cumsum(n),
|
1116 | percent = n_less_than * 100.0 / num_reserve) ->
|
1117 | reserve_args
|
1118 |
|
1119 | Log(' Num Items')
|
1120 | print(reserve_args %>% head(15))
|
1121 | print(reserve_args %>% tail(5))
|
1122 | Log('')
|
1123 |
|
1124 | Log('%s reserve() calls, total items = %s', commas(num_reserve),
|
1125 | commas(sum(reserve$num_items)))
|
1126 | Log('')
|
1127 |
|
1128 | # Accounting for all allocations!
|
1129 | Log('Untyped: %s', commas(num_allocs))
|
1130 | Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
|
1131 | Log('')
|
1132 |
|
1133 | num_other_typed = num_typed - num_lists
|
1134 |
|
1135 | # Summary table
|
1136 | stats = tibble(task = task_name,
|
1137 | total_bytes_ = commas(total_bytes),
|
1138 | num_allocs_ = commas(num_allocs),
|
1139 | sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
|
1140 | num_reserve_calls = commas(num_reserve),
|
1141 |
|
1142 | percent_list_allocs = Percent(num_lists, num_allocs),
|
1143 | percent_slab_allocs = Percent(num_slabs, num_allocs),
|
1144 | percent_string_allocs = Percent(num_strings, num_allocs),
|
1145 | percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
|
1146 |
|
1147 | percent_list_bytes = Percent(total_list_bytes, total_bytes),
|
1148 | percent_string_bytes = Percent(total_string_bytes, total_bytes),
|
1149 |
|
1150 | allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
|
1151 | allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
|
1152 | allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
|
1153 |
|
1154 | strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
|
1155 | strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
|
1156 | )
|
1157 | summaries$stats[[task_name]] = stats
|
1158 |
|
1159 | summaries$most_common_types[[task_name]] = most_common_types
|
1160 | }
|
1161 |
|
1162 | LoadUftraceTsv = function(in_dir, env) {
|
1163 | for (task in list.files(in_dir)) {
|
1164 | Log('Loading data for task %s', task)
|
1165 | base_dir = file.path(in_dir, task)
|
1166 |
|
1167 | task_env = new.env()
|
1168 | env[[task]] = task_env
|
1169 |
|
1170 | # TSV file, not CSV
|
1171 | task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
|
1172 | task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
|
1173 | task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
|
1174 | task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
|
1175 | task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
|
1176 |
|
1177 | # median string length is 4, mean is 9.5!
|
1178 | Log('UNTYPED')
|
1179 | print(summary(task_env$untyped))
|
1180 | Log('')
|
1181 |
|
1182 | Log('TYPED')
|
1183 | print(summary(task_env$typed))
|
1184 | Log('')
|
1185 |
|
1186 | Log('STRINGS')
|
1187 | print(summary(task_env$strings))
|
1188 | Log('')
|
1189 |
|
1190 | Log('SLABS')
|
1191 | print(summary(task_env$slabs))
|
1192 | Log('')
|
1193 |
|
1194 | Log('RESERVE')
|
1195 | print(summary(task_env$reserve))
|
1196 | Log('')
|
1197 | }
|
1198 | }
|
1199 |
|
1200 | Percent = function(n, total) {
|
1201 | sprintf('%.1f%%', n * 100.0 / total)
|
1202 | }
|
1203 |
|
1204 | PrettyPrintLong = function(d) {
|
1205 | tr = t(d) # transpose
|
1206 |
|
1207 | row_names = rownames(tr)
|
1208 |
|
1209 | for (i in 1:nrow(tr)) {
|
1210 | row_name = row_names[i]
|
1211 | cat(sprintf('%26s', row_name)) # calculated min width manually
|
1212 | cat(sprintf('%20s', tr[i,]))
|
1213 | cat('\n')
|
1214 |
|
1215 | # Extra spacing
|
1216 | if (row_name %in% c('num_reserve_calls',
|
1217 | 'percent_string_bytes',
|
1218 | 'percent_other_typed_allocs',
|
1219 | 'allocs_96_bytes_or_less')) {
|
1220 | cat('\n')
|
1221 | }
|
1222 | }
|
1223 | }
|
1224 |
|
1225 |
|
1226 | UftraceReport = function(env, out_dir) {
|
1227 | # summaries$stats should be a list of 1-row data frames
|
1228 | # summaries$top_types should be a list of types
|
1229 | summaries = new.env()
|
1230 |
|
1231 | for (task_name in names(env)) {
|
1232 | report_out = file.path(out_dir, paste0(task_name, '.txt'))
|
1233 |
|
1234 | Log('Making report for task %s -> %s', task_name, report_out)
|
1235 |
|
1236 | sink(file = report_out)
|
1237 | UftraceTaskReport(env, task_name, summaries)
|
1238 | sink() # reset
|
1239 | }
|
1240 | Log('')
|
1241 |
|
1242 | # Concate all the data frames added to summary
|
1243 | stats = bind_rows(as.list(summaries$stats))
|
1244 |
|
1245 | sink(file = file.path(out_dir, 'summary.txt'))
|
1246 | #print(stats)
|
1247 | #Log('')
|
1248 |
|
1249 | PrettyPrintLong(stats)
|
1250 | Log('')
|
1251 |
|
1252 | mct = summaries$most_common_types
|
1253 | for (task_name in names(mct)) {
|
1254 | Log('Common types in workload %s', task_name)
|
1255 | Log('')
|
1256 |
|
1257 | print(mct[[task_name]] %>% head(5))
|
1258 | Log('')
|
1259 | }
|
1260 | sink()
|
1261 |
|
1262 | # For the REPL
|
1263 | return(list(stats = stats))
|
1264 | }
|
1265 |
|
1266 | main = function(argv) {
|
1267 | action = argv[[1]]
|
1268 | in_dir = argv[[2]]
|
1269 | out_dir = argv[[3]]
|
1270 |
|
1271 | if (action == 'osh-parser') {
|
1272 | ParserReport(in_dir, out_dir)
|
1273 |
|
1274 | } else if (action == 'osh-runtime') {
|
1275 | RuntimeReport(in_dir, out_dir)
|
1276 |
|
1277 | } else if (action == 'vm-baseline') {
|
1278 | VmBaselineReport(in_dir, out_dir)
|
1279 |
|
1280 | } else if (action == 'ovm-build') {
|
1281 | OvmBuildReport(in_dir, out_dir)
|
1282 |
|
1283 | } else if (action == 'compute') {
|
1284 | ComputeReport(in_dir, out_dir)
|
1285 |
|
1286 | } else if (action == 'gc') {
|
1287 | GcReport(in_dir, out_dir)
|
1288 |
|
1289 | } else if (action == 'gc-cachegrind') {
|
1290 | GcCachegrindReport(in_dir, out_dir)
|
1291 |
|
1292 | } else if (action == 'mycpp') {
|
1293 | MyCppReport(in_dir, out_dir)
|
1294 |
|
1295 | } else if (action == 'uftrace') {
|
1296 | d = new.env()
|
1297 | LoadUftraceTsv(in_dir, d)
|
1298 | UftraceReport(d, out_dir)
|
1299 |
|
1300 | } else {
|
1301 | Log("Invalid action '%s'", action)
|
1302 | quit(status = 1)
|
1303 | }
|
1304 | Log('PID %d done', Sys.getpid())
|
1305 | }
|
1306 |
|
1307 | if (length(sys.frames()) == 0) {
|
1308 | # increase ggplot font size globally
|
1309 | #theme_set(theme_grey(base_size = 20))
|
1310 |
|
1311 | main(commandArgs(TRUE))
|
1312 | }
|