OILS / benchmarks / report.R View on Github | oilshell.org

1357 lines, 941 significant
1#!/usr/bin/env Rscript
2#
3# benchmarks/report.R -- Analyze data collected by shell scripts.
4#
5# Usage:
6# benchmarks/report.R OUT_DIR [TIMES_CSV...]
7
8# Suppress warnings about functions masked from 'package:stats' and 'package:base'
9# filter, lag
10# intersect, setdiff, setequal, union
11library(dplyr, warn.conflicts = FALSE)
12library(tidyr) # spread()
13library(stringr)
14
15source('benchmarks/common.R')
16
17options(stringsAsFactors = F)
18
19# For pretty printing
20commas = function(x) {
21 format(x, big.mark=',')
22}
23
24sourceUrl = function(path) {
25 sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26}
27
28# Takes a filename, not a path.
29sourceUrl2 = function(filename) {
30 sprintf(
31 'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32 filename)
33}
34
35mycppUrl = function(name) {
36 sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', name)
37}
38
39genUrl = function(name) {
40 sprintf('../../_gen/mycpp/examples/%s.mycpp.cc', name)
41}
42
43
44# TODO: Set up cgit because Github links are slow.
45benchmarkDataLink = function(subdir, name, suffix) {
46 #sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
47 sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
48 subdir, name, suffix)
49}
50
51provenanceLink = function(subdir, name, suffix) {
52 sprintf('../%s/%s%s', subdir, name, suffix)
53}
54
55
56GetOshLabel = function(shell_hash, prov_dir) {
57 ### Given a string, return another string.
58
59 path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
60
61 if (file.exists(path)) {
62 Log('Reading %s', path)
63 lines = readLines(path)
64 if (length(grep('_bin/osh', lines)) > 0) {
65 label = 'osh-ovm'
66 } else if (length(grep('bin/osh', lines)) > 0) {
67 label = 'osh-cpython'
68 } else if (length(grep('_bin/.*/osh', lines)) > 0) {
69 label = 'osh-native'
70 } else {
71 stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
72 }
73 } else {
74 stop(sprintf("%s doesn't exist", path))
75 }
76 return(label)
77}
78
79opt_suffix1 = '_bin/cxx-opt/osh'
80opt_suffix2 = '_bin/cxx-opt-sh/osh'
81
82ShellLabels = function(shell_name, shell_hash, num_hosts) {
83 ### Given 2 vectors, return a vector of readable labels.
84
85 # TODO: Clean up callers. Some metrics all this function with a
86 # shell/runtime BASENAME, and others a PATH
87 # - e.g. ComputeReport calls this with runtime_name which is actually a PATH
88
89 #Log('name %s', shell_name)
90 #Log('hash %s', shell_hash)
91
92 if (num_hosts == 1) {
93 prov_dir = '_tmp'
94 } else {
95 prov_dir = '../benchmark-data/'
96 }
97
98 labels = c()
99 for (i in 1:length(shell_name)) {
100 sh = shell_name[i]
101 if (sh == 'osh') {
102 label = GetOshLabel(shell_hash[i], prov_dir)
103
104 } else if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
105 label = 'opt/osh'
106
107 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
108 label = 'bumpleak/osh'
109
110 } else {
111 label = sh
112 }
113
114 Log('[%s] [%s]', shell_name[i], label)
115 labels = c(labels, label)
116 }
117
118 return(labels)
119}
120
121# Simple version of the above, used by benchmarks/gc
122ShellLabelFromPath = function(sh_path) {
123 labels = c()
124 for (i in 1:length(sh_path)) {
125 sh = sh_path[i]
126
127 if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
128 # the opt binary is osh-native
129 label = 'osh-native'
130
131 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
132 label = 'bumpleak/osh'
133
134 } else if (endsWith(sh, '_bin/osh')) { # the app bundle
135 label = 'osh-ovm'
136
137 } else if (endsWith(sh, 'bin/osh')) {
138 label = 'osh-cpython'
139
140 } else {
141 label = sh
142 }
143 labels = c(labels, label)
144 }
145 return(labels)
146}
147
148DistinctHosts = function(t) {
149 t %>% distinct(host_name, host_hash) -> distinct_hosts
150 # The label is just the name
151 distinct_hosts$host_label = distinct_hosts$host_name
152 return(distinct_hosts)
153}
154
155DistinctShells = function(t, num_hosts = -1) {
156 t %>% distinct(shell_name, shell_hash) -> distinct_shells
157
158 Log('')
159 Log('Labeling shells')
160
161 # Calculate it if not passed
162 if (num_hosts == -1) {
163 num_hosts = nrow(DistinctHosts(t))
164 }
165
166 distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
167 distinct_shells$shell_hash,
168 num_hosts)
169 return(distinct_shells)
170}
171
172ParserReport = function(in_dir, out_dir) {
173 times = read.csv(file.path(in_dir, 'times.csv'))
174 lines = read.csv(file.path(in_dir, 'lines.csv'))
175 raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
176
177 cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
178
179 # For joining by filename
180 lines_by_filename = tibble(
181 num_lines = lines$num_lines,
182 filename = basename(lines$path)
183 )
184
185 # Remove failures
186 times %>% filter(status == 0) %>% select(-c(status)) -> times
187 cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
188
189 # Add the number of lines, joining on path, and compute lines/ms
190 times %>%
191 left_join(lines, by = c('path')) %>%
192 mutate(filename = basename(path), filename_HREF = sourceUrl(path),
193 max_rss_MB = max_rss_KiB * 1024 / 1e6,
194 elapsed_ms = elapsed_secs * 1000,
195 user_ms = user_secs * 1000,
196 sys_ms = sys_secs * 1000,
197 lines_per_ms = num_lines / elapsed_ms) %>%
198 select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
199 joined_times
200
201 #print(head(times))
202 #print(head(lines))
203 #print(head(vm))
204 #print(head(joined_times))
205
206 print(summary(joined_times))
207
208 #
209 # Find distinct shells and hosts, and label them for readability.
210 #
211
212 distinct_hosts = DistinctHosts(joined_times)
213 Log('')
214 Log('Distinct hosts')
215 print(distinct_hosts)
216
217 distinct_shells = DistinctShells(joined_times)
218 Log('')
219 Log('Distinct shells')
220 print(distinct_shells)
221
222 # Replace name/hash combinations with labels.
223 joined_times %>%
224 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
225 left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
226 select(-c(host_name, host_hash, shell_name, shell_hash)) ->
227 joined_times
228
229 # Like 'times', but do shell_label as one step
230 # Hack: we know benchmarks/auto.sh runs this on one machine
231 distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
232 cachegrind %>%
233 left_join(lines, by = c('path')) %>%
234 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
235 left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
236 select(-c(shell_name, shell_hash)) %>%
237 mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
238 select(-c(path)) ->
239 joined_cachegrind
240
241 Log('summary(joined_times):')
242 print(summary(joined_times))
243 Log('head(joined_times):')
244 print(head(joined_times))
245
246 # Summarize rates by platform/shell
247 joined_times %>%
248 mutate(host_label = paste("host", host_label)) %>%
249 group_by(host_label, shell_label) %>%
250 summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
251 mutate(lines_per_ms = total_lines / total_ms) %>%
252 select(-c(total_ms)) %>%
253 spread(key = host_label, value = lines_per_ms) ->
254 times_summary
255
256 # Sort by parsing rate on the fast machine
257 if ("host lenny" %in% colnames(times_summary)) {
258 times_summary %>% arrange(desc(`host lenny`)) -> times_summary
259 } else {
260 times_summary %>% arrange(desc(`host no-host`)) -> times_summary
261 }
262
263 Log('times_summary:')
264 print(times_summary)
265
266 # Summarize cachegrind by platform/shell
267 # Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
268 joined_cachegrind %>%
269 group_by(shell_label) %>%
270 summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
271 mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
272 select(-c(total_irefs)) ->
273 cachegrind_summary
274
275 if ("no-host" %in% distinct_hosts$host_label) {
276
277 # We don't have all the shells
278 elapsed = NULL
279 rate = NULL
280 max_rss = NULL
281 instructions = NULL
282
283 joined_times %>%
284 select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
285 num_lines, filename, filename_HREF)) %>%
286 arrange(filename, elapsed_ms) ->
287 times_flat
288
289 joined_cachegrind %>%
290 select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
291 arrange(filename, irefs) ->
292 cachegrind_flat
293
294 } else {
295
296 times_flat = NULL
297 cachegrind_flat = NULL
298
299 # Elapsed seconds for each shell by platform and file
300 joined_times %>%
301 select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
302 spread(key = shell_label, value = elapsed_ms) %>%
303 arrange(host_label, num_lines) %>%
304 mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
305 select(c(host_label, bash, dash, mksh, zsh,
306 `osh-ovm`, `osh-cpython`, `osh-native`,
307 osh_to_bash_ratio, num_lines, filename, filename_HREF)) ->
308 elapsed
309
310 Log('\n')
311 Log('ELAPSED')
312 print(elapsed)
313
314 # Rates by file and shell
315 joined_times %>%
316 select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
317 spread(key = shell_label, value = lines_per_ms) %>%
318 arrange(host_label, num_lines) %>%
319 select(c(host_label, bash, dash, mksh, zsh,
320 `osh-ovm`, `osh-cpython`, `osh-native`,
321 num_lines, filename, filename_HREF)) ->
322 rate
323
324 Log('\n')
325 Log('RATE')
326 print(rate)
327
328 # Memory usage by file
329 joined_times %>%
330 select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
331 spread(key = shell_label, value = max_rss_MB) %>%
332 arrange(host_label, num_lines) %>%
333 select(c(host_label, bash, dash, mksh, zsh,
334 `osh-ovm`, `osh-cpython`, `osh-native`,
335 num_lines, filename, filename_HREF)) ->
336 max_rss
337
338 Log('\n')
339 Log('MAX RSS')
340 print(max_rss)
341
342 Log('\n')
343 Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
344 print(joined_cachegrind)
345 #print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
346
347 # Cachegrind instructions by file
348 joined_cachegrind %>%
349 mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
350 select(-c(irefs)) %>%
351 spread(key = shell_label, value = thousand_irefs_per_line) %>%
352 arrange(num_lines) %>%
353 select(c(bash, dash, mksh, `osh-native`,
354 num_lines, filename, filename_HREF)) ->
355 instructions
356
357 Log('\n')
358 Log('instructions has %d rows', nrow(instructions))
359 print(instructions)
360 }
361
362 WriteProvenance(distinct_hosts, distinct_shells, out_dir)
363
364 raw_data_table = tibble(
365 filename = basename(as.character(raw_data$path)),
366 filename_HREF = benchmarkDataLink('osh-parser', filename, '')
367 )
368 #print(raw_data_table)
369
370 writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
371
372 precision = SamePrecision(0) # lines per ms
373 writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
374
375 precision = ColumnPrecision(list(), default = 1)
376 writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
377
378 if (!is.null(times_flat)) {
379 precision = SamePrecision(0)
380 writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
381 }
382
383 if (!is.null(cachegrind_flat)) {
384 precision = SamePrecision(0)
385 writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
386 }
387
388 if (!is.null(elapsed)) { # equivalent to no-host
389 # Round to nearest millisecond, but the ratio has a decimal point.
390 precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
391 writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
392
393 precision = SamePrecision(0)
394 writeCsv(rate, file.path(out_dir, 'rate'), precision)
395
396 writeCsv(max_rss, file.path(out_dir, 'max_rss'))
397
398 precision = SamePrecision(1)
399 writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
400 }
401
402 Log('Wrote %s', out_dir)
403}
404
405WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
406
407 num_hosts = nrow(distinct_hosts)
408 if (num_hosts == 1) {
409 linkify = provenanceLink
410 } else {
411 linkify = benchmarkDataLink
412 }
413
414 Log('distinct_hosts')
415 print(distinct_hosts)
416 Log('')
417
418 Log('distinct_shells')
419 print(distinct_shells)
420 Log('')
421
422 # Should be:
423 # host_id_url
424 # And then csv_to_html will be smart enough? It should take --url flag?
425 host_table = tibble(
426 host_label = distinct_hosts$host_label,
427 host_id = paste(distinct_hosts$host_name,
428 distinct_hosts$host_hash, sep='-'),
429 host_id_HREF = linkify('host-id', host_id, '/')
430 )
431 Log('host_table')
432 print(host_table)
433 Log('')
434
435 shell_table = tibble(
436 shell_label = distinct_shells$shell_label,
437 shell_id = paste(distinct_shells$shell_name,
438 distinct_shells$shell_hash, sep='-'),
439 shell_id_HREF = linkify('shell-id', shell_id, '/')
440 )
441
442 Log('shell_table')
443 print(shell_table)
444 Log('')
445
446 if (tsv) {
447 writeTsv(host_table, file.path(out_dir, 'hosts'))
448 writeTsv(shell_table, file.path(out_dir, 'shells'))
449 } else {
450 writeCsv(host_table, file.path(out_dir, 'hosts'))
451 writeCsv(shell_table, file.path(out_dir, 'shells'))
452 }
453}
454
455WriteSimpleProvenance = function(provenance, out_dir) {
456 Log('provenance')
457 print(provenance)
458 Log('')
459
460 # Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
461 # benchmarks/id.sh publish-shell-id uses
462 provenance %>%
463 mutate(shell_name = basename(sh_path)) %>%
464 distinct(shell_label, shell_name, shell_hash) ->
465 distinct_shells
466
467 Log('distinct_shells')
468 print(distinct_shells)
469 Log('')
470
471 provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
472
473 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
474}
475
476RuntimeReport = function(in_dir, out_dir) {
477 times = readTsv(file.path(in_dir, 'times.tsv'))
478
479 gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
480 provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
481
482 times %>% filter(status != 0) -> failed
483 if (nrow(failed) != 0) {
484 print(failed)
485 stop('Some osh-runtime tasks failed')
486 }
487
488 # Joins:
489 # times <= sh_path => provenance
490 # times <= join_id, host_name => gc_stats
491
492 # TODO: provenance may have rows from 2 machines. Could validate them and
493 # deduplicate.
494
495 # It should have (host_label, host_name, host_hash)
496 # (shell_label, sh_path, shell_hash)
497 provenance %>%
498 mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
499 provenance
500
501 provenance %>% distinct(sh_path, shell_label) -> label_lookup
502
503 Log('label_lookup')
504 print(label_lookup)
505
506 # Join with provenance for host label and shell label
507 times %>%
508 select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
509 host_name, sh_path, workload)) %>%
510 mutate(elapsed_ms = elapsed_secs * 1000,
511 user_ms = user_secs * 1000,
512 sys_ms = sys_secs * 1000,
513 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
514 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
515 left_join(label_lookup, by = c('sh_path')) %>%
516 select(-c(sh_path)) %>%
517 # we want to compare workloads on adjacent rows
518 arrange(workload) ->
519 details
520
521 times %>%
522 select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
523 left_join(label_lookup, by = c('sh_path')) %>%
524 select(-c(sh_path)) %>%
525 # we want to compare workloads on adjacent rows
526 arrange(workload) ->
527 details_io
528
529 Log('details')
530 print(details)
531
532 # Elapsed time comparison
533 details %>%
534 select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
535 spread(key = shell_label, value = elapsed_ms) %>%
536 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
537 mutate(native_bash_ratio = `osh-native` / bash) %>%
538 arrange(workload, host_name) %>%
539 select(c(workload, host_name,
540 bash, dash, `osh-cpython`, `osh-native`,
541 py_bash_ratio, native_bash_ratio)) ->
542
543 elapsed
544
545 Log('elapsed')
546 print(elapsed)
547
548 # Minor Page Faults Comparison
549 details_io %>%
550 select(c(host_name, shell_label, workload, minor_faults)) %>%
551 spread(key = shell_label, value = minor_faults) %>%
552 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
553 mutate(native_bash_ratio = `osh-native` / bash) %>%
554 arrange(workload, host_name) %>%
555 select(c(workload, host_name,
556 bash, dash, `osh-cpython`, `osh-native`,
557 py_bash_ratio, native_bash_ratio)) ->
558 page_faults
559
560 Log('page_faults')
561 print(page_faults)
562
563 # Max RSS comparison
564 details %>%
565 select(c(host_name, shell_label, workload, max_rss_MB)) %>%
566 spread(key = shell_label, value = max_rss_MB) %>%
567 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
568 mutate(native_bash_ratio = `osh-native` / bash) %>%
569 arrange(workload, host_name) %>%
570 select(c(workload, host_name,
571 bash, dash, `osh-cpython`, `osh-native`,
572 py_bash_ratio, native_bash_ratio)) ->
573 max_rss
574
575 Log('max rss')
576 print(max_rss)
577
578 details %>%
579 select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
580 mutate(join_id = sprintf("gc-%d", task_id)) %>%
581 select(-c(task_id)) ->
582 gc_details
583
584 Log('GC details')
585 print(gc_details)
586 Log('')
587
588 Log('GC stats')
589 print(gc_stats)
590 Log('')
591
592 gc_stats %>%
593 left_join(gc_details, by = c('join_id', 'host_name')) %>%
594 select(-c(join_id, roots_capacity, objs_capacity)) %>%
595 # Do same transformations as GcReport()
596 mutate(allocated_MB = bytes_allocated / 1e6) %>%
597 select(-c(bytes_allocated)) %>%
598 rename(num_gc_done = num_collections) %>%
599 # Put these columns first
600 relocate(workload, host_name,
601 elapsed_ms, max_gc_millis, total_gc_millis,
602 allocated_MB, max_rss_MB, num_allocated) ->
603 gc_stats
604
605 Log('After GC stats')
606 print(gc_stats)
607 Log('')
608
609 WriteSimpleProvenance(provenance, out_dir)
610
611 # milliseconds don't need decimal digit
612 precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
613 `osh-native` = 0, py_bash_ratio = 2,
614 native_bash_ratio = 2))
615 writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
616 writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
617
618 precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
619 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
620
621 precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
622 default = 0)
623 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
624
625 writeTsv(details, file.path(out_dir, 'details'), precision3)
626 writeTsv(details_io, file.path(out_dir, 'details_io'))
627
628 Log('Wrote %s', out_dir)
629}
630
631VmBaselineReport = function(in_dir, out_dir) {
632 vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
633 #print(vm)
634
635 # Not using DistinctHosts() because field host_hash isn't collected
636 num_hosts = nrow(vm %>% distinct(host))
637
638 vm %>%
639 rename(kib = metric_value) %>%
640 mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
641 megabytes = kib * 1024 / 1e6) %>%
642 select(-c(shell_name, kib)) %>%
643 spread(key = c(metric_name), value = megabytes) %>%
644 rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
645 select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
646 arrange(shell_label, shell_hash, host, VmPeak_MB) ->
647 vm
648
649 print(vm)
650
651 writeTsv(vm, file.path(out_dir, 'vm-baseline'))
652}
653
654WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
655 host_table = tibble(
656 host_label = distinct_hosts$host_label,
657 host_id = paste(distinct_hosts$host_name,
658 distinct_hosts$host_hash, sep='-'),
659 host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
660 )
661 print(host_table)
662
663 dc = distinct_compilers
664 compiler_table = tibble(
665 compiler_label = dc$compiler_label,
666 compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
667 compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
668 )
669 print(compiler_table)
670
671 writeTsv(host_table, file.path(out_dir, 'hosts'))
672 writeTsv(compiler_table, file.path(out_dir, 'compilers'))
673}
674
675OvmBuildReport = function(in_dir, out_dir) {
676 times = readTsv(file.path(in_dir, 'times.tsv'))
677 bytecode_size = readTsv(file.path(in_dir, 'bytecode-size.tsv'))
678 bin_sizes = readTsv(file.path(in_dir, 'bin-sizes.tsv'))
679 native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
680 raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
681
682 times %>% filter(status != 0) -> failed
683 if (nrow(failed) != 0) {
684 print(failed)
685 stop('Some ovm-build tasks failed')
686 }
687
688 times %>% distinct(host_name, host_hash) -> distinct_hosts
689 distinct_hosts$host_label = distinct_hosts$host_name
690
691 times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
692 distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
693
694 #print(distinct_hosts)
695 #print(distinct_compilers)
696
697 WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
698
699 times %>%
700 select(-c(status)) %>%
701 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
702 left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
703 select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
704 mutate(src_dir = basename(src_dir),
705 host_label = paste("host ", host_label),
706 is_conf = str_detect(action, 'configure'),
707 is_ovm = str_detect(action, 'oil.ovm'),
708 is_dbg = str_detect(action, 'dbg'),
709 ) %>%
710 select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
711 elapsed_secs) %>%
712 spread(key = c(host_label), value = elapsed_secs) %>%
713 arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
714 select(-c(is_conf, is_ovm, is_dbg)) ->
715 times
716
717 #print(times)
718
719 bytecode_size %>%
720 rename(bytecode_size = num_bytes) %>%
721 select(-c(path)) ->
722 bytecode_size
723
724 bin_sizes %>%
725 # reorder
726 select(c(host_label, path, num_bytes)) %>%
727 left_join(bytecode_size, by = c('host_label')) %>%
728 mutate(native_code_size = num_bytes - bytecode_size) ->
729 sizes
730
731 # paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
732 native_sizes %>%
733 select(c(host_label, path, num_bytes)) %>%
734 mutate(host_label = paste("host ", host_label),
735 binary = basename(path),
736 compiler = basename(dirname(path)),
737 ) %>%
738 select(-c(path)) %>%
739 spread(key = c(host_label), value = num_bytes) %>%
740 arrange(compiler, binary) ->
741 native_sizes
742
743 # NOTE: These don't have the host and compiler.
744 writeTsv(times, file.path(out_dir, 'times'))
745 writeTsv(bytecode_size, file.path(out_dir, 'bytecode-size'))
746 writeTsv(sizes, file.path(out_dir, 'sizes'))
747 writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
748
749 # TODO: I want a size report too
750 #writeCsv(sizes, file.path(out_dir, 'sizes'))
751}
752
753unique_stdout_md5sum = function(t, num_expected) {
754 u = n_distinct(t$stdout_md5sum)
755 if (u != num_expected) {
756 t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
757 stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
758 }
759}
760
761ComputeReport = function(in_dir, out_dir) {
762 # TSV file, not CSV
763 times = read.table(file.path(in_dir, 'times.tsv'), header=T)
764 print(times)
765
766 times %>% filter(status != 0) -> failed
767 if (nrow(failed) != 0) {
768 print(failed)
769 stop('Some compute tasks failed')
770 }
771
772 #
773 # Check correctness
774 #
775
776 times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
777 times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
778 times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
779 # 3 different inputs
780 times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
781
782 times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
783
784 # TODO:
785 # - oils_cpp doesn't implement unicode LANG=C
786 # - bash behaves differently on your desktop vs. in the container
787 # - might need layer-locales in the image?
788
789 #times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
790 # Ditto here
791 #times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
792
793 #
794 # Find distinct shells and hosts, and label them for readability.
795 #
796
797 # Runtimes are called shells, as a hack for code reuse
798 times %>%
799 mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
800 select(c(host_name, host_hash, shell_name, shell_hash)) ->
801 tmp
802
803 distinct_hosts = DistinctHosts(tmp)
804 Log('')
805 Log('Distinct hosts')
806 print(distinct_hosts)
807
808 distinct_shells = DistinctShells(tmp)
809 Log('')
810 Log('Distinct runtimes')
811 print(distinct_shells)
812
813 num_hosts = nrow(distinct_hosts)
814
815 times %>%
816 select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
817 mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
818 elapsed_ms = elapsed_secs * 1000,
819 user_ms = user_secs * 1000,
820 sys_ms = sys_secs * 1000,
821 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
822 select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
823 arrange(host_name, task_name, arg1, arg2, user_ms) ->
824 details
825
826 times %>%
827 mutate(
828 runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
829 stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
830 select(c(host_name, task_name, arg1, arg2, runtime_label,
831 stdout_md5sum, stdout_md5sum_HREF)) ->
832 stdout_files
833
834 details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
835 details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
836 details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
837 # There's no arg2
838 details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
839
840 details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
841 details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
842
843 precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
844 writeTsv(details, file.path(out_dir, 'details'), precision)
845
846 writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
847
848 writeTsv(hello, file.path(out_dir, 'hello'), precision)
849 writeTsv(fib, file.path(out_dir, 'fib'), precision)
850 writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
851 writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
852
853 writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
854 writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
855
856 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
857}
858
859WriteOneTask = function(times, out_dir, task_name, precision) {
860 times %>%
861 filter(task == task_name) %>%
862 select(-c(task)) -> subset
863
864 writeTsv(subset, file.path(out_dir, task_name), precision)
865}
866
867SHELL_ORDER = c('dash',
868 'bash',
869 'zsh',
870 '_bin/cxx-opt+bumpleak/osh',
871 '_bin/cxx-opt+bumproot/osh',
872 '_bin/cxx-opt+bumpsmall/osh',
873 '_bin/cxx-opt/osh',
874 '_bin/cxx-opt+nopool/osh')
875
876GcReport = function(in_dir, out_dir) {
877 times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
878 gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
879
880 times %>% filter(status != 0) -> failed
881 if (nrow(failed) != 0) {
882 print(failed)
883 stop('Some gc tasks failed')
884 }
885
886 # Change units and order columns
887 times %>%
888 arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
889 mutate(elapsed_ms = elapsed_secs * 1000,
890 user_ms = user_secs * 1000,
891 sys_ms = sys_secs * 1000,
892 max_rss_MB = max_rss_KiB * 1024 / 1e6,
893 shell_label = ShellLabelFromPath(sh_path)
894 ) %>%
895 select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
896 shell_runtime_opts)) ->
897 times
898
899 # Join and order columns
900 gc_stats %>% left_join(times, by = c('join_id')) %>%
901 arrange(desc(task)) %>%
902 mutate(allocated_MB = bytes_allocated / 1e6) %>%
903 # try to make the table skinnier
904 rename(num_gc_done = num_collections) %>%
905 select(task, elapsed_ms, max_gc_millis, total_gc_millis,
906 allocated_MB, max_rss_MB, num_allocated,
907 num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
908 shell_label) ->
909 gc_stats
910
911 times %>% select(-c(join_id)) -> times
912
913
914 precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
915 default = 0)
916
917 writeTsv(times, file.path(out_dir, 'times'), precision)
918 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
919
920 tasks = c('parse.configure-coreutils',
921 'parse.configure-cpython',
922 'parse.abuild',
923 'ex.compute-fib',
924 'ex.bashcomp-parse-help',
925 'ex.abuild-print-help')
926 # Write out separate rows
927 for (task in tasks) {
928 WriteOneTask(times, out_dir, task, precision)
929 }
930}
931
932GcCachegrindReport = function(in_dir, out_dir) {
933 times = readTsv(file.path(in_dir, 'raw/times.tsv'))
934 counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
935
936 times %>% filter(status != 0) -> failed
937 if (nrow(failed) != 0) {
938 print(failed)
939 stop('Some gc tasks failed')
940 }
941
942 print(times)
943 print(counts)
944
945 counts %>% left_join(times, by = c('join_id')) %>%
946 mutate(million_irefs = irefs / 1e6) %>%
947 select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
948 arrange(factor(sh_path, levels = SHELL_ORDER)) ->
949 counts
950
951 precision = NULL
952 tasks = c('parse.abuild', 'ex.compute-fib')
953 for (task in tasks) {
954 WriteOneTask(counts, out_dir, task, precision)
955 }
956}
957
958MyCppReport = function(in_dir, out_dir) {
959 times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
960 print(times)
961
962 times %>% filter(status != 0) -> failed
963 if (nrow(failed) != 0) {
964 print(failed)
965 stop('Some mycpp tasks failed')
966 }
967
968 # Don't care about elapsed and system
969 times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
970 mutate(example_name_HREF = mycppUrl(example_name),
971 gen = c('gen'),
972 gen_HREF = genUrl(example_name),
973 user_ms = user_secs * 1000,
974 sys_ms = sys_secs * 1000,
975 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
976 select(-c(user_secs, sys_secs, max_rss_KiB)) ->
977 details
978
979 details %>% select(-c(sys_ms, max_rss_MB)) %>%
980 spread(key = impl, value = user_ms) %>%
981 mutate(`C++ : Python` = `C++` / Python) %>%
982 arrange(`C++ : Python`) ->
983 user_time
984
985 details %>% select(-c(user_ms, max_rss_MB)) %>%
986 spread(key = impl, value = sys_ms) %>%
987 mutate(`C++ : Python` = `C++` / Python) %>%
988 arrange(`C++ : Python`) ->
989 sys_time
990
991 details %>% select(-c(user_ms, sys_ms)) %>%
992 spread(key = impl, value = max_rss_MB) %>%
993 mutate(`C++ : Python` = `C++` / Python) %>%
994 arrange(`C++ : Python`) ->
995 max_rss
996
997 # Sometimes it speeds up by more than 10x
998 precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
999 writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
1000 writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
1001
1002 precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
1003 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
1004
1005 writeTsv(details, file.path(out_dir, 'details'))
1006}
1007
1008UftraceTaskReport = function(env, task_name, summaries) {
1009 # Need this again after redirect
1010 MaybeDisableColor(stdout())
1011
1012 task_env = env[[task_name]]
1013
1014 untyped = task_env$untyped
1015 typed = task_env$typed
1016 strings = task_env$strings
1017 slabs = task_env$slabs
1018 reserve = task_env$reserve
1019
1020 string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1021 strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1022
1023 # TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1024 # big/small
1025 #
1026 # And then zoom in on distributions as well
1027
1028 num_allocs = nrow(untyped)
1029 total_bytes = sum(untyped$obj_len)
1030
1031 untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1032 #print(untyped_hist)
1033
1034 untyped_hist %>%
1035 mutate(n_less_than = cumsum(n),
1036 percent = n_less_than * 100.0 / num_allocs) ->
1037 alloc_sizes
1038
1039 a24 = untyped_hist %>% filter(obj_len <= 24)
1040 a48 = untyped_hist %>% filter(obj_len <= 48)
1041 a96 = untyped_hist %>% filter(obj_len <= 96)
1042
1043 allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1044 allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1045 allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1046
1047 Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1048
1049 options(tibble.print_min=25)
1050
1051 Log('')
1052 Log('All allocations')
1053 print(alloc_sizes %>% head(22))
1054 print(alloc_sizes %>% tail(5))
1055
1056 Log('')
1057 Log('Common Sizes')
1058 print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1059
1060 Log('')
1061 Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1062 Log('')
1063
1064 Log('Typed allocations')
1065
1066 num_typed = nrow(typed)
1067
1068 typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1069 mutate(percent = n * 100.0 / num_typed) %>%
1070 arrange(desc(n)) -> most_common_types
1071
1072 print(most_common_types %>% head(20))
1073 print(most_common_types %>% tail(5))
1074
1075 lists = typed %>% filter(str_starts(func_name, ('List<')))
1076 #print(lists)
1077
1078 num_lists = nrow(lists)
1079 total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1080
1081 Log('')
1082 Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1083 Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1084 Log('')
1085
1086 #
1087 # Strings
1088 #
1089
1090 num_strings = nrow(strings)
1091 total_string_bytes = sum(strings$obj_len)
1092
1093 strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1094 mutate(n_less_than = cumsum(n),
1095 percent = n_less_than * 100.0 / num_strings) ->
1096 string_lengths
1097
1098 strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1099 strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1100
1101 # Parse workload
1102 # 62% of strings <= 6 bytes
1103 # 84% of strings <= 14 bytes
1104
1105 Log('Str - NewStr() and OverAllocatedStr()')
1106 print(string_lengths %>% head(16))
1107 print(string_lengths %>% tail(5))
1108 Log('')
1109
1110 Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1111 commas(sum(strings$str_len)), commas(total_string_bytes))
1112 Log('')
1113 Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1114 Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1115 Log('')
1116
1117 #
1118 # Slabs
1119 #
1120
1121 Log('NewSlab()')
1122
1123 num_slabs = nrow(slabs)
1124 slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1125 mutate(n_less_than = cumsum(n),
1126 percent = n_less_than * 100.0 / num_slabs) ->
1127 slab_lengths
1128
1129 slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1130 arrange(desc(n)) -> slab_types
1131
1132 Log(' Lengths')
1133 print(slab_lengths %>% head())
1134 print(slab_lengths %>% tail(5))
1135 Log('')
1136
1137 Log(' Slab Types')
1138 print(slab_types %>% head())
1139 print(slab_types %>% tail(5))
1140 Log('')
1141
1142 total_slab_items = sum(slabs$slab_len)
1143
1144 Log('%s slabs, total items = %s', commas(num_slabs),
1145 commas(sum(slabs$slab_len)))
1146 Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1147 Log('')
1148
1149 #
1150 # reserve() calls
1151 #
1152
1153 # There should be strictly more List::reserve() calls than NewSlab
1154
1155 Log('::reserve(int n)')
1156 Log('')
1157
1158 num_reserve = nrow(reserve)
1159 reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1160 mutate(n_less_than = cumsum(n),
1161 percent = n_less_than * 100.0 / num_reserve) ->
1162 reserve_args
1163
1164 Log(' Num Items')
1165 print(reserve_args %>% head(15))
1166 print(reserve_args %>% tail(5))
1167 Log('')
1168
1169 Log('%s reserve() calls, total items = %s', commas(num_reserve),
1170 commas(sum(reserve$num_items)))
1171 Log('')
1172
1173 # Accounting for all allocations!
1174 Log('Untyped: %s', commas(num_allocs))
1175 Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1176 Log('')
1177
1178 num_other_typed = num_typed - num_lists
1179
1180 # Summary table
1181 stats = tibble(task = task_name,
1182 total_bytes_ = commas(total_bytes),
1183 num_allocs_ = commas(num_allocs),
1184 sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1185 num_reserve_calls = commas(num_reserve),
1186
1187 percent_list_allocs = Percent(num_lists, num_allocs),
1188 percent_slab_allocs = Percent(num_slabs, num_allocs),
1189 percent_string_allocs = Percent(num_strings, num_allocs),
1190 percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1191
1192 percent_list_bytes = Percent(total_list_bytes, total_bytes),
1193 percent_string_bytes = Percent(total_string_bytes, total_bytes),
1194
1195 allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1196 allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1197 allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1198
1199 strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1200 strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1201 )
1202 summaries$stats[[task_name]] = stats
1203
1204 summaries$most_common_types[[task_name]] = most_common_types
1205}
1206
1207LoadUftraceTsv = function(in_dir, env) {
1208 for (task in list.files(in_dir)) {
1209 Log('Loading data for task %s', task)
1210 base_dir = file.path(in_dir, task)
1211
1212 task_env = new.env()
1213 env[[task]] = task_env
1214
1215 # TSV file, not CSV
1216 task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1217 task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1218 task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1219 task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1220 task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1221
1222 # median string length is 4, mean is 9.5!
1223 Log('UNTYPED')
1224 print(summary(task_env$untyped))
1225 Log('')
1226
1227 Log('TYPED')
1228 print(summary(task_env$typed))
1229 Log('')
1230
1231 Log('STRINGS')
1232 print(summary(task_env$strings))
1233 Log('')
1234
1235 Log('SLABS')
1236 print(summary(task_env$slabs))
1237 Log('')
1238
1239 Log('RESERVE')
1240 print(summary(task_env$reserve))
1241 Log('')
1242 }
1243}
1244
1245Percent = function(n, total) {
1246 sprintf('%.1f%%', n * 100.0 / total)
1247}
1248
1249PrettyPrintLong = function(d) {
1250 tr = t(d) # transpose
1251
1252 row_names = rownames(tr)
1253
1254 for (i in 1:nrow(tr)) {
1255 row_name = row_names[i]
1256 cat(sprintf('%26s', row_name)) # calculated min width manually
1257 cat(sprintf('%20s', tr[i,]))
1258 cat('\n')
1259
1260 # Extra spacing
1261 if (row_name %in% c('num_reserve_calls',
1262 'percent_string_bytes',
1263 'percent_other_typed_allocs',
1264 'allocs_96_bytes_or_less')) {
1265 cat('\n')
1266 }
1267 }
1268}
1269
1270
1271UftraceReport = function(env, out_dir) {
1272 # summaries$stats should be a list of 1-row data frames
1273 # summaries$top_types should be a list of types
1274 summaries = new.env()
1275
1276 for (task_name in names(env)) {
1277 report_out = file.path(out_dir, paste0(task_name, '.txt'))
1278
1279 Log('Making report for task %s -> %s', task_name, report_out)
1280
1281 sink(file = report_out)
1282 UftraceTaskReport(env, task_name, summaries)
1283 sink() # reset
1284 }
1285 Log('')
1286
1287 # Concate all the data frames added to summary
1288 stats = bind_rows(as.list(summaries$stats))
1289
1290 sink(file = file.path(out_dir, 'summary.txt'))
1291 #print(stats)
1292 #Log('')
1293
1294 PrettyPrintLong(stats)
1295 Log('')
1296
1297 mct = summaries$most_common_types
1298 for (task_name in names(mct)) {
1299 Log('Common types in workload %s', task_name)
1300 Log('')
1301
1302 print(mct[[task_name]] %>% head(5))
1303 Log('')
1304 }
1305 sink()
1306
1307 # For the REPL
1308 return(list(stats = stats))
1309}
1310
1311main = function(argv) {
1312 action = argv[[1]]
1313 in_dir = argv[[2]]
1314 out_dir = argv[[3]]
1315
1316 if (action == 'osh-parser') {
1317 ParserReport(in_dir, out_dir)
1318
1319 } else if (action == 'osh-runtime') {
1320 RuntimeReport(in_dir, out_dir)
1321
1322 } else if (action == 'vm-baseline') {
1323 VmBaselineReport(in_dir, out_dir)
1324
1325 } else if (action == 'ovm-build') {
1326 OvmBuildReport(in_dir, out_dir)
1327
1328 } else if (action == 'compute') {
1329 ComputeReport(in_dir, out_dir)
1330
1331 } else if (action == 'gc') {
1332 GcReport(in_dir, out_dir)
1333
1334 } else if (action == 'gc-cachegrind') {
1335 GcCachegrindReport(in_dir, out_dir)
1336
1337 } else if (action == 'mycpp') {
1338 MyCppReport(in_dir, out_dir)
1339
1340 } else if (action == 'uftrace') {
1341 d = new.env()
1342 LoadUftraceTsv(in_dir, d)
1343 UftraceReport(d, out_dir)
1344
1345 } else {
1346 Log("Invalid action '%s'", action)
1347 quit(status = 1)
1348 }
1349 Log('PID %d done', Sys.getpid())
1350}
1351
1352if (length(sys.frames()) == 0) {
1353 # increase ggplot font size globally
1354 #theme_set(theme_grey(base_size = 20))
1355
1356 main(commandArgs(TRUE))
1357}