benchmarks/report.R

OILS / benchmarks / report.R View on Github | oilshell.org

1357 lines, 941 significant

1	#!/usr/bin/env Rscript
2	#
3	# benchmarks/report.R -- Analyze data collected by shell scripts.
4	#
5	# Usage:
6	# benchmarks/report.R OUT_DIR [TIMES_CSV...]
7
8	# Suppress warnings about functions masked from 'package:stats' and 'package:base'
9	# filter, lag
10	# intersect, setdiff, setequal, union
11	library(dplyr, warn.conflicts = FALSE)
12	library(tidyr) # spread()
13	library(stringr)
14
15	source('benchmarks/common.R')
16
17	options(stringsAsFactors = F)
18
19	# For pretty printing
20	commas = function(x) {
21	format(x, big.mark=',')
22	}
23
24	sourceUrl = function(path) {
25	sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26	}
27
28	# Takes a filename, not a path.
29	sourceUrl2 = function(filename) {
30	sprintf(
31	'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32	filename)
33	}
34
35	mycppUrl = function(name) {
36	sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', name)
37	}
38
39	genUrl = function(name) {
40	sprintf('../../_gen/mycpp/examples/%s.mycpp.cc', name)
41	}
42
43
44	# TODO: Set up cgit because Github links are slow.
45	benchmarkDataLink = function(subdir, name, suffix) {
46	#sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
47	sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
48	subdir, name, suffix)
49	}
50
51	provenanceLink = function(subdir, name, suffix) {
52	sprintf('../%s/%s%s', subdir, name, suffix)
53	}
54
55
56	GetOshLabel = function(shell_hash, prov_dir) {
57	### Given a string, return another string.
58
59	path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
60
61	if (file.exists(path)) {
62	Log('Reading %s', path)
63	lines = readLines(path)
64	if (length(grep('_bin/osh', lines)) > 0) {
65	label = 'osh-ovm'
66	} else if (length(grep('bin/osh', lines)) > 0) {
67	label = 'osh-cpython'
68	} else if (length(grep('_bin/.*/osh', lines)) > 0) {
69	label = 'osh-native'
70	} else {
71	stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
72	}
73	} else {
74	stop(sprintf("%s doesn't exist", path))
75	}
76	return(label)
77	}
78
79	opt_suffix1 = '_bin/cxx-opt/osh'
80	opt_suffix2 = '_bin/cxx-opt-sh/osh'
81
82	ShellLabels = function(shell_name, shell_hash, num_hosts) {
83	### Given 2 vectors, return a vector of readable labels.
84
85	# TODO: Clean up callers. Some metrics all this function with a
86	# shell/runtime BASENAME, and others a PATH
87	# - e.g. ComputeReport calls this with runtime_name which is actually a PATH
88
89	#Log('name %s', shell_name)
90	#Log('hash %s', shell_hash)
91
92	if (num_hosts == 1) {
93	prov_dir = '_tmp'
94	} else {
95	prov_dir = '../benchmark-data/'
96	}
97
98	labels = c()
99	for (i in 1:length(shell_name)) {
100	sh = shell_name[i]
101	if (sh == 'osh') {
102	label = GetOshLabel(shell_hash[i], prov_dir)
103
104	} else if (endsWith(sh, opt_suffix1) \|\| endsWith(sh, opt_suffix2)) {
105	label = 'opt/osh'
106
107	} else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
108	label = 'bumpleak/osh'
109
110	} else {
111	label = sh
112	}
113
114	Log('[%s] [%s]', shell_name[i], label)
115	labels = c(labels, label)
116	}
117
118	return(labels)
119	}
120
121	# Simple version of the above, used by benchmarks/gc
122	ShellLabelFromPath = function(sh_path) {
123	labels = c()
124	for (i in 1:length(sh_path)) {
125	sh = sh_path[i]
126
127	if (endsWith(sh, opt_suffix1) \|\| endsWith(sh, opt_suffix2)) {
128	# the opt binary is osh-native
129	label = 'osh-native'
130
131	} else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
132	label = 'bumpleak/osh'
133
134	} else if (endsWith(sh, '_bin/osh')) { # the app bundle
135	label = 'osh-ovm'
136
137	} else if (endsWith(sh, 'bin/osh')) {
138	label = 'osh-cpython'
139
140	} else {
141	label = sh
142	}
143	labels = c(labels, label)
144	}
145	return(labels)
146	}
147
148	DistinctHosts = function(t) {
149	t %>% distinct(host_name, host_hash) -> distinct_hosts
150	# The label is just the name
151	distinct_hosts$host_label = distinct_hosts$host_name
152	return(distinct_hosts)
153	}
154
155	DistinctShells = function(t, num_hosts = -1) {
156	t %>% distinct(shell_name, shell_hash) -> distinct_shells
157
158	Log('')
159	Log('Labeling shells')
160
161	# Calculate it if not passed
162	if (num_hosts == -1) {
163	num_hosts = nrow(DistinctHosts(t))
164	}
165
166	distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
167	distinct_shells$shell_hash,
168	num_hosts)
169	return(distinct_shells)
170	}
171
172	ParserReport = function(in_dir, out_dir) {
173	times = read.csv(file.path(in_dir, 'times.csv'))
174	lines = read.csv(file.path(in_dir, 'lines.csv'))
175	raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
176
177	cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
178
179	# For joining by filename
180	lines_by_filename = tibble(
181	num_lines = lines$num_lines,
182	filename = basename(lines$path)
183	)
184
185	# Remove failures
186	times %>% filter(status == 0) %>% select(-c(status)) -> times
187	cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
188
189	# Add the number of lines, joining on path, and compute lines/ms
190	times %>%
191	left_join(lines, by = c('path')) %>%
192	mutate(filename = basename(path), filename_HREF = sourceUrl(path),
193	max_rss_MB = max_rss_KiB * 1024 / 1e6,
194	elapsed_ms = elapsed_secs * 1000,
195	user_ms = user_secs * 1000,
196	sys_ms = sys_secs * 1000,
197	lines_per_ms = num_lines / elapsed_ms) %>%
198	select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
199	joined_times
200
201	#print(head(times))
202	#print(head(lines))
203	#print(head(vm))
204	#print(head(joined_times))
205
206	print(summary(joined_times))
207
208	#
209	# Find distinct shells and hosts, and label them for readability.
210	#
211
212	distinct_hosts = DistinctHosts(joined_times)
213	Log('')
214	Log('Distinct hosts')
215	print(distinct_hosts)
216
217	distinct_shells = DistinctShells(joined_times)
218	Log('')
219	Log('Distinct shells')
220	print(distinct_shells)
221
222	# Replace name/hash combinations with labels.
223	joined_times %>%
224	left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
225	left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
226	select(-c(host_name, host_hash, shell_name, shell_hash)) ->
227	joined_times
228
229	# Like 'times', but do shell_label as one step
230	# Hack: we know benchmarks/auto.sh runs this on one machine
231	distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
232	cachegrind %>%
233	left_join(lines, by = c('path')) %>%
234	select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
235	left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
236	select(-c(shell_name, shell_hash)) %>%
237	mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
238	select(-c(path)) ->
239	joined_cachegrind
240
241	Log('summary(joined_times):')
242	print(summary(joined_times))
243	Log('head(joined_times):')
244	print(head(joined_times))
245
246	# Summarize rates by platform/shell
247	joined_times %>%
248	mutate(host_label = paste("host", host_label)) %>%
249	group_by(host_label, shell_label) %>%
250	summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
251	mutate(lines_per_ms = total_lines / total_ms) %>%
252	select(-c(total_ms)) %>%
253	spread(key = host_label, value = lines_per_ms) ->
254	times_summary
255
256	# Sort by parsing rate on the fast machine
257	if ("host lenny" %in% colnames(times_summary)) {
258	times_summary %>% arrange(desc(`host lenny`)) -> times_summary
259	} else {
260	times_summary %>% arrange(desc(`host no-host`)) -> times_summary
261	}
262
263	Log('times_summary:')
264	print(times_summary)
265
266	# Summarize cachegrind by platform/shell
267	# Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
268	joined_cachegrind %>%
269	group_by(shell_label) %>%
270	summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
271	mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
272	select(-c(total_irefs)) ->
273	cachegrind_summary
274
275	if ("no-host" %in% distinct_hosts$host_label) {
276
277	# We don't have all the shells
278	elapsed = NULL
279	rate = NULL
280	max_rss = NULL
281	instructions = NULL
282
283	joined_times %>%
284	select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
285	num_lines, filename, filename_HREF)) %>%
286	arrange(filename, elapsed_ms) ->
287	times_flat
288
289	joined_cachegrind %>%
290	select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
291	arrange(filename, irefs) ->
292	cachegrind_flat
293
294	} else {
295
296	times_flat = NULL
297	cachegrind_flat = NULL
298
299	# Elapsed seconds for each shell by platform and file
300	joined_times %>%
301	select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
302	spread(key = shell_label, value = elapsed_ms) %>%
303	arrange(host_label, num_lines) %>%
304	mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
305	select(c(host_label, bash, dash, mksh, zsh,
306	`osh-ovm`, `osh-cpython`, `osh-native`,
307	osh_to_bash_ratio, num_lines, filename, filename_HREF)) ->
308	elapsed
309
310	Log('\n')
311	Log('ELAPSED')
312	print(elapsed)
313
314	# Rates by file and shell
315	joined_times %>%
316	select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
317	spread(key = shell_label, value = lines_per_ms) %>%
318	arrange(host_label, num_lines) %>%
319	select(c(host_label, bash, dash, mksh, zsh,
320	`osh-ovm`, `osh-cpython`, `osh-native`,
321	num_lines, filename, filename_HREF)) ->
322	rate
323
324	Log('\n')
325	Log('RATE')
326	print(rate)
327
328	# Memory usage by file
329	joined_times %>%
330	select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
331	spread(key = shell_label, value = max_rss_MB) %>%
332	arrange(host_label, num_lines) %>%
333	select(c(host_label, bash, dash, mksh, zsh,
334	`osh-ovm`, `osh-cpython`, `osh-native`,
335	num_lines, filename, filename_HREF)) ->
336	max_rss
337
338	Log('\n')
339	Log('MAX RSS')
340	print(max_rss)
341
342	Log('\n')
343	Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
344	print(joined_cachegrind)
345	#print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
346
347	# Cachegrind instructions by file
348	joined_cachegrind %>%
349	mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
350	select(-c(irefs)) %>%
351	spread(key = shell_label, value = thousand_irefs_per_line) %>%
352	arrange(num_lines) %>%
353	select(c(bash, dash, mksh, `osh-native`,
354	num_lines, filename, filename_HREF)) ->
355	instructions
356
357	Log('\n')
358	Log('instructions has %d rows', nrow(instructions))
359	print(instructions)
360	}
361
362	WriteProvenance(distinct_hosts, distinct_shells, out_dir)
363
364	raw_data_table = tibble(
365	filename = basename(as.character(raw_data$path)),
366	filename_HREF = benchmarkDataLink('osh-parser', filename, '')
367	)
368	#print(raw_data_table)
369
370	writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
371
372	precision = SamePrecision(0) # lines per ms
373	writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
374
375	precision = ColumnPrecision(list(), default = 1)
376	writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
377
378	if (!is.null(times_flat)) {
379	precision = SamePrecision(0)
380	writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
381	}
382
383	if (!is.null(cachegrind_flat)) {
384	precision = SamePrecision(0)
385	writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
386	}
387
388	if (!is.null(elapsed)) { # equivalent to no-host
389	# Round to nearest millisecond, but the ratio has a decimal point.
390	precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
391	writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
392
393	precision = SamePrecision(0)
394	writeCsv(rate, file.path(out_dir, 'rate'), precision)
395
396	writeCsv(max_rss, file.path(out_dir, 'max_rss'))
397
398	precision = SamePrecision(1)
399	writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
400	}
401
402	Log('Wrote %s', out_dir)
403	}
404
405	WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
406
407	num_hosts = nrow(distinct_hosts)
408	if (num_hosts == 1) {
409	linkify = provenanceLink
410	} else {
411	linkify = benchmarkDataLink
412	}
413
414	Log('distinct_hosts')
415	print(distinct_hosts)
416	Log('')
417
418	Log('distinct_shells')
419	print(distinct_shells)
420	Log('')
421
422	# Should be:
423	# host_id_url
424	# And then csv_to_html will be smart enough? It should take --url flag?
425	host_table = tibble(
426	host_label = distinct_hosts$host_label,
427	host_id = paste(distinct_hosts$host_name,
428	distinct_hosts$host_hash, sep='-'),
429	host_id_HREF = linkify('host-id', host_id, '/')
430	)
431	Log('host_table')
432	print(host_table)
433	Log('')
434
435	shell_table = tibble(
436	shell_label = distinct_shells$shell_label,
437	shell_id = paste(distinct_shells$shell_name,
438	distinct_shells$shell_hash, sep='-'),
439	shell_id_HREF = linkify('shell-id', shell_id, '/')
440	)
441
442	Log('shell_table')
443	print(shell_table)
444	Log('')
445
446	if (tsv) {
447	writeTsv(host_table, file.path(out_dir, 'hosts'))
448	writeTsv(shell_table, file.path(out_dir, 'shells'))
449	} else {
450	writeCsv(host_table, file.path(out_dir, 'hosts'))
451	writeCsv(shell_table, file.path(out_dir, 'shells'))
452	}
453	}
454
455	WriteSimpleProvenance = function(provenance, out_dir) {
456	Log('provenance')
457	print(provenance)
458	Log('')
459
460	# Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
461	# benchmarks/id.sh publish-shell-id uses
462	provenance %>%
463	mutate(shell_name = basename(sh_path)) %>%
464	distinct(shell_label, shell_name, shell_hash) ->
465	distinct_shells
466
467	Log('distinct_shells')
468	print(distinct_shells)
469	Log('')
470
471	provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
472
473	WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
474	}
475
476	RuntimeReport = function(in_dir, out_dir) {
477	times = readTsv(file.path(in_dir, 'times.tsv'))
478
479	gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
480	provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
481
482	times %>% filter(status != 0) -> failed
483	if (nrow(failed) != 0) {
484	print(failed)
485	stop('Some osh-runtime tasks failed')
486	}
487
488	# Joins:
489	# times <= sh_path => provenance
490	# times <= join_id, host_name => gc_stats
491
492	# TODO: provenance may have rows from 2 machines. Could validate them and
493	# deduplicate.
494
495	# It should have (host_label, host_name, host_hash)
496	# (shell_label, sh_path, shell_hash)
497	provenance %>%
498	mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
499	provenance
500
501	provenance %>% distinct(sh_path, shell_label) -> label_lookup
502
503	Log('label_lookup')
504	print(label_lookup)
505
506	# Join with provenance for host label and shell label
507	times %>%
508	select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
509	host_name, sh_path, workload)) %>%
510	mutate(elapsed_ms = elapsed_secs * 1000,
511	user_ms = user_secs * 1000,
512	sys_ms = sys_secs * 1000,
513	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
514	select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
515	left_join(label_lookup, by = c('sh_path')) %>%
516	select(-c(sh_path)) %>%
517	# we want to compare workloads on adjacent rows
518	arrange(workload) ->
519	details
520
521	times %>%
522	select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
523	left_join(label_lookup, by = c('sh_path')) %>%
524	select(-c(sh_path)) %>%
525	# we want to compare workloads on adjacent rows
526	arrange(workload) ->
527	details_io
528
529	Log('details')
530	print(details)
531
532	# Elapsed time comparison
533	details %>%
534	select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
535	spread(key = shell_label, value = elapsed_ms) %>%
536	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
537	mutate(native_bash_ratio = `osh-native` / bash) %>%
538	arrange(workload, host_name) %>%
539	select(c(workload, host_name,
540	bash, dash, `osh-cpython`, `osh-native`,
541	py_bash_ratio, native_bash_ratio)) ->
542
543	elapsed
544
545	Log('elapsed')
546	print(elapsed)
547
548	# Minor Page Faults Comparison
549	details_io %>%
550	select(c(host_name, shell_label, workload, minor_faults)) %>%
551	spread(key = shell_label, value = minor_faults) %>%
552	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
553	mutate(native_bash_ratio = `osh-native` / bash) %>%
554	arrange(workload, host_name) %>%
555	select(c(workload, host_name,
556	bash, dash, `osh-cpython`, `osh-native`,
557	py_bash_ratio, native_bash_ratio)) ->
558	page_faults
559
560	Log('page_faults')
561	print(page_faults)
562
563	# Max RSS comparison
564	details %>%
565	select(c(host_name, shell_label, workload, max_rss_MB)) %>%
566	spread(key = shell_label, value = max_rss_MB) %>%
567	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
568	mutate(native_bash_ratio = `osh-native` / bash) %>%
569	arrange(workload, host_name) %>%
570	select(c(workload, host_name,
571	bash, dash, `osh-cpython`, `osh-native`,
572	py_bash_ratio, native_bash_ratio)) ->
573	max_rss
574
575	Log('max rss')
576	print(max_rss)
577
578	details %>%
579	select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
580	mutate(join_id = sprintf("gc-%d", task_id)) %>%
581	select(-c(task_id)) ->
582	gc_details
583
584	Log('GC details')
585	print(gc_details)
586	Log('')
587
588	Log('GC stats')
589	print(gc_stats)
590	Log('')
591
592	gc_stats %>%
593	left_join(gc_details, by = c('join_id', 'host_name')) %>%
594	select(-c(join_id, roots_capacity, objs_capacity)) %>%
595	# Do same transformations as GcReport()
596	mutate(allocated_MB = bytes_allocated / 1e6) %>%
597	select(-c(bytes_allocated)) %>%
598	rename(num_gc_done = num_collections) %>%
599	# Put these columns first
600	relocate(workload, host_name,
601	elapsed_ms, max_gc_millis, total_gc_millis,
602	allocated_MB, max_rss_MB, num_allocated) ->
603	gc_stats
604
605	Log('After GC stats')
606	print(gc_stats)
607	Log('')
608
609	WriteSimpleProvenance(provenance, out_dir)
610
611	# milliseconds don't need decimal digit
612	precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
613	`osh-native` = 0, py_bash_ratio = 2,
614	native_bash_ratio = 2))
615	writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
616	writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
617
618	precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
619	writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
620
621	precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
622	default = 0)
623	writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
624
625	writeTsv(details, file.path(out_dir, 'details'), precision3)
626	writeTsv(details_io, file.path(out_dir, 'details_io'))
627
628	Log('Wrote %s', out_dir)
629	}
630
631	VmBaselineReport = function(in_dir, out_dir) {
632	vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
633	#print(vm)
634
635	# Not using DistinctHosts() because field host_hash isn't collected
636	num_hosts = nrow(vm %>% distinct(host))
637
638	vm %>%
639	rename(kib = metric_value) %>%
640	mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
641	megabytes = kib * 1024 / 1e6) %>%
642	select(-c(shell_name, kib)) %>%
643	spread(key = c(metric_name), value = megabytes) %>%
644	rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
645	select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
646	arrange(shell_label, shell_hash, host, VmPeak_MB) ->
647	vm
648
649	print(vm)
650
651	writeTsv(vm, file.path(out_dir, 'vm-baseline'))
652	}
653
654	WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
655	host_table = tibble(
656	host_label = distinct_hosts$host_label,
657	host_id = paste(distinct_hosts$host_name,
658	distinct_hosts$host_hash, sep='-'),
659	host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
660	)
661	print(host_table)
662
663	dc = distinct_compilers
664	compiler_table = tibble(
665	compiler_label = dc$compiler_label,
666	compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
667	compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
668	)
669	print(compiler_table)
670
671	writeTsv(host_table, file.path(out_dir, 'hosts'))
672	writeTsv(compiler_table, file.path(out_dir, 'compilers'))
673	}
674
675	OvmBuildReport = function(in_dir, out_dir) {
676	times = readTsv(file.path(in_dir, 'times.tsv'))
677	bytecode_size = readTsv(file.path(in_dir, 'bytecode-size.tsv'))
678	bin_sizes = readTsv(file.path(in_dir, 'bin-sizes.tsv'))
679	native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
680	raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
681
682	times %>% filter(status != 0) -> failed
683	if (nrow(failed) != 0) {
684	print(failed)
685	stop('Some ovm-build tasks failed')
686	}
687
688	times %>% distinct(host_name, host_hash) -> distinct_hosts
689	distinct_hosts$host_label = distinct_hosts$host_name
690
691	times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
692	distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
693
694	#print(distinct_hosts)
695	#print(distinct_compilers)
696
697	WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
698
699	times %>%
700	select(-c(status)) %>%
701	left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
702	left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
703	select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
704	mutate(src_dir = basename(src_dir),
705	host_label = paste("host ", host_label),
706	is_conf = str_detect(action, 'configure'),
707	is_ovm = str_detect(action, 'oil.ovm'),
708	is_dbg = str_detect(action, 'dbg'),
709	) %>%
710	select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
711	elapsed_secs) %>%
712	spread(key = c(host_label), value = elapsed_secs) %>%
713	arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
714	select(-c(is_conf, is_ovm, is_dbg)) ->
715	times
716
717	#print(times)
718
719	bytecode_size %>%
720	rename(bytecode_size = num_bytes) %>%
721	select(-c(path)) ->
722	bytecode_size
723
724	bin_sizes %>%
725	# reorder
726	select(c(host_label, path, num_bytes)) %>%
727	left_join(bytecode_size, by = c('host_label')) %>%
728	mutate(native_code_size = num_bytes - bytecode_size) ->
729	sizes
730
731	# paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
732	native_sizes %>%
733	select(c(host_label, path, num_bytes)) %>%
734	mutate(host_label = paste("host ", host_label),
735	binary = basename(path),
736	compiler = basename(dirname(path)),
737	) %>%
738	select(-c(path)) %>%
739	spread(key = c(host_label), value = num_bytes) %>%
740	arrange(compiler, binary) ->
741	native_sizes
742
743	# NOTE: These don't have the host and compiler.
744	writeTsv(times, file.path(out_dir, 'times'))
745	writeTsv(bytecode_size, file.path(out_dir, 'bytecode-size'))
746	writeTsv(sizes, file.path(out_dir, 'sizes'))
747	writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
748
749	# TODO: I want a size report too
750	#writeCsv(sizes, file.path(out_dir, 'sizes'))
751	}
752
753	unique_stdout_md5sum = function(t, num_expected) {
754	u = n_distinct(t$stdout_md5sum)
755	if (u != num_expected) {
756	t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
757	stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
758	}
759	}
760
761	ComputeReport = function(in_dir, out_dir) {
762	# TSV file, not CSV
763	times = read.table(file.path(in_dir, 'times.tsv'), header=T)
764	print(times)
765
766	times %>% filter(status != 0) -> failed
767	if (nrow(failed) != 0) {
768	print(failed)
769	stop('Some compute tasks failed')
770	}
771
772	#
773	# Check correctness
774	#
775
776	times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
777	times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
778	times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
779	# 3 different inputs
780	times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
781
782	times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
783
784	# TODO:
785	# - oils_cpp doesn't implement unicode LANG=C
786	# - bash behaves differently on your desktop vs. in the container
787	# - might need layer-locales in the image?
788
789	#times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
790	# Ditto here
791	#times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
792
793	#
794	# Find distinct shells and hosts, and label them for readability.
795	#
796
797	# Runtimes are called shells, as a hack for code reuse
798	times %>%
799	mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
800	select(c(host_name, host_hash, shell_name, shell_hash)) ->
801	tmp
802
803	distinct_hosts = DistinctHosts(tmp)
804	Log('')
805	Log('Distinct hosts')
806	print(distinct_hosts)
807
808	distinct_shells = DistinctShells(tmp)
809	Log('')
810	Log('Distinct runtimes')
811	print(distinct_shells)
812
813	num_hosts = nrow(distinct_hosts)
814
815	times %>%
816	select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
817	mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
818	elapsed_ms = elapsed_secs * 1000,
819	user_ms = user_secs * 1000,
820	sys_ms = sys_secs * 1000,
821	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
822	select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
823	arrange(host_name, task_name, arg1, arg2, user_ms) ->
824	details
825
826	times %>%
827	mutate(
828	runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
829	stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
830	select(c(host_name, task_name, arg1, arg2, runtime_label,
831	stdout_md5sum, stdout_md5sum_HREF)) ->
832	stdout_files
833
834	details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
835	details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
836	details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
837	# There's no arg2
838	details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
839
840	details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
841	details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
842
843	precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
844	writeTsv(details, file.path(out_dir, 'details'), precision)
845
846	writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
847
848	writeTsv(hello, file.path(out_dir, 'hello'), precision)
849	writeTsv(fib, file.path(out_dir, 'fib'), precision)
850	writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
851	writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
852
853	writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
854	writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
855
856	WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
857	}
858
859	WriteOneTask = function(times, out_dir, task_name, precision) {
860	times %>%
861	filter(task == task_name) %>%
862	select(-c(task)) -> subset
863
864	writeTsv(subset, file.path(out_dir, task_name), precision)
865	}
866
867	SHELL_ORDER = c('dash',
868	'bash',
869	'zsh',
870	'_bin/cxx-opt+bumpleak/osh',
871	'_bin/cxx-opt+bumproot/osh',
872	'_bin/cxx-opt+bumpsmall/osh',
873	'_bin/cxx-opt/osh',
874	'_bin/cxx-opt+nopool/osh')
875
876	GcReport = function(in_dir, out_dir) {
877	times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
878	gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
879
880	times %>% filter(status != 0) -> failed
881	if (nrow(failed) != 0) {
882	print(failed)
883	stop('Some gc tasks failed')
884	}
885
886	# Change units and order columns
887	times %>%
888	arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
889	mutate(elapsed_ms = elapsed_secs * 1000,
890	user_ms = user_secs * 1000,
891	sys_ms = sys_secs * 1000,
892	max_rss_MB = max_rss_KiB * 1024 / 1e6,
893	shell_label = ShellLabelFromPath(sh_path)
894	) %>%
895	select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
896	shell_runtime_opts)) ->
897	times
898
899	# Join and order columns
900	gc_stats %>% left_join(times, by = c('join_id')) %>%
901	arrange(desc(task)) %>%
902	mutate(allocated_MB = bytes_allocated / 1e6) %>%
903	# try to make the table skinnier
904	rename(num_gc_done = num_collections) %>%
905	select(task, elapsed_ms, max_gc_millis, total_gc_millis,
906	allocated_MB, max_rss_MB, num_allocated,
907	num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
908	shell_label) ->
909	gc_stats
910
911	times %>% select(-c(join_id)) -> times
912
913
914	precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
915	default = 0)
916
917	writeTsv(times, file.path(out_dir, 'times'), precision)
918	writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
919
920	tasks = c('parse.configure-coreutils',
921	'parse.configure-cpython',
922	'parse.abuild',
923	'ex.compute-fib',
924	'ex.bashcomp-parse-help',
925	'ex.abuild-print-help')
926	# Write out separate rows
927	for (task in tasks) {
928	WriteOneTask(times, out_dir, task, precision)
929	}
930	}
931
932	GcCachegrindReport = function(in_dir, out_dir) {
933	times = readTsv(file.path(in_dir, 'raw/times.tsv'))
934	counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
935
936	times %>% filter(status != 0) -> failed
937	if (nrow(failed) != 0) {
938	print(failed)
939	stop('Some gc tasks failed')
940	}
941
942	print(times)
943	print(counts)
944
945	counts %>% left_join(times, by = c('join_id')) %>%
946	mutate(million_irefs = irefs / 1e6) %>%
947	select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
948	arrange(factor(sh_path, levels = SHELL_ORDER)) ->
949	counts
950
951	precision = NULL
952	tasks = c('parse.abuild', 'ex.compute-fib')
953	for (task in tasks) {
954	WriteOneTask(counts, out_dir, task, precision)
955	}
956	}
957
958	MyCppReport = function(in_dir, out_dir) {
959	times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
960	print(times)
961
962	times %>% filter(status != 0) -> failed
963	if (nrow(failed) != 0) {
964	print(failed)
965	stop('Some mycpp tasks failed')
966	}
967
968	# Don't care about elapsed and system
969	times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
970	mutate(example_name_HREF = mycppUrl(example_name),
971	gen = c('gen'),
972	gen_HREF = genUrl(example_name),
973	user_ms = user_secs * 1000,
974	sys_ms = sys_secs * 1000,
975	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
976	select(-c(user_secs, sys_secs, max_rss_KiB)) ->
977	details
978
979	details %>% select(-c(sys_ms, max_rss_MB)) %>%
980	spread(key = impl, value = user_ms) %>%
981	mutate(`C++ : Python` = `C++` / Python) %>%
982	arrange(`C++ : Python`) ->
983	user_time
984
985	details %>% select(-c(user_ms, max_rss_MB)) %>%
986	spread(key = impl, value = sys_ms) %>%
987	mutate(`C++ : Python` = `C++` / Python) %>%
988	arrange(`C++ : Python`) ->
989	sys_time
990
991	details %>% select(-c(user_ms, sys_ms)) %>%
992	spread(key = impl, value = max_rss_MB) %>%
993	mutate(`C++ : Python` = `C++` / Python) %>%
994	arrange(`C++ : Python`) ->
995	max_rss
996
997	# Sometimes it speeds up by more than 10x
998	precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
999	writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
1000	writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
1001
1002	precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
1003	writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
1004
1005	writeTsv(details, file.path(out_dir, 'details'))
1006	}
1007
1008	UftraceTaskReport = function(env, task_name, summaries) {
1009	# Need this again after redirect
1010	MaybeDisableColor(stdout())
1011
1012	task_env = env[[task_name]]
1013
1014	untyped = task_env$untyped
1015	typed = task_env$typed
1016	strings = task_env$strings
1017	slabs = task_env$slabs
1018	reserve = task_env$reserve
1019
1020	string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1021	strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1022
1023	# TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1024	# big/small
1025	#
1026	# And then zoom in on distributions as well
1027
1028	num_allocs = nrow(untyped)
1029	total_bytes = sum(untyped$obj_len)
1030
1031	untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1032	#print(untyped_hist)
1033
1034	untyped_hist %>%
1035	mutate(n_less_than = cumsum(n),
1036	percent = n_less_than * 100.0 / num_allocs) ->
1037	alloc_sizes
1038
1039	a24 = untyped_hist %>% filter(obj_len <= 24)
1040	a48 = untyped_hist %>% filter(obj_len <= 48)
1041	a96 = untyped_hist %>% filter(obj_len <= 96)
1042
1043	allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1044	allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1045	allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1046
1047	Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1048
1049	options(tibble.print_min=25)
1050
1051	Log('')
1052	Log('All allocations')
1053	print(alloc_sizes %>% head(22))
1054	print(alloc_sizes %>% tail(5))
1055
1056	Log('')
1057	Log('Common Sizes')
1058	print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1059
1060	Log('')
1061	Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1062	Log('')
1063
1064	Log('Typed allocations')
1065
1066	num_typed = nrow(typed)
1067
1068	typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1069	mutate(percent = n * 100.0 / num_typed) %>%
1070	arrange(desc(n)) -> most_common_types
1071
1072	print(most_common_types %>% head(20))
1073	print(most_common_types %>% tail(5))
1074
1075	lists = typed %>% filter(str_starts(func_name, ('List<')))
1076	#print(lists)
1077
1078	num_lists = nrow(lists)
1079	total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1080
1081	Log('')
1082	Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1083	Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1084	Log('')
1085
1086	#
1087	# Strings
1088	#
1089
1090	num_strings = nrow(strings)
1091	total_string_bytes = sum(strings$obj_len)
1092
1093	strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1094	mutate(n_less_than = cumsum(n),
1095	percent = n_less_than * 100.0 / num_strings) ->
1096	string_lengths
1097
1098	strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1099	strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1100
1101	# Parse workload
1102	# 62% of strings <= 6 bytes
1103	# 84% of strings <= 14 bytes
1104
1105	Log('Str - NewStr() and OverAllocatedStr()')
1106	print(string_lengths %>% head(16))
1107	print(string_lengths %>% tail(5))
1108	Log('')
1109
1110	Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1111	commas(sum(strings$str_len)), commas(total_string_bytes))
1112	Log('')
1113	Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1114	Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1115	Log('')
1116
1117	#
1118	# Slabs
1119	#
1120
1121	Log('NewSlab()')
1122
1123	num_slabs = nrow(slabs)
1124	slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1125	mutate(n_less_than = cumsum(n),
1126	percent = n_less_than * 100.0 / num_slabs) ->
1127	slab_lengths
1128
1129	slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1130	arrange(desc(n)) -> slab_types
1131
1132	Log(' Lengths')
1133	print(slab_lengths %>% head())
1134	print(slab_lengths %>% tail(5))
1135	Log('')
1136
1137	Log(' Slab Types')
1138	print(slab_types %>% head())
1139	print(slab_types %>% tail(5))
1140	Log('')
1141
1142	total_slab_items = sum(slabs$slab_len)
1143
1144	Log('%s slabs, total items = %s', commas(num_slabs),
1145	commas(sum(slabs$slab_len)))
1146	Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1147	Log('')
1148
1149	#
1150	# reserve() calls
1151	#
1152
1153	# There should be strictly more List::reserve() calls than NewSlab
1154
1155	Log('::reserve(int n)')
1156	Log('')
1157
1158	num_reserve = nrow(reserve)
1159	reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1160	mutate(n_less_than = cumsum(n),
1161	percent = n_less_than * 100.0 / num_reserve) ->
1162	reserve_args
1163
1164	Log(' Num Items')
1165	print(reserve_args %>% head(15))
1166	print(reserve_args %>% tail(5))
1167	Log('')
1168
1169	Log('%s reserve() calls, total items = %s', commas(num_reserve),
1170	commas(sum(reserve$num_items)))
1171	Log('')
1172
1173	# Accounting for all allocations!
1174	Log('Untyped: %s', commas(num_allocs))
1175	Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1176	Log('')
1177
1178	num_other_typed = num_typed - num_lists
1179
1180	# Summary table
1181	stats = tibble(task = task_name,
1182	total_bytes_ = commas(total_bytes),
1183	num_allocs_ = commas(num_allocs),
1184	sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1185	num_reserve_calls = commas(num_reserve),
1186
1187	percent_list_allocs = Percent(num_lists, num_allocs),
1188	percent_slab_allocs = Percent(num_slabs, num_allocs),
1189	percent_string_allocs = Percent(num_strings, num_allocs),
1190	percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1191
1192	percent_list_bytes = Percent(total_list_bytes, total_bytes),
1193	percent_string_bytes = Percent(total_string_bytes, total_bytes),
1194
1195	allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1196	allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1197	allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1198
1199	strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1200	strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1201	)
1202	summaries$stats[[task_name]] = stats
1203
1204	summaries$most_common_types[[task_name]] = most_common_types
1205	}
1206
1207	LoadUftraceTsv = function(in_dir, env) {
1208	for (task in list.files(in_dir)) {
1209	Log('Loading data for task %s', task)
1210	base_dir = file.path(in_dir, task)
1211
1212	task_env = new.env()
1213	env[[task]] = task_env
1214
1215	# TSV file, not CSV
1216	task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1217	task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1218	task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1219	task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1220	task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1221
1222	# median string length is 4, mean is 9.5!
1223	Log('UNTYPED')
1224	print(summary(task_env$untyped))
1225	Log('')
1226
1227	Log('TYPED')
1228	print(summary(task_env$typed))
1229	Log('')
1230
1231	Log('STRINGS')
1232	print(summary(task_env$strings))
1233	Log('')
1234
1235	Log('SLABS')
1236	print(summary(task_env$slabs))
1237	Log('')
1238
1239	Log('RESERVE')
1240	print(summary(task_env$reserve))
1241	Log('')
1242	}
1243	}
1244
1245	Percent = function(n, total) {
1246	sprintf('%.1f%%', n * 100.0 / total)
1247	}
1248
1249	PrettyPrintLong = function(d) {
1250	tr = t(d) # transpose
1251
1252	row_names = rownames(tr)
1253
1254	for (i in 1:nrow(tr)) {
1255	row_name = row_names[i]
1256	cat(sprintf('%26s', row_name)) # calculated min width manually
1257	cat(sprintf('%20s', tr[i,]))
1258	cat('\n')
1259
1260	# Extra spacing
1261	if (row_name %in% c('num_reserve_calls',
1262	'percent_string_bytes',
1263	'percent_other_typed_allocs',
1264	'allocs_96_bytes_or_less')) {
1265	cat('\n')
1266	}
1267	}
1268	}
1269
1270
1271	UftraceReport = function(env, out_dir) {
1272	# summaries$stats should be a list of 1-row data frames
1273	# summaries$top_types should be a list of types
1274	summaries = new.env()
1275
1276	for (task_name in names(env)) {
1277	report_out = file.path(out_dir, paste0(task_name, '.txt'))
1278
1279	Log('Making report for task %s -> %s', task_name, report_out)
1280
1281	sink(file = report_out)
1282	UftraceTaskReport(env, task_name, summaries)
1283	sink() # reset
1284	}
1285	Log('')
1286
1287	# Concate all the data frames added to summary
1288	stats = bind_rows(as.list(summaries$stats))
1289
1290	sink(file = file.path(out_dir, 'summary.txt'))
1291	#print(stats)
1292	#Log('')
1293
1294	PrettyPrintLong(stats)
1295	Log('')
1296
1297	mct = summaries$most_common_types
1298	for (task_name in names(mct)) {
1299	Log('Common types in workload %s', task_name)
1300	Log('')
1301
1302	print(mct[[task_name]] %>% head(5))
1303	Log('')
1304	}
1305	sink()
1306
1307	# For the REPL
1308	return(list(stats = stats))
1309	}
1310
1311	main = function(argv) {
1312	action = argv[[1]]
1313	in_dir = argv[[2]]
1314	out_dir = argv[[3]]
1315
1316	if (action == 'osh-parser') {
1317	ParserReport(in_dir, out_dir)
1318
1319	} else if (action == 'osh-runtime') {
1320	RuntimeReport(in_dir, out_dir)
1321
1322	} else if (action == 'vm-baseline') {
1323	VmBaselineReport(in_dir, out_dir)
1324
1325	} else if (action == 'ovm-build') {
1326	OvmBuildReport(in_dir, out_dir)
1327
1328	} else if (action == 'compute') {
1329	ComputeReport(in_dir, out_dir)
1330
1331	} else if (action == 'gc') {
1332	GcReport(in_dir, out_dir)
1333
1334	} else if (action == 'gc-cachegrind') {
1335	GcCachegrindReport(in_dir, out_dir)
1336
1337	} else if (action == 'mycpp') {
1338	MyCppReport(in_dir, out_dir)
1339
1340	} else if (action == 'uftrace') {
1341	d = new.env()
1342	LoadUftraceTsv(in_dir, d)
1343	UftraceReport(d, out_dir)
1344
1345	} else {
1346	Log("Invalid action '%s'", action)
1347	quit(status = 1)
1348	}
1349	Log('PID %d done', Sys.getpid())
1350	}
1351
1352	if (length(sys.frames()) == 0) {
1353	# increase ggplot font size globally
1354	#theme_set(theme_grey(base_size = 20))
1355
1356	main(commandArgs(TRUE))
1357	}