OILS / metrics / native-code.sh View on Github | oilshell.org

193 lines, 88 significant
1#!/usr/bin/env bash
2#
3# Usage:
4# metrics/native-code.sh <function name>
5
6set -o nounset
7set -o pipefail
8set -o errexit
9
10source build/dev-shell.sh # put bloaty in $PATH, R_LIBS_USER
11
12readonly OVM_BASE_DIR=_tmp/metrics/ovm
13readonly OIL_BASE_DIR=_tmp/metrics/oils-for-unix
14
15pylibc-symbols() {
16 symbols _devbuild/py-ext/x86_64/libc.so
17}
18
19fastlex-symbols() {
20 symbols _devbuild/py-ext/x86_64/fastlex.so
21}
22
23print-symbols() {
24 local obj=$1
25 ls -l $obj
26 echo
27
28 # Summary
29 bloaty $obj
30 echo
31
32 # Top symbols
33 # fastlex_MatchToken is 21.2 KiB. That doesn't seem to large compared to
34 # the 14K line output?
35 bloaty -d symbols $obj
36 echo
37
38 nm $obj
39 echo
40}
41
42# Big functions:
43# - PyEval_EvalFrameEx (38 KiB)
44# - fastlex_MatchOSHToken (22.5 KiB)
45# - convertitem() in args.py (9.04 KiB)
46# - PyString_Format() in args.py (6.84 KiB)
47#
48# Easy removals:
49# - marshal_dumps and marshal_dump! We never use those.
50# - Remove all docstrings!!! Like sys_doc.
51
52compileunits() {
53 # Hm there doesn't seem to be a way to do this without
54 local file=${1:-_build/oil/ovm-dbg}
55
56 #local file=_build/oil/ovm-opt
57 #local sym=_build/oil/ovm-opt.symbols
58
59 bloaty --tsv -n 0 -d compileunits $file
60}
61
62symbols() {
63 # NOTE: This is different than the release binary!
64 # ovm-opt.stripped doesn't show a report.
65 local file=${1:-_build/oil/ovm-opt}
66
67 # Full output
68 # 3,588 lines!
69 bloaty --tsv -n 0 -d symbols $file
70}
71
72R-report() {
73 metrics/native-code.R "$@"
74}
75
76build-ovm() {
77 # 2022-12: hack for ./configure, because line_input failed to compile without
78 # HAVE_READLINE See _build/oil/module_init.c
79 # TODO: This metric should either be DELETED, or automated in the CI, so it
80 # doesn't break
81
82 ./configure
83
84 make _build/oil/ovm-{dbg,opt}
85}
86
87collect-and-report() {
88 local base_dir=$1
89 local dbg=$2
90 local opt=$3
91
92 mkdir -p $base_dir
93
94 print-symbols $opt > $base_dir/symbols.txt
95
96 symbols $opt > $base_dir/symbols.tsv
97
98 # Really 'translation units', but bloaty gives it that name.
99 compileunits $dbg > $base_dir/compileunits.tsv
100
101 head $base_dir/symbols.tsv $base_dir/compileunits.tsv
102
103 # Hack for now
104 if Rscript -e 'print("hi from R")'; then
105 R-report metrics $base_dir $dbg $opt | tee $base_dir/overview.txt
106 else
107 echo 'R not detected' | tee $base_dir/overview.txt
108 fi
109}
110
111oils-for-unix() {
112 ### Report on the ones we just built
113
114 # TODO: could compare GCC and Clang once we have R on the CI images
115 local -a targets=(_bin/cxx-{dbg,opt}/oils-for-unix)
116 ninja "${targets[@]}"
117
118 collect-and-report $OIL_BASE_DIR "${targets[@]}"
119
120 ls -l $OIL_BASE_DIR
121}
122
123compare-gcc-clang() {
124 ### Run by Soil 'cpp-coverage' task, because it has clang
125
126 local -a targets=(
127 _bin/{clang,cxx}-dbg/oils-for-unix
128 _bin/{clang,cxx}-opt/oils-for-unix.stripped
129 _bin/cxx-{opt+bumpleak,opt+bumproot}/oils-for-unix.stripped
130 _bin/{clang,cxx}-opt/yaks/yaks_main.mycpp.stripped
131 _bin/cxx-{opt+bumpleak,opt+bumproot}/yaks/yaks_main.mycpp.stripped
132 )
133 ninja "${targets[@]}"
134
135 mkdir -p _tmp/metrics
136 ls -l --sort=none "${targets[@]}" | tee _tmp/metrics/compare-gcc-clang.txt
137}
138
139readonly OIL_VERSION=$(head -n 1 oil-version.txt)
140
141run-for-release() {
142 build-ovm
143
144 local dbg=_build/oil/ovm-dbg
145 local opt=_build/oil/ovm-opt
146
147 collect-and-report $OVM_BASE_DIR $dbg $opt
148
149 # TODO: consolidate with benchmarks/common.sh, OSH_CPP_BENCHMARK_DATA
150 # For some reason _bin/cxx-opt/ and _bin/cxx-opt-sh can differ by a few bytes
151 local bin_dir="../benchmark-data/src/oils-for-unix-$OIL_VERSION"
152 collect-and-report $OIL_BASE_DIR $bin_dir/_bin/cxx-{dbg,opt}-sh/oils-for-unix
153}
154
155dupe-strings() {
156 ### Check for NUL-terminated strings
157
158 python2 -c '
159import collections
160import re
161import sys
162
163with open(sys.argv[1]) as f:
164 contents = f.read()
165strs = re.split("\\0", contents)
166
167printable = re.compile("[ -~]+$")
168
169d = collections.Counter()
170for s in strs:
171 if len(s) > 1 and printable.match(s):
172 d[s] += 1
173
174for s, count in d.most_common()[:50]:
175 if count == 1:
176 break
177 print("%5d %r" % (count, s))
178
179' "$@"
180}
181
182# Results:
183# Found StrFromC() and len() duplication
184
185oil-dupe-strings() {
186 local bin=_bin/cxx-opt/oils-for-unix.stripped
187 #local bin=_bin/clang-opt/oils-for-unix.stripped
188 ninja $bin
189
190 dupe-strings $bin
191}
192
193"$@"