OILS / metrics / source-code.sh View on Github | oilshell.org

585 lines, 315 significant
1#!/usr/bin/env bash
2#
3# Count lines of code in various ways.
4#
5# Usage:
6# metrics/source-code.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd $(dirname $0)/.. && pwd) # tsv-lib.sh uses this
13readonly REPO_ROOT
14
15source test/common.sh
16source test/tsv-lib.sh
17
18filter-py() {
19 grep -E -v '__init__.py$|_gen.py|_test.py|_tests.py|NINJA_subgraph.py$'
20}
21
22readonly -a OSH_ASDL=( {frontend,core}/*.asdl )
23
24# OSH and common
25osh-files() {
26 # Exclude:
27 # - line_input.c because I didn't write it. It still should be minimized.
28 # - code generators
29 # - test library
30
31 ls bin/oils_for_unix.py {osh,core,frontend}/*.py builtin/*_osh.py \
32 pyext/*.c */*.pyi \
33 "${OSH_ASDL[@]}" \
34 | filter-py | grep -E -v 'posixmodule.c$|line_input.c$|_gen.py$|test_lib.py$|os.pyi$'
35}
36
37# cloc doesn't understand ASDL files.
38# Use a wc-like format, filtering out blank lines and comments.
39asdl-cloc() {
40 python -c '
41import sys
42
43total = 0
44for path in sys.argv[1:]:
45 num_lines = 0
46 with open(path) as f:
47 for line in f:
48 line = line.strip()
49 if not line or line.startswith("#"):
50 continue
51 num_lines += 1
52
53 print "%5d %s" % (num_lines, path)
54 total += num_lines
55
56print "%5d %s" % (total, "total")
57' "$@"
58}
59
60cloc-report() {
61 echo '(non-blank non-comment lines)'
62 echo
63
64 echo 'OSH'
65 echo
66 osh-files | xargs cloc --quiet "$@"
67 echo
68 echo
69
70 echo 'YSH'
71 echo
72 ysh-files | xargs cloc --quiet "$@"
73 echo
74 echo
75
76 echo 'Data Languages'
77 echo
78 data-lang-files | xargs cloc --quiet "$@"
79 echo
80 echo
81
82 echo 'Tools'
83 echo
84 tools-files | xargs cloc --quiet "$@"
85 echo
86 echo
87
88 echo 'ASDL SCHEMAS (non-blank non-comment lines)'
89 asdl-cloc "${OSH_ASDL[@]}" data_lang/*.asdl
90 echo
91 echo
92
93 # NOTE: --csv option could be parsed into HTML.
94 # Or just sum with asdl-cloc!
95
96 echo 'Hand-Written C++ code (non-blank non-comment lines)'
97 echo
98 { cpp-binding-files; mycpp-runtime-files; } | xargs cloc --quiet "$@"
99}
100
101preprocessed() {
102 ./NINJA-config.sh
103
104 # Clang has slightly fewer lines, but it's not on the CI machine
105 #local -a files=(_build/preprocessed/{cxx,clang}-{dbg,opt}.txt)
106
107 local -a files=(_build/preprocessed/cxx-{dbg,opt}.txt)
108
109 ninja "${files[@]}"
110
111 # Publish with release and show and CI
112
113 local dir=_tmp/metrics/preprocessed
114 mkdir -p $dir
115 cp -v "${files[@]}" $dir
116
117 cat >$dir/index.html <<EOF
118<a href="cxx-dbg.txt">cxx-dbg.txt</a> <br/>
119<a href="cxx-opt.txt">cxx-opt.txt</a> <br/>
120EOF
121
122 head -n 100 $dir/*.txt
123}
124
125#
126# Two variants of the $count function: text and html
127#
128
129category-text() {
130 local header=$1
131 local comment=$2
132
133 echo "$header"
134 # omit comment
135
136 # stdin is the files
137 xargs wc -l | sort --numeric
138 echo
139}
140
141# This is overly clever ...
142shopt -s lastpipe
143SECTION_ID=0 # mutable global
144
145category-html() {
146 # TODO: Don't use wc -l, and just count and sum the lines yourself
147
148 xargs wc -l | metrics/line_counts.py $((++SECTION_ID)) "$@"
149}
150
151#
152# Functions That Count
153#
154
155# Note this style is OVERLY ABSTRACT, but it's hard to do better in shell. We
156# want to parameterize over text and HTML. In Oils I think we would use this:
157#
158# proc p1 {
159# category 'OSH (and common libraries)' {
160# comment = 'This is the input'
161# osh-files | read --lines :files
162# }
163# }
164#
165# This produces a series of dicts that looks like
166# { name: 'OSH ...', comment: "This ...", files: %(one two three) }
167#
168# Then we iterate over the categories and produce text or HTML.
169
170osh-counts() {
171 local count=$1
172 shift
173
174 osh-files | $count \
175 'OSH (and common libraries)' \
176 'This is the input to the translators, written in statically-typed Python. Note that bash is at least 140K lines of code, and OSH implements a large part of bash and more.' \
177 "$@"
178}
179
180ysh-files() {
181 ls ysh/*.{py,pgen2} builtin/{func,method}*.py builtin/*_ysh.py | filter-py
182}
183
184ysh-counts() {
185 local count=$1
186 shift
187
188 ysh-files | $count \
189 'YSH' 'Expression grammar, parser, evaluator, etc.' "$@"
190}
191
192data-lang-files() {
193 ls data_lang/*.asdl
194 ls data_lang/*.py | filter-py
195 ls data_lang/*.{c,h} | egrep -v '_test' # exclude j8_test_lib as well
196}
197
198data-lang-counts() {
199 local count=$1
200 shift
201
202 data-lang-files | $count \
203 'Data Languages' 'JSON, J8 Notation, ...' "$@"
204}
205
206tools-files() {
207 ls tools/*.py | filter-py
208}
209
210tools-counts() {
211 local count=$1
212 shift
213
214 tools-files | $count \
215 'Tools' '' "$@"
216}
217
218cpp-binding-files() {
219 ls cpp/*.{cc,h} | egrep -v '_test.cc'
220}
221
222mycpp-runtime-files() {
223 ls mycpp/*.{cc,h} | egrep -v '_test.cc|bump_leak_heap'
224}
225
226cpp-counts() {
227 local count=$1
228 shift
229
230 cpp-binding-files | $count \
231 'Hand-written C++ Code' \
232 'Includes OS bindings. Small C++ files like cpp/osh_arith_parse.{cc,h} correspond to larger Python files like osh/arith_parse.py.' \
233 "$@"
234
235 # Remove code that isn't "in production"
236 mycpp-runtime-files | $count \
237 'Garbage-Collected Runtime' \
238 'Uses a fork-friendly Mark-Sweep collector.' \
239 "$@"
240
241 ls mycpp/*_test.cc cpp/*_test.cc | $count \
242 'Unit tests in C++' \
243 'The goal is to make the spec tests pass, but unit tests are helpful too.' \
244 "$@"
245
246 ls NINJA*.sh */NINJA*.py build/ninja*.{sh,py} | $count \
247 'Incremental C++ Build' '' "$@"
248}
249
250gen-cpp-counts() {
251 local count=$1
252 shift
253
254 # NOTE: this excludes .re2c.h file
255 ls _gen/*/*.{cc,h} | $count \
256 'Generated C++ Code' \
257 'mycpp generates the big file _gen/bin/oils-for-unix.mycpp.cc. Other programs like Zephyr ASDL and re2c generate other files.' \
258 "$@"
259}
260
261mycpp-counts() {
262 local count=$1
263 shift
264
265 ls mycpp/*.py | grep -v 'NINJA_subgraph.py' | filter-py | $count \
266 'mycpp Translator' \
267 "This prototype uses the MyPy frontend to translate statically-typed Python to C++. The generated code calls a small runtime which implements things like List[T], Dict[K, V], and Python's len()." \
268 "$@"
269
270 ls mycpp/examples/*.py | $count \
271 'mycpp Test Data' \
272 'Small Python examples that translate to C++, compile, and run.' \
273 "$@"
274}
275
276code-generator-counts() {
277 local count=$1
278 shift
279
280 ls asdl/*.py | filter-py | grep -v -E 'arith_|tdop|_demo' | $count \
281 'Zephyr ASDL' \
282 'A DSL for algebraic data types, borrowed from Python. Oils is the most strongly typed Bourne shell implementation!' \
283 "$@"
284
285 ls pgen2/*.py | filter-py | $count \
286 'pgen2 Parser Generator' \
287 'An LL(1) parser generator used to parse YSH expressions. Also borrowed from CPython.' \
288 "$@"
289
290 ls */*_gen.py | $count \
291 'Other Code Generators' \
292 'In order to make Oils statically typed, we had to abandon Python reflection and use C++ source code generation instead. The lexer, flag definitions, and constants can be easily compiled to C++.' \
293 "$@"
294
295 ls yaks/*.py | filter-py | $count \
296 'Yaks' \
297 'Experimental replacement for mycpp' \
298 "$@"
299}
300
301spec-gold-counts() {
302 local count=$1
303 shift
304
305 ls spec/*.test.sh | $count \
306 'Spec Tests' \
307 'A comprehensive test suite that compares OSH against other shells. If OSH passes these tests in BOTH Python and C++, it means that the translation works.' \
308 "$@"
309
310 ls test/gold/*.sh | $count \
311 'Gold Tests' \
312 'Another suite that tests shells "from the outside". Instead of making explicit assertions, we verify that OSH behaves like bash.' \
313 "$@"
314}
315
316#
317# Top Level Summaries
318#
319
320_for-translation() {
321 local count=$1
322 shift
323
324 mycpp-counts $count "$@"
325
326 code-generator-counts $count "$@"
327
328 cpp-counts $count "$@"
329
330 osh-counts $count "$@"
331
332 ysh-counts $count "$@"
333
334 data-lang-counts $count "$@"
335
336 tools-counts $count "$@"
337
338 spec-gold-counts $count "$@"
339
340 gen-cpp-counts $count "$@"
341}
342
343_overview() {
344 local count=$1
345 shift
346
347 osh-counts $count "$@"
348
349 ysh-counts $count "$@"
350
351 data-lang-counts $count "$@"
352
353 tools-counts $count "$@"
354
355 ls stdlib/*.ysh | $count \
356 "YSH stdlib" '' "$@"
357
358 ls pylib/*.py | filter-py | $count \
359 "Code Borrowed from Python's stdlib" '' "$@"
360
361 spec-gold-counts $count "$@"
362
363 test/unit.sh py2-tests | $count \
364 'Python Unit Tests' '' "$@"
365
366 ls test/*.{sh,py,R} | filter-py | grep -v jsontemplate.py | $count \
367 'Other Shell Tests' '' "$@"
368
369 ls */TEST.sh | $count \
370 'Test Automation' '' "$@"
371
372 mycpp-counts $count "$@"
373
374 code-generator-counts $count "$@"
375
376 cpp-counts $count "$@"
377
378 # Leaving off gen-cpp-counts since that requires a C++ build
379
380 ls build/*.{mk,sh,py,c} Makefile configure install \
381 | filter-py | egrep -v 'NINJA|TEST' | $count \
382 'Build Automation' '' "$@"
383
384 ls devtools/release*.sh | $count \
385 'Release Automation' '' "$@"
386
387 ls soil/*.{sh,py} | $count \
388 'Soil: Multi-cloud CI with containers' '' "$@"
389
390 ls benchmarks/*.{sh,py,R} | $count \
391 'Benchmarks' '' "$@"
392
393 ls metrics/*.{sh,R} | $count \
394 'Metrics' '' "$@"
395
396 ls _devbuild/gen/*.py | $count \
397 'Generated Python Code' \
398 'For the Python App Bundle.' \
399 "$@"
400
401 ls {doctools,lazylex}/*.py doctools/*.{h,cc} | filter-py | $count \
402 'Doc Tools' '' "$@"
403
404 ls web/*.js web/*/*.{js,py} | $count \
405 'Web' '' "$@"
406}
407
408for-translation() {
409 _for-translation category-text
410}
411
412overview() {
413 _overview category-text
414}
415
416print-files() {
417 xargs -n 1 -- echo
418}
419
420overview-list() {
421 _overview print-files
422}
423
424#
425# HTML Versions
426#
427
428html-head() {
429 PYTHONPATH=. doctools/html_head.py "$@"
430}
431
432metrics-html-head() {
433 local title="$1"
434
435 local base_url='../../../web'
436
437 html-head --title "$title" "$base_url/base.css" "$base_url/table/table-sort.css" "$base_url/line-counts.css"
438}
439
440counts-html() {
441 local name=$1
442 local title=$2
443
444 local tmp_dir=_tmp/metrics/line-counts/$name
445
446 rm -r -f -v $tmp_dir >& 2
447 mkdir -v -p $tmp_dir >& 2
448
449 tsv-row category category_HREF total_lines num_files > $tmp_dir/INDEX.tsv
450
451 echo $'column_name\ttype
452category\tstring
453category_HREF\tstring
454total_lines\tinteger
455num_files\tinteger' >$tmp_dir/INDEX.schema.tsv
456
457 # Generate the HTML
458 "_$name" category-html $tmp_dir
459
460 metrics-html-head "$title"
461 echo ' <body class="width40">'
462
463 echo "<h1>$title</h1>"
464
465 tsv2html $tmp_dir/INDEX.tsv
466
467 echo '<hr/>'
468
469 echo '<h2>Related Documents</h2>
470 <p>The <a href="https://www.oilshell.org/release/latest/doc/README.html">README for oilshell/oil</a>
471 has another overview of the repository.
472 </p>'
473
474 # All the parts
475 cat $tmp_dir/*.html
476
477 echo ' </body>'
478 echo '</html>'
479}
480
481for-translation-html() {
482 local title='Overview: Translating Oils to C++'
483 counts-html for-translation "$title"
484}
485
486overview-html() {
487 local title='Overview of Oils Code'
488 counts-html overview "$title"
489}
490
491write-reports() {
492 local out_dir=${1:-_tmp/metrics/line-counts}
493
494 mkdir -v -p $out_dir
495
496 for-translation-html > $out_dir/for-translation.html
497
498 overview-html > $out_dir/overview.html
499
500 cat >$out_dir/index.html <<EOF
501<a href="for-translation.html">for-translation</a> <br/>
502<a href="overview.html">overview</a> <br/>
503EOF
504
505 ls -l $out_dir
506}
507
508#
509# Misc
510#
511
512# count instructions, for fun
513instructions() {
514 # http://pepijndevos.nl/2016/08/24/x86-instruction-distribution.html
515
516 local bin=_build/oil/ovm-opt.stripped
517 objdump -d $bin | cut -f3 | grep -oE "^[a-z]+" | hist
518}
519
520hist() {
521 sort | uniq -c | sort -n
522}
523
524stdlib-imports() {
525 oil-osh-files | xargs grep --no-filename '^import' | hist
526}
527
528imports() {
529 oil-osh-files | xargs grep --no-filename -w import | hist
530}
531
532imports-not-at-top() {
533 oil-osh-files | xargs grep -n -w import | awk -F : ' $2 > 100'
534}
535
536# For the compiler, see what's at the top level.
537top-level() {
538 grep '^[a-zA-Z]' {core,osh}/*.py \
539 | grep -v '_test.py' \
540 | egrep -v ':import|from|class|def' # note: colon is from grep output
541}
542
543_python-symbols() {
544 local main=$1
545 local name=$2
546 local out_dir=$3
547
548 mkdir -p $out_dir
549 local out=${out_dir}/${name}-symbols.txt
550
551 # To debug what version we're running eci
552 /usr/bin/env python2 -V
553 echo
554
555 # Run this from the repository root.
556 PYTHONPATH='.:vendor/' CALLGRAPH=1 $main | tee $out
557
558 wc -l $out
559 echo
560 echo "Wrote $out"
561}
562
563oil-python-symbols() {
564 local out_dir=${1:-_tmp/opy-test}
565 _python-symbols bin/oil.py oil $out_dir
566}
567
568old-style-classes() {
569 oil-python-symbols | grep -v '<'
570}
571
572# Some of these are "abstract classes" like ChildStateChange
573NotImplementedError() {
574 grep NotImplementedError */*.py
575}
576
577py-ext() {
578 # for the py-source build
579 # 35 imports
580 osh-files | xargs -- egrep 'import (fanos|libc|line_input|posix_|yajl)'
581}
582
583if test $(basename $0) = 'source-code.sh'; then
584 "$@"
585fi