OILS / doctools / ref_check.py View on Github | oilshell.org

184 lines, 120 significant
1#!/usr/bin/env python2
2"""
3ref_check.py: Check Links
4"""
5from __future__ import print_function
6
7import collections
8import json
9from pprint import pprint
10import sys
11
12from doctools.util import log
13
14
15def PrintTree(node, f, indent=0):
16 """
17 Print DocNode tree in make_help.py
18 """
19 if node.attrs:
20 a_str = ', '.join('%s=%s' % pair for pair in node.attrs)
21 a_str = '(%s)' % a_str
22 else:
23 a_str = ''
24
25 print('%s%s %s' % (indent * ' ', node.name, a_str), file=f)
26 for ch in node.children:
27 PrintTree(ch, f, indent+1)
28
29
30def Check(all_toc_nodes, chap_tree):
31 """
32 Args:
33 all_toc_nodes: Structure of doc/ref/toc-*.md
34 chap_tree: Structure of chap-*.html
35 """
36 all_topics = []
37
38 link_from = set()
39 link_to = set()
40
41 section_check = collections.defaultdict(list)
42 toc_topic_check = collections.defaultdict(list)
43
44 #
45 # Walk the TOC metadata
46 #
47
48 topics_not_impl = 0
49 sections_not_impl = 0
50
51 log('TOC:')
52 log('')
53 for toc_node in all_toc_nodes:
54 toc = toc_node['toc']
55 log(' %s', toc)
56 for box_node in toc_node['boxes']:
57 to_chap = box_node['to_chap']
58 log(' %s' % to_chap)
59 for line_info in box_node['lines']:
60 section = line_info['section']
61 section_impl = line_info['impl']
62 if not section_impl:
63 sections_not_impl += 1
64
65 topics = line_info['topics']
66 for topic, topic_impl in topics:
67 toc_topic_check[topic].append(toc)
68
69 chap_filename = 'chap-%s.html' % to_chap
70 link_from.add((chap_filename, topic))
71
72 if not topic_impl or not section_impl:
73 topics_not_impl += 1
74
75 all_topics.extend(topics)
76
77 log('')
78
79 log('TOC stats:')
80 log(' Topics: %d', len(all_topics))
81 log(' Unique topics: %d', len(set(all_topics)))
82 log(' Topic Sections not implemented (X): %d', sections_not_impl)
83 log(' All topics not implemented: %d', topics_not_impl)
84 log('')
85
86 if 0:
87 PrintTree(chap_tree, sys.stdout)
88
89 num_sections = 0
90 num_topics = 0
91 num_topics_written = 0
92
93 #
94 # Walk the Chapter Tree
95 #
96
97 chap_topics = collections.defaultdict(list) # topic_id -> list of chapters
98 short_topics = []
99
100 min_words = 5 # arbitrary
101
102 for chap in chap_tree.children:
103
104 for section in chap.children:
105 num_sections += 1
106
107 for topic in section.children:
108 num_topics += 1
109
110 values = [v for k, v in topic.attrs if k == 'id']
111 if len(values) == 1:
112 topic_id = values[0]
113 else:
114 topic_id = topic.name
115
116 chap_topics[topic_id].append(chap.name)
117 link_to.add((chap.name, topic.name))
118
119 # split by whitespace
120 num_words = len(topic.text.split())
121 if num_words > min_words:
122 num_topics_written += 1
123 elif num_words > 1:
124 short_topics.append((topic_id, topic.text))
125
126 num_chapters = len(chap_tree.children)
127
128 log('Chapter stats:')
129 log(' num chapters = %d', num_chapters)
130 log(' num_sections = %d', num_sections)
131 log(' num_topics = %d', num_topics)
132
133 chap_topic_set = set(chap_topics)
134 log(' num unique topics = %d', len(chap_topic_set))
135 log(' topics with first draft (more than %d words) = %d', min_words,
136 num_topics_written)
137 log('')
138
139 log('%d in link_to set: %s', len(link_to), sorted(link_to)[:10])
140 log('')
141 log('%d in link_from set: %s', len(link_from), sorted(link_from)[:10])
142 log('')
143
144 index_topic_set = set(toc_topic_check)
145
146 assert 'j8-escape' in index_topic_set
147 assert 'j8-escape' in chap_topic_set
148
149 # Report on topic namespace integrity, e.g. 'help append' should go to one
150 # thing
151 log('Topics in multiple chapters:')
152 for topic_id, chaps in chap_topics.iteritems():
153 if len(chaps) > 1:
154 log(' %s: %s', topic_id, ' '.join(chaps))
155 log('')
156
157 log('Duplicate topics in TOC:')
158 log('')
159 for topic in sorted(toc_topic_check):
160 toc_list = toc_topic_check[topic]
161 if len(toc_list) > 1:
162 log('%20s: %s', topic, ' '.join(toc_list))
163 log('')
164
165 # Report on link integrity
166 if 1:
167 broken = link_from - link_to
168 log('%d Broken Links:', len(broken))
169 for pair in sorted(broken):
170 log(' %s', pair)
171 log('')
172
173 orphaned = link_to - link_from
174 log('%d Orphaned Topics:', len(orphaned))
175 for pair in sorted(orphaned):
176 log(' %s', pair)
177 log('')
178
179 log('Short topics:')
180 for topic, text in short_topics:
181 log('%15s %r', topic, text)
182 log('')
183
184# vim: sw=2