1 | #!/usr/bin/env python2
2 | """
3 | ref_check.py: Check Links
4 | """
5 | from __future__ import print_function
6 |
7 | import collections
8 | import json
9 | from pprint import pprint
10 | import sys
11 |
12 | from doctools.util import log
13 |
14 |
15 | def PrintTree(node, f, indent=0):
16 | """
17 | Print DocNode tree in make_help.py
18 | """
19 | if node.attrs:
20 | a_str = ', '.join('%s=%s' % pair for pair in node.attrs)
21 | a_str = '(%s)' % a_str
22 | else:
23 | a_str = ''
24 |
25 | print('%s%s %s' % (indent * ' ', node.name, a_str), file=f)
26 | for ch in node.children:
27 | PrintTree(ch, f, indent+1)
28 |
29 |
30 | def Check(all_toc_nodes, chap_tree):
31 | """
32 | Args:
33 | all_toc_nodes: Structure of doc/ref/toc-*.md
34 | chap_tree: Structure of chap-*.html
35 | """
36 | all_topics = []
37 |
38 | link_from = set()
39 | link_to = set()
40 |
41 | section_check = collections.defaultdict(list)
42 | toc_topic_check = collections.defaultdict(list)
43 |
44 | #
45 | # Walk the TOC metadata
46 | #
47 |
48 | topics_not_impl = 0
49 | sections_not_impl = 0
50 |
51 | log('TOC:')
52 | log('')
53 | for toc_node in all_toc_nodes:
54 | toc = toc_node['toc']
55 | log(' %s', toc)
56 | for box_node in toc_node['boxes']:
57 | to_chap = box_node['to_chap']
58 | log(' %s' % to_chap)
59 | for line_info in box_node['lines']:
60 | section = line_info['section']
61 | section_impl = line_info['impl']
62 | if not section_impl:
63 | sections_not_impl += 1
64 |
65 | topics = line_info['topics']
66 | for topic, topic_impl in topics:
67 | toc_topic_check[topic].append(toc)
68 |
69 | chap_filename = 'chap-%s.html' % to_chap
70 | link_from.add((chap_filename, topic))
71 |
72 | if not topic_impl or not section_impl:
73 | topics_not_impl += 1
74 |
75 | all_topics.extend(topics)
76 |
77 | log('')
78 |
79 | log('TOC stats:')
80 | log(' Topics: %d', len(all_topics))
81 | log(' Unique topics: %d', len(set(all_topics)))
82 | log(' Topic Sections not implemented (X): %d', sections_not_impl)
83 | log(' All topics not implemented: %d', topics_not_impl)
84 | log('')
85 |
86 | if 0:
87 | PrintTree(chap_tree, sys.stdout)
88 |
89 | num_sections = 0
90 | num_topics = 0
91 | num_topics_written = 0
92 |
93 | #
94 | # Walk the Chapter Tree
95 | #
96 |
97 | chap_topics = collections.defaultdict(list) # topic_id -> list of chapters
98 | short_topics = []
99 |
100 | min_words = 5 # arbitrary
101 |
102 | for chap in chap_tree.children:
103 |
104 | for section in chap.children:
105 | num_sections += 1
106 |
107 | for topic in section.children:
108 | num_topics += 1
109 |
110 | values = [v for k, v in topic.attrs if k == 'id']
111 | if len(values) == 1:
112 | topic_id = values[0]
113 | else:
114 | topic_id = topic.name
115 |
116 | chap_topics[topic_id].append(chap.name)
117 | link_to.add((chap.name, topic.name))
118 |
119 | # split by whitespace
120 | num_words = len(topic.text.split())
121 | if num_words > min_words:
122 | num_topics_written += 1
123 | elif num_words > 1:
124 | short_topics.append((topic_id, topic.text))
125 |
126 | num_chapters = len(chap_tree.children)
127 |
128 | log('Chapter stats:')
129 | log(' num chapters = %d', num_chapters)
130 | log(' num_sections = %d', num_sections)
131 | log(' num_topics = %d', num_topics)
132 |
133 | chap_topic_set = set(chap_topics)
134 | log(' num unique topics = %d', len(chap_topic_set))
135 | log(' topics with first draft (more than %d words) = %d', min_words,
136 | num_topics_written)
137 | log('')
138 |
139 | log('%d in link_to set: %s', len(link_to), sorted(link_to)[:10])
140 | log('')
141 | log('%d in link_from set: %s', len(link_from), sorted(link_from)[:10])
142 | log('')
143 |
144 | index_topic_set = set(toc_topic_check)
145 |
146 | assert 'j8-escape' in index_topic_set
147 | assert 'j8-escape' in chap_topic_set
148 |
149 | # Report on topic namespace integrity, e.g. 'help append' should go to one
150 | # thing
151 | log('Topics in multiple chapters:')
152 | for topic_id, chaps in chap_topics.iteritems():
153 | if len(chaps) > 1:
154 | log(' %s: %s', topic_id, ' '.join(chaps))
155 | log('')
156 |
157 | log('Duplicate topics in TOC:')
158 | log('')
159 | for topic in sorted(toc_topic_check):
160 | toc_list = toc_topic_check[topic]
161 | if len(toc_list) > 1:
162 | log('%20s: %s', topic, ' '.join(toc_list))
163 | log('')
164 |
165 | # Report on link integrity
166 | if 1:
167 | broken = link_from - link_to
168 | log('%d Broken Links:', len(broken))
169 | for pair in sorted(broken):
170 | log(' %s', pair)
171 | log('')
172 |
173 | orphaned = link_to - link_from
174 | log('%d Orphaned Topics:', len(orphaned))
175 | for pair in sorted(orphaned):
176 | log(' %s', pair)
177 | log('')
178 |
179 | log('Short topics:')
180 | for topic, text in short_topics:
181 | log('%15s %r', topic, text)
182 | log('')
183 |
184 | # vim: sw=2