1 | #!/usr/bin/env python2
|
2 | """
|
3 | ref_check.py: Check Links
|
4 | """
|
5 | from __future__ import print_function
|
6 |
|
7 | import collections
|
8 | import json
|
9 | from pprint import pprint
|
10 | import sys
|
11 |
|
12 | from doctools.util import log
|
13 |
|
14 |
|
15 | def PrintTree(node, f, indent=0):
|
16 | """
|
17 | Print DocNode tree in make_help.py
|
18 | """
|
19 | if node.attrs:
|
20 | a_str = ', '.join('%s=%s' % pair for pair in node.attrs)
|
21 | a_str = '(%s)' % a_str
|
22 | else:
|
23 | a_str = ''
|
24 |
|
25 | print('%s%s %s' % (indent * ' ', node.name, a_str), file=f)
|
26 | for ch in node.children:
|
27 | PrintTree(ch, f, indent+1)
|
28 |
|
29 |
|
30 | def Check(all_toc_nodes, chap_tree):
|
31 | """
|
32 | Args:
|
33 | all_toc_nodes: Structure of doc/ref/toc-*.md
|
34 | chap_tree: Structure of chap-*.html
|
35 | """
|
36 | all_topics = []
|
37 |
|
38 | link_from = set()
|
39 | link_to = set()
|
40 |
|
41 | section_check = collections.defaultdict(list)
|
42 | toc_topic_check = collections.defaultdict(list)
|
43 |
|
44 | #
|
45 | # Walk the TOC metadata
|
46 | #
|
47 |
|
48 | topics_not_impl = 0
|
49 | sections_not_impl = 0
|
50 |
|
51 | log('TOC:')
|
52 | log('')
|
53 | for toc_node in all_toc_nodes:
|
54 | toc = toc_node['toc']
|
55 | log(' %s', toc)
|
56 | for box_node in toc_node['boxes']:
|
57 | to_chap = box_node['to_chap']
|
58 | log(' %s' % to_chap)
|
59 | for line_info in box_node['lines']:
|
60 | section = line_info['section']
|
61 | section_impl = line_info['impl']
|
62 | if not section_impl:
|
63 | sections_not_impl += 1
|
64 |
|
65 | topics = line_info['topics']
|
66 | for topic, topic_impl in topics:
|
67 | toc_topic_check[topic].append(toc)
|
68 |
|
69 | chap_filename = 'chap-%s.html' % to_chap
|
70 | link_from.add((chap_filename, topic))
|
71 |
|
72 | if not topic_impl or not section_impl:
|
73 | topics_not_impl += 1
|
74 |
|
75 | all_topics.extend(topics)
|
76 |
|
77 | log('')
|
78 |
|
79 | log('TOC stats:')
|
80 | log(' Topics: %d', len(all_topics))
|
81 | log(' Unique topics: %d', len(set(all_topics)))
|
82 | log(' Topic Sections not implemented (X): %d', sections_not_impl)
|
83 | log(' All topics not implemented: %d', topics_not_impl)
|
84 | log('')
|
85 |
|
86 | if 0:
|
87 | PrintTree(chap_tree, sys.stdout)
|
88 |
|
89 | num_sections = 0
|
90 | num_topics = 0
|
91 | num_topics_written = 0
|
92 |
|
93 | #
|
94 | # Walk the Chapter Tree
|
95 | #
|
96 |
|
97 | chap_topics = collections.defaultdict(list) # topic_id -> list of chapters
|
98 | short_topics = []
|
99 |
|
100 | min_words = 5 # arbitrary
|
101 |
|
102 | for chap in chap_tree.children:
|
103 |
|
104 | for section in chap.children:
|
105 | num_sections += 1
|
106 |
|
107 | for topic in section.children:
|
108 | num_topics += 1
|
109 |
|
110 | values = [v for k, v in topic.attrs if k == 'id']
|
111 | if len(values) == 1:
|
112 | topic_id = values[0]
|
113 | else:
|
114 | topic_id = topic.name
|
115 |
|
116 | chap_topics[topic_id].append(chap.name)
|
117 | link_to.add((chap.name, topic.name))
|
118 |
|
119 | # split by whitespace
|
120 | num_words = len(topic.text.split())
|
121 | if num_words > min_words:
|
122 | num_topics_written += 1
|
123 | elif num_words > 1:
|
124 | short_topics.append((topic_id, topic.text))
|
125 |
|
126 | num_chapters = len(chap_tree.children)
|
127 |
|
128 | log('Chapter stats:')
|
129 | log(' num chapters = %d', num_chapters)
|
130 | log(' num_sections = %d', num_sections)
|
131 | log(' num_topics = %d', num_topics)
|
132 |
|
133 | chap_topic_set = set(chap_topics)
|
134 | log(' num unique topics = %d', len(chap_topic_set))
|
135 | log(' topics with first draft (more than %d words) = %d', min_words,
|
136 | num_topics_written)
|
137 | log('')
|
138 |
|
139 | log('%d in link_to set: %s', len(link_to), sorted(link_to)[:10])
|
140 | log('')
|
141 | log('%d in link_from set: %s', len(link_from), sorted(link_from)[:10])
|
142 | log('')
|
143 |
|
144 | index_topic_set = set(toc_topic_check)
|
145 |
|
146 | assert 'j8-escape' in index_topic_set
|
147 | assert 'j8-escape' in chap_topic_set
|
148 |
|
149 | # Report on topic namespace integrity, e.g. 'help append' should go to one
|
150 | # thing
|
151 | log('Topics in multiple chapters:')
|
152 | for topic_id, chaps in chap_topics.iteritems():
|
153 | if len(chaps) > 1:
|
154 | log(' %s: %s', topic_id, ' '.join(chaps))
|
155 | log('')
|
156 |
|
157 | log('Duplicate topics in TOC:')
|
158 | log('')
|
159 | for topic in sorted(toc_topic_check):
|
160 | toc_list = toc_topic_check[topic]
|
161 | if len(toc_list) > 1:
|
162 | log('%20s: %s', topic, ' '.join(toc_list))
|
163 | log('')
|
164 |
|
165 | # Report on link integrity
|
166 | if 1:
|
167 | broken = link_from - link_to
|
168 | log('%d Broken Links:', len(broken))
|
169 | for pair in sorted(broken):
|
170 | log(' %s', pair)
|
171 | log('')
|
172 |
|
173 | orphaned = link_to - link_from
|
174 | log('%d Orphaned Topics:', len(orphaned))
|
175 | for pair in sorted(orphaned):
|
176 | log(' %s', pair)
|
177 | log('')
|
178 |
|
179 | log('Short topics:')
|
180 | for topic, text in short_topics:
|
181 | log('%15s %r', topic, text)
|
182 | log('')
|
183 |
|
184 | # vim: sw=2
|