doctools/split

OILS / doctools / split_doc.py View on Github | oilshell.org

157 lines, 109 significant

1	#!/usr/bin/env python2
2	"""split_doc.py."""
3	from __future__ import print_function
4
5	import json
6	import optparse
7	import re
8	import sys
9
10	DATE_RE = re.compile(r'(\d\d\d\d) / (\d\d) / (\d\d)', re.VERBOSE)
11
12	META_RE = re.compile(r'(\S+): [ ]* (.*)', re.VERBOSE)
13
14
15	def SplitDocument(default_vals, entry_f, meta_f, content_f, strict=False):
16	"""Split a document into metadata JSON and content Markdown.
17
18	Used for blog posts and index.md / cross-ref.md.
19	"""
20	first_line = entry_f.readline()
21	if strict and first_line.strip() != '---':
22	raise RuntimeError("Document should start with --- (got %r)" %
23	first_line)
24
25	meta = {}
26
27	# TODO: if first_line is ---, then read metadata in key: value format.
28	if first_line.strip() == '---':
29	while True:
30	line = entry_f.readline().strip()
31	if line == '---':
32	break
33	m = META_RE.match(line)
34	if not m:
35	raise RuntimeError('Invalid metadata line %r' % line)
36	name, value = m.groups()
37
38	if name == 'date':
39	m2 = DATE_RE.match(value)
40	if not m2:
41	raise RuntimeError('Invalid date %r' % value)
42	year, month, day = m2.groups()
43	meta['year'] = int(year)
44	meta['month'] = int(month)
45	meta['day'] = int(day)
46
47	elif name == 'updated_date':
48	m2 = DATE_RE.match(value)
49	if not m2:
50	raise RuntimeError('Invalid date %r' % value)
51	year, month, day = m2.groups()
52	meta['updated_year'] = int(year)
53	meta['updated_month'] = int(month)
54	meta['updated_day'] = int(day)
55
56	else:
57	meta[name] = value
58
59	#print('line = %r' % line, file=sys.stderr)
60	while True:
61	first_nonempty = entry_f.readline()
62	if first_nonempty.strip() != '':
63	break
64
65	else:
66	if first_line:
67	first_nonempty = first_line
68	else:
69	while True:
70	first_nonempty = entry_f.readline()
71	if first_nonempty.strip() != '':
72	break
73
74	# Invariant: we've read the first non-empty line here. Now we need to see if
75	# it's the title.
76
77	#print('first_nonempty = %r' % first_nonempty, file=sys.stderr)
78
79	line_two = entry_f.readline()
80	if re.match('=+', line_two):
81	meta['title'] = first_nonempty.strip()
82
83	# Fill in defaults after parsing all values.
84	for name, value in default_vals.iteritems():
85	if name not in meta:
86	meta[name] = value
87
88	json.dump(meta, meta_f, indent=2)
89
90	# Read the rest of the file and write it
91	contents = entry_f.read()
92
93	content_f.write(first_nonempty)
94	content_f.write(line_two)
95
96	content_f.write(contents)
97
98	comments_url = meta.get('comments_url', '')
99	if comments_url:
100	content_f.write("""
101	[comments-url]: %s
102
103	""" % comments_url)
104
105
106	def Options():
107	p = optparse.OptionParser('split_doc.py [options] input_file out_prefix')
108	# Like awk -v
109	p.add_option(
110	'-v',
111	dest='default_vals',
112	action='append',
113	default=[],
114	help=
115	"If the doc's own metadata doesn't define 'name', set it to this value"
116	)
117	p.add_option('-s',
118	'--strict',
119	dest='strict',
120	action='store_true',
121	default=False,
122	help="Require metadata")
123	return p
124
125
126	def main(argv):
127	o = Options()
128	opts, argv = o.parse_args(argv)
129
130	entry_path = argv[1] # e.g. blog/2016/11/01.md
131	out_prefix = argv[2] # e.g _site/blog/2016/11/01
132
133	meta_path = out_prefix + '_meta.json'
134	content_path = out_prefix + '_content.md'
135
136	default_vals = {}
137	for pair in opts.default_vals:
138	name, value = pair.split('=', 1)
139	default_vals[name] = value
140
141	with \
142	open(entry_path) as entry_f, \
143	open(meta_path, 'w') as meta_f, \
144	open(content_path, 'w') as content_f:
145	SplitDocument(default_vals,
146	entry_f,
147	meta_f,
148	content_f,
149	strict=opts.strict)
150
151
152	if __name__ == '__main__':
153	try:
154	main(sys.argv)
155	except RuntimeError as e:
156	print('FATAL: %s' % e, file=sys.stderr)
157	sys.exit(1)