doctools/split

OILS / doctools / split_doc.py View on Github | oilshell.org

151 lines, 99 significant

1	#!/usr/bin/env python2
2	"""
3	split_doc.py
4	"""
5	from __future__ import print_function
6
7	import json
8	import optparse
9	import re
10	import sys
11
12
13	DATE_RE = re.compile(
14	r'(\d\d\d\d) / (\d\d) / (\d\d)', re.VERBOSE)
15
16	META_RE = re.compile(
17	r'(\S+): [ ]* (.*)', re.VERBOSE)
18
19
20	def SplitDocument(default_vals, entry_f, meta_f, content_f, strict=False):
21	"""Split a document into metadata JSON and content Markdown.
22
23	Used for blog posts and index.md / cross-ref.md.
24	"""
25	first_line = entry_f.readline()
26	if strict and first_line.strip() != '---':
27	raise RuntimeError("Document should start with --- (got %r)" % first_line)
28
29	meta = {}
30
31	# TODO: if first_line is ---, then read metadata in key: value format.
32	if first_line.strip() == '---':
33	while True:
34	line = entry_f.readline().strip()
35	if line == '---':
36	break
37	m = META_RE.match(line)
38	if not m:
39	raise RuntimeError('Invalid metadata line %r' % line)
40	name, value = m.groups()
41
42	if name == 'date':
43	m2 = DATE_RE.match(value)
44	if not m2:
45	raise RuntimeError('Invalid date %r' % value)
46	year, month, day = m2.groups()
47	meta['year'] = int(year)
48	meta['month'] = int(month)
49	meta['day'] = int(day)
50
51	elif name == 'updated_date':
52	m2 = DATE_RE.match(value)
53	if not m2:
54	raise RuntimeError('Invalid date %r' % value)
55	year, month, day = m2.groups()
56	meta['updated_year'] = int(year)
57	meta['updated_month'] = int(month)
58	meta['updated_day'] = int(day)
59
60	else:
61	meta[name] = value
62
63	#print('line = %r' % line, file=sys.stderr)
64	while True:
65	first_nonempty = entry_f.readline()
66	if first_nonempty.strip() != '':
67	break
68
69	else:
70	if first_line:
71	first_nonempty = first_line
72	else:
73	while True:
74	first_nonempty = entry_f.readline()
75	if first_nonempty.strip() != '':
76	break
77
78	# Invariant: we've read the first non-empty line here. Now we need to see if
79	# it's the title.
80
81	#print('first_nonempty = %r' % first_nonempty, file=sys.stderr)
82
83	line_two = entry_f.readline()
84	if re.match('=+', line_two):
85	meta['title'] = first_nonempty.strip()
86
87	# Fill in defaults after parsing all values.
88	for name, value in default_vals.iteritems():
89	if name not in meta:
90	meta[name] = value
91
92	json.dump(meta, meta_f, indent=2)
93
94	# Read the rest of the file and write it
95	contents = entry_f.read()
96
97	content_f.write(first_nonempty)
98	content_f.write(line_two)
99
100	content_f.write(contents)
101
102	comments_url = meta.get('comments_url', '')
103	if comments_url:
104	content_f.write("""
105	[comments-url]: %s
106
107	""" % comments_url)
108
109
110
111	def Options():
112	"""Returns an option parser instance."""
113	p = optparse.OptionParser('split_doc.py [options] input_file out_prefix')
114	# Like awk -v
115	p.add_option(
116	'-v', dest='default_vals', action='append', default=[],
117	help="If the doc's own metadata doesn't define 'name', set it to this value")
118	p.add_option(
119	'-s', '--strict', dest='strict', action='store_true', default=False,
120	help="Require metadata")
121	return p
122
123
124	def main(argv):
125	o = Options()
126	opts, argv = o.parse_args(argv)
127
128	entry_path = argv[1] # e.g. blog/2016/11/01.md
129	out_prefix = argv[2] # e.g _site/blog/2016/11/01
130
131	meta_path = out_prefix + '_meta.json'
132	content_path = out_prefix + '_content.md'
133
134	default_vals = {}
135	for pair in opts.default_vals:
136	name, value = pair.split('=', 1)
137	default_vals[name] = value
138
139	with \
140	open(entry_path) as entry_f, \
141	open(meta_path, 'w') as meta_f, \
142	open(content_path, 'w') as content_f:
143	SplitDocument(default_vals, entry_f, meta_f, content_f, strict=opts.strict)
144
145
146	if __name__ == '__main__':
147	try:
148	main(sys.argv)
149	except RuntimeError as e:
150	print('FATAL: %s' % e, file=sys.stderr)
151	sys.exit(1)