1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
import markdown
from markdown.preprocessors import Preprocessor
from markdown.postprocessors import Postprocessor
from markdown.extensions import Extension
import sys
import re
import os
import codecs
import jinja2
'''
Process Github Wiki Tutorial Pages
Simon Wilper
2017-02-24
'''
class BrokenLinkPreprocessor(Preprocessor):
'''
Github Wiki inserts a space between link title and url what breaks
python's markdown processor. This Preprocessor fixes this
'''
def run(self,lines):
new_lines = []
for line in lines:
new_lines.append(re.sub(r"\] \(", '](', line))
return new_lines
class MkLocalLinkPostprocessor(Postprocessor):
'''
In order to make interlinks work, add a .html suffix in all hrefs
but no the ones beginning with https?
'''
def run(self,text):
new_text=re.sub(
r'href="(?!https?://)(.+?)"',
r'href="\1.html"',
text
)
return new_text
class FixGHLinksExtension(Extension):
def extendMarkdown(self, md, md_globals):
md.preprocessors.add( 'blpp', BrokenLinkPreprocessor(),
'_begin' )
md.postprocessors.add( 'llpp', MkLocalLinkPostprocessor(),
'_begin' )
if len(sys.argv) < 3:
print('Give markdown source file and html output file')
sys.exit(-1)
file_name = sys.argv[1]
output_file_name = sys.argv[2]
input_file = codecs.open(file_name, mode="r", encoding="utf-8")
md_text = input_file.read()
html = markdown.markdown(md_text, extensions=[
FixGHLinksExtension(),
'markdown.extensions.codehilite',
'markdown.extensions.fenced_code'
])
t = jinja2.Environment(
loader=jinja2.FileSystemLoader(
os.path.dirname(os.path.realpath(__file__))
)
).get_template('default.html')
output_file = codecs.open(
output_file_name, "w", encoding="utf-8",
errors="xmlcharrefreplace"
)
output_file.write(t.render(title=file_name,contents=html))
|