summarylogtreecommitdiffstats
path: root/proc_tut.py
blob: 3469220843c7b24fe8632bce6e9898001e7edfd7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import markdown
from markdown.preprocessors import Preprocessor
from markdown.postprocessors import Postprocessor
from markdown.extensions import Extension
import sys
import re
import os
import codecs
import jinja2

'''
Process Github Wiki Tutorial Pages
Simon Wilper
2017-02-24
'''

class BrokenLinkPreprocessor(Preprocessor):
    '''
    Github Wiki inserts a space between link title and url what breaks
    python's markdown processor. This Preprocessor fixes this
    '''
    def run(self,lines):
        new_lines = []
        for line in lines:
            new_lines.append(re.sub(r"\] \(", '](', line))
        return new_lines

class MkLocalLinkPostprocessor(Postprocessor):
    '''
    In order to make interlinks work, add a .html suffix in all hrefs
    but no the ones beginning with https?
    '''
    def run(self,text):
        new_text=re.sub(
                r'href="(?!https?://)(.+?)"',
                r'href="\1.html"',
                text
                )
        return new_text

class FixGHLinksExtension(Extension):
    def extendMarkdown(self, md, md_globals):
        md.preprocessors.add( 'blpp', BrokenLinkPreprocessor(),
                '_begin' )
        md.postprocessors.add( 'llpp', MkLocalLinkPostprocessor(),
                '_begin' )

if len(sys.argv) < 3:
    print('Give markdown source file and html output file')
    sys.exit(-1)

file_name = sys.argv[1]
output_file_name = sys.argv[2]

input_file = codecs.open(file_name, mode="r", encoding="utf-8")
md_text = input_file.read()
html = markdown.markdown(md_text, extensions=[
    FixGHLinksExtension(),
    'markdown.extensions.codehilite',
    'markdown.extensions.fenced_code'
    ])

t = jinja2.Environment(
        loader=jinja2.FileSystemLoader(
                os.path.dirname(os.path.realpath(__file__))
            )
    ).get_template('default.html')

output_file = codecs.open(
        output_file_name, "w", encoding="utf-8",
        errors="xmlcharrefreplace"
    )

output_file.write(t.render(title=file_name,contents=html))