-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathkb_html_cleanup_hook.py
88 lines (72 loc) · 3.25 KB
/
kb_html_cleanup_hook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import bs4
from bs4 import Comment
import re
import pathlib
def on_post_page (output, page, config, **kwargs):
kb_html = output.replace('class="admonition note"', 'class="notice notice-info"')
kb_html = kb_html.replace('class="admonition warning"', 'class="notice notice-warning"')
kb_html = kb_html.replace('class="admonition question"', 'class="notice notice-success"')
kb_html = kb_html.replace('class="admonition example"', 'class="notice notice-example"')
kb_html = kb_html.replace('class="admonition danger"', 'class="notice notice-error"')
kb_html = kb_html.replace('class="admonition tip"', 'class="notice notice-tip"')
p = bs4.BeautifulSoup(kb_html, 'html.parser')
# Delete H1 tags, they are redundant for PHPKB
h1 = p.find('h1')
if (h1):
# print ('deleting redundant H1: ' + h1.string)
h1.decompose()
# Cleanup empty Ps
for i in p.find_all('p'):
if (not i.contents):
print ('deleting empty P from ' + page.title)
i.decompose()
# Cleanup comments
for i in p.find_all(string=lambda text: isinstance(text, Comment)):
# print ('deleting comment from ' + page.title)
i.extract()
# Add ‌ within Fontawesome icons, otherwise PHPKB will delete them
for i in p.find_all('i', class_=re.compile("fa.+")):
i.string = '‌'
i.append(Comment('icon'))
# Add class="colored_numbers_list" to all ordered lists lists
for i in p.find_all('ol'):
i['class'] = 'colored_numbers_list'
# Add style="width: 100%;" to all <tables>
for i in p.find_all('table'):
i['style'] = 'width: 100%;'
# Add class="screenshot_with_caption" to figures
for i in p.find_all('figure'):
i['class'] = 'screenshot_with_caption'
if i.find('figcaption'):
i.find('figcaption')['class'] = 'caption'
# Base img src on site_name or leave as is
for i in p.find_all('img'):
if not i['src'].startswith(('https://', 'http://')):
dir = pathlib.PurePosixPath(page.abs_url).parents[0]
imgPath = pathlib.PurePosixPath(config.site_name, str(dir), str(i['src']))
i['src'] = imgPath
# Classify all links as imported from MkDocs
for i in p.find_all('a'):
if i.get('class'):
i['class'].append("mkdocs_imported_link")
else:
i['class'] = "mkdocs_imported_link"
# Fix code blocks for PHPKB
for i in p.find_all('pre'):
pattern = re.compile(r'^(.*)\n', flags=re.MULTILINE)
pre = str(i)
pre = re.sub(pattern,
r'<code>\1</code> <br />',
pre)
i.replace_with(bs4.BeautifulSoup(pre, 'html.parser'))
# turn <body> into <div> for PHPKB compatibility, as PHPKB provides <body>
body = p.body
body.name = 'div'
body['class'] = 'md-body'
# Do not use prettify(), it adds redundant spaces in PHPKB
# Fix ‌ after BeautifulSoup's redundant escaping
kb_html = str(body).replace('&zwnj;', '‌')
# Cleanup redundant new lines
pattern = re.compile(r'\n+', flags=re.MULTILINE)
kb_html = re.sub(pattern, r'\n', kb_html)
return kb_html