-
Notifications
You must be signed in to change notification settings - Fork 315
Expand file tree
/
Copy pathhacktricks-preprocessor.py
More file actions
184 lines (154 loc) · 7.34 KB
/
hacktricks-preprocessor.py
File metadata and controls
184 lines (154 loc) · 7.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import json
import os
import sys
import re
import logging
from os import path
from urllib.request import urlopen, Request
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
handler = logging.FileHandler(filename='hacktricks-preprocessor.log', mode='w', encoding='utf-8')
handler.setLevel(logging.DEBUG)
logger.addHandler(handler)
handler2 = logging.FileHandler(filename='hacktricks-preprocessor-error.log', mode='w', encoding='utf-8')
handler2.setLevel(logging.ERROR)
logger.addHandler(handler2)
def findtitle(search ,obj, key, path=(),):
# logger.debug(f"Looking for {search} in {path}")
if isinstance(obj, dict) and key in obj and obj[key] == search:
return obj, path
if isinstance(obj, list):
for k, v in enumerate(obj):
item = findtitle(search, v, key, (*path, k))
if item is not None:
return item
if isinstance(obj, dict):
for k, v in obj.items():
item = findtitle(search, v, key, (*path, k))
if item is not None:
return item
def ref(matchobj):
logger.debug(f'Ref match: {matchobj.groups(0)[0].strip()}')
href = matchobj.groups(0)[0].strip()
title = href
if href.startswith("http://") or href.startswith("https://"):
if context['config']['preprocessor']['hacktricks']['env'] == 'dev':
pass
else:
try:
raw_html = str(urlopen(Request(href, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0'})).read())
match = re.search('<title>(.*?)</title>', raw_html)
title = match.group(1) if match else href
except Exception as e:
logger.error(f'Error opening URL {href}: {e}')
pass #Dont stop on broken link
else:
try:
if href.endswith("/"):
href = href+"README.md" # Fix if ref points to a folder
if "#" in href:
result = findtitle(href.split("#")[0], book, "source_path")
if result is None or result[0] is None:
raise Exception(f"Chapter not found")
chapter, _path = result
title = " ".join(href.split("#")[1].split("-")).title()
logger.debug(f'Ref has # using title: {title}')
else:
result = findtitle(href, book, "source_path")
if result is None or result[0] is None:
raise Exception(f"Chapter not found")
chapter, _path = result
logger.debug(f'Recursive title search result: {chapter["name"]}')
title = chapter['name']
except Exception as e:
try:
dir = path.dirname(current_chapter['source_path'])
logger.debug(f'Error getting chapter title: {href} trying with relative path {path.normpath(path.join(dir,href))}')
if "#" in href:
result = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path")
if result is None or result[0] is None:
raise Exception(f"Chapter not found")
chapter, _path = result
title = " ".join(href.split("#")[1].split("-")).title()
logger.debug(f'Ref has # using title: {title}')
else:
result = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path")
if result is None or result[0] is None:
raise Exception(f"Chapter not found")
chapter, _path = result
title = chapter["name"]
logger.debug(f'Recursive title search result: {chapter["name"]}')
except Exception as e:
logger.error(f"Error: {e}")
logger.error(f'Error getting chapter title: {path.normpath(path.join(dir,href))}')
sys.exit(1)
if href.endswith("/README.md"):
href = href.replace("/README.md", "/index.html")
template = f"""<a class="content_ref" href="{href}"><span class="content_ref_label">{title}</span></a>"""
# translate_table = str.maketrans({"\"":"\\\"","\n":"\\n"})
# translated_text = template.translate(translate_table)
result = template
return result
def files(matchobj):
logger.debug(f'Files match: {matchobj.groups(0)[0].strip()}')
href = matchobj.groups(0)[0].strip()
title = ""
try:
for root, dirs, files in os.walk(os.getcwd()+'/src/files'):
logger.debug(root)
logger.debug(files)
if href in files:
title = href
logger.debug(f'File search result: {os.path.join(root, href)}')
except Exception as e:
logger.error(f"Error: {e}")
logger.error(f'Error searching file: {href}')
sys.exit(1)
if title=="":
logger.error(f'Error searching file: {href}')
sys.exit(1)
template = f"""<a class="content_ref" href="/files/{href}"><span class="content_ref_label">{title}</span></a>"""
result = template
return result
def add_read_time(content):
regex = r'(<\/style>\n# .*(?=\n))'
new_content = re.sub(regex, lambda x: x.group(0) + "\n\nReading time: {{ #reading_time }}", content)
return new_content
def iterate_chapters(sections):
if isinstance(sections, dict) and "PartTitle" in sections: # Not a chapter section
return
elif isinstance(sections, dict) and "Chapter" in sections: # Is a chapter return it and look into sub items
# logger.debug(f"Chapter {sections['Chapter']}")
yield sections['Chapter']
yield from iterate_chapters(sections['Chapter']["sub_items"])
elif isinstance(sections, list): # Iterate through list when in sections and in sub_items
for k, v in enumerate(sections):
yield from iterate_chapters(v)
if __name__ == '__main__':
global context, book, current_chapter
if len(sys.argv) > 1: # we check if we received any argument
if sys.argv[1] == "supports":
# then we are good to return an exit status code of 0, since the other argument will just be the renderer's name
sys.exit(0)
logger.debug('Started hacktricks preprocessor')
# load both the context and the book representations from stdin
context, book = json.load(sys.stdin)
logger.debug(f"Context: {context}")
logger.debug(f"Book keys: {book.keys()}")
# Handle both old (sections) and new (items) mdbook API
book_items = book.get('sections') or book.get('items', [])
for chapter in iterate_chapters(book_items):
if chapter is None:
continue
logger.debug(f"Chapter: {chapter['path']}")
current_chapter = chapter
# regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endref[\s]*}}'
regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n#]*(?:#(.*))?)(?:\n)?{{[\s]*#endref[\s]*}}'
new_content = re.sub(regex, ref, chapter['content'])
regex = r'{{[\s]*#file[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endfile[\s]*}}'
new_content = re.sub(regex, files, new_content)
new_content = add_read_time(new_content)
chapter['content'] = new_content
content = json.dumps(book)
logger.debug(content)
print(content)