-
Notifications
You must be signed in to change notification settings - Fork 18
/
server.py
176 lines (141 loc) · 5.23 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python3
"""
weasyprint server
A tiny aiohttp based web server that wraps weasyprint
It expects a multipart/form-data upload containing a html file, an optional
css file and optional attachments.
"""
from aiohttp import web
from re import match
from urllib.parse import urlparse
from weasyprint import CSS
from weasyprint import DEFAULT_OPTIONS
from weasyprint import default_url_fetcher
from weasyprint import HTML
import json
import logging
import os.path
import tempfile
CHUNK_SIZE = 65536
logger = logging.getLogger('weasyprint')
class URLFetcher:
"""URL fetcher that only allows data URLs and known files"""
def __init__(self, valid_paths):
self.valid_paths = valid_paths
def __call__(self, url):
if config['allowed_urls_pattern'] and match(
config['allowed_urls_pattern'], url
):
return default_url_fetcher(url)
parsed = urlparse(url)
if parsed.scheme == 'data':
return default_url_fetcher(url)
if parsed.scheme in ['', 'file'] and parsed.path:
if os.path.abspath(parsed.path) in self.valid_paths:
return default_url_fetcher(url)
else:
raise ValueError('Only known path allowed')
raise ValueError('External resources are not allowed')
async def render_pdf(request):
form_data = {}
options = {}
temp_dir = None
if not request.content_type == 'multipart/form-data':
logger.info(
'Bad request. Received content type %s instead of multipart/form-data.',
request.content_type,
)
return web.Response(status=400, text="Multipart request required.")
reader = await request.multipart()
with tempfile.TemporaryDirectory() as temp_dir:
while True:
part = await reader.next()
if part is None:
break
if (
part.name in ['html', 'css']
or part.name.startswith('attachment.')
or part.name.startswith('asset.')
):
form_data[part.name] = await save_part_to_file(part, temp_dir)
elif part.name == 'options':
try:
options = json.loads(await part.text())
except json.JSONDecodeError:
logger.exception('Failed decoding options.')
return web.Response(
status=400, text="Invalid JSON in options part.")
# Ignore unknown and unsafe options
options = {
k: options[k]
for k in options.keys()
& DEFAULT_OPTIONS.keys()
- set(['stylesheets', 'attachments', 'cache'])
}
if 'html' not in form_data:
logger.info('Bad request. No html file provided.')
return web.Response(status=400, text="No html file provided.")
html = HTML(filename=form_data['html'], url_fetcher=URLFetcher(form_data.values()))
if 'css' in form_data:
css = CSS(filename=form_data['css'], url_fetcher=URLFetcher(form_data.values()))
else:
css = CSS(string='@page { size: A4; margin: 2cm 2.5cm; }')
attachments = [
attachment for name, attachment in form_data.items()
if name.startswith('attachment.')
]
pdf_filename = os.path.join(temp_dir, 'output.pdf')
try:
html.write_pdf(
pdf_filename, stylesheets=[css], attachments=attachments, **options)
except Exception:
logger.exception('PDF generation failed')
return web.Response(
status=500, text="PDF generation failed.")
else:
return await stream_file(request, pdf_filename, 'application/pdf')
async def save_part_to_file(part, directory):
filename = os.path.join(directory, part.filename)
with open(filename, 'wb') as file_:
while True:
chunk = await part.read_chunk(CHUNK_SIZE)
if not chunk:
break
file_.write(chunk)
return filename
async def stream_file(request, filename, content_type):
response = web.StreamResponse(
status=200,
reason='OK',
headers={
'Content-Type': content_type,
'Content-Disposition':
f'attachment; filename="{os.path.basename(filename)}"',
},
)
await response.prepare(request)
with open(filename, 'rb') as outfile:
while True:
data = outfile.read(CHUNK_SIZE)
if not data:
break
await response.write(data)
await response.write_eof()
return response
async def healthcheck(request):
return web.Response(status=200, text="OK")
def get_config():
config = {}
config['allowed_urls_pattern'] = os.environ.get(
'WEASYPRINT_ALLOWED_URLS_PATTERN', None)
return config
config = get_config()
if __name__ == '__main__':
logging.basicConfig(
format='%(asctime)s %(levelname)s %(name)s %(message)s',
level=logging.INFO,
)
app = web.Application()
app.add_routes([web.post('/', render_pdf)])
app.add_routes([web.get('/healthcheck', healthcheck)])
web.run_app(app)