Skip to content

Commit 9beb7f4

Browse files
committed
removing metadata modification
1 parent 93ee5c1 commit 9beb7f4

File tree

6 files changed

+15
-71
lines changed

6 files changed

+15
-71
lines changed

HISTORY.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
History
44
-------
55

6+
v0.33.0 (2017-05-24)
7+
....................
8+
* remove pdf metadata modification as it can break some pdf viewers
9+
610
v0.32.0 (2017-05-24)
711
....................
812
* set ``cache_dir`` for ``generate_pdf`` by default

benchmark/invoice.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<html lang="en">
33
<head>
44
<meta charset="UTF-8">
5-
<title>Invoice INV-1</title>
5+
<title>Invoice INV-123</title>
66
<link rel="stylesheet" href="https://secure.tutorcruncher.com/static/css/libraries.css">
77
<link rel="stylesheet" href="https://secure.tutorcruncher.com/static/css/pdf_styles.css">
88
</head>

benchmark/run.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ def go_sync():
1717
for i in range(count):
1818
pdf = generate_pdf(
1919
html,
20-
title='Benchmark',
21-
author='Samuel Colvin',
22-
subject='Mock Invoice',
2320
page_size='A4',
2421
zoom='1.25',
2522
margin_left='8mm',
@@ -41,9 +38,6 @@ async def go_async():
4138
async def gen(i_):
4239
pdf = await apydf.generate_pdf(
4340
html,
44-
title='Benchmark',
45-
author='Samuel Colvin',
46-
subject='Mock Invoice',
4741
page_size='A4',
4842
zoom='1.25',
4943
margin_left='8mm',

pydf/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from distutils.version import StrictVersion
22

3-
VERSION = StrictVersion('0.32.0')
3+
VERSION = StrictVersion('0.33.0')

pydf/wkhtmltopdf.py

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import asyncio
2-
import re
32
import subprocess
43
import tempfile
54

@@ -47,20 +46,6 @@ def _convert_args(**py_args):
4746
return cmd_args
4847

4948

50-
def _set_meta_data(pdf_content, **kwargs):
51-
fields = [
52-
('Title', kwargs.get('title')),
53-
('Author', kwargs.get('author')),
54-
('Subject', kwargs.get('subject')),
55-
('Creator', kwargs.get('creator')),
56-
('Producer', kwargs.get('producer')),
57-
]
58-
metadata = '\n'.join(f'/{name} ({value})' for name, value in fields if value)
59-
if metadata:
60-
pdf_content = re.sub(b'/Title.*\n.*\n/Producer.*', metadata.encode(), pdf_content, count=1)
61-
return pdf_content
62-
63-
6449
class AsyncPydf:
6550
def __init__(self, *, max_processes=20, loop=None, cache_dir=DFT_CACHE_DIR):
6651
self.semaphore = asyncio.Semaphore(value=max_processes, loop=loop)
@@ -69,14 +54,7 @@ def __init__(self, *, max_processes=20, loop=None, cache_dir=DFT_CACHE_DIR):
6954
Path.mkdir(cache_dir)
7055
self.cache_dir = cache_dir
7156

72-
async def generate_pdf(self,
73-
html,
74-
title=None,
75-
author=None,
76-
subject=None,
77-
creator=None,
78-
producer=None,
79-
**cmd_args):
57+
async def generate_pdf(self, html, **cmd_args):
8058
cmd_args = [WK_PATH] + _convert_args(cache_dir=self.cache_dir, **cmd_args)
8159
async with self.semaphore:
8260
p = await asyncio.create_subprocess_exec(
@@ -94,24 +72,10 @@ async def generate_pdf(self,
9472
stderr = await p.stderr.read()
9573
raise RuntimeError('error running wkhtmltopdf, command: {!r}\n'
9674
'response: "{}"'.format(cmd_args, stderr.strip()))
97-
98-
return _set_meta_data(
99-
pdf_content,
100-
title=title,
101-
author=author,
102-
subject=subject,
103-
creator=creator,
104-
producer=producer,
105-
)
75+
return pdf_content
10676

10777

10878
def generate_pdf(html, *,
109-
title: str=None,
110-
author: str=None,
111-
subject: str=None,
112-
creator: str=None,
113-
producer: str=None,
114-
# from here on arguments are passed via the commandline to wkhtmltopdf
11579
cache_dir: Path=DFT_CACHE_DIR,
11680
grayscale: bool=False,
11781
lowquality: bool=False,
@@ -186,15 +150,7 @@ def generate_pdf(html, *,
186150
if p.returncode != 0 and pdf_content[:4] != b'%PDF':
187151
raise RuntimeError('error running wkhtmltopdf, command: {!r}\n'
188152
'response: "{}"'.format(cmd_args, p.stderr.strip()))
189-
190-
return _set_meta_data(
191-
pdf_content,
192-
title=title,
193-
author=author,
194-
subject=subject,
195-
creator=creator,
196-
producer=producer,
197-
)
153+
return pdf_content
198154

199155

200156
def _string_execute(*args):

tests/test_sync.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,13 @@ def test_generate_pdf_with_html():
1111
assert 'Is this thing on?\n\n\x0c' == text
1212

1313

14-
def test_generate_pdf_with_html_meta_data():
15-
pdf_content = generate_pdf(
16-
'<html><body>Is this thing on?</body></html>',
17-
title='title foobar',
18-
subject='the subject',
19-
author='Samuel Colvin',
20-
creator='this is the creator'
21-
)
14+
def test_pdf_title():
15+
pdf_content = generate_pdf('<html><head><title>the title</title></head><body>hello</body></html>')
2216
assert pdf_content[:4] == b'%PDF'
23-
beginning = pdf_content.decode('utf8', 'ignore')[:300]
24-
print(beginning)
25-
assert """
26-
<<
27-
/Title (title foobar)
28-
/Author (Samuel Colvin)
29-
/Subject (the subject)
30-
/Creator (this is the creator)""" in beginning
17+
text = pdf_text(pdf_content)
18+
title = 'the title'.encode('utf-16be')
19+
assert b'\n/Title (\xfe\xff%s)\n' % title in pdf_content
20+
assert 'hello\n\n\x0c' == text
3121

3222

3323
def test_unicode():

0 commit comments

Comments
 (0)