Skip to content

Commit 012709f

Browse files
authored
TST: Increase Test coverage (#756)
Adding unit Tests: * xmp * ConvertFunctionsToVirtualList * PyPDF2.utils.hexStr * Page operations with encoded file * merging encrypted * images DOC: Comments to docstrings STY: Remove vim comments BUG: CCITTFaxDecode decodeParms can be an ArrayObject. I don't know how a good solution would look like. Now it doesn't throw an error, but the result might be wrong. BUG: struct was not imported for Python 2.X
1 parent 9d53ee8 commit 012709f

20 files changed

+296
-119
lines changed

PyPDF2/filters.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# vim: sw=4:expandtab:foldmethod=marker
2-
#
31
# Copyright (c) 2006, Mathieu Fenniak
42
# All rights reserved.
53
#
@@ -40,7 +38,7 @@
4038
from cStringIO import StringIO
4139
else:
4240
from io import StringIO
43-
import struct
41+
import struct
4442

4543
try:
4644
import zlib
@@ -356,6 +354,10 @@ def decode(data, decodeParms=None):
356354
class CCITTFaxDecode(object):
357355
def decode(data, decodeParms=None, height=0):
358356
if decodeParms:
357+
from PyPDF2.generic import ArrayObject
358+
if isinstance(decodeParms, ArrayObject):
359+
if len(decodeParms) == 1:
360+
decodeParms = decodeParms[0]
359361
if decodeParms.get("/K", 1) == -1:
360362
CCITTgroup = 4
361363
else:
@@ -451,6 +453,10 @@ def _xobj_to_image(x_object_obj):
451453
img_byte_arr = io.BytesIO()
452454
img.save(img_byte_arr, format="PNG")
453455
data = img_byte_arr.getvalue()
456+
elif x_object_obj["/Filter"] in (["/LZWDecode"], ['/ASCII85Decode'], ['/CCITTFaxDecode']):
457+
from PyPDF2.utils import b_
458+
extension = ".png"
459+
data = b_(data)
454460
elif x_object_obj["/Filter"] == "/DCTDecode":
455461
extension = ".jpg"
456462
elif x_object_obj["/Filter"] == "/JPXDecode":

PyPDF2/generic.py

+25-24
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
import decimal
4545
import codecs
4646

47+
from PyPDF2.utils import ERR_STREAM_TRUNCATED_PREMATURELY
48+
4749
ObjectPrefix = b_('/<[tf(n%')
4850
NumberSigns = b_('+-')
4951
IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
@@ -199,17 +201,15 @@ def readFromStream(stream, pdf):
199201
while True:
200202
tok = stream.read(1)
201203
if not tok:
202-
# stream has truncated prematurely
203-
raise PdfStreamError("Stream has ended unexpectedly")
204+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
204205
if tok.isspace():
205206
break
206207
idnum += tok
207208
generation = b_("")
208209
while True:
209210
tok = stream.read(1)
210211
if not tok:
211-
# stream has truncated prematurely
212-
raise PdfStreamError("Stream has ended unexpectedly")
212+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
213213
if tok.isspace():
214214
if not generation:
215215
continue
@@ -273,10 +273,11 @@ def readFromStream(stream):
273273
readFromStream = staticmethod(readFromStream)
274274

275275

276-
##
277-
# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
278-
# TextStringObject to represent the string.
279276
def createStringObject(string):
277+
"""
278+
Given a string (either a "str" or "unicode"), create a ByteStringObject or a
279+
TextStringObject to represent the string.
280+
"""
280281
if isinstance(string, utils.string_type):
281282
return TextStringObject(string)
282283
elif isinstance(string, utils.bytes_type):
@@ -306,8 +307,7 @@ def readHexStringFromStream(stream):
306307
while True:
307308
tok = readNonWhitespace(stream)
308309
if not tok:
309-
# stream has truncated prematurely
310-
raise PdfStreamError("Stream has ended unexpectedly")
310+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
311311
if tok == b_(">"):
312312
break
313313
x += tok
@@ -328,8 +328,7 @@ def readStringFromStream(stream):
328328
while True:
329329
tok = stream.read(1)
330330
if not tok:
331-
# stream has truncated prematurely
332-
raise PdfStreamError("Stream has ended unexpectedly")
331+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
333332
if tok == b_("("):
334333
parens += 1
335334
elif tok == b_(")"):
@@ -392,16 +391,17 @@ def readStringFromStream(stream):
392391
return createStringObject(txt)
393392

394393

395-
##
396-
# Represents a string object where the text encoding could not be determined.
397-
# This occurs quite often, as the PDF spec doesn't provide an alternate way to
398-
# represent strings -- for example, the encryption data stored in files (like
399-
# /O) is clearly not text, but is still stored in a "String" object.
400394
class ByteStringObject(utils.bytes_type, PdfObject):
395+
"""
396+
Represents a string object where the text encoding could not be determined.
397+
This occurs quite often, as the PDF spec doesn't provide an alternate way to
398+
represent strings -- for example, the encryption data stored in files (like
399+
/O) is clearly not text, but is still stored in a "String" object.
400+
"""
401401

402402
##
403403
# For compatibility with TextStringObject.original_bytes. This method
404-
# returns self.
404+
# self.
405405
original_bytes = property(lambda self: self)
406406

407407
def writeToStream(self, stream, encryption_key):
@@ -413,12 +413,14 @@ def writeToStream(self, stream, encryption_key):
413413
stream.write(b_(">"))
414414

415415

416-
##
417-
# Represents a string object that has been decoded into a real unicode string.
418-
# If read from a PDF document, this string appeared to match the
419-
# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
420-
# occur.
421416
class TextStringObject(utils.string_type, PdfObject):
417+
"""
418+
Represents a string object that has been decoded into a real unicode string.
419+
If read from a PDF document, this string appeared to match the
420+
PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
421+
occur.
422+
"""
423+
422424
autodetect_pdfdocencoding = False
423425
autodetect_utf16 = False
424426

@@ -569,8 +571,7 @@ def readFromStream(stream, pdf):
569571
skipOverComment(stream)
570572
continue
571573
if not tok:
572-
# stream has truncated prematurely
573-
raise PdfStreamError("Stream has ended unexpectedly")
574+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
574575

575576
if debug: print(("Tok:", tok))
576577
if tok == b_(">"):

PyPDF2/merger.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# vim: sw=4:expandtab:foldmethod=marker
2-
#
31
# Copyright (c) 2006, Mathieu Fenniak
42
# All rights reserved.
53
#

PyPDF2/pdf.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# -*- coding: utf-8 -*-
22
#
3-
# vim: sw=4:expandtab:foldmethod=marker
4-
#
53
# Copyright (c) 2006, Mathieu Fenniak
64
# Copyright (c) 2007, Ashish Kulkarni <[email protected]>
75
#
@@ -1637,7 +1635,7 @@ def _getObjectFromStream(self, indirectReference):
16371635
streamData.seek(0, 0)
16381636
lines = streamData.readlines()
16391637
for i in range(0, len(lines)):
1640-
print((lines[i]))
1638+
print(lines[i])
16411639
streamData.seek(pos, 0)
16421640
try:
16431641
obj = readObject(streamData, self)
@@ -2588,11 +2586,6 @@ def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expan
25882586
ctm[1][0], ctm[1][1],
25892587
ctm[2][0], ctm[2][1]], expand)
25902588

2591-
##
2592-
# Applys a transformation matrix the page.
2593-
#
2594-
# @param ctm A 6 elements tuple containing the operands of the
2595-
# transformation matrix
25962589
def addTransformation(self, ctm):
25972590
"""
25982591
Applies a transformation matrix to the page.

PyPDF2/utils.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
except ImportError: # Py3
4040
import builtins
4141

42-
42+
ERR_STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly"
4343
xrange_fn = getattr(builtins, "xrange", range)
4444
_basestring = getattr(builtins, "basestring", str)
4545

@@ -122,7 +122,7 @@ def skipOverComment(stream):
122122
def readUntilRegex(stream, regex, ignore_eof=False):
123123
"""
124124
Reads until the regular expression pattern matched (ignore the match)
125-
Raise PdfStreamError on premature end-of-file.
125+
:raises PdfStreamError: on premature end-of-file
126126
:param bool ignore_eof: If true, ignore end-of-line and return immediately
127127
"""
128128
name = b_('')
@@ -133,7 +133,7 @@ def readUntilRegex(stream, regex, ignore_eof=False):
133133
if ignore_eof:
134134
return name
135135
else:
136-
raise PdfStreamError("Stream has ended unexpectedly")
136+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
137137
m = regex.search(tok)
138138
if m is not None:
139139
name += tok[:m.start()]
@@ -242,7 +242,6 @@ def b_(s):
242242
bc[s] = r
243243
return r
244244
except Exception:
245-
print(s)
246245
r = s.encode('utf-8')
247246
if len(s) < 2:
248247
bc[s] = r
2.78 KB
Binary file not shown.
1.84 KB
Binary file not shown.

Resources/imagemagick-images.pdf

15.6 KB
Binary file not shown.

Resources/imagemagick-lzw.pdf

2.62 KB
Binary file not shown.

Resources/metadata.pdf

13 KB
Binary file not shown.

Tests/test_basic_features.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -2,60 +2,60 @@
22

33
import pytest
44

5-
from PyPDF2 import PdfFileWriter, PdfFileReader
6-
from PyPDF2.utils import PdfReadError
5+
from PyPDF2 import PdfFileReader, PdfFileWriter
76
from PyPDF2.pdf import convertToInt
7+
from PyPDF2.utils import PdfReadError
88

99
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
1010
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
1111
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")
1212

1313

1414
def test_basic_features():
15-
output = PdfFileWriter()
16-
document1 = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
17-
input1 = PdfFileReader(document1)
15+
writer = PdfFileWriter()
16+
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
17+
reader = PdfFileReader(pdf_path)
1818

1919
# print how many pages input1 has:
20-
print("document1.pdf has %d pages." % input1.getNumPages())
20+
print("document1.pdf has %d pages." % reader.getNumPages())
2121

2222
# add page 1 from input1 to output document, unchanged
23-
output.addPage(input1.getPage(0))
23+
writer.addPage(reader.getPage(0))
2424

2525
# add page 2 from input1, but rotated clockwise 90 degrees
26-
output.addPage(input1.getPage(0).rotateClockwise(90))
26+
writer.addPage(reader.getPage(0).rotateClockwise(90))
2727

2828
# add page 3 from input1, rotated the other way:
29-
output.addPage(input1.getPage(0).rotateCounterClockwise(90))
29+
writer.addPage(reader.getPage(0).rotateCounterClockwise(90))
3030
# alt: output.addPage(input1.getPage(0).rotateClockwise(270))
3131

3232
# add page 4 from input1, but first add a watermark from another PDF:
33-
page4 = input1.getPage(0)
34-
watermark_pdf = document1
33+
page4 = reader.getPage(0)
34+
watermark_pdf = pdf_path
3535
watermark = PdfFileReader(watermark_pdf)
3636
page4.mergePage(watermark.getPage(0))
37-
output.addPage(page4)
37+
writer.addPage(page4)
3838

3939
# add page 5 from input1, but crop it to half size:
40-
page5 = input1.getPage(0)
40+
page5 = reader.getPage(0)
4141
page5.mediaBox.upperRight = (
4242
page5.mediaBox.getUpperRight_x() / 2,
4343
page5.mediaBox.getUpperRight_y() / 2,
4444
)
45-
output.addPage(page5)
45+
writer.addPage(page5)
4646

4747
# add some Javascript to launch the print window on opening this PDF.
4848
# the password dialog may prevent the print dialog from being shown,
4949
# comment the the encription lines, if that's the case, to try this out
50-
output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
50+
writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
5151

5252
# encrypt your new PDF and add a password
5353
password = "secret"
54-
output.encrypt(password)
54+
writer.encrypt(password)
5555

5656
# finally, write "output" to PyPDF2-output.pdf
5757
with open("PyPDF2-output.pdf", "wb") as outputStream:
58-
output.write(outputStream)
58+
writer.write(outputStream)
5959

6060

6161
def test_convertToInt():

Tests/test_javascript.py

+21-15
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
23
import pytest
34

45
from PyPDF2 import PdfFileReader, PdfFileWriter
@@ -8,21 +9,28 @@
89
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
910
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")
1011

12+
1113
@pytest.fixture
1214
def pdf_file_writer():
13-
ipdf = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
15+
reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
1416
pdf_file_writer = PdfFileWriter()
15-
pdf_file_writer.appendPagesFromReader(ipdf)
17+
pdf_file_writer.appendPagesFromReader(reader)
1618
yield pdf_file_writer
1719

20+
1821
def test_add_js(pdf_file_writer):
19-
pdf_file_writer.addJS(
20-
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
21-
)
22+
pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
23+
24+
assert (
25+
"/Names" in pdf_file_writer._root_object
26+
), "addJS should add a name catalog in the root object."
27+
assert (
28+
"/JavaScript" in pdf_file_writer._root_object["/Names"]
29+
), "addJS should add a JavaScript name tree under the name catalog."
30+
assert (
31+
"/OpenAction" in pdf_file_writer._root_object
32+
), "addJS should add an OpenAction to the catalog."
2233

23-
assert "/Names" in pdf_file_writer._root_object, "addJS should add a name catalog in the root object."
24-
assert "/JavaScript" in pdf_file_writer._root_object["/Names"], "addJS should add a JavaScript name tree under the name catalog."
25-
assert "/OpenAction" in pdf_file_writer._root_object, "addJS should add an OpenAction to the catalog."
2634

2735
def test_overwrite_js(pdf_file_writer):
2836
def get_javascript_name():
@@ -31,14 +39,12 @@ def get_javascript_name():
3139
assert "/Names" in pdf_file_writer._root_object["/Names"]["/JavaScript"]
3240
return pdf_file_writer._root_object["/Names"]["/JavaScript"]["/Names"][0]
3341

34-
pdf_file_writer.addJS(
35-
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
36-
)
42+
pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
3743
first_js = get_javascript_name()
3844

39-
pdf_file_writer.addJS(
40-
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
41-
)
45+
pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
4246
second_js = get_javascript_name()
4347

44-
assert first_js != second_js, "addJS should overwrite the previous script in the catalog."
48+
assert (
49+
first_js != second_js
50+
), "addJS should overwrite the previous script in the catalog."

0 commit comments

Comments
 (0)