Skip to content

Commit 012709f

Browse files
authored
TST: Increase Test coverage (#756)
Adding unit Tests: * xmp * ConvertFunctionsToVirtualList * PyPDF2.utils.hexStr * Page operations with encoded file * merging encrypted * images DOC: Comments to docstrings STY: Remove vim comments BUG: CCITTFaxDecode decodeParms can be an ArrayObject. I don't know how a good solution would look like. Now it doesn't throw an error, but the result might be wrong. BUG: struct was not imported for Python 2.X
1 parent 9d53ee8 commit 012709f

20 files changed

+296
-119
lines changed

PyPDF2/filters.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# vim: sw=4:expandtab:foldmethod=marker
2-
#
31
# Copyright (c) 2006, Mathieu Fenniak
42
# All rights reserved.
53
#
@@ -40,7 +38,7 @@
4038
from cStringIO import StringIO
4139
else:
4240
from io import StringIO
43-
import struct
41+
import struct
4442

4543
try:
4644
import zlib
@@ -356,6 +354,10 @@ def decode(data, decodeParms=None):
356354
class CCITTFaxDecode(object):
357355
def decode(data, decodeParms=None, height=0):
358356
if decodeParms:
357+
from PyPDF2.generic import ArrayObject
358+
if isinstance(decodeParms, ArrayObject):
359+
if len(decodeParms) == 1:
360+
decodeParms = decodeParms[0]
359361
if decodeParms.get("/K", 1) == -1:
360362
CCITTgroup = 4
361363
else:
@@ -451,6 +453,10 @@ def _xobj_to_image(x_object_obj):
451453
img_byte_arr = io.BytesIO()
452454
img.save(img_byte_arr, format="PNG")
453455
data = img_byte_arr.getvalue()
456+
elif x_object_obj["/Filter"] in (["/LZWDecode"], ['/ASCII85Decode'], ['/CCITTFaxDecode']):
457+
from PyPDF2.utils import b_
458+
extension = ".png"
459+
data = b_(data)
454460
elif x_object_obj["/Filter"] == "/DCTDecode":
455461
extension = ".jpg"
456462
elif x_object_obj["/Filter"] == "/JPXDecode":

PyPDF2/generic.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
import decimal
4545
import codecs
4646

47+
from PyPDF2.utils import ERR_STREAM_TRUNCATED_PREMATURELY
48+
4749
ObjectPrefix = b_('/<[tf(n%')
4850
NumberSigns = b_('+-')
4951
IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
@@ -199,17 +201,15 @@ def readFromStream(stream, pdf):
199201
while True:
200202
tok = stream.read(1)
201203
if not tok:
202-
# stream has truncated prematurely
203-
raise PdfStreamError("Stream has ended unexpectedly")
204+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
204205
if tok.isspace():
205206
break
206207
idnum += tok
207208
generation = b_("")
208209
while True:
209210
tok = stream.read(1)
210211
if not tok:
211-
# stream has truncated prematurely
212-
raise PdfStreamError("Stream has ended unexpectedly")
212+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
213213
if tok.isspace():
214214
if not generation:
215215
continue
@@ -273,10 +273,11 @@ def readFromStream(stream):
273273
readFromStream = staticmethod(readFromStream)
274274

275275

276-
##
277-
# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
278-
# TextStringObject to represent the string.
279276
def createStringObject(string):
277+
"""
278+
Given a string (either a "str" or "unicode"), create a ByteStringObject or a
279+
TextStringObject to represent the string.
280+
"""
280281
if isinstance(string, utils.string_type):
281282
return TextStringObject(string)
282283
elif isinstance(string, utils.bytes_type):
@@ -306,8 +307,7 @@ def readHexStringFromStream(stream):
306307
while True:
307308
tok = readNonWhitespace(stream)
308309
if not tok:
309-
# stream has truncated prematurely
310-
raise PdfStreamError("Stream has ended unexpectedly")
310+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
311311
if tok == b_(">"):
312312
break
313313
x += tok
@@ -328,8 +328,7 @@ def readStringFromStream(stream):
328328
while True:
329329
tok = stream.read(1)
330330
if not tok:
331-
# stream has truncated prematurely
332-
raise PdfStreamError("Stream has ended unexpectedly")
331+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
333332
if tok == b_("("):
334333
parens += 1
335334
elif tok == b_(")"):
@@ -392,16 +391,17 @@ def readStringFromStream(stream):
392391
return createStringObject(txt)
393392

394393

395-
##
396-
# Represents a string object where the text encoding could not be determined.
397-
# This occurs quite often, as the PDF spec doesn't provide an alternate way to
398-
# represent strings -- for example, the encryption data stored in files (like
399-
# /O) is clearly not text, but is still stored in a "String" object.
400394
class ByteStringObject(utils.bytes_type, PdfObject):
395+
"""
396+
Represents a string object where the text encoding could not be determined.
397+
This occurs quite often, as the PDF spec doesn't provide an alternate way to
398+
represent strings -- for example, the encryption data stored in files (like
399+
/O) is clearly not text, but is still stored in a "String" object.
400+
"""
401401

402402
##
403403
# For compatibility with TextStringObject.original_bytes. This method
404-
# returns self.
404+
# self.
405405
original_bytes = property(lambda self: self)
406406

407407
def writeToStream(self, stream, encryption_key):
@@ -413,12 +413,14 @@ def writeToStream(self, stream, encryption_key):
413413
stream.write(b_(">"))
414414

415415

416-
##
417-
# Represents a string object that has been decoded into a real unicode string.
418-
# If read from a PDF document, this string appeared to match the
419-
# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
420-
# occur.
421416
class TextStringObject(utils.string_type, PdfObject):
417+
"""
418+
Represents a string object that has been decoded into a real unicode string.
419+
If read from a PDF document, this string appeared to match the
420+
PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
421+
occur.
422+
"""
423+
422424
autodetect_pdfdocencoding = False
423425
autodetect_utf16 = False
424426

@@ -569,8 +571,7 @@ def readFromStream(stream, pdf):
569571
skipOverComment(stream)
570572
continue
571573
if not tok:
572-
# stream has truncated prematurely
573-
raise PdfStreamError("Stream has ended unexpectedly")
574+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
574575

575576
if debug: print(("Tok:", tok))
576577
if tok == b_(">"):

PyPDF2/merger.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# vim: sw=4:expandtab:foldmethod=marker
2-
#
31
# Copyright (c) 2006, Mathieu Fenniak
42
# All rights reserved.
53
#

PyPDF2/pdf.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# -*- coding: utf-8 -*-
22
#
3-
# vim: sw=4:expandtab:foldmethod=marker
4-
#
53
# Copyright (c) 2006, Mathieu Fenniak
64
# Copyright (c) 2007, Ashish Kulkarni <[email protected]>
75
#
@@ -1637,7 +1635,7 @@ def _getObjectFromStream(self, indirectReference):
16371635
streamData.seek(0, 0)
16381636
lines = streamData.readlines()
16391637
for i in range(0, len(lines)):
1640-
print((lines[i]))
1638+
print(lines[i])
16411639
streamData.seek(pos, 0)
16421640
try:
16431641
obj = readObject(streamData, self)
@@ -2588,11 +2586,6 @@ def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expan
25882586
ctm[1][0], ctm[1][1],
25892587
ctm[2][0], ctm[2][1]], expand)
25902588

2591-
##
2592-
# Applys a transformation matrix the page.
2593-
#
2594-
# @param ctm A 6 elements tuple containing the operands of the
2595-
# transformation matrix
25962589
def addTransformation(self, ctm):
25972590
"""
25982591
Applies a transformation matrix to the page.

PyPDF2/utils.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
except ImportError: # Py3
4040
import builtins
4141

42-
42+
ERR_STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly"
4343
xrange_fn = getattr(builtins, "xrange", range)
4444
_basestring = getattr(builtins, "basestring", str)
4545

@@ -122,7 +122,7 @@ def skipOverComment(stream):
122122
def readUntilRegex(stream, regex, ignore_eof=False):
123123
"""
124124
Reads until the regular expression pattern matched (ignore the match)
125-
Raise PdfStreamError on premature end-of-file.
125+
:raises PdfStreamError: on premature end-of-file
126126
:param bool ignore_eof: If true, ignore end-of-line and return immediately
127127
"""
128128
name = b_('')
@@ -133,7 +133,7 @@ def readUntilRegex(stream, regex, ignore_eof=False):
133133
if ignore_eof:
134134
return name
135135
else:
136-
raise PdfStreamError("Stream has ended unexpectedly")
136+
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
137137
m = regex.search(tok)
138138
if m is not None:
139139
name += tok[:m.start()]
@@ -242,7 +242,6 @@ def b_(s):
242242
bc[s] = r
243243
return r
244244
except Exception:
245-
print(s)
246245
r = s.encode('utf-8')
247246
if len(s) < 2:
248247
bc[s] = r
2.78 KB
Binary file not shown.
1.84 KB
Binary file not shown.

Resources/imagemagick-images.pdf

15.6 KB
Binary file not shown.

Resources/imagemagick-lzw.pdf

2.62 KB
Binary file not shown.

Resources/metadata.pdf

13 KB
Binary file not shown.

0 commit comments

Comments
 (0)