-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Closed
Description
After upgrading pytest
to version 8.0.0, a few warning-related tests started failing:
Full test output
$ python -m pytest -m 'not enable_socket'
========================================================= test session starts =========================================================
platform linux -- Python 3.11.7, pytest-8.0.0, pluggy-1.4.0
rootdir: /tmp/pypdf
configfile: pyproject.toml
testpaths: tests
plugins: socket-0.7.0, cov-4.1.0, timeout-2.2.0
collected 865 items / 273 deselected / 592 selected
tests/test_annotations.py .. [ 0%]
tests/test_constants.py ... [ 0%]
tests/test_encryption.py .................................... [ 6%]
tests/test_filters.py ...................x..... [ 11%]
tests/test_generic.py ................................................................................................. [ 27%]
tests/test_images.py ....... [ 28%]
tests/test_javascript.py .. [ 29%]
tests/test_merger.py ............. [ 31%]
tests/test_page.py ......................................................................s.. [ 43%]
tests/test_page_labels.py ................... [ 46%]
tests/test_pagerange.py ...................... [ 50%]
tests/test_papersizes.py .................. [ 53%]
tests/test_pdfa.py . [ 53%]
tests/test_protocols.py . [ 53%]
tests/test_reader.py ...............xx...F..FF....................F.F....F.......................... [ 67%]
tests/test_text_extraction.py ..... [ 68%]
tests/test_utils.py ........................................................................................................... [ 86%]
[ 86%]
tests/test_workflows.py ................... [ 89%]
tests/test_writer.py ..................................................s [ 97%]
tests/test_xmp.py ........... [ 99%]
tests/test_xobject_image_helpers.py . [100%]
============================================================== FAILURES ===============================================================
_______________________________________________ test_get_images_raw[True-True--1-True-] _______________________________________________
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3e9ef150>, strict = True, with_prev_0 = True, startx_correction = -1
should_fail = True, warning_msgs = ''
@pytest.mark.parametrize(
("strict", "with_prev_0", "startx_correction", "should_fail", "warning_msgs"),
[
(
True,
False,
-1,
False,
[
"startxref on same line as offset",
"Xref table not zero-indexed. "
"ID numbers for objects will be corrected.",
],
), # all nominal => no fail
(True, True, -1, True, ""), # Prev=0 => fail expected
(
False,
False,
-1,
False,
["startxref on same line as offset"],
),
(
False,
True,
-1,
False,
[
"startxref on same line as offset",
"/Prev=0 in the trailer - assuming there is no previous xref table",
],
), # Prev =0 => no strict so tolerant
(True, False, 0, True, ""), # error on startxref, in strict => fail expected
(True, True, 0, True, ""),
(
False,
False,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
), # error on startxref, but no strict => xref rebuilt,no fail
(
False,
True,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
),
],
)
def test_get_images_raw(
caplog, strict, with_prev_0, startx_correction, should_fail, warning_msgs
):
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
# startx_correction should be -1 due to double % at the beginning
# inducing an error on startxref computation
pdf_data.find(b"xref") + startx_correction,
)
pdf_stream = io.BytesIO(pdf_data)
if should_fail:
with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
> PdfReader(pdf_stream, strict=strict)
tests/test_reader.py:339:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pypdf/_reader.py:318: in __init__
self.read(stream)
pypdf/_reader.py:1346: in read
self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)
pypdf/_reader.py:1589: in _read_xref_tables_and_trailers
startxref = self._read_xref_other_error(stream, startxref)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pypdf._reader.PdfReader object at 0x7f2f3e95ff90>, stream = <_io.BytesIO object at 0x7f2f3e08b6a0>, startxref = 0
def _read_xref_other_error(
self, stream: StreamType, startxref: int
) -> Optional[int]:
# some PDFs have /Prev=0 in the trailer, instead of no /Prev
if startxref == 0:
if self.strict:
> raise PdfReadError(
"/Prev=0 in the trailer (try opening with strict=False)"
)
E pypdf.errors.PdfReadError: /Prev=0 in the trailer (try opening with strict=False)
pypdf/_reader.py:1622: PdfReadError
During handling of the above exception, another exception occurred:
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3e9ef150>, strict = True, with_prev_0 = True, startx_correction = -1
should_fail = True, warning_msgs = ''
@pytest.mark.parametrize(
("strict", "with_prev_0", "startx_correction", "should_fail", "warning_msgs"),
[
(
True,
False,
-1,
False,
[
"startxref on same line as offset",
"Xref table not zero-indexed. "
"ID numbers for objects will be corrected.",
],
), # all nominal => no fail
(True, True, -1, True, ""), # Prev=0 => fail expected
(
False,
False,
-1,
False,
["startxref on same line as offset"],
),
(
False,
True,
-1,
False,
[
"startxref on same line as offset",
"/Prev=0 in the trailer - assuming there is no previous xref table",
],
), # Prev =0 => no strict so tolerant
(True, False, 0, True, ""), # error on startxref, in strict => fail expected
(True, True, 0, True, ""),
(
False,
False,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
), # error on startxref, but no strict => xref rebuilt,no fail
(
False,
True,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
),
],
)
def test_get_images_raw(
caplog, strict, with_prev_0, startx_correction, should_fail, warning_msgs
):
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
# startx_correction should be -1 due to double % at the beginning
# inducing an error on startxref computation
pdf_data.find(b"xref") + startx_correction,
)
pdf_stream = io.BytesIO(pdf_data)
if should_fail:
> with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
E Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
E Emitted warnings: [].
tests/test_reader.py:338: Failed
---------------------------------------------------------- Captured log call ----------------------------------------------------------
WARNING pypdf._reader:_utils.py:477 startxref on same line as offset
WARNING pypdf._reader:_utils.py:477 Xref table not zero-indexed. ID numbers for objects will be corrected.
_______________________________________________ test_get_images_raw[True-False-0-True-] _______________________________________________
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3f871410>, strict = True, with_prev_0 = False, startx_correction = 0
should_fail = True, warning_msgs = ''
@pytest.mark.parametrize(
("strict", "with_prev_0", "startx_correction", "should_fail", "warning_msgs"),
[
(
True,
False,
-1,
False,
[
"startxref on same line as offset",
"Xref table not zero-indexed. "
"ID numbers for objects will be corrected.",
],
), # all nominal => no fail
(True, True, -1, True, ""), # Prev=0 => fail expected
(
False,
False,
-1,
False,
["startxref on same line as offset"],
),
(
False,
True,
-1,
False,
[
"startxref on same line as offset",
"/Prev=0 in the trailer - assuming there is no previous xref table",
],
), # Prev =0 => no strict so tolerant
(True, False, 0, True, ""), # error on startxref, in strict => fail expected
(True, True, 0, True, ""),
(
False,
False,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
), # error on startxref, but no strict => xref rebuilt,no fail
(
False,
True,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
),
],
)
def test_get_images_raw(
caplog, strict, with_prev_0, startx_correction, should_fail, warning_msgs
):
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
# startx_correction should be -1 due to double % at the beginning
# inducing an error on startxref computation
pdf_data.find(b"xref") + startx_correction,
)
pdf_stream = io.BytesIO(pdf_data)
if should_fail:
with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
> PdfReader(pdf_stream, strict=strict)
tests/test_reader.py:339:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pypdf/_reader.py:318: in __init__
self.read(stream)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pypdf._reader.PdfReader object at 0x7f2f3f872f90>, stream = <_io.BytesIO object at 0x7f2f3f5025c0>
def read(self, stream: StreamType) -> None:
self._basic_validation(stream)
self._find_eof_marker(stream)
startxref = self._find_startxref_pos(stream)
# check and eventually correct the startxref only in not strict
xref_issue_nr = self._get_xref_issues(stream, startxref)
if xref_issue_nr != 0:
if self.strict and xref_issue_nr:
> raise PdfReadError("Broken xref table")
E pypdf.errors.PdfReadError: Broken xref table
pypdf/_reader.py:1342: PdfReadError
During handling of the above exception, another exception occurred:
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3f871410>, strict = True, with_prev_0 = False, startx_correction = 0
should_fail = True, warning_msgs = ''
@pytest.mark.parametrize(
("strict", "with_prev_0", "startx_correction", "should_fail", "warning_msgs"),
[
(
True,
False,
-1,
False,
[
"startxref on same line as offset",
"Xref table not zero-indexed. "
"ID numbers for objects will be corrected.",
],
), # all nominal => no fail
(True, True, -1, True, ""), # Prev=0 => fail expected
(
False,
False,
-1,
False,
["startxref on same line as offset"],
),
(
False,
True,
-1,
False,
[
"startxref on same line as offset",
"/Prev=0 in the trailer - assuming there is no previous xref table",
],
), # Prev =0 => no strict so tolerant
(True, False, 0, True, ""), # error on startxref, in strict => fail expected
(True, True, 0, True, ""),
(
False,
False,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
), # error on startxref, but no strict => xref rebuilt,no fail
(
False,
True,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
),
],
)
def test_get_images_raw(
caplog, strict, with_prev_0, startx_correction, should_fail, warning_msgs
):
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
# startx_correction should be -1 due to double % at the beginning
# inducing an error on startxref computation
pdf_data.find(b"xref") + startx_correction,
)
pdf_stream = io.BytesIO(pdf_data)
if should_fail:
> with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
E Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
E Emitted warnings: [].
tests/test_reader.py:338: Failed
---------------------------------------------------------- Captured log call ----------------------------------------------------------
WARNING pypdf._reader:_utils.py:477 startxref on same line as offset
_______________________________________________ test_get_images_raw[True-True-0-True-] ________________________________________________
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3f673510>, strict = True, with_prev_0 = True, startx_correction = 0
should_fail = True, warning_msgs = ''
@pytest.mark.parametrize(
("strict", "with_prev_0", "startx_correction", "should_fail", "warning_msgs"),
[
(
True,
False,
-1,
False,
[
"startxref on same line as offset",
"Xref table not zero-indexed. "
"ID numbers for objects will be corrected.",
],
), # all nominal => no fail
(True, True, -1, True, ""), # Prev=0 => fail expected
(
False,
False,
-1,
False,
["startxref on same line as offset"],
),
(
False,
True,
-1,
False,
[
"startxref on same line as offset",
"/Prev=0 in the trailer - assuming there is no previous xref table",
],
), # Prev =0 => no strict so tolerant
(True, False, 0, True, ""), # error on startxref, in strict => fail expected
(True, True, 0, True, ""),
(
False,
False,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
), # error on startxref, but no strict => xref rebuilt,no fail
(
False,
True,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
),
],
)
def test_get_images_raw(
caplog, strict, with_prev_0, startx_correction, should_fail, warning_msgs
):
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
# startx_correction should be -1 due to double % at the beginning
# inducing an error on startxref computation
pdf_data.find(b"xref") + startx_correction,
)
pdf_stream = io.BytesIO(pdf_data)
if should_fail:
with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
> PdfReader(pdf_stream, strict=strict)
tests/test_reader.py:339:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pypdf/_reader.py:318: in __init__
self.read(stream)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pypdf._reader.PdfReader object at 0x7f2f3f670390>, stream = <_io.BytesIO object at 0x7f2f3f4750d0>
def read(self, stream: StreamType) -> None:
self._basic_validation(stream)
self._find_eof_marker(stream)
startxref = self._find_startxref_pos(stream)
# check and eventually correct the startxref only in not strict
xref_issue_nr = self._get_xref_issues(stream, startxref)
if xref_issue_nr != 0:
if self.strict and xref_issue_nr:
> raise PdfReadError("Broken xref table")
E pypdf.errors.PdfReadError: Broken xref table
pypdf/_reader.py:1342: PdfReadError
During handling of the above exception, another exception occurred:
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3f673510>, strict = True, with_prev_0 = True, startx_correction = 0
should_fail = True, warning_msgs = ''
@pytest.mark.parametrize(
("strict", "with_prev_0", "startx_correction", "should_fail", "warning_msgs"),
[
(
True,
False,
-1,
False,
[
"startxref on same line as offset",
"Xref table not zero-indexed. "
"ID numbers for objects will be corrected.",
],
), # all nominal => no fail
(True, True, -1, True, ""), # Prev=0 => fail expected
(
False,
False,
-1,
False,
["startxref on same line as offset"],
),
(
False,
True,
-1,
False,
[
"startxref on same line as offset",
"/Prev=0 in the trailer - assuming there is no previous xref table",
],
), # Prev =0 => no strict so tolerant
(True, False, 0, True, ""), # error on startxref, in strict => fail expected
(True, True, 0, True, ""),
(
False,
False,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
), # error on startxref, but no strict => xref rebuilt,no fail
(
False,
True,
0,
False,
["startxref on same line as offset", "incorrect startxref pointer(1)"],
),
],
)
def test_get_images_raw(
caplog, strict, with_prev_0, startx_correction, should_fail, warning_msgs
):
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
# startx_correction should be -1 due to double % at the beginning
# inducing an error on startxref computation
pdf_data.find(b"xref") + startx_correction,
)
pdf_stream = io.BytesIO(pdf_data)
if should_fail:
> with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
E Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
E Emitted warnings: [].
tests/test_reader.py:338: Failed
---------------------------------------------------------- Captured log call ----------------------------------------------------------
WARNING pypdf._reader:_utils.py:477 startxref on same line as offset
______________________________________________________ test_read_prev_0_trailer _______________________________________________________
def test_read_prev_0_trailer():
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
with_prev_0 = True
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
pdf_data.find(b"xref") - 1,
)
pdf_stream = io.BytesIO(pdf_data)
with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
> PdfReader(pdf_stream, strict=True)
tests/test_reader.py:534:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pypdf/_reader.py:318: in __init__
self.read(stream)
pypdf/_reader.py:1346: in read
self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)
pypdf/_reader.py:1589: in _read_xref_tables_and_trailers
startxref = self._read_xref_other_error(stream, startxref)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pypdf._reader.PdfReader object at 0x7f2f3e9e52d0>, stream = <_io.BytesIO object at 0x7f2f3f921e40>, startxref = 0
def _read_xref_other_error(
self, stream: StreamType, startxref: int
) -> Optional[int]:
# some PDFs have /Prev=0 in the trailer, instead of no /Prev
if startxref == 0:
if self.strict:
> raise PdfReadError(
"/Prev=0 in the trailer (try opening with strict=False)"
)
E pypdf.errors.PdfReadError: /Prev=0 in the trailer (try opening with strict=False)
pypdf/_reader.py:1622: PdfReadError
During handling of the above exception, another exception occurred:
def test_read_prev_0_trailer():
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << %s/Root 5 0 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
with_prev_0 = True
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
b"/Prev 0 " if with_prev_0 else b"",
pdf_data.find(b"xref") - 1,
)
pdf_stream = io.BytesIO(pdf_data)
> with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
E Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
E Emitted warnings: [].
tests/test_reader.py:533: Failed
---------------------------------------------------------- Captured log call ----------------------------------------------------------
WARNING pypdf._reader:_utils.py:477 startxref on same line as offset
WARNING pypdf._reader:_utils.py:477 Xref table not zero-indexed. ID numbers for objects will be corrected.
____________________________________________________ test_read_unknown_zero_pages _____________________________________________________
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3ebf7c90>
def test_read_unknown_zero_pages(caplog):
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
# Pages 0 0 is the key point:
b"5 0 obj << /Pages 0 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << /Root 5 1 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
pdf_data.find(b"xref") - 1,
)
pdf_stream = io.BytesIO(pdf_data)
reader = PdfReader(pdf_stream, strict=True)
warnings = [
"startxref on same line as offset",
"Xref table not zero-indexed. ID numbers for objects will be corrected.",
]
assert normalize_warnings(caplog.text) == warnings
with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
> len(reader.pages)
tests/test_reader.py:611:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pypdf/_page.py:2201: in __len__
return self.length_function()
pypdf/_reader.py:439: in _get_num_pages
self._flatten()
pypdf/_reader.py:1052: in _flatten
catalog = self.trailer[TK.ROOT].get_object()
pypdf/generic/_data_structures.py:319: in __getitem__
return dict.__getitem__(self, key).get_object()
pypdf/generic/_base.py:284: in get_object
obj = self.pdf.get_object(self)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pypdf._reader.PdfReader object at 0x7f2f3ebf4590>, indirect_reference = IndirectObject(5, 1, 139840892913040)
def get_object(
self, indirect_reference: Union[int, IndirectObject]
) -> Optional[PdfObject]:
if isinstance(indirect_reference, int):
indirect_reference = IndirectObject(indirect_reference, 0, self)
retval = self.cache_get_indirect_object(
indirect_reference.generation, indirect_reference.idnum
)
if retval is not None:
return retval
if (
indirect_reference.generation == 0
and indirect_reference.idnum in self.xref_objStm
):
retval = self._get_object_from_stream(indirect_reference) # type: ignore
elif (
indirect_reference.generation in self.xref
and indirect_reference.idnum in self.xref[indirect_reference.generation]
):
if self.xref_free_entry.get(indirect_reference.generation, {}).get(
indirect_reference.idnum, False
):
return NullObject()
start = self.xref[indirect_reference.generation][indirect_reference.idnum]
self.stream.seek(start, 0)
try:
idnum, generation = self.read_object_header(self.stream)
except Exception:
if hasattr(self.stream, "getbuffer"):
buf = bytes(self.stream.getbuffer())
else:
p = self.stream.tell()
self.stream.seek(0, 0)
buf = self.stream.read(-1)
self.stream.seek(p, 0)
m = re.search(
rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
buf,
)
if m is not None:
logger_warning(
f"Object ID {indirect_reference.idnum},{indirect_reference.generation} ref repaired",
__name__,
)
self.xref[indirect_reference.generation][
indirect_reference.idnum
] = (m.start(0) + 1)
self.stream.seek(m.start(0) + 1)
idnum, generation = self.read_object_header(self.stream)
else:
idnum = -1 # exception will be raised below
if idnum != indirect_reference.idnum and self.xref_index:
# Xref table probably had bad indexes due to not being zero-indexed
if self.strict:
raise PdfReadError(
f"Expected object ID ({indirect_reference.idnum} {indirect_reference.generation}) "
f"does not match actual ({idnum} {generation}); "
"xref table not zero-indexed."
)
# xref table is corrected in non-strict mode
elif idnum != indirect_reference.idnum and self.strict:
# some other problem
raise PdfReadError(
f"Expected object ID ({indirect_reference.idnum} "
f"{indirect_reference.generation}) does not match actual "
f"({idnum} {generation})."
)
if self.strict:
assert generation == indirect_reference.generation
retval = read_object(self.stream, self) # type: ignore
# override encryption is used for the /Encrypt dictionary
if not self._override_encryption and self._encryption is not None:
# if we don't have the encryption key:
if not self._encryption.is_decrypted():
raise FileNotDecryptedError("File has not been decrypted")
# otherwise, decrypt here...
retval = cast(PdfObject, retval)
retval = self._encryption.decrypt_object(
retval, indirect_reference.idnum, indirect_reference.generation
)
else:
if hasattr(self.stream, "getbuffer"):
buf = bytes(self.stream.getbuffer())
else:
p = self.stream.tell()
self.stream.seek(0, 0)
buf = self.stream.read(-1)
self.stream.seek(p, 0)
m = re.search(
rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
buf,
)
if m is not None:
logger_warning(
f"Object {indirect_reference.idnum} {indirect_reference.generation} found",
__name__,
)
if indirect_reference.generation not in self.xref:
self.xref[indirect_reference.generation] = {}
self.xref[indirect_reference.generation][indirect_reference.idnum] = (
m.start(0) + 1
)
self.stream.seek(m.end(0) + 1)
skip_over_whitespace(self.stream)
self.stream.seek(-1, 1)
retval = read_object(self.stream, self) # type: ignore
# override encryption is used for the /Encrypt dictionary
if not self._override_encryption and self._encryption is not None:
# if we don't have the encryption key:
if not self._encryption.is_decrypted():
raise FileNotDecryptedError("File has not been decrypted")
# otherwise, decrypt here...
retval = cast(PdfObject, retval)
retval = self._encryption.decrypt_object(
retval, indirect_reference.idnum, indirect_reference.generation
)
else:
logger_warning(
f"Object {indirect_reference.idnum} {indirect_reference.generation} not defined.",
__name__,
)
if self.strict:
> raise PdfReadError("Could not find object.")
E pypdf.errors.PdfReadError: Could not find object.
pypdf/_reader.py:1281: PdfReadError
During handling of the above exception, another exception occurred:
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3ebf7c90>
def test_read_unknown_zero_pages(caplog):
pdf_data = (
b"%%PDF-1.7\n"
b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
b"2 0 obj << >> endobj\n"
b"3 0 obj << >> endobj\n"
b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
b" /Resources << /Font << >> >>"
b" /Rotate 0 /Type /Page >> endobj\n"
# Pages 0 0 is the key point:
b"5 0 obj << /Pages 0 0 R /Type /Catalog >> endobj\n"
b"xref 1 5\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"%010d 00000 n\n"
b"trailer << /Root 5 1 R /Size 6 >>\n"
b"startxref %d\n"
b"%%%%EOF"
)
pdf_data = pdf_data % (
pdf_data.find(b"1 0 obj"),
pdf_data.find(b"2 0 obj"),
pdf_data.find(b"3 0 obj"),
pdf_data.find(b"4 0 obj"),
pdf_data.find(b"5 0 obj"),
pdf_data.find(b"xref") - 1,
)
pdf_stream = io.BytesIO(pdf_data)
reader = PdfReader(pdf_stream, strict=True)
warnings = [
"startxref on same line as offset",
"Xref table not zero-indexed. ID numbers for objects will be corrected.",
]
assert normalize_warnings(caplog.text) == warnings
> with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
E Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
E Emitted warnings: [].
tests/test_reader.py:610: Failed
---------------------------------------------------------- Captured log call ----------------------------------------------------------
WARNING pypdf._reader:_utils.py:477 startxref on same line as offset
WARNING pypdf._reader:_utils.py:477 Xref table not zero-indexed. ID numbers for objects will be corrected.
WARNING pypdf._reader:_utils.py:477 Object 5 1 not defined.
_________________________________________________________ test_issue604[True] _________________________________________________________
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3e964290>, strict = True
@pytest.mark.parametrize(
"strict",
[True, False],
)
def test_issue604(caplog, strict):
"""Test with invalid destinations.""" # TODO
with open(RESOURCE_ROOT / "issue-604.pdf", "rb") as f:
pdf = None
outline = None
if strict:
pdf = PdfReader(f, strict=strict)
with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
> outline = pdf.outline
tests/test_reader.py:691:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pypdf/_reader.py:748: in outline
return self._get_outline()
pypdf/_reader.py:774: in _get_outline
outline_obj = self._build_outline_item(node)
pypdf/_reader.py:917: in _build_outline_item
outline_item = self._build_destination(title, dest)
pypdf/_reader.py:881: in _build_destination
return Destination(title, page, Fit(fit_type=typ, fit_args=array)) # type: ignore
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = {'/Title': 'ms_Thyroid_2_2020_071520_watermarked.pdf', '/Page': NullObject, '/Type': '0'}
title = 'ms_Thyroid_2_2020_071520_watermarked.pdf', page = NullObject, fit = <pypdf.generic._fit.Fit object at 0x7f2f3f970810>
def __init__(
self,
title: str,
page: Union[NumberObject, IndirectObject, NullObject, DictionaryObject],
fit: Fit,
) -> None:
typ = fit.fit_type
args = fit.fit_args
DictionaryObject.__init__(self)
self[NameObject("/Title")] = TextStringObject(title)
self[NameObject("/Page")] = page
self[NameObject("/Type")] = typ
# from table 8.2 of the PDF 1.7 reference.
if typ == "/XYZ":
if len(args) < 1: # left is missing : should never occur
args.append(NumberObject(0.0))
if len(args) < 2: # top is missing
args.append(NumberObject(0.0))
if len(args) < 3: # zoom is missing
args.append(NumberObject(0.0))
(
self[NameObject(TA.LEFT)],
self[NameObject(TA.TOP)],
self[NameObject("/Zoom")],
) = args
elif len(args) == 0:
pass
elif typ == TF.FIT_R:
(
self[NameObject(TA.LEFT)],
self[NameObject(TA.BOTTOM)],
self[NameObject(TA.RIGHT)],
self[NameObject(TA.TOP)],
) = args
elif typ in [TF.FIT_H, TF.FIT_BH]:
try: # Preferred to be more robust not only to null parameters
(self[NameObject(TA.TOP)],) = args
except Exception:
(self[NameObject(TA.TOP)],) = (NullObject(),)
elif typ in [TF.FIT_V, TF.FIT_BV]:
try: # Preferred to be more robust not only to null parameters
(self[NameObject(TA.LEFT)],) = args
except Exception:
(self[NameObject(TA.LEFT)],) = (NullObject(),)
elif typ in [TF.FIT, TF.FIT_B]:
pass
else:
> raise PdfReadError(f"Unknown Destination Type: {typ!r}")
E pypdf.errors.PdfReadError: Unknown Destination Type: '0'
pypdf/generic/_data_structures.py:1404: PdfReadError
During handling of the above exception, another exception occurred:
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f2f3e964290>, strict = True
@pytest.mark.parametrize(
"strict",
[True, False],
)
def test_issue604(caplog, strict):
"""Test with invalid destinations.""" # TODO
with open(RESOURCE_ROOT / "issue-604.pdf", "rb") as f:
pdf = None
outline = None
if strict:
pdf = PdfReader(f, strict=strict)
> with pytest.raises(PdfReadError) as exc, pytest.warns(PdfReadWarning):
E Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
E Emitted warnings: [].
tests/test_reader.py:690: Failed
---------------------------------------------------------- Captured log call ----------------------------------------------------------
WARNING pypdf._reader:_utils.py:477 Unknown destination: ms_Thyroid_2_2020_071520_watermarked.pdf [0, 1]
======================================================= short test summary info =======================================================
FAILED tests/test_reader.py::test_get_images_raw[True-True--1-True-] - Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
FAILED tests/test_reader.py::test_get_images_raw[True-False-0-True-] - Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
FAILED tests/test_reader.py::test_get_images_raw[True-True-0-True-] - Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
FAILED tests/test_reader.py::test_read_prev_0_trailer - Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
FAILED tests/test_reader.py::test_read_unknown_zero_pages - Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
FAILED tests/test_reader.py::test_issue604[True] - Failed: DID NOT WARN. No warnings of type (<class 'pypdf.errors.PdfReadWarning'>,) were emitted.
================================ 6 failed, 581 passed, 2 skipped, 273 deselected, 3 xfailed in 30.19s =================================
Environment
Which environment were you using when you encountered the problem?
$ python -m platform
Linux-6.7.2-gentoo-dist-x86_64-AMD_Ryzen_5_3600_6-Core_Processor-with-glibc2.38
$ python -c "import pypdf;print(pypdf._debug_versions)"
pypdf==4.0.1, crypt_provider=('pycryptodome', '3.20.0'), PIL=10.2.0
Confirmed on top of 7579329.
Metadata
Metadata
Assignees
Labels
No labels