Skip to content

bpo-45006 Add a data_offset field to ZipInfo #27961

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3196,6 +3196,19 @@ def test_inheritance(self, alpharep):
file = cls(alpharep).joinpath('some dir').parent
assert isinstance(file, cls)

def test_dataoffset(self):
data = io.BytesIO()
with zipfile.ZipFile(data, 'w', compression=zipfile.ZIP_STORED) as zfp:
zfp.writestr("a/b/c.txt", "random data for c")
zfp.writestr("a/b/b.txt", "random data for b")
zfp.writestr("a/b/c/d.txt", "random data for d")
zfp.writestr("a.txt", "random data for a")
zip_content = data.getvalue()
with zipfile.ZipFile(data, 'r') as zfp:
for entry in zfp.infolist():
expected_offset = zip_content.index(zfp.read(entry))
self.assertEqual(entry.data_offset, expected_offset)


if __name__ == "__main__":
unittest.main()
13 changes: 13 additions & 0 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ class ZipInfo (object):
'CRC',
'compress_size',
'file_size',
'data_offset',
'_raw_time',
)

Expand Down Expand Up @@ -406,6 +407,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.external_attr = 0 # External file attributes
self.compress_size = 0 # Size of the compressed file
self.file_size = 0 # Size of the uncompressed file
self.data_offset = None # Offset to beginning of compressed data
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
# CRC CRC-32 of the uncompressed file
Expand Down Expand Up @@ -1340,6 +1342,16 @@ def __repr__(self):
result.append('>')
return ''.join(result)

def _ComputeDataOffset(self, zinfo: ZipInfo):
if self.fp.seekable():
self.fp.seek(zinfo.header_offset)
fheader = struct.unpack(structFileHeader, self.fp.read(sizeFileHeader))
if fheader[_FH_SIGNATURE] != stringFileHeader:
return
return zinfo.header_offset + \
fheader[_FH_FILENAME_LENGTH] + \
fheader[_FH_EXTRA_FIELD_LENGTH] + sizeFileHeader

def _RealGetContents(self):
"""Read in the table of contents for the ZIP file."""
fp = self.fp
Expand Down Expand Up @@ -1406,6 +1418,7 @@ def _RealGetContents(self):

x._decodeExtra()
x.header_offset = x.header_offset + concat
x.data_offset = self._ComputeDataOffset(x)
self.filelist.append(x)
self.NameToInfo[x.filename] = x

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
class ``zipfile.ZipInfo`` now exposes a data_offset field to indicate offset to beginning of compressed data for this entry.