Skip to content

Commit c890673

Browse files
committed
Add a data_offset field to ZipInfo
1 parent f56268a commit c890673

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

Lib/test/test_zipfile.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3196,6 +3196,19 @@ def test_inheritance(self, alpharep):
31963196
file = cls(alpharep).joinpath('some dir').parent
31973197
assert isinstance(file, cls)
31983198

3199+
def test_dataoffset(self):
3200+
data = io.BytesIO()
3201+
with zipfile.ZipFile(data, 'w', compression=zipfile.ZIP_STORED) as zfp:
3202+
zfp.writestr("a/b/c.txt", "random data for c")
3203+
zfp.writestr("a/b/b.txt", "random data for b")
3204+
zfp.writestr("a/b/c/d.txt", "random data for d")
3205+
zfp.writestr("a.txt", "random data for a")
3206+
zip_content = data.getvalue()
3207+
with zipfile.ZipFile(data, 'r') as zfp:
3208+
for entry in zfp.infolist():
3209+
expected_offset = zip_content.index(zfp.read(entry))
3210+
self.assertEqual(entry.data_offset, expected_offset)
3211+
31993212

32003213
if __name__ == "__main__":
32013214
unittest.main()

Lib/zipfile.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ class ZipInfo (object):
364364
'CRC',
365365
'compress_size',
366366
'file_size',
367+
'data_offset',
367368
'_raw_time',
368369
)
369370

@@ -406,6 +407,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
406407
self.external_attr = 0 # External file attributes
407408
self.compress_size = 0 # Size of the compressed file
408409
self.file_size = 0 # Size of the uncompressed file
410+
self.data_offset = None # Offset to beginning of compressed data
409411
# Other attributes are set by class ZipFile:
410412
# header_offset Byte offset to the file header
411413
# CRC CRC-32 of the uncompressed file
@@ -1340,6 +1342,16 @@ def __repr__(self):
13401342
result.append('>')
13411343
return ''.join(result)
13421344

1345+
def _ComputeDataOffset(self, zinfo: ZipInfo):
1346+
if self.fp.seekable():
1347+
self.fp.seek(zinfo.header_offset)
1348+
fheader = struct.unpack(structFileHeader, self.fp.read(sizeFileHeader))
1349+
if fheader[_FH_SIGNATURE] != stringFileHeader:
1350+
return
1351+
return zinfo.header_offset + \
1352+
fheader[_FH_FILENAME_LENGTH] + \
1353+
fheader[_FH_EXTRA_FIELD_LENGTH] + sizeFileHeader
1354+
13431355
def _RealGetContents(self):
13441356
"""Read in the table of contents for the ZIP file."""
13451357
fp = self.fp
@@ -1406,6 +1418,7 @@ def _RealGetContents(self):
14061418

14071419
x._decodeExtra()
14081420
x.header_offset = x.header_offset + concat
1421+
x.data_offset = self._ComputeDataOffset(x)
14091422
self.filelist.append(x)
14101423
self.NameToInfo[x.filename] = x
14111424

0 commit comments

Comments
 (0)