@@ -403,6 +403,59 @@ def __iter__(self):
403
403
return self ._buffer .__iter__ ()
404
404
405
405
406
+ def _read_exact (fp , n ):
407
+ '''Read exactly *n* bytes from `fp`
408
+
409
+ This method is required because fp may be unbuffered,
410
+ i.e. return short reads.
411
+ '''
412
+ data = fp .read (n )
413
+ while len (data ) < n :
414
+ b = fp .read (n - len (data ))
415
+ if not b :
416
+ raise EOFError ("Compressed file ended before the "
417
+ "end-of-stream marker was reached" )
418
+ data += b
419
+ return data
420
+
421
+
422
+ def _read_gzip_header (fp ):
423
+ '''Read a gzip header from `fp` and progress to the end of the header.
424
+
425
+ Returns last mtime if header was present or None otherwise.
426
+ '''
427
+ magic = fp .read (2 )
428
+ if magic == b'' :
429
+ return None
430
+
431
+ if magic != b'\037 \213 ' :
432
+ raise BadGzipFile ('Not a gzipped file (%r)' % magic )
433
+
434
+ (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
435
+ if method != 8 :
436
+ raise BadGzipFile ('Unknown compression method' )
437
+
438
+ if flag & FEXTRA :
439
+ # Read & discard the extra field, if present
440
+ extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
441
+ _read_exact (fp , extra_len )
442
+ if flag & FNAME :
443
+ # Read and discard a null-terminated string containing the filename
444
+ while True :
445
+ s = fp .read (1 )
446
+ if not s or s == b'\000 ' :
447
+ break
448
+ if flag & FCOMMENT :
449
+ # Read and discard a null-terminated string containing a comment
450
+ while True :
451
+ s = fp .read (1 )
452
+ if not s or s == b'\000 ' :
453
+ break
454
+ if flag & FHCRC :
455
+ _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
456
+ return last_mtime
457
+
458
+
406
459
class _GzipReader (_compression .DecompressReader ):
407
460
def __init__ (self , fp ):
408
461
super ().__init__ (_PaddedFile (fp ), zlib .decompressobj ,
@@ -415,53 +468,11 @@ def _init_read(self):
415
468
self ._crc = zlib .crc32 (b"" )
416
469
self ._stream_size = 0 # Decompressed size of unconcatenated stream
417
470
418
- def _read_exact (self , n ):
419
- '''Read exactly *n* bytes from `self._fp`
420
-
421
- This method is required because self._fp may be unbuffered,
422
- i.e. return short reads.
423
- '''
424
-
425
- data = self ._fp .read (n )
426
- while len (data ) < n :
427
- b = self ._fp .read (n - len (data ))
428
- if not b :
429
- raise EOFError ("Compressed file ended before the "
430
- "end-of-stream marker was reached" )
431
- data += b
432
- return data
433
-
434
471
def _read_gzip_header (self ):
435
- magic = self ._fp . read ( 2 )
436
- if magic == b'' :
472
+ last_mtime = _read_gzip_header ( self ._fp )
473
+ if last_mtime is None :
437
474
return False
438
-
439
- if magic != b'\037 \213 ' :
440
- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
441
-
442
- (method , flag ,
443
- self ._last_mtime ) = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
444
- if method != 8 :
445
- raise BadGzipFile ('Unknown compression method' )
446
-
447
- if flag & FEXTRA :
448
- # Read & discard the extra field, if present
449
- extra_len , = struct .unpack ("<H" , self ._read_exact (2 ))
450
- self ._read_exact (extra_len )
451
- if flag & FNAME :
452
- # Read and discard a null-terminated string containing the filename
453
- while True :
454
- s = self ._fp .read (1 )
455
- if not s or s == b'\000 ' :
456
- break
457
- if flag & FCOMMENT :
458
- # Read and discard a null-terminated string containing a comment
459
- while True :
460
- s = self ._fp .read (1 )
461
- if not s or s == b'\000 ' :
462
- break
463
- if flag & FHCRC :
464
- self ._read_exact (2 ) # Read & discard the 16-bit header CRC
475
+ self ._last_mtime = last_mtime
465
476
return True
466
477
467
478
def read (self , size = - 1 ):
@@ -524,7 +535,7 @@ def _read_eof(self):
524
535
# We check that the computed CRC and size of the
525
536
# uncompressed data matches the stored values. Note that the size
526
537
# stored is the true file size mod 2**32.
527
- crc32 , isize = struct .unpack ("<II" , self . _read_exact (8 ))
538
+ crc32 , isize = struct .unpack ("<II" , _read_exact (self . _fp , 8 ))
528
539
if crc32 != self ._crc :
529
540
raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
530
541
hex (self ._crc )))
@@ -544,21 +555,65 @@ def _rewind(self):
544
555
super ()._rewind ()
545
556
self ._new_member = True
546
557
558
+
559
+ def _create_simple_gzip_header (compresslevel : int ,
560
+ mtime = None ) -> bytes :
561
+ """
562
+ Write a simple gzip header with no extra fields.
563
+ :param compresslevel: Compresslevel used to determine the xfl bytes.
564
+ :param mtime: The mtime (must support conversion to a 32-bit integer).
565
+ :return: A bytes object representing the gzip header.
566
+ """
567
+ if mtime is None :
568
+ mtime = time .time ()
569
+ if compresslevel == _COMPRESS_LEVEL_BEST :
570
+ xfl = 2
571
+ elif compresslevel == _COMPRESS_LEVEL_FAST :
572
+ xfl = 4
573
+ else :
574
+ xfl = 0
575
+ # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
576
+ # fields added to header), mtime, xfl and os (255 for unknown OS).
577
+ return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
578
+
579
+
547
580
def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
548
581
"""Compress data in one shot and return the compressed string.
549
- Optional argument is the compression level, in range of 0-9.
582
+
583
+ compresslevel sets the compression level in range of 0-9.
584
+ mtime can be used to set the modification time. The modification time is
585
+ set to the current time by default.
550
586
"""
551
- buf = io .BytesIO ()
552
- with GzipFile (fileobj = buf , mode = 'wb' , compresslevel = compresslevel , mtime = mtime ) as f :
553
- f .write (data )
554
- return buf .getvalue ()
587
+ if mtime == 0 :
588
+ # Use zlib as it creates the header with 0 mtime by default.
589
+ # This is faster and with less overhead.
590
+ return zlib .compress (data , level = compresslevel , wbits = 31 )
591
+ header = _create_simple_gzip_header (compresslevel , mtime )
592
+ trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
593
+ # Wbits=-15 creates a raw deflate block.
594
+ return header + zlib .compress (data , wbits = - 15 ) + trailer
595
+
555
596
556
597
def decompress (data ):
557
598
"""Decompress a gzip compressed string in one shot.
558
599
Return the decompressed string.
559
600
"""
560
- with GzipFile (fileobj = io .BytesIO (data )) as f :
561
- return f .read ()
601
+ decompressed_members = []
602
+ while True :
603
+ fp = io .BytesIO (data )
604
+ if _read_gzip_header (fp ) is None :
605
+ return b"" .join (decompressed_members )
606
+ # Use a zlib raw deflate compressor
607
+ do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
608
+ # Read all the data except the header
609
+ decompressed = do .decompress (data [fp .tell ():])
610
+ crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
611
+ if crc != zlib .crc32 (decompressed ):
612
+ raise BadGzipFile ("CRC check failed" )
613
+ if length != (len (decompressed ) & 0xffffffff ):
614
+ raise BadGzipFile ("Incorrect length of data produced" )
615
+ decompressed_members .append (decompressed )
616
+ data = do .unused_data [8 :].lstrip (b"\x00 " )
562
617
563
618
564
619
def main ():
0 commit comments