Skip to content

Commit df3bb1a

Browse files
authored
Port ZipStreamReader from adamziel/wxr-normalize (#116)
This new ZipStreamReader opens its own file handles which means it can be paused, resumed, and is more reliable. The original implementation was built as a part of adamziel/wxr-normalize#1 This is all new code so there are no testing instructions. Eventually this implementation will replace the existing ZipStreamReader.
1 parent 3b8943b commit df3bb1a

File tree

1 file changed

+356
-0
lines changed

1 file changed

+356
-0
lines changed
Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
<?php
2+
3+
namespace WordPress\Zip;
4+
5+
/**
6+
* Improves on ZipStreamReader – it keeps track of its own parsing state
7+
* without relying on $fp objects, which enables pausing and resuming.
8+
*
9+
* @TODO: Replace ZipStreamReader with this class once the consumers of
10+
* ZipStreamReader have been updated to use the new interface.
11+
*/
12+
class NewZipStreamReader {
13+
14+
const SIGNATURE_FILE = 0x04034b50;
15+
const SIGNATURE_CENTRAL_DIRECTORY = 0x02014b50;
16+
const SIGNATURE_CENTRAL_DIRECTORY_END = 0x06054b50;
17+
const COMPRESSION_DEFLATE = 8;
18+
19+
private $file_path;
20+
private $zip_file_bytes_parsed_so_far = 0;
21+
private $file_entry_body_bytes_parsed_so_far = 0;
22+
private $state = NewZipStreamReader::STATE_SCAN;
23+
private $header = null;
24+
private $file_body_chunk = null;
25+
private $paused_incomplete_input = false;
26+
private $error_message;
27+
28+
private $inflate_handle;
29+
private $fp;
30+
31+
const STATE_SCAN = 'scan';
32+
const STATE_FILE_ENTRY = 'file-entry';
33+
const STATE_CENTRAL_DIRECTORY_ENTRY = 'central-directory-entry';
34+
const STATE_CENTRAL_DIRECTORY_ENTRY_EXTRA = 'central-directory-entry-extra';
35+
const STATE_END_CENTRAL_DIRECTORY_ENTRY = 'end-central-directory-entry';
36+
const STATE_END_CENTRAL_DIRECTORY_ENTRY_EXTRA = 'end-central-directory-entry-extra';
37+
const STATE_COMPLETE = 'complete';
38+
const STATE_ERROR = 'error';
39+
40+
public function pause() {
41+
return [
42+
'file_path' => $this->file_path,
43+
'zip_file_bytes_parsed_so_far' => $this->zip_file_bytes_parsed_so_far,
44+
'file_entry_body_bytes_parsed_so_far' => $this->file_entry_body_bytes_parsed_so_far,
45+
'state' => $this->state,
46+
'header' => $this->header,
47+
'file_body_chunk' => $this->file_body_chunk,
48+
'paused_incomplete_input' => $this->paused_incomplete_input,
49+
];
50+
}
51+
52+
public function resume($paused) {
53+
$this->file_path = $paused['file_path'];
54+
$this->zip_file_bytes_parsed_so_far = 0;
55+
$this->state = $paused['state'];
56+
$this->header = $paused['header'];
57+
$this->file_body_chunk = $paused['file_body_chunk'];
58+
$this->paused_incomplete_input = $paused['paused_incomplete_input'];
59+
60+
$this->fp = fopen($this->file_path, 'rb');
61+
if($paused['file_entry_body_bytes_parsed_so_far'] > 0) {
62+
$this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW);
63+
$file_starts_at = $paused['zip_file_bytes_parsed_so_far'] - $paused['file_entry_body_bytes_parsed_so_far'];
64+
$this->zip_file_bytes_parsed_so_far = $file_starts_at;
65+
fseek($this->fp, $file_starts_at);
66+
while(true) {
67+
$missing_bytes = $paused['file_entry_body_bytes_parsed_so_far'] - $this->file_entry_body_bytes_parsed_so_far;
68+
$missing_bytes = max(0, min(4096, $missing_bytes));
69+
if($missing_bytes === 0) {
70+
break;
71+
}
72+
$this->read_file_entry_body_chunk($missing_bytes);
73+
}
74+
} else {
75+
$this->zip_file_bytes_parsed_so_far = $paused['zip_file_bytes_parsed_so_far'];
76+
fseek($this->fp, $this->zip_file_bytes_parsed_so_far);
77+
}
78+
}
79+
80+
public function __construct($file_path) {
81+
$this->file_path = $file_path;
82+
}
83+
84+
public function is_paused_at_incomplete_input(): bool {
85+
return $this->paused_incomplete_input;
86+
}
87+
88+
public function is_finished(): bool
89+
{
90+
return self::STATE_COMPLETE === $this->state || self::STATE_ERROR === $this->state;
91+
}
92+
93+
public function get_state()
94+
{
95+
return $this->state;
96+
}
97+
98+
public function get_header()
99+
{
100+
return $this->header;
101+
}
102+
103+
public function get_file_path()
104+
{
105+
if(!$this->header) {
106+
return null;
107+
}
108+
109+
return $this->header['path'];
110+
}
111+
112+
public function get_file_body_chunk()
113+
{
114+
return $this->file_body_chunk;
115+
}
116+
117+
public function get_last_error(): ?string
118+
{
119+
return $this->error_message;
120+
}
121+
122+
public function next()
123+
{
124+
do {
125+
if(self::STATE_SCAN === $this->state) {
126+
if(false === $this->scan()) {
127+
return false;
128+
}
129+
}
130+
131+
switch ($this->state) {
132+
case self::STATE_ERROR:
133+
case self::STATE_COMPLETE:
134+
return false;
135+
136+
case self::STATE_FILE_ENTRY:
137+
if (false === $this->read_file_entry()) {
138+
return false;
139+
}
140+
break;
141+
142+
case self::STATE_CENTRAL_DIRECTORY_ENTRY:
143+
if (false === $this->read_central_directory_entry()) {
144+
return false;
145+
}
146+
break;
147+
148+
case self::STATE_END_CENTRAL_DIRECTORY_ENTRY:
149+
if (false === $this->read_end_central_directory_entry()) {
150+
return false;
151+
}
152+
break;
153+
154+
default:
155+
return false;
156+
}
157+
} while (self::STATE_SCAN === $this->state);
158+
159+
return true;
160+
}
161+
162+
private function read_central_directory_entry()
163+
{
164+
if ($this->header && !empty($this->header['path'])) {
165+
$this->header = null;
166+
$this->state = self::STATE_SCAN;
167+
return;
168+
}
169+
170+
if (!$this->header) {
171+
$data = $this->consume_bytes(42);
172+
if ($data === false) {
173+
$this->paused_incomplete_input = true;
174+
return false;
175+
}
176+
$this->header = unpack(
177+
'vversionCreated/vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength/vfileCommentLength/vdiskNumber/vinternalAttributes/VexternalAttributes/VfirstByteAt',
178+
$data
179+
);
180+
}
181+
182+
if($this->header) {
183+
$n = $this->header['pathLength'] + $this->header['extraLength'] + $this->header['fileCommentLength'];
184+
$this->header['path'] = $this->consume_bytes($this->header['pathLength']);
185+
$this->header['extra'] = $this->consume_bytes($this->header['extraLength']);
186+
$this->header['fileComment'] = $this->consume_bytes($this->header['fileCommentLength']);
187+
if(!$this->header['path']) {
188+
$this->set_error('Empty path in central directory entry');
189+
}
190+
}
191+
}
192+
193+
private function read_end_central_directory_entry()
194+
{
195+
if ($this->header && ( !empty($this->header['comment']) || 0 === $this->header['commentLength'] )) {
196+
$this->header = null;
197+
$this->state = self::STATE_SCAN;
198+
return;
199+
}
200+
201+
if(!$this->header) {
202+
$data = $this->consume_bytes(18);
203+
if ($data === false) {
204+
$this->paused_incomplete_input = true;
205+
return false;
206+
}
207+
$this->header = unpack(
208+
'vdiskNumber/vcentralDirectoryStartDisk/vnumberCentralDirectoryRecordsOnThisDisk/vnumberCentralDirectoryRecords/VcentralDirectorySize/VcentralDirectoryOffset/vcommentLength',
209+
$data
210+
);
211+
}
212+
213+
if($this->header && empty($this->header['comment']) && $this->header['commentLength'] > 0) {
214+
$comment = $this->consume_bytes($this->header['commentLength']);
215+
if(false === $comment) {
216+
$this->paused_incomplete_input = true;
217+
return false;
218+
}
219+
$this->header['comment'] = $comment;
220+
}
221+
}
222+
223+
private function scan() {
224+
$signature = $this->consume_bytes(4);
225+
if ($signature === false || 0 === strlen($signature)) {
226+
$this->paused_incomplete_input = true;
227+
return false;
228+
}
229+
$signature = unpack('V', $signature)[1];
230+
switch($signature) {
231+
case self::SIGNATURE_FILE:
232+
$this->state = self::STATE_FILE_ENTRY;
233+
break;
234+
case self::SIGNATURE_CENTRAL_DIRECTORY:
235+
$this->state = self::STATE_CENTRAL_DIRECTORY_ENTRY;
236+
break;
237+
case self::SIGNATURE_CENTRAL_DIRECTORY_END:
238+
$this->state = self::STATE_END_CENTRAL_DIRECTORY_ENTRY;
239+
break;
240+
default:
241+
$this->set_error('Invalid signature ' . $signature);
242+
return false;
243+
}
244+
}
245+
246+
/**
247+
* Reads a file entry from a zip file.
248+
*
249+
* The file entry is structured as follows:
250+
*
251+
* ```
252+
* Offset Bytes Description
253+
* 0 4 Local file header signature = 0x04034b50 (PK♥♦ or "PK\3\4")
254+
* 4 2 Version needed to extract (minimum)
255+
* 6 2 General purpose bit flag
256+
* 8 2 Compression method; e.g. none = 0, DEFLATE = 8 (or "\0x08\0x00")
257+
* 10 2 File last modification time
258+
* 12 2 File last modification date
259+
* 14 4 CRC-32 of uncompressed data
260+
* 18 4 Compressed size (or 0xffffffff for ZIP64)
261+
* 22 4 Uncompressed size (or 0xffffffff for ZIP64)
262+
* 26 2 File name length (n)
263+
* 28 2 Extra field length (m)
264+
* 30 n File name
265+
* 30+n m Extra field
266+
* ```
267+
*
268+
* @param resource $stream
269+
*/
270+
private function read_file_entry()
271+
{
272+
if(false === $this->read_file_entry_header()) {
273+
return false;
274+
}
275+
if(false === $this->read_file_entry_body_chunk()) {
276+
return false;
277+
}
278+
}
279+
280+
private function read_file_entry_header() {
281+
if (null === $this->header) {
282+
$data = $this->consume_bytes(26);
283+
if ($data === false) {
284+
$this->paused_incomplete_input = true;
285+
return false;
286+
}
287+
$this->header = unpack(
288+
'vversionNeeded/vgeneralPurpose/vcompressionMethod/vlastModifiedTime/vlastModifiedDate/Vcrc/VcompressedSize/VuncompressedSize/vpathLength/vextraLength',
289+
$data
290+
);
291+
$this->file_entry_body_bytes_parsed_so_far = 0;
292+
}
293+
294+
if($this->header && empty($this->header['path'])) {
295+
$this->header['path'] = $this->consume_bytes($this->header['pathLength']);
296+
$this->header['extra'] = $this->consume_bytes($this->header['extraLength']);
297+
if($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) {
298+
$this->inflate_handle = inflate_init(ZLIB_ENCODING_RAW);
299+
}
300+
}
301+
}
302+
303+
private function read_file_entry_body_chunk($max_bytes_to_read=4096) {
304+
$this->file_body_chunk = null;
305+
306+
$file_body_bytes_left = $this->header['compressedSize'] - $this->file_entry_body_bytes_parsed_so_far;
307+
if($file_body_bytes_left === 0) {
308+
$this->header = null;
309+
$this->inflate_handle = null;
310+
$this->file_entry_body_bytes_parsed_so_far = 0;
311+
$this->state = self::STATE_SCAN;
312+
return;
313+
}
314+
315+
$chunk_size = min($max_bytes_to_read, $file_body_bytes_left);
316+
$compressed_bytes = $this->consume_bytes($chunk_size);
317+
$this->file_entry_body_bytes_parsed_so_far += strlen($compressed_bytes);
318+
319+
if ($this->header['compressionMethod'] === self::COMPRESSION_DEFLATE) {
320+
$uncompressed_bytes = inflate_add($this->inflate_handle, $compressed_bytes, ZLIB_PARTIAL_FLUSH);
321+
if ( $uncompressed_bytes === false || inflate_get_status( $this->inflate_handle ) === false ) {
322+
$this->set_error('Failed to inflate');
323+
return false;
324+
}
325+
} else {
326+
$uncompressed_bytes = $compressed_bytes;
327+
}
328+
329+
$this->file_body_chunk = $uncompressed_bytes;
330+
}
331+
332+
private function set_error($message) {
333+
$this->state = self::STATE_ERROR;
334+
$this->error_message = $message;
335+
$this->paused_incomplete_input = false;
336+
}
337+
338+
private function consume_bytes($n) {
339+
if(0 === $n) {
340+
return '';
341+
}
342+
if(null === $this->fp) {
343+
$this->fp = fopen($this->file_path, 'rb');
344+
}
345+
346+
$this->zip_file_bytes_parsed_so_far += $n;
347+
$bytes_read = fread($this->fp, $n);
348+
if(false === $bytes_read || '' === $bytes_read) {
349+
fclose($this->fp);
350+
$this->state = self::STATE_COMPLETE;
351+
return false;
352+
}
353+
return $bytes_read;
354+
}
355+
356+
}

0 commit comments

Comments
 (0)