Skip to content

Commit 21d36d8

Browse files
authored
Merge pull request #350 from clue-labs/header-parser
Change parser to use single regular expression to match all headers
2 parents 369d495 + 35cc108 commit 21d36d8

File tree

2 files changed

+123
-100
lines changed

2 files changed

+123
-100
lines changed

src/Io/RequestHeaderParser.php

Lines changed: 78 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
use Evenement\EventEmitter;
66
use Psr\Http\Message\ServerRequestInterface;
77
use React\Socket\ConnectionInterface;
8-
use RingCentral\Psr7 as g7;
98
use Exception;
109

1110
/**
@@ -56,7 +55,7 @@ public function handle(ConnectionInterface $conn)
5655

5756
try {
5857
$request = $that->parseRequest(
59-
(string)\substr($buffer, 0, $endOfHeader),
58+
(string)\substr($buffer, 0, $endOfHeader + 2),
6059
$conn->getRemoteAddress(),
6160
$conn->getLocalAddress()
6261
);
@@ -124,32 +123,36 @@ public function parseRequest($headers, $remoteSocketUri, $localSocketUri)
124123
{
125124
// additional, stricter safe-guard for request line
126125
// because request parser doesn't properly cope with invalid ones
127-
if (!\preg_match('#^[^ ]+ [^ ]+ HTTP/\d\.\d#m', $headers)) {
126+
$start = array();
127+
if (!\preg_match('#^(?<method>[^ ]+) (?<target>[^ ]+) HTTP/(?<version>\d\.\d)#m', $headers, $start)) {
128128
throw new \InvalidArgumentException('Unable to parse invalid request-line');
129129
}
130130

131-
// parser does not support asterisk-form and authority-form
132-
// remember original target and temporarily replace and re-apply below
133-
$originalTarget = null;
134-
if (\strncmp($headers, 'OPTIONS * ', 10) === 0) {
135-
$originalTarget = '*';
136-
$headers = 'OPTIONS / ' . \substr($headers, 10);
137-
} elseif (\strncmp($headers, 'CONNECT ', 8) === 0) {
138-
$parts = \explode(' ', $headers, 3);
139-
$uri = \parse_url('tcp://' . $parts[1]);
131+
// only support HTTP/1.1 and HTTP/1.0 requests
132+
if ($start['version'] !== '1.1' && $start['version'] !== '1.0') {
133+
throw new \InvalidArgumentException('Received request with invalid protocol version', 505);
134+
}
140135

141-
// check this is a valid authority-form request-target (host:port)
142-
if (isset($uri['scheme'], $uri['host'], $uri['port']) && count($uri) === 3) {
143-
$originalTarget = $parts[1];
144-
$parts[1] = 'http://' . $parts[1] . '/';
145-
$headers = implode(' ', $parts);
146-
} else {
147-
throw new \InvalidArgumentException('CONNECT method MUST use authority-form request target');
148-
}
136+
// match all request header fields into array, thanks to @kelunik for checking the HTTP specs and coming up with this regex
137+
$matches = array();
138+
$n = \preg_match_all('/^([^()<>@,;:\\\"\/\[\]?={}\x01-\x20\x7F]++):[\x20\x09]*+((?:[\x20\x09]*+[\x21-\x7E\x80-\xFF]++)*+)[\x20\x09]*+[\r]?+\n/m', $headers, $matches, \PREG_SET_ORDER);
139+
140+
// check number of valid header fields matches number of lines + request line
141+
if (\substr_count($headers, "\n") !== $n + 1) {
142+
throw new \InvalidArgumentException('Unable to parse invalid request header fields');
149143
}
150144

151-
// parse request headers into obj implementing RequestInterface
152-
$request = g7\parse_request($headers);
145+
// format all header fields into associative array
146+
$host = null;
147+
$fields = array();
148+
foreach ($matches as $match) {
149+
$fields[$match[1]][] = $match[2];
150+
151+
// match `Host` request header
152+
if ($host === null && \strtolower($match[1]) === 'host') {
153+
$host = $match[2];
154+
}
155+
}
153156

154157
// create new obj implementing ServerRequestInterface by preserving all
155158
// previous properties and restoring original request-target
@@ -158,6 +161,48 @@ public function parseRequest($headers, $remoteSocketUri, $localSocketUri)
158161
'REQUEST_TIME_FLOAT' => \microtime(true)
159162
);
160163

164+
// scheme is `http` unless TLS is used
165+
$localParts = \parse_url($localSocketUri);
166+
if (isset($localParts['scheme']) && $localParts['scheme'] === 'tls') {
167+
$scheme = 'https://';
168+
$serverParams['HTTPS'] = 'on';
169+
} else {
170+
$scheme = 'http://';
171+
}
172+
173+
// default host if unset comes from local socket address or defaults to localhost
174+
if ($host === null) {
175+
$host = isset($localParts['host'], $localParts['port']) ? $localParts['host'] . ':' . $localParts['port'] : '127.0.0.1';
176+
}
177+
178+
if ($start['method'] === 'OPTIONS' && $start['target'] === '*') {
179+
// support asterisk-form for `OPTIONS *` request line only
180+
$uri = $scheme . $host;
181+
} elseif ($start['method'] === 'CONNECT') {
182+
$parts = \parse_url('tcp://' . $start['target']);
183+
184+
// check this is a valid authority-form request-target (host:port)
185+
if (!isset($parts['scheme'], $parts['host'], $parts['port']) || \count($parts) !== 3) {
186+
throw new \InvalidArgumentException('CONNECT method MUST use authority-form request target');
187+
}
188+
$uri = $scheme . $start['target'];
189+
} else {
190+
// support absolute-form or origin-form for proxy requests
191+
if ($start['target'][0] === '/') {
192+
$uri = $scheme . $host . $start['target'];
193+
} else {
194+
// ensure absolute-form request-target contains a valid URI
195+
$parts = \parse_url($start['target']);
196+
197+
// make sure value contains valid host component (IP or hostname), but no fragment
198+
if (!isset($parts['scheme'], $parts['host']) || $parts['scheme'] !== 'http' || isset($parts['fragment'])) {
199+
throw new \InvalidArgumentException('Invalid absolute-form request-target');
200+
}
201+
202+
$uri = $start['target'];
203+
}
204+
}
205+
161206
// apply REMOTE_ADDR and REMOTE_PORT if source address is known
162207
// address should always be known, unless this is over Unix domain sockets (UDS)
163208
if ($remoteSocketUri !== null) {
@@ -169,51 +214,23 @@ public function parseRequest($headers, $remoteSocketUri, $localSocketUri)
169214
// apply SERVER_ADDR and SERVER_PORT if server address is known
170215
// address should always be known, even for Unix domain sockets (UDS)
171216
// but skip UDS as it doesn't have a concept of host/port.
172-
if ($localSocketUri !== null) {
173-
$localAddress = \parse_url($localSocketUri);
174-
if (isset($localAddress['host'], $localAddress['port'])) {
175-
$serverParams['SERVER_ADDR'] = $localAddress['host'];
176-
$serverParams['SERVER_PORT'] = $localAddress['port'];
177-
}
178-
if (isset($localAddress['scheme']) && $localAddress['scheme'] === 'tls') {
179-
$serverParams['HTTPS'] = 'on';
180-
}
217+
if ($localSocketUri !== null && isset($localParts['host'], $localParts['port'])) {
218+
$serverParams['SERVER_ADDR'] = $localParts['host'];
219+
$serverParams['SERVER_PORT'] = $localParts['port'];
181220
}
182221

183-
$target = $request->getRequestTarget();
184222
$request = new ServerRequest(
185-
$request->getMethod(),
186-
$request->getUri(),
187-
$request->getHeaders(),
188-
$request->getBody(),
189-
$request->getProtocolVersion(),
223+
$start['method'],
224+
$uri,
225+
$fields,
226+
null,
227+
$start['version'],
190228
$serverParams
191229
);
192-
$request = $request->withRequestTarget($target);
193-
194-
// re-apply actual request target from above
195-
if ($originalTarget !== null) {
196-
$request = $request->withUri(
197-
$request->getUri()->withPath(''),
198-
true
199-
)->withRequestTarget($originalTarget);
200-
}
201-
202-
// only support HTTP/1.1 and HTTP/1.0 requests
203-
$protocolVersion = $request->getProtocolVersion();
204-
if ($protocolVersion !== '1.1' && $protocolVersion !== '1.0') {
205-
throw new \InvalidArgumentException('Received request with invalid protocol version', 505);
206-
}
207-
208-
// ensure absolute-form request-target contains a valid URI
209-
$requestTarget = $request->getRequestTarget();
210-
if (\strpos($requestTarget, '://') !== false && \substr($requestTarget, 0, 1) !== '/') {
211-
$parts = \parse_url($requestTarget);
212230

213-
// make sure value contains valid host component (IP or hostname), but no fragment
214-
if (!isset($parts['scheme'], $parts['host']) || $parts['scheme'] !== 'http' || isset($parts['fragment'])) {
215-
throw new \InvalidArgumentException('Invalid absolute-form request-target');
216-
}
231+
// only assign request target if it is not in origin-form (happy path for most normal requests)
232+
if ($start['target'][0] !== '/') {
233+
$request = $request->withRequestTarget($start['target']);
217234
}
218235

219236
// Optional Host header value MUST be valid (host and optional port)
@@ -252,44 +269,6 @@ public function parseRequest($headers, $remoteSocketUri, $localSocketUri)
252269
}
253270
}
254271

255-
// set URI components from socket address if not already filled via Host header
256-
if ($request->getUri()->getHost() === '') {
257-
$parts = \parse_url($localSocketUri);
258-
if (!isset($parts['host'], $parts['port'])) {
259-
$parts = array('host' => '127.0.0.1', 'port' => 80);
260-
}
261-
262-
$request = $request->withUri(
263-
$request->getUri()->withScheme('http')->withHost($parts['host'])->withPort($parts['port']),
264-
true
265-
);
266-
}
267-
268-
// Do not assume this is HTTPS when this happens to be port 443
269-
// detecting HTTPS is left up to the socket layer (TLS detection)
270-
if ($request->getUri()->getScheme() === 'https') {
271-
$request = $request->withUri(
272-
$request->getUri()->withScheme('http')->withPort(443),
273-
true
274-
);
275-
}
276-
277-
// Update request URI to "https" scheme if the connection is encrypted
278-
$parts = \parse_url($localSocketUri);
279-
if (isset($parts['scheme']) && $parts['scheme'] === 'tls') {
280-
// The request URI may omit default ports here, so try to parse port
281-
// from Host header field (if possible)
282-
$port = $request->getUri()->getPort();
283-
if ($port === null) {
284-
$port = \parse_url('tcp://' . $request->getHeaderLine('Host'), PHP_URL_PORT); // @codeCoverageIgnore
285-
}
286-
287-
$request = $request->withUri(
288-
$request->getUri()->withScheme('https')->withPort($port),
289-
true
290-
);
291-
}
292-
293272
// always sanitize Host header because it contains critical routing information
294273
$request = $request->withUri($request->getUri()->withUserInfo('u')->withUserInfo(''));
295274

tests/Io/RequestHeaderParserTest.php

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,50 @@ public function testInvalidMalformedRequestLineParseException()
340340
$this->assertSame('Unable to parse invalid request-line', $error->getMessage());
341341
}
342342

343+
/**
344+
* @group a
345+
*/
346+
public function testInvalidMalformedRequestHeadersThrowsParseException()
347+
{
348+
$error = null;
349+
350+
$parser = new RequestHeaderParser();
351+
$parser->on('headers', $this->expectCallableNever());
352+
$parser->on('error', function ($message) use (&$error) {
353+
$error = $message;
354+
});
355+
356+
$connection = $this->getMockBuilder('React\Socket\Connection')->disableOriginalConstructor()->setMethods(null)->getMock();
357+
$parser->handle($connection);
358+
359+
$connection->emit('data', array("GET / HTTP/1.1\r\nHost : yes\r\n\r\n"));
360+
361+
$this->assertInstanceOf('InvalidArgumentException', $error);
362+
$this->assertSame('Unable to parse invalid request header fields', $error->getMessage());
363+
}
364+
365+
/**
366+
* @group a
367+
*/
368+
public function testInvalidMalformedRequestHeadersWhitespaceThrowsParseException()
369+
{
370+
$error = null;
371+
372+
$parser = new RequestHeaderParser();
373+
$parser->on('headers', $this->expectCallableNever());
374+
$parser->on('error', function ($message) use (&$error) {
375+
$error = $message;
376+
});
377+
378+
$connection = $this->getMockBuilder('React\Socket\Connection')->disableOriginalConstructor()->setMethods(null)->getMock();
379+
$parser->handle($connection);
380+
381+
$connection->emit('data', array("GET / HTTP/1.1\r\nHost: yes\rFoo: bar\r\n\r\n"));
382+
383+
$this->assertInstanceOf('InvalidArgumentException', $error);
384+
$this->assertSame('Unable to parse invalid request header fields', $error->getMessage());
385+
}
386+
343387
public function testInvalidAbsoluteFormSchemeEmitsError()
344388
{
345389
$error = null;
@@ -400,7 +444,7 @@ public function testUriStartingWithColonSlashSlashFails()
400444
$connection->emit('data', array("GET ://example.com:80/ HTTP/1.0\r\n\r\n"));
401445

402446
$this->assertInstanceOf('InvalidArgumentException', $error);
403-
$this->assertSame('Invalid request string', $error->getMessage());
447+
$this->assertSame('Invalid absolute-form request-target', $error->getMessage());
404448
}
405449

406450
public function testInvalidAbsoluteFormWithFragmentEmitsError()

0 commit comments

Comments
 (0)