Skip to content
This repository was archived by the owner on Dec 18, 2018. It is now read-only.

Commit 2d061a0

Browse files
committed
Unescape string in memory
1. In place unescape; 1. UTF-8 verification; 2. MemoryPoolIterator2.Put 3. Tests
1 parent 8d107b2 commit 2d061a0

File tree

4 files changed

+546
-6
lines changed

4 files changed

+546
-6
lines changed

src/Microsoft.AspNet.Server.Kestrel/Http/Frame.cs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
using Microsoft.AspNet.Server.Kestrel.Infrastructure;
1313
using Microsoft.Extensions.Logging;
1414
using Microsoft.Extensions.Primitives;
15+
using Microsoft.Extensions.WebEncoders;
1516

1617
// ReSharper disable AccessToModifiedClosure
1718

@@ -420,8 +421,8 @@ public async Task ProduceStartAndFireOnStarting(bool immediate = true)
420421
if (_responseStarted) return;
421422

422423
await FireOnStarting();
423-
424-
if (_applicationException != null)
424+
425+
if (_applicationException != null)
425426
{
426427
throw new ObjectDisposedException(
427428
"The response has been aborted due to an unhandled application exception.",
@@ -596,7 +597,17 @@ private bool TakeStartLine(SocketInput input)
596597
{
597598
return false;
598599
}
599-
var requestUri = begin.GetString(scan);
600+
601+
var needDecode = true;
602+
var percent = begin;
603+
var percentFound = percent.Seek('%');
604+
if (percentFound == -1)
605+
{
606+
needDecode = false;
607+
}
608+
609+
var pathBegin = begin;
610+
var pathEnd = scan;
600611

601612
var queryString = "";
602613
if (chFound == '?')
@@ -623,6 +634,13 @@ private bool TakeStartLine(SocketInput input)
623634
return false;
624635
}
625636

637+
if (needDecode)
638+
{
639+
pathEnd = UrlPathDecoder.Unescape(pathBegin, pathEnd);
640+
}
641+
642+
var requestUri = pathBegin.GetString(pathEnd);
643+
626644
consumed = scan;
627645
Method = method;
628646
RequestUri = requestUri;
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
using Microsoft.AspNet.Server.Kestrel.Infrastructure;
5+
6+
namespace Microsoft.AspNet.Server.Kestrel.Http
7+
{
8+
public class UrlPathDecoder
9+
{
10+
/// <summary>
11+
/// Unescapes the string between given memory iterators in place.
12+
/// </summary>
13+
/// <param name="start">The iterator points to the beginning of the sequence.</param>
14+
/// <param name="end">The iterator points to the byte behind the end of the sequence.</param>
15+
/// <returns>The iterator points to the byte behind the end of the processed sequence.</returns>
16+
public static MemoryPoolIterator2 Unescape(MemoryPoolIterator2 start, MemoryPoolIterator2 end)
17+
{
18+
// the slot to read the input
19+
var reader = start;
20+
21+
// the slot to write the unescaped byte
22+
var writer = reader;
23+
24+
while (true)
25+
{
26+
if (CompareIterators(ref reader, ref end))
27+
{
28+
return writer;
29+
}
30+
31+
if (reader.Peek() == '%')
32+
{
33+
var decodeReader = reader;
34+
35+
// If decoding process successes, the writer iterator will be moved
36+
// to the next write-ready location. On the other hand if the scanned
37+
// percent-encodings cannot be interpreted as sequence of UTF-8 octets,
38+
// these bytes should be copied to output as is.
39+
// The decodeReader iterator is always moved to the first byte not yet
40+
// be scanned after the process. A failed decoding means the chars
41+
// between the reader and decodeReader can be copied to output untouched.
42+
if (!DecodeCore(ref decodeReader, ref writer, end))
43+
{
44+
Copy(reader, decodeReader, ref writer);
45+
}
46+
47+
reader = decodeReader;
48+
}
49+
else
50+
{
51+
writer.Put(reader.Take());
52+
}
53+
}
54+
}
55+
56+
/// <summary>
57+
/// Unescape the percent-encodings
58+
/// </summary>
59+
/// <param name="reader">The iterator point to the first % char</param>
60+
/// <param name="writer">The place to write to</param>
61+
/// <param name="end">The end of the sequence</param>
62+
private static bool DecodeCore(ref MemoryPoolIterator2 reader, ref MemoryPoolIterator2 writer, MemoryPoolIterator2 end)
63+
{
64+
// preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets,
65+
// bytes from this till the last scanned one will be copied to the memory pointed by writer.
66+
var byte1 = UnescapePercentEncoding(ref reader, end);
67+
if (byte1 == -1)
68+
{
69+
return false;
70+
}
71+
72+
if (byte1 <= 0x7F)
73+
{
74+
// first byte < U+007f, it is a single byte ASCII
75+
writer.Put(byte1);
76+
return true;
77+
}
78+
79+
int byte2 = 0, byte3 = 0, byte4 = 0;
80+
81+
// anticipate more bytes
82+
var currentDecodeBits = 0;
83+
var byteCount = 1;
84+
var expectValueMin = 0;
85+
if ((byte1 & 0xE0) == 0xC0)
86+
{
87+
// 110x xxxx, expect one more byte
88+
currentDecodeBits = byte1 & 0x1F;
89+
byteCount = 2;
90+
expectValueMin = 0x80;
91+
}
92+
else if ((byte1 & 0xF0) == 0xE0)
93+
{
94+
// 1110 xxxx, expect two more bytes
95+
currentDecodeBits = byte1 & 0x0F;
96+
byteCount = 3;
97+
expectValueMin = 0x800;
98+
}
99+
else if ((byte1 & 0xF8) == 0xF0)
100+
{
101+
// 1111 0xxx, expect three more bytes
102+
currentDecodeBits = byte1 & 0x07;
103+
byteCount = 4;
104+
expectValueMin = 0x10000;
105+
}
106+
else
107+
{
108+
// invalid first byte
109+
return false;
110+
}
111+
112+
var remainingBytes = byteCount - 1;
113+
while (remainingBytes > 0)
114+
{
115+
// read following three chars
116+
if (CompareIterators(ref reader, ref end))
117+
{
118+
return false;
119+
}
120+
121+
var nextByte = UnescapePercentEncoding(ref reader, end);
122+
if (nextByte == -1)
123+
{
124+
return false;
125+
}
126+
127+
if ((nextByte & 0xC0) != 0x80)
128+
{
129+
// the follow up byte is not in form of 10xx xxxx
130+
return false;
131+
}
132+
133+
currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F);
134+
remainingBytes--;
135+
136+
if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
137+
{
138+
// this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that
139+
// are not allowed in UTF-8;
140+
return false;
141+
}
142+
143+
if (remainingBytes == 2 && currentDecodeBits >= 0x110)
144+
{
145+
// this is going to be out of the upper Unicode bound 0x10FFFF.
146+
return false;
147+
}
148+
149+
if (byteCount - remainingBytes == 2)
150+
{
151+
byte2 = nextByte;
152+
}
153+
else if (byteCount - remainingBytes == 3)
154+
{
155+
byte3 = nextByte;
156+
}
157+
else if (byteCount - remainingBytes == 4)
158+
{
159+
byte4 = nextByte;
160+
}
161+
}
162+
163+
if (currentDecodeBits < expectValueMin)
164+
{
165+
// overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
166+
return false;
167+
}
168+
169+
// all bytes are verified, write to the output
170+
if (byteCount > 0)
171+
{
172+
writer.Put(byte1);
173+
}
174+
if (byteCount > 1)
175+
{
176+
writer.Put(byte2);
177+
}
178+
if (byteCount > 2)
179+
{
180+
writer.Put(byte3);
181+
}
182+
if (byteCount > 3)
183+
{
184+
writer.Put(byte4);
185+
}
186+
187+
return true;
188+
}
189+
190+
private static void Copy(MemoryPoolIterator2 head, MemoryPoolIterator2 tail, ref MemoryPoolIterator2 writer)
191+
{
192+
while (!CompareIterators(ref head, ref tail))
193+
{
194+
writer.Put(head.Take());
195+
}
196+
}
197+
198+
/// <summary>
199+
/// Read the percent-encoding and try unescape it.
200+
///
201+
/// The operation first peek at the character the <paramref name="scan"/>
202+
/// iterator points at. If it is % the <paramref name="scan"/> is then
203+
/// moved on to scan the following to characters. If the two following
204+
/// characters are hexadecimal literals they will be unescaped and the
205+
/// value will be returned.
206+
///
207+
/// If the first character is not % the <paramref name="scan"/> iterator
208+
/// will be removed beyond the location of % and -1 will be returned.
209+
///
210+
/// If the following two characters can't be successfully unescaped the
211+
/// <paramref name="scan"/> iterator will be move behind the % and -1
212+
/// will be returned.
213+
/// </summary>
214+
/// <param name="scan">The value to read</param>
215+
/// <param name="end">The end of the sequence</param>
216+
/// <returns>The unescaped byte if success. Otherwise return -1.</returns>
217+
private static int UnescapePercentEncoding(ref MemoryPoolIterator2 scan, MemoryPoolIterator2 end)
218+
{
219+
if (scan.Take() != '%')
220+
{
221+
return -1;
222+
}
223+
224+
var probe = scan;
225+
226+
int value1 = ReadHex(ref probe, end);
227+
if (value1 == -1)
228+
{
229+
return -1;
230+
}
231+
232+
int value2 = ReadHex(ref probe, end);
233+
if (value2 == -1)
234+
{
235+
return -1;
236+
}
237+
238+
if (SkipUnescape(value1, value2))
239+
{
240+
return -1;
241+
}
242+
243+
scan = probe;
244+
return (value1 << 4) + value2;
245+
}
246+
247+
/// <summary>
248+
/// Read the next char and convert it into hexadecimal value.
249+
///
250+
/// The <paramref name="scan"/> iterator will be moved to the next
251+
/// byte no matter no matter whether the operation successes.
252+
/// </summary>
253+
/// <param name="scan">The value to read</param>
254+
/// <param name="end">The end of the sequence</param>
255+
/// <returns>The hexadecimal value if successes, otherwise -1.</returns>
256+
private static int ReadHex(ref MemoryPoolIterator2 scan, MemoryPoolIterator2 end)
257+
{
258+
if (CompareIterators(ref scan, ref end))
259+
{
260+
return -1;
261+
}
262+
263+
var value = scan.Take();
264+
var isHead = (((value >= '0') && (value <= '9')) ||
265+
((value >= 'A') && (value <= 'F')) ||
266+
((value >= 'a') && (value <= 'f')));
267+
268+
if (!isHead)
269+
{
270+
return -1;
271+
}
272+
273+
if (value <= '9')
274+
{
275+
return value - '0';
276+
}
277+
else if (value <= 'F')
278+
{
279+
return (value - 'A') + 10;
280+
}
281+
else // a - f
282+
{
283+
return (value - 'a') + 10;
284+
}
285+
}
286+
287+
private static bool SkipUnescape(int value1, int value2)
288+
{
289+
// skip %2F
290+
if (value1 == 2 && value2 == 15)
291+
{
292+
return true;
293+
}
294+
295+
return false;
296+
}
297+
298+
private static bool CompareIterators(ref MemoryPoolIterator2 lhs, ref MemoryPoolIterator2 rhs)
299+
{
300+
// uses ref parameter to save cost of copying
301+
return (lhs.Block == rhs.Block) && (lhs.Index == rhs.Index);
302+
}
303+
}
304+
}

0 commit comments

Comments
 (0)