Skip to content
This repository was archived by the owner on Dec 18, 2018. It is now read-only.

Commit 9e1c054

Browse files
committed
Unescape string in memory
1. In place unescape; 1. UTF-8 verification; 2. MemoryPoolIterator2.Put 3. Tests
1 parent eb2c3a1 commit 9e1c054

File tree

5 files changed

+802
-9
lines changed

5 files changed

+802
-9
lines changed

src/Microsoft.AspNet.Server.Kestrel/Http/Frame.cs

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,8 @@ public async Task ProduceStartAndFireOnStarting(bool immediate = true)
420420
if (_responseStarted) return;
421421

422422
await FireOnStarting();
423-
424-
if (_applicationException != null)
423+
424+
if (_applicationException != null)
425425
{
426426
throw new ObjectDisposedException(
427427
"The response has been aborted due to an unhandled application exception.",
@@ -591,12 +591,38 @@ private bool TakeStartLine(SocketInput input)
591591

592592
scan.Take();
593593
begin = scan;
594-
var chFound = scan.Seek(' ', '?');
595-
if (chFound == -1)
594+
595+
int chFound = -1;
596+
bool needDecode = false;
597+
while (true)
596598
{
597-
return false;
599+
if (!needDecode)
600+
{
601+
chFound = scan.Seek(' ', '?', '%');
602+
}
603+
else
604+
{
605+
// knows it need to be decoded, % is not the concern.
606+
chFound = scan.Seek(' ', '?');
607+
}
608+
609+
if (chFound == -1)
610+
{
611+
return false;
612+
}
613+
else if (chFound == '%')
614+
{
615+
needDecode = true;
616+
continue;
617+
}
618+
else
619+
{
620+
break;
621+
}
598622
}
599-
var requestUri = begin.GetString(scan);
623+
624+
var pathBegin = begin;
625+
var pathEnd = scan;
600626

601627
var queryString = "";
602628
if (chFound == '?')
@@ -623,6 +649,13 @@ private bool TakeStartLine(SocketInput input)
623649
return false;
624650
}
625651

652+
if (needDecode)
653+
{
654+
pathEnd = UrlPathDecoder.Unescape(pathBegin, pathEnd);
655+
}
656+
657+
var requestUri = pathBegin.GetString(pathEnd);
658+
626659
consumed = scan;
627660
Method = method;
628661
RequestUri = requestUri;
Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
using Microsoft.AspNet.Server.Kestrel.Infrastructure;
5+
6+
namespace Microsoft.AspNet.Server.Kestrel.Http
7+
{
8+
public class UrlPathDecoder
9+
{
10+
/// <summary>
11+
/// Unescapes the string between given memory iterators in place.
12+
/// </summary>
13+
/// <param name="start">The iterator points to the beginning of the sequence.</param>
14+
/// <param name="end">The iterator points to the byte behind the end of the sequence.</param>
15+
/// <returns>The iterator points to the byte behind the end of the processed sequence.</returns>
16+
public static MemoryPoolIterator2 Unescape(MemoryPoolIterator2 start, MemoryPoolIterator2 end)
17+
{
18+
// the slot to read the input
19+
var reader = start;
20+
21+
// the slot to write the unescaped byte
22+
var writer = reader;
23+
24+
while (true)
25+
{
26+
if (CompareIterators(ref reader, ref end))
27+
{
28+
return writer;
29+
}
30+
31+
if (reader.Peek() == '%')
32+
{
33+
var decodeReader = reader;
34+
35+
// If decoding process succeeds, the writer iterator will be moved
36+
// to the next write-ready location. On the other hand if the scanned
37+
// percent-encodings cannot be interpreted as sequence of UTF-8 octets,
38+
// these bytes should be copied to output as is.
39+
// The decodeReader iterator is always moved to the first byte not yet
40+
// be scanned after the process. A failed decoding means the chars
41+
// between the reader and decodeReader can be copied to output untouched.
42+
if (!DecodeCore(ref decodeReader, ref writer, end))
43+
{
44+
Copy(reader, decodeReader, ref writer);
45+
}
46+
47+
reader = decodeReader;
48+
}
49+
else
50+
{
51+
writer.Put((byte)reader.Take());
52+
}
53+
}
54+
}
55+
56+
/// <summary>
57+
/// Unescape the percent-encodings
58+
/// </summary>
59+
/// <param name="reader">The iterator point to the first % char</param>
60+
/// <param name="writer">The place to write to</param>
61+
/// <param name="end">The end of the sequence</param>
62+
private static bool DecodeCore(ref MemoryPoolIterator2 reader, ref MemoryPoolIterator2 writer, MemoryPoolIterator2 end)
63+
{
64+
// preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets,
65+
// bytes from this till the last scanned one will be copied to the memory pointed by writer.
66+
var byte1 = UnescapePercentEncoding(ref reader, end);
67+
if (byte1 == -1)
68+
{
69+
return false;
70+
}
71+
72+
if (byte1 <= 0x7F)
73+
{
74+
// first byte < U+007f, it is a single byte ASCII
75+
writer.Put((byte)byte1);
76+
return true;
77+
}
78+
79+
int byte2 = 0, byte3 = 0, byte4 = 0;
80+
81+
// anticipate more bytes
82+
var currentDecodeBits = 0;
83+
var byteCount = 1;
84+
var expectValueMin = 0;
85+
if ((byte1 & 0xE0) == 0xC0)
86+
{
87+
// 110x xxxx, expect one more byte
88+
currentDecodeBits = byte1 & 0x1F;
89+
byteCount = 2;
90+
expectValueMin = 0x80;
91+
}
92+
else if ((byte1 & 0xF0) == 0xE0)
93+
{
94+
// 1110 xxxx, expect two more bytes
95+
currentDecodeBits = byte1 & 0x0F;
96+
byteCount = 3;
97+
expectValueMin = 0x800;
98+
}
99+
else if ((byte1 & 0xF8) == 0xF0)
100+
{
101+
// 1111 0xxx, expect three more bytes
102+
currentDecodeBits = byte1 & 0x07;
103+
byteCount = 4;
104+
expectValueMin = 0x10000;
105+
}
106+
else
107+
{
108+
// invalid first byte
109+
return false;
110+
}
111+
112+
var remainingBytes = byteCount - 1;
113+
while (remainingBytes > 0)
114+
{
115+
// read following three chars
116+
if (CompareIterators(ref reader, ref end))
117+
{
118+
return false;
119+
}
120+
121+
var nextItr = reader;
122+
var nextByte = UnescapePercentEncoding(ref nextItr, end);
123+
if (nextByte == -1)
124+
{
125+
return false;
126+
}
127+
128+
if ((nextByte & 0xC0) != 0x80)
129+
{
130+
// the follow up byte is not in form of 10xx xxxx
131+
return false;
132+
}
133+
134+
currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F);
135+
remainingBytes--;
136+
137+
if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
138+
{
139+
// this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that
140+
// are not allowed in UTF-8;
141+
return false;
142+
}
143+
144+
if (remainingBytes == 2 && currentDecodeBits >= 0x110)
145+
{
146+
// this is going to be out of the upper Unicode bound 0x10FFFF.
147+
return false;
148+
}
149+
150+
reader = nextItr;
151+
if (byteCount - remainingBytes == 2)
152+
{
153+
byte2 = nextByte;
154+
}
155+
else if (byteCount - remainingBytes == 3)
156+
{
157+
byte3 = nextByte;
158+
}
159+
else if (byteCount - remainingBytes == 4)
160+
{
161+
byte4 = nextByte;
162+
}
163+
}
164+
165+
if (currentDecodeBits < expectValueMin)
166+
{
167+
// overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
168+
return false;
169+
}
170+
171+
// all bytes are verified, write to the output
172+
if (byteCount > 0)
173+
{
174+
writer.Put((byte)byte1);
175+
}
176+
if (byteCount > 1)
177+
{
178+
writer.Put((byte)byte2);
179+
}
180+
if (byteCount > 2)
181+
{
182+
writer.Put((byte)byte3);
183+
}
184+
if (byteCount > 3)
185+
{
186+
writer.Put((byte)byte4);
187+
}
188+
189+
return true;
190+
}
191+
192+
private static void Copy(MemoryPoolIterator2 head, MemoryPoolIterator2 tail, ref MemoryPoolIterator2 writer)
193+
{
194+
while (!CompareIterators(ref head, ref tail))
195+
{
196+
writer.Put((byte)head.Take());
197+
}
198+
}
199+
200+
/// <summary>
201+
/// Read the percent-encoding and try unescape it.
202+
///
203+
/// The operation first peek at the character the <paramref name="scan"/>
204+
/// iterator points at. If it is % the <paramref name="scan"/> is then
205+
/// moved on to scan the following to characters. If the two following
206+
/// characters are hexadecimal literals they will be unescaped and the
207+
/// value will be returned.
208+
///
209+
/// If the first character is not % the <paramref name="scan"/> iterator
210+
/// will be removed beyond the location of % and -1 will be returned.
211+
///
212+
/// If the following two characters can't be successfully unescaped the
213+
/// <paramref name="scan"/> iterator will be move behind the % and -1
214+
/// will be returned.
215+
/// </summary>
216+
/// <param name="scan">The value to read</param>
217+
/// <param name="end">The end of the sequence</param>
218+
/// <returns>The unescaped byte if success. Otherwise return -1.</returns>
219+
private static int UnescapePercentEncoding(ref MemoryPoolIterator2 scan, MemoryPoolIterator2 end)
220+
{
221+
if (scan.Take() != '%')
222+
{
223+
return -1;
224+
}
225+
226+
var probe = scan;
227+
228+
int value1 = ReadHex(ref probe, end);
229+
if (value1 == -1)
230+
{
231+
return -1;
232+
}
233+
234+
int value2 = ReadHex(ref probe, end);
235+
if (value2 == -1)
236+
{
237+
return -1;
238+
}
239+
240+
if (SkipUnescape(value1, value2))
241+
{
242+
return -1;
243+
}
244+
245+
scan = probe;
246+
return (value1 << 4) + value2;
247+
}
248+
249+
/// <summary>
250+
/// Read the next char and convert it into hexadecimal value.
251+
///
252+
/// The <paramref name="scan"/> iterator will be moved to the next
253+
/// byte no matter no matter whether the operation successes.
254+
/// </summary>
255+
/// <param name="scan">The value to read</param>
256+
/// <param name="end">The end of the sequence</param>
257+
/// <returns>The hexadecimal value if successes, otherwise -1.</returns>
258+
private static int ReadHex(ref MemoryPoolIterator2 scan, MemoryPoolIterator2 end)
259+
{
260+
if (CompareIterators(ref scan, ref end))
261+
{
262+
return -1;
263+
}
264+
265+
var value = scan.Take();
266+
var isHead = (((value >= '0') && (value <= '9')) ||
267+
((value >= 'A') && (value <= 'F')) ||
268+
((value >= 'a') && (value <= 'f')));
269+
270+
if (!isHead)
271+
{
272+
return -1;
273+
}
274+
275+
if (value <= '9')
276+
{
277+
return value - '0';
278+
}
279+
else if (value <= 'F')
280+
{
281+
return (value - 'A') + 10;
282+
}
283+
else // a - f
284+
{
285+
return (value - 'a') + 10;
286+
}
287+
}
288+
289+
private static bool SkipUnescape(int value1, int value2)
290+
{
291+
// skip %2F
292+
if (value1 == 2 && value2 == 15)
293+
{
294+
return true;
295+
}
296+
297+
return false;
298+
}
299+
300+
private static bool CompareIterators(ref MemoryPoolIterator2 lhs, ref MemoryPoolIterator2 rhs)
301+
{
302+
// uses ref parameter to save cost of copying
303+
return (lhs.Block == rhs.Block) && (lhs.Index == rhs.Index);
304+
}
305+
}
306+
}

0 commit comments

Comments
 (0)