Skip to content
This repository was archived by the owner on Nov 20, 2018. It is now read-only.

Commit 60d50aa

Browse files
committed
Add Url Path Decoder
1 parent 221df71 commit 60d50aa

File tree

4 files changed

+287
-0
lines changed

4 files changed

+287
-0
lines changed
Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
namespace Microsoft.Extensions.WebEncoders
5+
{
6+
public class PathDecoder
7+
{
8+
public static string Decode(string raw)
9+
{
10+
if (NextPercentage(0, raw) == raw.Length)
11+
{
12+
return raw;
13+
}
14+
15+
var buf = new char[raw.Length];
16+
17+
int len;
18+
DecodeCore(raw, buf, out len);
19+
20+
return new string(buf, 0, len);
21+
}
22+
23+
private static void DecodeCore(string source, char[] buffer, out int length)
24+
{
25+
var bufferPosition = 0;
26+
var sourcePosition = 0;
27+
char unescapedChar = default(char);
28+
29+
while (sourcePosition < source.Length)
30+
{
31+
var next = NextPercentage(sourcePosition, source);
32+
var copyLength = next - sourcePosition;
33+
Copy(source, ref sourcePosition, buffer, ref bufferPosition, copyLength);
34+
35+
if (sourcePosition >= source.Length)
36+
{
37+
break;
38+
}
39+
var consumed = Unescape(source, next, ref unescapedChar);
40+
if (consumed == 0)
41+
{
42+
// Skip unescaping the % as the sequence follows it can't be correctly
43+
// decoded under UTF8
44+
Copy(source, ref sourcePosition, buffer, ref bufferPosition, 1);
45+
}
46+
else if (SkipUnescape(unescapedChar))
47+
{
48+
// Skip unescaping specified characters (eg. '/')
49+
// Copy the original sequence to destination
50+
Copy(source, ref sourcePosition, buffer, ref bufferPosition, consumed);
51+
}
52+
else
53+
{
54+
// Copy unescaped chararter. Move to the next charactor in source.
55+
buffer[bufferPosition++] = unescapedChar;
56+
sourcePosition += consumed;
57+
}
58+
}
59+
60+
length = bufferPosition;
61+
}
62+
63+
private static bool SkipUnescape(char charactor)
64+
{
65+
if (charactor == '/')
66+
{
67+
return true;
68+
}
69+
70+
return false;
71+
}
72+
73+
/// <summary>
74+
/// Unescape a sequence of characters
75+
/// The sequence is a substring of the given source. The start index must point to
76+
/// a % character initializes the sequence.
77+
/// If the sequence following the % can be successfully decoded in UTF8,
78+
/// - The result char will be set to the unescaped character
79+
/// - The length of the sequence, including the % charactor, will be returned.
80+
/// Otherwise 0 is returned.
81+
/// </summary>
82+
private static int Unescape(string source, int start, ref char result)
83+
{
84+
if (start + 2 >= source.Length)
85+
{
86+
return 0;
87+
}
88+
89+
char first;
90+
if (!TryGetUnescapedAscii(source[start + 1], source[start + 2], out first))
91+
{
92+
return 0;
93+
}
94+
95+
if ((first & 0x80) == 0)
96+
{
97+
// first < U+007F, single byte ASCII
98+
result = first;
99+
return 3;
100+
}
101+
102+
// anticipate the byte count
103+
int currentDecodeBits = 0;
104+
int expectBytes = 0;
105+
int expectValueMin = 0;
106+
if ((first & 0xE0) == 0xC0)
107+
{
108+
// 110x xxxx, expect 1 more byte
109+
currentDecodeBits = first & 0x1F;
110+
expectBytes = 1;
111+
expectValueMin = 0x80;
112+
}
113+
else if ((first & 0xF0) == 0xE0)
114+
{
115+
// 1110 xxxx, expect 2 more bytes
116+
currentDecodeBits = first & 0x0F;
117+
expectBytes = 2;
118+
expectValueMin = 0x800;
119+
}
120+
else if ((first & 0xF8) == 0xF0)
121+
{
122+
// 1111 0xxx, expect 3 more bytes
123+
currentDecodeBits = first & 0x07;
124+
expectBytes = 3;
125+
expectValueMin = 0x10000;
126+
}
127+
else
128+
{
129+
// invalid
130+
return 0;
131+
}
132+
133+
if (start + 2 + (expectBytes * 3) >= source.Length)
134+
{
135+
// less than expected bytes to decode
136+
return 0;
137+
}
138+
139+
var remainingBytes = expectBytes;
140+
while (remainingBytes > 0)
141+
{
142+
start += 3;
143+
if (source[start] != '%')
144+
{
145+
return 0;
146+
}
147+
148+
char v;
149+
if (!TryGetUnescapedAscii(source[start + 1], source[start + 2], out v))
150+
{
151+
return 0;
152+
}
153+
154+
if ((v & 0xC0) != 0x80)
155+
{
156+
return 0;
157+
}
158+
159+
// append the lower 6bit
160+
currentDecodeBits = (currentDecodeBits << 6) | (v & 0x3F);
161+
162+
remainingBytes--;
163+
164+
if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
165+
{
166+
// This is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that are not allowed in UTF-8;
167+
return 0;
168+
}
169+
if (remainingBytes == 2 && currentDecodeBits >= 0x110)
170+
{
171+
// This is going to be out of the upper Unicode bound 0x10FFFF.
172+
return 0;
173+
}
174+
}
175+
176+
if (currentDecodeBits < expectValueMin)
177+
{
178+
// Overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
179+
return 0;
180+
}
181+
182+
// Convert the code point into char
183+
result = char.ConvertFromUtf32(currentDecodeBits)[0];
184+
return (expectBytes + 1) * 3;
185+
}
186+
187+
/// <summary>
188+
/// Copy characters in source string to target char array. After copied the indices points at the beginning
189+
/// offset of both source and destination are updated to point to the position after the last copied character
190+
/// </summary>
191+
private static int Copy(string source, ref int sourceStart, char[] destination, ref int destinatonStart, int length)
192+
{
193+
for (var i = 0; i < length; ++i)
194+
{
195+
destination[destinatonStart++] = source[sourceStart++];
196+
197+
if (destinatonStart >= destination.Length ||
198+
sourceStart >= source.Length)
199+
{
200+
return i + 1;
201+
}
202+
}
203+
204+
return length;
205+
}
206+
207+
/// <summary>
208+
/// Find the next % in the sequence. If % is not found, return the sequence length.
209+
/// </summary>
210+
private static int NextPercentage(int start, string source)
211+
{
212+
for (var i = start; i < source.Length; ++i)
213+
{
214+
if (source[i] == '%')
215+
{
216+
return i;
217+
}
218+
}
219+
220+
return source.Length;
221+
}
222+
223+
private static bool TryGetUnescapedAscii(char first, char second, out char result)
224+
{
225+
if (!IsHex(first) || !IsHex(second))
226+
{
227+
result = default(char);
228+
return false;
229+
}
230+
231+
// result in range [0000, 00FF]
232+
result = (char)((HexToDec(first) << 4) + (HexToDec(second)));
233+
return true;
234+
}
235+
236+
private static bool IsHex(char value)
237+
{
238+
return (((value >= '0') && (value <= '9')) ||
239+
((value >= 'A') && (value <= 'F')) ||
240+
((value >= 'a') && (value <= 'f')));
241+
}
242+
243+
private static int HexToDec(char value)
244+
{
245+
if (value <= '9')
246+
{
247+
return value - '0';
248+
}
249+
else if (value <= 'F')
250+
{
251+
return (value - 'A') + 10;
252+
}
253+
else // a - f
254+
{
255+
return (value - 'a') + 10;
256+
}
257+
}
258+
}
259+
}

src/Microsoft.Extensions.WebEncoders.Core/project.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
"dnxcore50": {
1616
"dependencies": {
1717
"System.ComponentModel": "4.0.1-beta-*",
18+
"System.Collections": "4.0.11-beta-*",
1819
"System.Diagnostics.Debug": "4.0.11-beta-*",
1920
"System.IO": "4.0.11-beta-*",
2021
"System.Reflection": "4.0.10-*",
2122
"System.Resources.ResourceManager": "4.0.1-beta-*",
2223
"System.Runtime.Extensions": "4.0.11-beta-*",
24+
"System.Text.Encoding": "4.0.11-beta-*",
2325
"System.Threading": "4.0.11-beta-*"
2426
}
2527
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
using Xunit;
5+
6+
namespace Microsoft.Extensions.WebEncoders.Tests
7+
{
8+
public class PathDecoderTests
9+
{
10+
[Theory]
11+
[InlineData("/Path", "/Path")]
12+
[InlineData("/Path%20space", "/Path space")]
13+
[InlineData("/Path%2Fspace", "/Path%2Fspace")]
14+
[InlineData("/Path/space%", "/Path/space%")]
15+
[InlineData("/Path/space%a", "/Path/space%a")]
16+
[InlineData("%C3%84ra%20Benetton", "Ära Benetton")]
17+
[InlineData("%E6%88%91%E8%87%AA%E6%A8%AA%E5%88%80%E5%90%91%E5%A4%A9%E7%AC%91%E5%8E%BB%E7%95%99%E8%82%9D%E8%83%86%E4%B8%A4%E6%98%86%E4%BB%91", "我自横刀向天笑去留肝胆两昆仑")]
18+
[InlineData("%", "%")]
19+
[InlineData("%%", "%%")]
20+
public void DecodeUri(string raw, string expect)
21+
{
22+
Assert.Equal(expect, PathDecoder.Decode(raw));
23+
}
24+
}
25+
}

test/Microsoft.Extensions.WebEncoders.Tests/project.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"dnx451": { },
1616
"dnxcore50": {
1717
"dependencies": {
18+
"System.Text.Encoding": "4.0.11-beta-*",
1819
"System.Text.Encoding.Extensions": "4.0.11-beta-*"
1920
}
2021
}

0 commit comments

Comments
 (0)