Skip to content
This repository was archived by the owner on Nov 20, 2018. It is now read-only.

Commit 90eb1a0

Browse files
committed
Add a length boundary to decode method
1 parent 594a3f9 commit 90eb1a0

File tree

2 files changed

+142
-38
lines changed

2 files changed

+142
-38
lines changed

src/Microsoft.Extensions.WebEncoders.Core/UrlPathDecoder.cs

Lines changed: 119 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,39 @@ public class UrlPathDecoder
3030
/// </param>
3131
/// <returns>The length of the result.</returns>
3232
public static int DecodeInPlace(char[] buffer)
33+
{
34+
return DecodeInPlace(buffer, buffer.Length);
35+
}
36+
37+
/// <summary>
38+
/// Unescape a url char array in place. Returns the length of the result.
39+
///
40+
/// - Everything is unescaped except %2F ('/')
41+
/// - UTF8 bytes are tested for formatting, overlong encoding, surrogates and value ranges
42+
/// - Invalid escaped sequence are copied to output as is
43+
/// - It doesn't check if the string contains query
44+
/// </summary>
45+
/// <param name="buffer">
46+
/// The char array contains sequence of charactors to be decoded. The
47+
/// result will be saved in the same array.
48+
/// </param>
49+
/// <param name="len">
50+
/// The lenght of the sequence of characters in buffer to be decoded.
51+
/// </param>
52+
/// <returns>The length of the result.</returns>
53+
public static int DecodeInPlace(char[] buffer, int len)
3354
{
3455
if (buffer == null)
3556
{
3657
throw new ArgumentNullException(nameof(buffer));
3758
}
3859

39-
if (GetNextEncoded(0, buffer) == buffer.Length)
60+
if (GetNextEncoded(buffer, 0, len) == len)
4061
{
41-
return buffer.Length;
62+
return len;
4263
}
4364

44-
return DecodeCore(buffer, buffer);
65+
return DecodeCore(buffer, len);
4566
}
4667

4768
/// <summary>
@@ -69,60 +90,72 @@ public static string Decode(string original)
6990
var buffer = original.ToCharArray();
7091

7192
// decode in place
72-
var len = DecodeCore(buffer, buffer);
93+
var len = DecodeCore(buffer, buffer.Length);
7394
return new string(buffer, 0, len);
7495
}
7596

7697
/// <summary>
77-
/// Unescape a url path string.
78-
///
79-
/// - The result is saved in place
98+
/// Decode the sequence of charactors in give char array.
8099
/// </summary>
81-
private static int DecodeCore(char[] source, char[] destination)
100+
/// <param name="buffer">
101+
/// The array of characters to be decoded. It's both the source and output of the
102+
/// operation which means the decode happens in place.
103+
/// </param>
104+
/// <param name="length">
105+
/// The length of the source sequence in the <paramref name="buffer"/> array to be decoded.
106+
/// </param>
107+
/// <returns>
108+
/// The length of the result.
109+
/// </returns>
110+
private static int DecodeCore(char[] buffer, int length)
82111
{
83-
var bufferPosition = 0;
84-
var sourcePosition = 0;
112+
// two indices to read and write
113+
var readerPosition = 0;
114+
var writerPosition = 0;
85115

116+
// operating buffer
86117
var unescapedChars = new char[1];
87118
var unescapedCharsCount = 0;
88119
var bytesBuffer = new byte[4];
89120

90-
while (sourcePosition < source.Length)
121+
while (readerPosition < length)
91122
{
92-
var next = GetNextEncoded(sourcePosition, source);
93-
var copyLength = next - sourcePosition;
94-
Copy(source, ref sourcePosition, destination, ref bufferPosition, copyLength);
123+
var next = GetNextEncoded(buffer, readerPosition, length);
124+
var copyLength = next - readerPosition;
125+
126+
CopyInPlace(buffer, length, copyLength, ref readerPosition, ref writerPosition);
95127

96-
if (sourcePosition >= source.Length)
128+
if (readerPosition >= length)
97129
{
98130
break;
99131
}
100132

101-
var consumed = Unescape(source, next, bytesBuffer, ref unescapedChars, ref unescapedCharsCount);
133+
var consumed = Unescape(buffer, length, next, bytesBuffer, ref unescapedChars, ref unescapedCharsCount);
102134
if (consumed == 0)
103135
{
104136
// Skip unescaping the % as the sequence follows it can't be correctly
105137
// decoded under UTF8
106-
Copy(source, ref sourcePosition, destination, ref bufferPosition, 1);
138+
CopyInPlace(buffer, length, 1, ref readerPosition, ref writerPosition);
107139
}
108140
else if (unescapedCharsCount == 1 && SkipUnescape(unescapedChars[0]))
109141
{
110142
// Skip unescaping specified characters (eg. '/')
111143
// Copy the original sequence to destination
112-
Copy(source, ref sourcePosition, destination, ref bufferPosition, consumed);
144+
CopyInPlace(buffer, length, consumed, ref readerPosition, ref writerPosition);
113145
}
114146
else
115147
{
116148
// Copy unescaped chararter. Move to the next charactor in source.
117149
for (int i = 0; i < unescapedCharsCount; ++i)
118150
{
119-
destination[bufferPosition++] = unescapedChars[i];
151+
buffer[writerPosition++] = unescapedChars[i];
120152
}
121-
sourcePosition += consumed;
153+
154+
readerPosition += consumed;
122155
}
123156
}
124157

125-
return bufferPosition;
158+
return writerPosition;
126159
}
127160

128161
private static bool SkipUnescape(char charactor)
@@ -144,9 +177,9 @@ private static bool SkipUnescape(char charactor)
144177
/// - The length of the sequence, including the % charactor, will be returned.
145178
/// Otherwise 0 is returned.
146179
/// </summary>
147-
private static int Unescape(char[] source, int start, byte[] bytesBuffer, ref char[] output, ref int count)
180+
private static int Unescape(char[] source, int sourceBoundary, int start, byte[] bytesBuffer, ref char[] output, ref int count)
148181
{
149-
if (start + 2 >= source.Length)
182+
if (start + 2 >= sourceBoundary)
150183
{
151184
return 0;
152185
}
@@ -205,7 +238,7 @@ private static int Unescape(char[] source, int start, byte[] bytesBuffer, ref ch
205238
return 0;
206239
}
207240

208-
if (start + (bytesCount * 3) > source.Length)
241+
if (start + (bytesCount * 3) > sourceBoundary)
209242
{
210243
// less than expected bytes to decode
211244
return 0;
@@ -274,39 +307,88 @@ private static int Unescape(char[] source, int start, byte[] bytesBuffer, ref ch
274307
}
275308

276309
/// <summary>
277-
/// Copy characters in source string to target char array. After copied the indices points at the beginning
278-
/// offset of both source and destination are updated to point to the position after the last copied character
310+
/// Copy characters in an array in place.
279311
/// </summary>
280-
private static int Copy(char[] source, ref int sourceStart, char[] destination, ref int destinatonStart, int length)
312+
/// <param name="buffer">
313+
/// The array containing the characters to be copied. It is both the
314+
/// source and output of the operation.
315+
/// </param>
316+
/// <param name="sourceLength">
317+
/// The length of the source sequence
318+
/// </param>
319+
/// <param name="count">
320+
/// The count of the charaters to be copied in the <paramref name="buffer"/>
321+
/// </param>
322+
/// <param name="readerPosition">
323+
/// The index where characters are copied from. The parameter will be
324+
/// set to the position right behind the last read character after copy.
325+
/// </param>
326+
/// <param name="writerPosition">
327+
/// The index where characters are copied to. The parameter will be set to
328+
/// the position right behind the last written character adter copy.
329+
/// </param>
330+
/// <returns>The number of charactors actually be copied.</returns>
331+
private static int CopyInPlace(char[] buffer, int sourceLength, int count, ref int readerPosition, ref int writerPosition)
281332
{
282-
for (var i = 0; i < length; ++i)
333+
if (buffer == null)
334+
{
335+
throw new ArgumentNullException(nameof(buffer));
336+
}
337+
338+
if (sourceLength > buffer.Length)
339+
{
340+
throw new ArgumentOutOfRangeException(nameof(sourceLength), $"The length of the source sequence can't be longer than the size of the buffer.");
341+
}
342+
343+
if (readerPosition < 0 || readerPosition >= sourceLength)
344+
{
345+
throw new ArgumentOutOfRangeException(nameof(readerPosition), $"The index of the source sequence {readerPosition} is out of range.");
346+
}
347+
348+
if (writerPosition < 0 || writerPosition >= buffer.Length)
349+
{
350+
throw new ArgumentOutOfRangeException(nameof(writerPosition), $"The index of the output sequence {writerPosition} is out of range.");
351+
}
352+
353+
if (writerPosition > readerPosition)
354+
{
355+
throw new ArgumentException($"The index of output sequence {writerPosition} is behind the read sequence {readerPosition}.");
356+
}
357+
358+
for (var i = 0; i < count; ++i)
283359
{
284-
destination[destinatonStart++] = source[sourceStart++];
360+
buffer[writerPosition++] = buffer[readerPosition++];
285361

286-
if (destinatonStart >= destination.Length ||
287-
sourceStart >= source.Length)
362+
// when reader pointer surpass the boundary of the source sequence; or
363+
// writer pointer surpass the boundary of the buffer
364+
// return the count of the copied charcters
365+
if (writerPosition >= buffer.Length || readerPosition >= sourceLength)
288366
{
289367
return i + 1;
290368
}
291369
}
292370

293-
return length;
371+
return count;
294372
}
295373

296374
/// <summary>
297-
/// Find the next % in the sequence. If % is not found, return the sequence length.
375+
/// Find the next % in the sequence of range [start, end)
298376
/// </summary>
299-
private static int GetNextEncoded(int start, char[] array)
377+
/// <param name="buffer">The array of character in which the % is seacrhed.</param>
378+
/// <param name="start">The start of the search range.</param>
379+
/// <param name="end">The end of the search range.</param>
380+
/// <returns>The index of the first %, or <paramref name="end"/> if % is not found.</returns>
381+
private static int GetNextEncoded(char[] buffer, int start, int end)
300382
{
301-
for (var i = start; i < array.Length; ++i)
383+
for (var i = start; i < end; ++i)
302384
{
303-
if (array[i] == '%')
385+
if (buffer[i] == '%')
304386
{
305387
return i;
306388
}
307389
}
308390

309-
return array.Length;
391+
return end;
310392
}
311393

312394
private static bool TryGetUnescapedByte(char[] buffer, int position, out byte result)

test/Microsoft.Extensions.WebEncoders.Tests/UrlPathDecoderTests.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ protected override void PositiveAssert(string raw, string expect)
124124
}
125125
}
126126

127-
public class UrlPathInPlaceDecoderTests: UrlPathDecoderTestBase
127+
public class UrlPathInPlaceDecoderTests : UrlPathDecoderTestBase
128128
{
129129
protected override void PositiveAssert(string raw, string expect)
130130
{
@@ -154,5 +154,27 @@ protected override void NegativeAssert(string raw)
154154
Assert.Equal(raw.Length, len);
155155
Assert.Equal(raw.ToCharArray(), buf);
156156
}
157+
158+
[Theory]
159+
[InlineData("/foo%2Fbar", 10, "/foo%2Fbar", 10)]
160+
[InlineData("/foo%2Fbar", 9, "/foo%2Fba", 9)]
161+
[InlineData("/foo%2Fbar", 8, "/foo%2Fb", 8)]
162+
[InlineData("%D0%A4", 6, "Ф", 1)]
163+
[InlineData("%D0%A4", 5, "%D0%A", 5)]
164+
[InlineData("%D0%A4", 4, "%D0%", 4)]
165+
[InlineData("%D0%A4", 3, "%D0", 3)]
166+
[InlineData("%D0%A4", 2, "%D", 2)]
167+
[InlineData("%D0%A4", 1, "%", 1)]
168+
[InlineData("%D0%A4", 0, "", 0)]
169+
[InlineData("%C2%B5%40%C3%9F%C3%B6%C3%A4%C3%BC%C3%A0%C3%A1", 45, "µ@ßöäüàá", 8)]
170+
[InlineData("%C2%B5%40%C3%9F%C3%B6%C3%A4%C3%BC%C3%A0%C3%A1", 44, "µ@ßöäüà%C3%A", 12)]
171+
public void DecodeWithBoundary(string raw, int rawLength, string expect, int expectLength)
172+
{
173+
var buf = raw.ToCharArray();
174+
var len = UrlPathDecoder.DecodeInPlace(buf, rawLength);
175+
176+
Assert.Equal(expectLength, len);
177+
Assert.Equal(expect.ToCharArray(), new ArraySegment<char>(buf, 0, expectLength));
178+
}
157179
}
158180
}

0 commit comments

Comments
 (0)