diff --git a/src/Microsoft.Extensions.WebEncoders.Core/Internals/UrlPathDecoder.cs b/src/Microsoft.Extensions.WebEncoders.Core/Internals/UrlPathDecoder.cs
new file mode 100644
index 00000000..93884083
--- /dev/null
+++ b/src/Microsoft.Extensions.WebEncoders.Core/Internals/UrlPathDecoder.cs
@@ -0,0 +1,431 @@
+// Copyright (c) .NET Foundation. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using System.Text;
+
+namespace Microsoft.Extensions.WebEncoders.Internals
+{
+ ///
+ /// UrlPathDecoder unescape a given path string.
+ ///
+ public class UrlPathDecoder
+ {
+ private static Encoding _utf8 = Encoding.GetEncoding(
+ "utf-8",
+ new EncoderExceptionFallback(),
+ new DecoderExceptionFallback());
+
+ ///
+ /// Unescape a url char array in place. Returns the length of the result.
+ ///
+ /// - Everything is unescaped except %2F ('/')
+ /// - UTF8 bytes are tested for formatting, overlong encoding, surrogates and value ranges
+ /// - Invalid escaped sequence are copied to output as is
+ /// - It doesn't check if the string contains query
+ ///
+ ///
+ /// The char array contains sequence of charactors to be decoded. The
+ /// result will be saved in the same array.
+ ///
+ /// The length of the result.
+ public static int DecodeInPlace(char[] buffer)
+ {
+ return DecodeInPlace(buffer, buffer.Length);
+ }
+
+ ///
+ /// Unescape a url char array in place. Returns the length of the result.
+ ///
+ /// - Everything is unescaped except %2F ('/')
+ /// - UTF8 bytes are tested for formatting, overlong encoding, surrogates and value ranges
+ /// - Invalid escaped sequence are copied to output as is
+ /// - It doesn't check if the string contains query
+ ///
+ ///
+ /// The char array contains sequence of charactors to be decoded. The
+ /// result will be saved in the same array.
+ ///
+ ///
+ /// The lenght of the sequence of characters in buffer to be decoded.
+ ///
+ /// The length of the result.
+ public static int DecodeInPlace(char[] buffer, int len)
+ {
+ if (buffer == null)
+ {
+ throw new ArgumentNullException(nameof(buffer));
+ }
+
+ if (GetNextEncoded(buffer, 0, len) == len)
+ {
+ return len;
+ }
+
+ return DecodeCore(buffer, len);
+ }
+
+ ///
+ /// Unescape a url path string.
+ ///
+ /// - Everything is unescaped except %2F ('/')
+ /// - UTF8 bytes are tested for formatting, overlong encoding, surrogates and value ranges
+ /// - Invalid escaped sequence are copied to output as is
+ /// - It doesn't check if the string contains query
+ ///
+ /// The string to be decoded.
+ /// The decoded result.
+ public static string Decode(string original)
+ {
+ if (original == null)
+ {
+ throw new ArgumentNullException(nameof(original));
+ }
+
+ if (original.IndexOf('%') == -1)
+ {
+ return original;
+ }
+
+ var buffer = original.ToCharArray();
+
+ // decode in place
+ var len = DecodeCore(buffer, buffer.Length);
+ return new string(buffer, 0, len);
+ }
+
+ ///
+ /// Decode the sequence of charactors in give char array.
+ ///
+ ///
+ /// The array of characters to be decoded. It's both the source and output of the
+ /// operation which means the decode happens in place.
+ ///
+ ///
+ /// The length of the source sequence in the array to be decoded.
+ ///
+ ///
+ /// The length of the result.
+ ///
+ private static int DecodeCore(char[] buffer, int length)
+ {
+ // two indices to read and write
+ var readerPosition = 0;
+ var writerPosition = 0;
+
+ // operating buffer
+ var unescapedChars = new char[1];
+ var unescapedCharsCount = 0;
+ var bytesBuffer = new byte[4];
+
+ while (readerPosition < length)
+ {
+ var next = GetNextEncoded(buffer, readerPosition, length);
+ var copyLength = next - readerPosition;
+
+ CopyInPlace(buffer, length, copyLength, ref readerPosition, ref writerPosition);
+
+ if (readerPosition >= length)
+ {
+ break;
+ }
+
+ var consumed = Unescape(buffer, length, next, bytesBuffer, ref unescapedChars, ref unescapedCharsCount);
+ if (consumed == 0)
+ {
+ // Skip unescaping the % as the sequence follows it can't be correctly
+ // decoded under UTF8
+ CopyInPlace(buffer, length, 1, ref readerPosition, ref writerPosition);
+ }
+ else if (unescapedCharsCount == 1 && SkipUnescape(unescapedChars[0]))
+ {
+ // Skip unescaping specified characters (eg. '/')
+ // Copy the original sequence to destination
+ CopyInPlace(buffer, length, consumed, ref readerPosition, ref writerPosition);
+ }
+ else
+ {
+ // Copy unescaped chararter. Move to the next charactor in source.
+ for (int i = 0; i < unescapedCharsCount; ++i)
+ {
+ buffer[writerPosition++] = unescapedChars[i];
+ }
+
+ readerPosition += consumed;
+ }
+ }
+
+ return writerPosition;
+ }
+
+ private static bool SkipUnescape(char charactor)
+ {
+ if (charactor == '/')
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ ///
+ /// Unescape a sequence of characters
+ /// The sequence is a substring of the given source. The start index must point to
+ /// a % character initializes the sequence.
+ /// If the sequence following the % can be successfully decoded in UTF8,
+ /// - The result char will be set to the unescaped character
+ /// - The length of the sequence, including the % charactor, will be returned.
+ /// Otherwise 0 is returned.
+ ///
+ private static int Unescape(char[] source, int sourceBoundary, int start, byte[] bytesBuffer, ref char[] output, ref int count)
+ {
+ if (start + 2 >= sourceBoundary)
+ {
+ return 0;
+ }
+
+ byte firstByte;
+ if (!TryGetUnescapedByte(source, start, out firstByte))
+ {
+ return 0;
+ }
+
+ if (firstByte <= 0x7F)
+ {
+ // first < U+007F, single byte ASCII
+ if (output.Length < 1)
+ {
+ output = new char[] { (char)firstByte };
+ }
+ else
+ {
+ output[0] = (char)firstByte;
+ }
+
+ count = 1;
+ return 3;
+ }
+
+ // anticipate the byte count
+ int currentDecodeBits = 0;
+ int bytesCount = 1;
+ int expectValueMin = 0;
+ bytesBuffer[0] = (byte)firstByte;
+ if ((firstByte & 0xE0) == 0xC0)
+ {
+ // 110x xxxx, expect 1 more byte
+ currentDecodeBits = firstByte & 0x1F;
+ bytesCount = 2;
+ expectValueMin = 0x80;
+ }
+ else if ((firstByte & 0xF0) == 0xE0)
+ {
+ // 1110 xxxx, expect 2 more bytes
+ currentDecodeBits = firstByte & 0x0F;
+ bytesCount = 3;
+ expectValueMin = 0x800;
+ }
+ else if ((firstByte & 0xF8) == 0xF0)
+ {
+ // 1111 0xxx, expect 3 more bytes
+ currentDecodeBits = firstByte & 0x07;
+ bytesCount = 4;
+ expectValueMin = 0x10000;
+ }
+ else
+ {
+ // invalid
+ return 0;
+ }
+
+ if (start + (bytesCount * 3) > sourceBoundary)
+ {
+ // less than expected bytes to decode
+ return 0;
+ }
+
+ var remainingBytes = bytesCount - 1;
+ while (remainingBytes > 0)
+ {
+ start += 3;
+ if (source[start] != '%')
+ {
+ return 0;
+ }
+
+ byte v;
+ if (!TryGetUnescapedByte(source, start, out v))
+ {
+ return 0;
+ }
+
+ if ((v & 0xC0) != 0x80)
+ {
+ return 0;
+ }
+
+ // append the lower 6bit
+ currentDecodeBits = (currentDecodeBits << 6) | (v & 0x3F);
+
+ bytesBuffer[bytesCount - remainingBytes] = v;
+ remainingBytes--;
+
+ if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
+ {
+ // This is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that are not allowed in UTF-8;
+ return 0;
+ }
+ if (remainingBytes == 2 && currentDecodeBits >= 0x110)
+ {
+ // This is going to be out of the upper Unicode bound 0x10FFFF.
+ return 0;
+ }
+ }
+
+ if (currentDecodeBits < expectValueMin)
+ {
+ // Overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
+ return 0;
+ }
+
+ try
+ {
+ // Convert the code point into char
+ count = _utf8.GetCharCount(bytesBuffer, 0, bytesCount);
+ if (count > output.Length)
+ {
+ output = new char[count];
+ }
+
+ _utf8.GetChars(bytesBuffer, 0, bytesCount, output, 0);
+ return bytesCount * 3;
+ }
+ catch (DecoderFallbackException)
+ {
+ return 0;
+ }
+ }
+
+ ///
+ /// Copy characters in an array in place.
+ ///
+ ///
+ /// The array containing the characters to be copied. It is both the
+ /// source and output of the operation.
+ ///
+ ///
+ /// The length of the source sequence
+ ///
+ ///
+ /// The count of the charaters to be copied in the
+ ///
+ ///
+ /// The index where characters are copied from. The parameter will be
+ /// set to the position right behind the last read character after copy.
+ ///
+ ///
+ /// The index where characters are copied to. The parameter will be set to
+ /// the position right behind the last written character adter copy.
+ ///
+ /// The number of charactors actually be copied.
+ private static int CopyInPlace(char[] buffer, int sourceLength, int count, ref int readerPosition, ref int writerPosition)
+ {
+ if (buffer == null)
+ {
+ throw new ArgumentNullException(nameof(buffer));
+ }
+
+ if (sourceLength > buffer.Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(sourceLength), $"The length of the source sequence can't be longer than the size of the buffer.");
+ }
+
+ if (readerPosition < 0 || readerPosition >= sourceLength)
+ {
+ throw new ArgumentOutOfRangeException(nameof(readerPosition), $"The index of the source sequence {readerPosition} is out of range.");
+ }
+
+ if (writerPosition < 0 || writerPosition >= buffer.Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(writerPosition), $"The index of the output sequence {writerPosition} is out of range.");
+ }
+
+ if (writerPosition > readerPosition)
+ {
+ throw new ArgumentException($"The index of output sequence {writerPosition} is behind the read sequence {readerPosition}.");
+ }
+
+ for (var i = 0; i < count; ++i)
+ {
+ buffer[writerPosition++] = buffer[readerPosition++];
+
+ // when reader pointer surpass the boundary of the source sequence; or
+ // writer pointer surpass the boundary of the buffer
+ // return the count of the copied charcters
+ if (writerPosition >= buffer.Length || readerPosition >= sourceLength)
+ {
+ return i + 1;
+ }
+ }
+
+ return count;
+ }
+
+ ///
+ /// Find the next % in the sequence of range [start, end)
+ ///
+ /// The array of character in which the % is seacrhed.
+ /// The start of the search range.
+ /// The end of the search range.
+ /// The index of the first %, or if % is not found.
+ private static int GetNextEncoded(char[] buffer, int start, int end)
+ {
+ for (var i = start; i < end; ++i)
+ {
+ if (buffer[i] == '%')
+ {
+ return i;
+ }
+ }
+
+ return end;
+ }
+
+ private static bool TryGetUnescapedByte(char[] buffer, int position, out byte result)
+ {
+ if (!IsHex(buffer[position + 1]) || !IsHex(buffer[position + 2]))
+ {
+ result = default(byte);
+ return false;
+ }
+ else
+ {
+ result = (byte)((HexToDec(buffer[position + 1]) << 4) + HexToDec(buffer[position + 2]));
+ return true;
+ }
+ }
+
+ private static bool IsHex(char value)
+ {
+ return (((value >= '0') && (value <= '9')) ||
+ ((value >= 'A') && (value <= 'F')) ||
+ ((value >= 'a') && (value <= 'f')));
+ }
+
+ private static int HexToDec(char value)
+ {
+ if (value <= '9')
+ {
+ return value - '0';
+ }
+ else if (value <= 'F')
+ {
+ return (value - 'A') + 10;
+ }
+ else // a - f
+ {
+ return (value - 'a') + 10;
+ }
+ }
+ }
+}
diff --git a/src/Microsoft.Extensions.WebEncoders.Core/project.json b/src/Microsoft.Extensions.WebEncoders.Core/project.json
index 0da3e708..921c9c75 100644
--- a/src/Microsoft.Extensions.WebEncoders.Core/project.json
+++ b/src/Microsoft.Extensions.WebEncoders.Core/project.json
@@ -15,11 +15,13 @@
"dnxcore50": {
"dependencies": {
"System.ComponentModel": "4.0.1-beta-*",
+ "System.Collections": "4.0.11-beta-*",
"System.Diagnostics.Debug": "4.0.11-beta-*",
"System.IO": "4.0.11-beta-*",
"System.Reflection": "4.0.10-*",
"System.Resources.ResourceManager": "4.0.1-beta-*",
"System.Runtime.Extensions": "4.0.11-beta-*",
+ "System.Text.Encoding": "4.0.11-beta-*",
"System.Threading": "4.0.11-beta-*"
}
}
diff --git a/test/Microsoft.Extensions.WebEncoders.Tests/UrlPathDecoderTests.cs b/test/Microsoft.Extensions.WebEncoders.Tests/UrlPathDecoderTests.cs
new file mode 100644
index 00000000..efc90480
--- /dev/null
+++ b/test/Microsoft.Extensions.WebEncoders.Tests/UrlPathDecoderTests.cs
@@ -0,0 +1,181 @@
+// Copyright (c) .NET Foundation. All rights reserved.
+// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
+
+using System;
+using Microsoft.Extensions.WebEncoders.Internals;
+using Xunit;
+
+namespace Microsoft.Extensions.WebEncoders.Tests
+{
+ public abstract class UrlPathDecoderTestBase
+ {
+ protected abstract void PositiveAssert(string raw, string expect);
+
+ protected abstract void PositiveAssert(string raw);
+
+ protected abstract void NegativeAssert(string raw);
+
+ [Fact]
+ public void Empty()
+ {
+ PositiveAssert(string.Empty, string.Empty);
+ }
+
+ [Fact]
+ public void WhiteSpace()
+ {
+ PositiveAssert(" ", " ");
+ }
+
+ [Fact]
+ public void ThrowNullArgument()
+ {
+ Assert.Throws(() => UrlPathDecoder.Decode(null));
+ }
+
+ [Theory]
+ [InlineData("/foo/bar", "/foo/bar")]
+ [InlineData("/foo/BAR", "/foo/BAR")]
+ [InlineData("/foo/", "/foo/")]
+ [InlineData("/", "/")]
+ public void NormalCases(string raw, string expect)
+ {
+ PositiveAssert(raw, expect);
+ }
+
+ [Theory]
+ [InlineData("%2F", "%2F")]
+ [InlineData("/foo%2Fbar", "/foo%2Fbar")]
+ [InlineData("/foo%2F%20bar", "/foo%2F bar")]
+ public void SkipForwardSlash(string raw, string expect)
+ {
+ PositiveAssert(raw, expect);
+ }
+
+ [Theory]
+ [InlineData("%C3%84ra%20Benetton", "Ära Benetton")]
+ [InlineData("%E6%88%91%E8%87%AA%E6%A8%AA%E5%88%80%E5%90%91%E5%A4%A9%E7%AC%91%E5%8E%BB%E7%95%99%E8%82%9D%E8%83%86%E4%B8%A4%E6%98%86%E4%BB%91", "我自横刀向天笑去留肝胆两昆仑")]
+ public void Internationalized(string raw, string expect)
+ {
+ PositiveAssert(raw, expect);
+ }
+
+ [Theory]
+ [InlineData("%D0%A4", "Ф")]
+ [InlineData("%d0%a4", "Ф")]
+ [InlineData("%E0%A4%AD", "भ")]
+ [InlineData("%e0%A4%Ad", "भ")]
+ [InlineData("%F0%A4%AD%A2", "𤭢")]
+ [InlineData("%F0%a4%Ad%a2", "𤭢")]
+ [InlineData("%48%65%6C%6C%6F%20%57%6F%72%6C%64", "Hello World")]
+ [InlineData("%48%65%6C%6C%6F%2D%C2%B5%40%C3%9F%C3%B6%C3%A4%C3%BC%C3%A0%C3%A1", "Hello-µ@ßöäüàá")]
+ // Overlong borderline cases
+ public void ValidUTF8(string raw, string expect)
+ {
+ PositiveAssert(raw, expect);
+ }
+
+ [Theory]
+ // Test the borderline cases of overlong UTF8.
+ [InlineData("%C2%80")]
+ [InlineData("%C2%80")]
+ [InlineData("%E0%A0%80")]
+ [InlineData("%F0%90%80%80")]
+ public void ValidUTF8(string raw)
+ {
+ PositiveAssert(raw);
+ }
+
+ [Theory]
+ // Overlong ASCII
+ [InlineData("%C0%A4")]
+ [InlineData("%C1%BF")]
+ [InlineData("%E0%80%AF")]
+ [InlineData("%E0%9F%BF")]
+ [InlineData("%F0%80%80%AF")]
+ [InlineData("%F0%8F%8F%BF")]
+ // Incomplete
+ [InlineData("%")]
+ [InlineData("%%")]
+ [InlineData("%A")]
+ [InlineData("%Y")]
+ // [InlineData("http://xn--9zt52a.example.org/%e2%80%ae")]
+ public void InvalidUTF8(string raw)
+ {
+ NegativeAssert(raw);
+ }
+ }
+
+ public class UrlPathDecoderTests : UrlPathDecoderTestBase
+ {
+ protected override void NegativeAssert(string raw)
+ {
+ // invalid sequence are left untouched
+ Assert.Equal(raw, UrlPathDecoder.Decode(raw));
+ }
+
+ protected override void PositiveAssert(string raw)
+ {
+ Assert.NotEqual(raw, UrlPathDecoder.Decode(raw));
+ }
+
+ protected override void PositiveAssert(string raw, string expect)
+ {
+ Assert.Equal(expect, UrlPathDecoder.Decode(raw));
+ }
+ }
+
+ public class UrlPathInPlaceDecoderTests : UrlPathDecoderTestBase
+ {
+ protected override void PositiveAssert(string raw, string expect)
+ {
+ var buf = raw.ToCharArray();
+
+ var len = UrlPathDecoder.DecodeInPlace(buf);
+
+ Assert.Equal(expect.Length, len);
+ Assert.Equal(expect.ToCharArray(), new ArraySegment(buf, 0, len));
+ }
+
+ protected override void PositiveAssert(string raw)
+ {
+ var buf = raw.ToCharArray();
+
+ var len = UrlPathDecoder.DecodeInPlace(buf);
+
+ Assert.NotEqual(raw.Length, len);
+ }
+
+ protected override void NegativeAssert(string raw)
+ {
+ var buf = raw.ToCharArray();
+
+ var len = UrlPathDecoder.DecodeInPlace(buf);
+
+ Assert.Equal(raw.Length, len);
+ Assert.Equal(raw.ToCharArray(), buf);
+ }
+
+ [Theory]
+ [InlineData("/foo%2Fbar", 10, "/foo%2Fbar", 10)]
+ [InlineData("/foo%2Fbar", 9, "/foo%2Fba", 9)]
+ [InlineData("/foo%2Fbar", 8, "/foo%2Fb", 8)]
+ [InlineData("%D0%A4", 6, "Ф", 1)]
+ [InlineData("%D0%A4", 5, "%D0%A", 5)]
+ [InlineData("%D0%A4", 4, "%D0%", 4)]
+ [InlineData("%D0%A4", 3, "%D0", 3)]
+ [InlineData("%D0%A4", 2, "%D", 2)]
+ [InlineData("%D0%A4", 1, "%", 1)]
+ [InlineData("%D0%A4", 0, "", 0)]
+ [InlineData("%C2%B5%40%C3%9F%C3%B6%C3%A4%C3%BC%C3%A0%C3%A1", 45, "µ@ßöäüàá", 8)]
+ [InlineData("%C2%B5%40%C3%9F%C3%B6%C3%A4%C3%BC%C3%A0%C3%A1", 44, "µ@ßöäüà%C3%A", 12)]
+ public void DecodeWithBoundary(string raw, int rawLength, string expect, int expectLength)
+ {
+ var buf = raw.ToCharArray();
+ var len = UrlPathDecoder.DecodeInPlace(buf, rawLength);
+
+ Assert.Equal(expectLength, len);
+ Assert.Equal(expect.ToCharArray(), new ArraySegment(buf, 0, expectLength));
+ }
+ }
+}
\ No newline at end of file
diff --git a/test/Microsoft.Extensions.WebEncoders.Tests/project.json b/test/Microsoft.Extensions.WebEncoders.Tests/project.json
index 87d9e98e..f35c5911 100644
--- a/test/Microsoft.Extensions.WebEncoders.Tests/project.json
+++ b/test/Microsoft.Extensions.WebEncoders.Tests/project.json
@@ -15,6 +15,7 @@
"dnx451": { },
"dnxcore50": {
"dependencies": {
+ "System.Text.Encoding": "4.0.11-beta-*",
"System.Text.Encoding.Extensions": "4.0.11-beta-*"
}
}