@@ -30,18 +30,39 @@ public class UrlPathDecoder
30
30
/// </param>
31
31
/// <returns>The length of the result.</returns>
32
32
public static int DecodeInPlace ( char [ ] buffer )
33
+ {
34
+ return DecodeInPlace ( buffer , buffer . Length ) ;
35
+ }
36
+
37
+ /// <summary>
38
+ /// Unescape a url char array in place. Returns the length of the result.
39
+ ///
40
+ /// - Everything is unescaped except %2F ('/')
41
+ /// - UTF8 bytes are tested for formatting, overlong encoding, surrogates and value ranges
42
+ /// - Invalid escaped sequence are copied to output as is
43
+ /// - It doesn't check if the string contains query
44
+ /// </summary>
45
+ /// <param name="buffer">
46
+ /// The char array contains sequence of charactors to be decoded. The
47
+ /// result will be saved in the same array.
48
+ /// </param>
49
+ /// <param name="len">
50
+ /// The lenght of the sequence of characters in buffer to be decoded.
51
+ /// </param>
52
+ /// <returns>The length of the result.</returns>
53
+ public static int DecodeInPlace ( char [ ] buffer , int len )
33
54
{
34
55
if ( buffer == null )
35
56
{
36
57
throw new ArgumentNullException ( nameof ( buffer ) ) ;
37
58
}
38
59
39
- if ( GetNextEncoded ( 0 , buffer ) == buffer . Length )
60
+ if ( GetNextEncoded ( buffer , 0 , len ) == len )
40
61
{
41
- return buffer . Length ;
62
+ return len ;
42
63
}
43
64
44
- return DecodeCore ( buffer , buffer ) ;
65
+ return DecodeCore ( buffer , len ) ;
45
66
}
46
67
47
68
/// <summary>
@@ -69,60 +90,72 @@ public static string Decode(string original)
69
90
var buffer = original . ToCharArray ( ) ;
70
91
71
92
// decode in place
72
- var len = DecodeCore ( buffer , buffer ) ;
93
+ var len = DecodeCore ( buffer , buffer . Length ) ;
73
94
return new string ( buffer , 0 , len ) ;
74
95
}
75
96
76
97
/// <summary>
77
- /// Unescape a url path string.
78
- ///
79
- /// - The result is saved in place
98
+ /// Decode the sequence of charactors in give char array.
80
99
/// </summary>
81
- private static int DecodeCore ( char [ ] source , char [ ] destination )
100
+ /// <param name="buffer">
101
+ /// The array of characters to be decoded. It's both the source and output of the
102
+ /// operation which means the decode happens in place.
103
+ /// </param>
104
+ /// <param name="length">
105
+ /// The length of the source sequence in the <paramref name="buffer"/> array to be decoded.
106
+ /// </param>
107
+ /// <returns>
108
+ /// The length of the result.
109
+ /// </returns>
110
+ private static int DecodeCore ( char [ ] buffer , int length )
82
111
{
83
- var bufferPosition = 0 ;
84
- var sourcePosition = 0 ;
112
+ // two indices to read and write
113
+ var readerPosition = 0 ;
114
+ var writerPosition = 0 ;
85
115
116
+ // operating buffer
86
117
var unescapedChars = new char [ 1 ] ;
87
118
var unescapedCharsCount = 0 ;
88
119
var bytesBuffer = new byte [ 4 ] ;
89
120
90
- while ( sourcePosition < source . Length )
121
+ while ( readerPosition < length )
91
122
{
92
- var next = GetNextEncoded ( sourcePosition , source ) ;
93
- var copyLength = next - sourcePosition ;
94
- Copy ( source , ref sourcePosition , destination , ref bufferPosition , copyLength ) ;
123
+ var next = GetNextEncoded ( buffer , readerPosition , length ) ;
124
+ var copyLength = next - readerPosition ;
125
+
126
+ CopyInPlace ( buffer , length , copyLength , ref readerPosition , ref writerPosition ) ;
95
127
96
- if ( sourcePosition >= source . Length )
128
+ if ( readerPosition >= length )
97
129
{
98
130
break ;
99
131
}
100
132
101
- var consumed = Unescape ( source , next , bytesBuffer , ref unescapedChars , ref unescapedCharsCount ) ;
133
+ var consumed = Unescape ( buffer , length , next , bytesBuffer , ref unescapedChars , ref unescapedCharsCount ) ;
102
134
if ( consumed == 0 )
103
135
{
104
136
// Skip unescaping the % as the sequence follows it can't be correctly
105
137
// decoded under UTF8
106
- Copy ( source , ref sourcePosition , destination , ref bufferPosition , 1 ) ;
138
+ CopyInPlace ( buffer , length , 1 , ref readerPosition , ref writerPosition ) ;
107
139
}
108
140
else if ( unescapedCharsCount == 1 && SkipUnescape ( unescapedChars [ 0 ] ) )
109
141
{
110
142
// Skip unescaping specified characters (eg. '/')
111
143
// Copy the original sequence to destination
112
- Copy ( source , ref sourcePosition , destination , ref bufferPosition , consumed ) ;
144
+ CopyInPlace ( buffer , length , consumed , ref readerPosition , ref writerPosition ) ;
113
145
}
114
146
else
115
147
{
116
148
// Copy unescaped chararter. Move to the next charactor in source.
117
149
for ( int i = 0 ; i < unescapedCharsCount ; ++ i )
118
150
{
119
- destination [ bufferPosition ++ ] = unescapedChars [ i ] ;
151
+ buffer [ writerPosition ++ ] = unescapedChars [ i ] ;
120
152
}
121
- sourcePosition += consumed ;
153
+
154
+ readerPosition += consumed ;
122
155
}
123
156
}
124
157
125
- return bufferPosition ;
158
+ return writerPosition ;
126
159
}
127
160
128
161
private static bool SkipUnescape ( char charactor )
@@ -144,9 +177,9 @@ private static bool SkipUnescape(char charactor)
144
177
/// - The length of the sequence, including the % charactor, will be returned.
145
178
/// Otherwise 0 is returned.
146
179
/// </summary>
147
- private static int Unescape ( char [ ] source , int start , byte [ ] bytesBuffer , ref char [ ] output , ref int count )
180
+ private static int Unescape ( char [ ] source , int sourceBoundary , int start , byte [ ] bytesBuffer , ref char [ ] output , ref int count )
148
181
{
149
- if ( start + 2 >= source . Length )
182
+ if ( start + 2 >= sourceBoundary )
150
183
{
151
184
return 0 ;
152
185
}
@@ -205,7 +238,7 @@ private static int Unescape(char[] source, int start, byte[] bytesBuffer, ref ch
205
238
return 0 ;
206
239
}
207
240
208
- if ( start + ( bytesCount * 3 ) > source . Length )
241
+ if ( start + ( bytesCount * 3 ) > sourceBoundary )
209
242
{
210
243
// less than expected bytes to decode
211
244
return 0 ;
@@ -274,39 +307,88 @@ private static int Unescape(char[] source, int start, byte[] bytesBuffer, ref ch
274
307
}
275
308
276
309
/// <summary>
277
- /// Copy characters in source string to target char array. After copied the indices points at the beginning
278
- /// offset of both source and destination are updated to point to the position after the last copied character
310
+ /// Copy characters in an array in place.
279
311
/// </summary>
280
- private static int Copy ( char [ ] source , ref int sourceStart , char [ ] destination , ref int destinatonStart , int length )
312
+ /// <param name="buffer">
313
+ /// The array containing the characters to be copied. It is both the
314
+ /// source and output of the operation.
315
+ /// </param>
316
+ /// <param name="sourceLength">
317
+ /// The length of the source sequence
318
+ /// </param>
319
+ /// <param name="count">
320
+ /// The count of the charaters to be copied in the <paramref name="buffer"/>
321
+ /// </param>
322
+ /// <param name="readerPosition">
323
+ /// The index where characters are copied from. The parameter will be
324
+ /// set to the position right behind the last read character after copy.
325
+ /// </param>
326
+ /// <param name="writerPosition">
327
+ /// The index where characters are copied to. The parameter will be set to
328
+ /// the position right behind the last written character adter copy.
329
+ /// </param>
330
+ /// <returns>The number of charactors actually be copied.</returns>
331
+ private static int CopyInPlace ( char [ ] buffer , int sourceLength , int count , ref int readerPosition , ref int writerPosition )
281
332
{
282
- for ( var i = 0 ; i < length ; ++ i )
333
+ if ( buffer == null )
334
+ {
335
+ throw new ArgumentNullException ( nameof ( buffer ) ) ;
336
+ }
337
+
338
+ if ( sourceLength > buffer . Length )
339
+ {
340
+ throw new ArgumentOutOfRangeException ( nameof ( sourceLength ) , $ "The length of the source sequence can't be longer than the size of the buffer.") ;
341
+ }
342
+
343
+ if ( readerPosition < 0 || readerPosition >= sourceLength )
344
+ {
345
+ throw new ArgumentOutOfRangeException ( nameof ( readerPosition ) , $ "The index of the source sequence { readerPosition } is out of range.") ;
346
+ }
347
+
348
+ if ( writerPosition < 0 || writerPosition >= buffer . Length )
349
+ {
350
+ throw new ArgumentOutOfRangeException ( nameof ( writerPosition ) , $ "The index of the output sequence { writerPosition } is out of range.") ;
351
+ }
352
+
353
+ if ( writerPosition > readerPosition )
354
+ {
355
+ throw new ArgumentException ( $ "The index of output sequence { writerPosition } is behind the read sequence { readerPosition } .") ;
356
+ }
357
+
358
+ for ( var i = 0 ; i < count ; ++ i )
283
359
{
284
- destination [ destinatonStart ++ ] = source [ sourceStart ++ ] ;
360
+ buffer [ writerPosition ++ ] = buffer [ readerPosition ++ ] ;
285
361
286
- if ( destinatonStart >= destination . Length ||
287
- sourceStart >= source . Length )
362
+ // when reader pointer surpass the boundary of the source sequence; or
363
+ // writer pointer surpass the boundary of the buffer
364
+ // return the count of the copied charcters
365
+ if ( writerPosition >= buffer . Length || readerPosition >= sourceLength )
288
366
{
289
367
return i + 1 ;
290
368
}
291
369
}
292
370
293
- return length ;
371
+ return count ;
294
372
}
295
373
296
374
/// <summary>
297
- /// Find the next % in the sequence. If % is not found, return the sequence length.
375
+ /// Find the next % in the sequence of range [start, end)
298
376
/// </summary>
299
- private static int GetNextEncoded ( int start , char [ ] array )
377
+ /// <param name="buffer">The array of character in which the % is seacrhed.</param>
378
+ /// <param name="start">The start of the search range.</param>
379
+ /// <param name="end">The end of the search range.</param>
380
+ /// <returns>The index of the first %, or <paramref name="end"/> if % is not found.</returns>
381
+ private static int GetNextEncoded ( char [ ] buffer , int start , int end )
300
382
{
301
- for ( var i = start ; i < array . Length ; ++ i )
383
+ for ( var i = start ; i < end ; ++ i )
302
384
{
303
- if ( array [ i ] == '%' )
385
+ if ( buffer [ i ] == '%' )
304
386
{
305
387
return i ;
306
388
}
307
389
}
308
390
309
- return array . Length ;
391
+ return end ;
310
392
}
311
393
312
394
private static bool TryGetUnescapedByte ( char [ ] buffer , int position , out byte result )
0 commit comments