Skip to content

Commit 6eb7cee

Browse files
committed
Make URL strategy better at recognizing URLs. (#873)
1 parent a5d2005 commit 6eb7cee

File tree

9 files changed

+418
-123
lines changed

9 files changed

+418
-123
lines changed

pkgs/path/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
## 1.9.2-wip
22

3+
- Make `Url` style better at recognizing schemes and authorities.
4+
Only consider a path as being a schemed URL if it starts with a
5+
valid scheme.
6+
Recognize `#` and `?` as ending an authority or path. Remove
7+
queries and fragments when when normalizing, include them
8+
in/as the last segment when splitting.
9+
310
## 1.9.1
411

512
- Require Dart 3.4

pkgs/path/lib/src/characters.dart

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,24 @@
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5-
/// This library contains character-code definitions.
5+
// Character-code constants.
6+
7+
const hash = 0x23;
8+
const percent = 0x25;
69
const plus = 0x2b;
710
const minus = 0x2d;
811
const period = 0x2e;
912
const slash = 0x2f;
1013
const zero = 0x30;
1114
const nine = 0x39;
1215
const colon = 0x3a;
16+
const question = 0x3f;
1317
const upperA = 0x41;
1418
const upperZ = 0x5a;
1519
const lowerA = 0x61;
20+
const lowerE = 0x65;
21+
const lowerF = 0x66;
22+
const lowerI = 0x69;
23+
const lowerL = 0x6c;
1624
const lowerZ = 0x7a;
1725
const backslash = 0x5c;

pkgs/path/lib/src/context.dart

Lines changed: 66 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -372,18 +372,22 @@ class Context {
372372
/// Canonicalizes [path].
373373
///
374374
/// This is guaranteed to return the same path for two different input paths
375-
/// if and only if both input paths point to the same location. Unlike
375+
/// only if both input paths point to the same location. Unlike
376376
/// [normalize], it returns absolute paths when possible and canonicalizes
377-
/// ASCII case on Windows.
377+
/// ASCII case on Windows, and scheme and authority case for URLs (but does
378+
/// not normalize or canonicalize `%`-escapes.)
378379
///
379380
/// Note that this does not resolve symlinks.
380381
///
381382
/// If you want a map that uses path keys, it's probably more efficient to use
382383
/// a Map with [equals] and [hash] specified as the callbacks to use for keys
383384
/// than it is to canonicalize every key.
385+
///
384386
String canonicalize(String path) {
385387
path = absolute(path);
386-
if (style != Style.windows && !_needsNormalization(path)) return path;
388+
// Windows and URL styles need to case-canonicalize, even if it doesn't
389+
// need to normalize anything.
390+
if (style == Style.posix && !_needsNormalization(path)) return path;
387391

388392
final parsed = _parse(path);
389393
parsed.normalize(canonicalize: true);
@@ -395,7 +399,7 @@ class Context {
395399
///
396400
/// Note that this is *not* guaranteed to return the same result for two
397401
/// equivalent input paths. For that, see [canonicalize]. Or, if you're using
398-
/// paths as map keys use [equals] and [hash] as the key callbacks.
402+
/// paths as map keys, use [equals] and [hash] as the key callbacks.
399403
///
400404
/// context.normalize('path/./to/..//file.text'); // -> 'path/file.txt'
401405
String normalize(String path) {
@@ -408,68 +412,76 @@ class Context {
408412

409413
/// Returns whether [path] needs to be normalized.
410414
bool _needsNormalization(String path) {
411-
var start = 0;
412-
final codeUnits = path.codeUnits;
413-
int? previousPrevious;
414-
int? previous;
415+
// Empty paths are normalized to ".".
416+
if (path.isEmpty) return true;
417+
418+
// At start, no previous separator.
419+
const stateStart = 0;
420+
421+
// Previous character was a separator.
422+
const stateSeparator = 1;
423+
424+
// Added to state for each `.` seen.
425+
const stateDotCount = 2;
426+
427+
// Path segment that contains anything other than nothing, `.` or `..`.
428+
//
429+
// Includes any value at or above this one.
430+
const stateNotDots = 6;
431+
432+
// Current state of the last few characters.
433+
//
434+
// Seeing a separator resets to [stateSeparator].
435+
// Seeing a `.` adds [stateDotCount].
436+
// Seeing any non-separator or more than two dots will
437+
// bring the value above [stateNotDots].
438+
// (The separator may be optional at the start, seeing one is fine,
439+
// and seeing dots will start counting.)
440+
// (That is, `/` has value 1, `/.` value 3, ``/..` value 5, and anything
441+
// else is 6 or above, except at the very start where empty path, `.`
442+
// and `..` have values 0, 2 and 4.)
443+
var state = stateStart;
415444

416445
// Skip past the root before we start looking for snippets that need
417446
// normalization. We want to normalize "//", but not when it's part of
418447
// "http://".
419-
final root = style.rootLength(path);
420-
if (root != 0) {
421-
start = root;
422-
previous = chars.slash;
423-
448+
final start = style.rootLength(path);
449+
if (start != 0) {
450+
if (style.isSeparator(path.codeUnitAt(start - 1))) {
451+
state = stateSeparator;
452+
}
424453
// On Windows, the root still needs to be normalized if it contains a
425454
// forward slash.
426455
if (style == Style.windows) {
427-
for (var i = 0; i < root; i++) {
428-
if (codeUnits[i] == chars.slash) return true;
456+
for (var i = 0; i < start; i++) {
457+
if (path.codeUnitAt(i) == chars.slash) return true;
429458
}
430459
}
431460
}
432461

433-
for (var i = start; i < codeUnits.length; i++) {
434-
final codeUnit = codeUnits[i];
462+
for (var i = start; i < path.length; i++) {
463+
final codeUnit = path.codeUnitAt(i);
435464
if (style.isSeparator(codeUnit)) {
465+
// If ending empty, `.` or `..` path segment.
466+
if (state >= stateSeparator && state < stateNotDots) return true;
436467
// Forward slashes in Windows paths are normalized to backslashes.
437468
if (style == Style.windows && codeUnit == chars.slash) return true;
438-
439-
// Multiple separators are normalized to single separators.
440-
if (previous != null && style.isSeparator(previous)) return true;
441-
442-
// Single dots and double dots are normalized to directory traversals.
443-
//
444-
// This can return false positives for ".../", but that's unlikely
445-
// enough that it's probably not going to cause performance issues.
446-
if (previous == chars.period &&
447-
(previousPrevious == null ||
448-
previousPrevious == chars.period ||
449-
style.isSeparator(previousPrevious))) {
469+
state = stateSeparator;
470+
} else if (codeUnit == chars.period) {
471+
state += stateDotCount;
472+
} else {
473+
state = stateNotDots;
474+
if (style == Style.url &&
475+
(codeUnit == chars.question || codeUnit == chars.hash)) {
476+
// Normalize away `?` query parts and `#` fragment parts in URL
477+
// styled paths.
450478
return true;
451479
}
452480
}
453-
454-
previousPrevious = previous;
455-
previous = codeUnit;
456-
}
457-
458-
// Empty paths are normalized to ".".
459-
if (previous == null) return true;
460-
461-
// Trailing separators are removed.
462-
if (style.isSeparator(previous)) return true;
463-
464-
// Single dots and double dots are normalized to directory traversals.
465-
if (previous == chars.period &&
466-
(previousPrevious == null ||
467-
style.isSeparator(previousPrevious) ||
468-
previousPrevious == chars.period)) {
469-
return true;
470481
}
471482

472-
return false;
483+
// Otherwise only normalize if there are separators and single/double dots.
484+
return state >= stateSeparator && state < stateNotDots;
473485
}
474486

475487
/// Attempts to convert [path] to an equivalent relative path relative to
@@ -1020,7 +1032,9 @@ class Context {
10201032
/// Returns the path represented by [uri], which may be a [String] or a [Uri].
10211033
///
10221034
/// For POSIX and Windows styles, [uri] must be a `file:` URI. For the URL
1023-
/// style, this will just convert [uri] to a string.
1035+
/// style, this will just convert [uri] to a string, but if the input was
1036+
/// a string, it will be parsed and normalized as a [Uri] first.
1037+
///
10241038
///
10251039
/// // POSIX
10261040
/// context.fromUri('file:///path/to/foo')
@@ -1036,7 +1050,11 @@ class Context {
10361050
///
10371051
/// If [uri] is relative, a relative path will be returned.
10381052
///
1053+
/// // POSIX
10391054
/// path.fromUri('path/to/foo'); // -> 'path/to/foo'
1055+
///
1056+
/// // Windows
1057+
/// path.fromUri('/C:/foo'); // -> r'C:\foo`
10401058
String fromUri(Object? uri) => style.pathFromUri(_parseUri(uri!));
10411059

10421060
/// Returns the URI that represents [path].

pkgs/path/lib/src/internal_style.dart

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,21 @@ abstract class InternalStyle extends Style {
3232
/// "usr", an additional "/" is needed (making "file:///usr").
3333
bool needsSeparator(String path);
3434

35-
/// Returns the number of characters of the root part.
35+
/// The number of characters of the root part.
3636
///
3737
/// Returns 0 if the path is relative and 1 if the path is root-relative.
3838
///
3939
/// If [withDrive] is `true`, this should include the drive letter for `file:`
4040
/// URLs. Non-URL styles may ignore the parameter.
41+
///
42+
/// For URL style, the root includes any URI scheme and host/authority part.
43+
/// If either of those are included, a leading `/` of the path is not
44+
/// included unless it's followed by a drive letter. A slash after a
45+
/// drive letter is included. If a URL style path starts with a single `/`,
46+
/// it is included in the root.
47+
/// If a URL style path has a scheme, but not authority, and the path
48+
/// does not start with a `/`, the first path segment of the path is
49+
/// considered part of the root.
4150
int rootLength(String path, {bool withDrive = false});
4251

4352
/// Gets the root prefix of [path] if path is absolute. If [path] is relative,

pkgs/path/lib/src/parsed_path.dart

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5+
import 'characters.dart' as chars;
56
import 'internal_style.dart';
67
import 'style.dart';
8+
import 'utils.dart' show endOfScheme, removeQueryFragment;
79

810
class ParsedPath {
911
/// The [InternalStyle] that was used to parse this path.
@@ -35,7 +37,7 @@ class ParsedPath {
3537
/// one.
3638
String extension([int level = 1]) => _splitExtension(level)[1];
3739

38-
/// `true` if this is an absolute path.
40+
/// Whether this is an absolute path.
3941
bool get isAbsolute => root != null;
4042

4143
factory ParsedPath.parse(String path, InternalStyle style) {
@@ -58,11 +60,17 @@ class ParsedPath {
5860
}
5961

6062
for (var i = start; i < path.length; i++) {
61-
if (style.isSeparator(path.codeUnitAt(i))) {
63+
final codeUnit = path.codeUnitAt(i);
64+
if (style.isSeparator(codeUnit)) {
6265
parts.add(path.substring(start, i));
6366
separators.add(path[i]);
6467
start = i + 1;
6568
}
69+
if (style == Style.url &&
70+
(codeUnit == chars.question || codeUnit == chars.hash)) {
71+
// Include `?` and `#` in final path segment.
72+
break;
73+
}
6674
}
6775

6876
// Add the final part, if any.
@@ -101,6 +109,13 @@ class ParsedPath {
101109
// Handle '.', '..', and empty parts.
102110
var leadingDoubles = 0;
103111
final newParts = <String>[];
112+
if (style == Style.url && parts.isNotEmpty) {
113+
parts.last = removeQueryFragment(parts.last);
114+
if (canonicalize && endOfScheme(parts.first, 0) > 0) {
115+
// Normalize scheme and authority.
116+
parts.first = parts.first.toLowerCase();
117+
}
118+
}
104119
for (var part in parts) {
105120
if (part == '.' || part == '') {
106121
// Do nothing. Ignore it.
@@ -131,14 +146,21 @@ class ParsedPath {
131146
parts = newParts;
132147
separators =
133148
List.filled(newParts.length + 1, style.separator, growable: true);
134-
if (!isAbsolute || newParts.isEmpty || !style.needsSeparator(root!)) {
149+
150+
final root = this.root;
151+
if (root == null || newParts.isEmpty || !style.needsSeparator(root)) {
135152
separators[0] = '';
136153
}
137154

138-
// Normalize the Windows root if needed.
139-
if (root != null && style == Style.windows) {
140-
if (canonicalize) root = root!.toLowerCase();
141-
root = root!.replaceAll('/', '\\');
155+
if (root != null) {
156+
if (style == Style.windows) {
157+
// Normalize the Windows root if needed.
158+
final canonRoot = canonicalize ? root.toLowerCase() : root;
159+
this.root = canonRoot.replaceAll('/', r'\');
160+
} else if (canonicalize && style == Style.url) {
161+
// Canonicalize the URL scheme and authority.
162+
this.root = root.toLowerCase();
163+
}
142164
}
143165
removeTrailingSeparators();
144166
}

0 commit comments

Comments
 (0)