From b89dadc8506a614c76e1c991ae8a89a2ae3fcdb5 Mon Sep 17 00:00:00 2001 From: "Lasse R.H. Nielsen" Date: Mon, 24 Mar 2025 16:08:04 +0100 Subject: [PATCH 1/4] Make URL strategy better at recognizing URLs. More precise scheme detection, more precise authority detection, more precise drive-letter detection. Generally recognizes that a path ends at `#` or `?`, and doesn't parse into that. Recognizes only valid schemes as schemes. Ends authority at `/`, `#` or `?` or end-of-path, not just at `/`. Allows drive letter ended by `#` or `?` or end-of-path, not just `/`. --- pkgs/path/lib/src/characters.dart | 9 ++- pkgs/path/lib/src/style/url.dart | 97 ++++++++++++++++++++++++------- pkgs/path/test/url_test.dart | 22 +++++++ 3 files changed, 107 insertions(+), 21 deletions(-) diff --git a/pkgs/path/lib/src/characters.dart b/pkgs/path/lib/src/characters.dart index a08354314..5df1688d3 100644 --- a/pkgs/path/lib/src/characters.dart +++ b/pkgs/path/lib/src/characters.dart @@ -2,7 +2,9 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. -/// This library contains character-code definitions. +// Character-code constants. + +const hash = 0x23; const plus = 0x2b; const minus = 0x2d; const period = 0x2e; @@ -10,8 +12,13 @@ const slash = 0x2f; const zero = 0x30; const nine = 0x39; const colon = 0x3a; +const question = 0x3f; const upperA = 0x41; const upperZ = 0x5a; const lowerA = 0x61; const lowerZ = 0x7a; const backslash = 0x5c; + +bool isLetter(int char) => lowerA <= (char |= 0x20) && char <= lowerZ; + +bool isDigit(int char) => char ^ zero <= 9; diff --git a/pkgs/path/lib/src/style/url.dart b/pkgs/path/lib/src/style/url.dart index a2d3b0cef..4d734149f 100644 --- a/pkgs/path/lib/src/style/url.dart +++ b/pkgs/path/lib/src/style/url.dart @@ -43,32 +43,89 @@ class UrlStyle extends InternalStyle { return path.endsWith('://') && rootLength(path) == path.length; } + /// Checks if [path] starts with `"file:"`, case insensitively. + static bool _startsWithFileColon(String path) { + if (path.length < 5) return false; + const f = 0x66; + const i = 0x69; + const l = 0x6c; + const e = 0x65; + return path.codeUnitAt(4) == chars.colon && + (path.codeUnitAt(0) | 0x20) == f && + (path.codeUnitAt(1) | 0x20) == i && + (path.codeUnitAt(2) | 0x20) == l && + (path.codeUnitAt(3) | 0x20) == e; + } + @override int rootLength(String path, {bool withDrive = false}) { if (path.isEmpty) return 0; - if (isSeparator(path.codeUnitAt(0))) return 1; - - for (var i = 0; i < path.length; i++) { - final codeUnit = path.codeUnitAt(i); - if (isSeparator(codeUnit)) return 0; - if (codeUnit == chars.colon) { - if (i == 0) return 0; - - // The root part is up until the next '/', or the full path. Skip ':' - // (and '//' if it exists) and search for '/' after that. - if (path.startsWith('//', i + 1)) i += 3; - final index = path.indexOf('/', i); - if (index <= 0) return path.length; - - // file: URLs sometimes consider Windows drive letters part of the root. - // See https://url.spec.whatwg.org/#file-slash-state. - if (!withDrive || path.length < index + 3) return index; - if (!path.startsWith('file://')) return index; - return driveLetterEnd(path, index + 1) ?? index; + if (withDrive && _startsWithFileColon(path)) { + return _rootAuthorityLength(path, 5, withDrive: true); + } + final firstChar = path.codeUnitAt(0); + if (chars.isLetter(firstChar)) { + // Check if starting with scheme or drive letter. + for (var i = 1; i < path.length; i++) { + final codeUnit = path.codeUnitAt(i); + if (chars.isLetter(codeUnit) || + chars.isDigit(codeUnit) || + codeUnit == chars.plus || + codeUnit == chars.minus || + codeUnit == chars.period) { + continue; + } + if (codeUnit == chars.colon) { + return _rootAuthorityLength(path, i + 1, withDrive: false); + } + break; } + return 0; } + return _rootAuthorityLength(path, 0, withDrive: false); + } - return 0; + /// Checks for authority part at start or after scheme. + /// + /// If found, includes this in the root length. + /// + /// Includes an authority starting at `//` until the next `/`, `?` or `#`, + /// or the end of the path. + int _rootAuthorityLength(String path, int index, {required bool withDrive}) { + if (path.startsWith('//', index)) { + index += 2; + while (true) { + if (index == path.length) return index; + final codeUnit = path.codeUnitAt(index); + if (codeUnit == chars.question || codeUnit == chars.hash) return index; + index++; + if (isSeparator(codeUnit)) break; + } + } + if (withDrive) return _withDrive(path, index); + return index; + } + + /// Checks for `[a-z]:/`, or `[a-z]:` when followed by `?` or `#` or nothing. + /// + /// If found, includes this in the root length. + int _withDrive(String path, int index) { + final afterDrive = index + 2; + if (path.length < afterDrive || + !chars.isLetter(path.codeUnitAt(index)) || + path.codeUnitAt(index + 1) != chars.colon) { + return index; + } + if (path.length == afterDrive) return afterDrive; + final nextChar = path.codeUnitAt(afterDrive); + if (nextChar == chars.slash) { + // Include following slash in root. + return afterDrive + 1; + } + if (nextChar == chars.question || nextChar == chars.hash) { + return afterDrive; + } + return index; } @override diff --git a/pkgs/path/test/url_test.dart b/pkgs/path/test/url_test.dart index df4e58232..b3b459bc9 100644 --- a/pkgs/path/test/url_test.dart +++ b/pkgs/path/test/url_test.dart @@ -167,6 +167,19 @@ void main() { expect(context.isRelative(r'package:foo/bar.dart'), false); expect(context.isRelative('foo/bar:baz/qux'), true); expect(context.isRelative(r'\\a'), true); + expect(context.isRelative('/c:/a'), false); + expect(context.isRelative('file:///c:/a'), false); + expect(context.isRelative('/c:/'), false); + expect(context.isRelative('file:///c:/'), false); + expect(context.isRelative('a2:a'), false); + expect(context.isRelative('a+:a'), false); + expect(context.isRelative('a-:a'), false); + expect(context.isRelative('a.:a'), false); + expect(context.isRelative('2:a'), true); + expect(context.isRelative('+:a'), true); + expect(context.isRelative('-:a'), true); + expect(context.isRelative('.:a'), true); + expect(context.isRelative(':a/'), true); }); test('isRootRelative', () { @@ -192,6 +205,11 @@ void main() { expect(context.isRootRelative(r'package:foo/bar.dart'), false); expect(context.isRootRelative('foo/bar:baz/qux'), false); expect(context.isRootRelative(r'\\a'), false); + expect(context.isRootRelative('/c:/a'), true); + expect(context.isRootRelative('file:///c:/a'), false); + expect(context.isRootRelative('/c:/'), true); + expect(context.isRootRelative('file:///c:/'), false); + expect(context.isRootRelative('//c:/'), false); }); group('join', () { @@ -232,6 +250,10 @@ void main() { context.join('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p'), 'a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p'); + + for (final absolute in ['a:/', '/a', '//a']) { + expect(context.join('a', absolute), absolute); + } }); test('does not add separator if a part ends in one', () { From 254ba485ca326d9012592e666ccd55b6aba05793 Mon Sep 17 00:00:00 2001 From: "Lasse R.H. Nielsen" Date: Mon, 24 Mar 2025 16:44:58 +0100 Subject: [PATCH 2/4] WIP --- pkgs/path/CHANGELOG.md | 7 ++ pkgs/path/lib/src/characters.dart | 9 +- pkgs/path/lib/src/context.dart | 114 ++++++++++++---------- pkgs/path/lib/src/internal_style.dart | 11 ++- pkgs/path/lib/src/parsed_path.dart | 34 +++++-- pkgs/path/lib/src/style/url.dart | 130 +++++++++++--------------- pkgs/path/lib/src/utils.dart | 128 +++++++++++++++++++------ pkgs/path/test/url_test.dart | 124 +++++++++++++++++++++--- pkgs/path/test/windows_test.dart | 12 +-- 9 files changed, 389 insertions(+), 180 deletions(-) diff --git a/pkgs/path/CHANGELOG.md b/pkgs/path/CHANGELOG.md index 0cdc59f2b..0216c30e5 100644 --- a/pkgs/path/CHANGELOG.md +++ b/pkgs/path/CHANGELOG.md @@ -1,5 +1,12 @@ ## 1.9.2-wip +- Make `Url` style better at recognizing schemes and authorities. + Only consider a path as being a schemed URL if it starts with a + valid scheme. + Recognize `#` and `?` as ending an authority or path. Remove + queries and fragments when when normalizing, include them + in/as the last segment when splitting. + ## 1.9.1 - Require Dart 3.4 diff --git a/pkgs/path/lib/src/characters.dart b/pkgs/path/lib/src/characters.dart index 5df1688d3..b592b8870 100644 --- a/pkgs/path/lib/src/characters.dart +++ b/pkgs/path/lib/src/characters.dart @@ -5,6 +5,7 @@ // Character-code constants. const hash = 0x23; +const percent = 0x25; const plus = 0x2b; const minus = 0x2d; const period = 0x2e; @@ -16,9 +17,9 @@ const question = 0x3f; const upperA = 0x41; const upperZ = 0x5a; const lowerA = 0x61; +const lowerE = 0x65; +const lowerF = 0x66; +const lowerI = 0x69; +const lowerL = 0x6c; const lowerZ = 0x7a; const backslash = 0x5c; - -bool isLetter(int char) => lowerA <= (char |= 0x20) && char <= lowerZ; - -bool isDigit(int char) => char ^ zero <= 9; diff --git a/pkgs/path/lib/src/context.dart b/pkgs/path/lib/src/context.dart index 452333212..461d77d4d 100644 --- a/pkgs/path/lib/src/context.dart +++ b/pkgs/path/lib/src/context.dart @@ -372,18 +372,22 @@ class Context { /// Canonicalizes [path]. /// /// This is guaranteed to return the same path for two different input paths - /// if and only if both input paths point to the same location. Unlike + /// only if both input paths point to the same location. Unlike /// [normalize], it returns absolute paths when possible and canonicalizes - /// ASCII case on Windows. + /// ASCII case on Windows, and scheme and authority case for URLs (but does + /// not normalize or canonicalize `%`-escapes.) /// /// Note that this does not resolve symlinks. /// /// If you want a map that uses path keys, it's probably more efficient to use /// a Map with [equals] and [hash] specified as the callbacks to use for keys /// than it is to canonicalize every key. + /// String canonicalize(String path) { path = absolute(path); - if (style != Style.windows && !_needsNormalization(path)) return path; + // Windows and URL styles need to case-canonicalize, even if it doesn't + // need to normalize anything. + if (style == Style.posix && !_needsNormalization(path)) return path; final parsed = _parse(path); parsed.normalize(canonicalize: true); @@ -395,7 +399,7 @@ class Context { /// /// Note that this is *not* guaranteed to return the same result for two /// equivalent input paths. For that, see [canonicalize]. Or, if you're using - /// paths as map keys use [equals] and [hash] as the key callbacks. + /// paths as map keys, use [equals] and [hash] as the key callbacks. /// /// context.normalize('path/./to/..//file.text'); // -> 'path/file.txt' String normalize(String path) { @@ -408,68 +412,76 @@ class Context { /// Returns whether [path] needs to be normalized. bool _needsNormalization(String path) { - var start = 0; - final codeUnits = path.codeUnits; - int? previousPrevious; - int? previous; + // Empty paths are normalized to ".". + if (path.isEmpty) return true; + + /// At start, no previous separator. + const stateStart = 0; + + /// Previous character was a separator. + const stateSeparator = 1; + + /// Added to state for each `.` seen. + const stateDotCount = 2; + + /// Path segment that contains anything other than nothing, `.` or `..`. + /// + /// Includes any value at or above this one. + const stateNotDots = 6; + + /// Current state of the last few characters. + /// + /// Seeing a separator resets to [stateSeparator]. + /// Seeing a `.` adds [stateDotCount]. + /// Seeing any non-separator or more than two dots will + /// bring the value above [stateNotDots]. + /// (The separator may be optional at the start, seeing one is fine, + /// and seeing dots will start counting.) + /// (That is, `/` has value 1, `/.` value 3, ``/..` value 5, and anything + /// else is 6 or above, except at the very start where empty path, `.` + /// and `..` have values 0, 2 and 4.) + var state = stateStart; // Skip past the root before we start looking for snippets that need // normalization. We want to normalize "//", but not when it's part of // "http://". - final root = style.rootLength(path); - if (root != 0) { - start = root; - previous = chars.slash; - + final start = style.rootLength(path); + if (start != 0) { + if (style.isSeparator(path.codeUnitAt(start - 1))) { + state = stateSeparator; + } // On Windows, the root still needs to be normalized if it contains a // forward slash. if (style == Style.windows) { - for (var i = 0; i < root; i++) { - if (codeUnits[i] == chars.slash) return true; + for (var i = 0; i < start; i++) { + if (path.codeUnitAt(i) == chars.slash) return true; } } } - for (var i = start; i < codeUnits.length; i++) { - final codeUnit = codeUnits[i]; + for (var i = start; i < path.length; i++) { + final codeUnit = path.codeUnitAt(i); if (style.isSeparator(codeUnit)) { + // If ending empty, `.` or `..` path segment. + if (state >= stateSeparator && state < stateNotDots) return true; // Forward slashes in Windows paths are normalized to backslashes. if (style == Style.windows && codeUnit == chars.slash) return true; - - // Multiple separators are normalized to single separators. - if (previous != null && style.isSeparator(previous)) return true; - - // Single dots and double dots are normalized to directory traversals. - // - // This can return false positives for ".../", but that's unlikely - // enough that it's probably not going to cause performance issues. - if (previous == chars.period && - (previousPrevious == null || - previousPrevious == chars.period || - style.isSeparator(previousPrevious))) { + state = stateSeparator; + } else if (codeUnit == chars.period) { + state += stateDotCount; + } else { + state = stateNotDots; + if (style == Style.url && + (codeUnit == chars.question || codeUnit == chars.hash)) { + // Normalize away `?` query parts and `#` fragment parts in URL + // styled paths. return true; } } - - previousPrevious = previous; - previous = codeUnit; } - // Empty paths are normalized to ".". - if (previous == null) return true; - - // Trailing separators are removed. - if (style.isSeparator(previous)) return true; - - // Single dots and double dots are normalized to directory traversals. - if (previous == chars.period && - (previousPrevious == null || - style.isSeparator(previousPrevious) || - previousPrevious == chars.period)) { - return true; - } - - return false; + // Otherwise only normalize if there are separators and single/double dots. + return state >= stateSeparator && state < stateNotDots; } /// Attempts to convert [path] to an equivalent relative path relative to @@ -1020,7 +1032,9 @@ class Context { /// Returns the path represented by [uri], which may be a [String] or a [Uri]. /// /// For POSIX and Windows styles, [uri] must be a `file:` URI. For the URL - /// style, this will just convert [uri] to a string. + /// style, this will just convert [uri] to a string, but if the input was + /// a string, it will be parsed and normalized as a [Uri] first. + /// /// /// // POSIX /// context.fromUri('file:///path/to/foo') @@ -1036,7 +1050,11 @@ class Context { /// /// If [uri] is relative, a relative path will be returned. /// + /// // POSIX /// path.fromUri('path/to/foo'); // -> 'path/to/foo' + /// + /// // Windows + /// path.fromUri('/C:/foo'); // -> r'C:\foo` String fromUri(Object? uri) => style.pathFromUri(_parseUri(uri!)); /// Returns the URI that represents [path]. diff --git a/pkgs/path/lib/src/internal_style.dart b/pkgs/path/lib/src/internal_style.dart index 71762c16e..39ba9cd55 100644 --- a/pkgs/path/lib/src/internal_style.dart +++ b/pkgs/path/lib/src/internal_style.dart @@ -32,12 +32,21 @@ abstract class InternalStyle extends Style { /// "usr", an additional "/" is needed (making "file:///usr"). bool needsSeparator(String path); - /// Returns the number of characters of the root part. + /// The number of characters of the root part. /// /// Returns 0 if the path is relative and 1 if the path is root-relative. /// /// If [withDrive] is `true`, this should include the drive letter for `file:` /// URLs. Non-URL styles may ignore the parameter. + /// + /// For URL style, the root includes any URI scheme and host/authority part. + /// If either of those are included, a leading `/` of the path is not + /// included unless it's followed by a drive letter. A slash after a + /// drive letter is included. If a URL style path starts with a single `/`, + /// it is included in the root. + /// If a URL style path has a scheme, but not authority, and the path + /// does not start with a `/`, the first path segment of the path is + /// considered part of the root. int rootLength(String path, {bool withDrive = false}); /// Gets the root prefix of [path] if path is absolute. If [path] is relative, diff --git a/pkgs/path/lib/src/parsed_path.dart b/pkgs/path/lib/src/parsed_path.dart index 60fa84917..a765052b6 100644 --- a/pkgs/path/lib/src/parsed_path.dart +++ b/pkgs/path/lib/src/parsed_path.dart @@ -2,8 +2,10 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'characters.dart' as chars; import 'internal_style.dart'; import 'style.dart'; +import 'utils.dart' show endOfScheme, removeQueryFragment; class ParsedPath { /// The [InternalStyle] that was used to parse this path. @@ -58,11 +60,17 @@ class ParsedPath { } for (var i = start; i < path.length; i++) { - if (style.isSeparator(path.codeUnitAt(i))) { + final codeUnit = path.codeUnitAt(i); + if (style.isSeparator(codeUnit)) { parts.add(path.substring(start, i)); separators.add(path[i]); start = i + 1; } + if (style == Style.url && + (codeUnit == chars.question || codeUnit == chars.hash)) { + // Include `?` and `#` in final path segment. + break; + } } // Add the final part, if any. @@ -101,6 +109,13 @@ class ParsedPath { // Handle '.', '..', and empty parts. var leadingDoubles = 0; final newParts = []; + if (style == Style.url && parts.isNotEmpty) { + parts.last = removeQueryFragment(parts.last); + if (canonicalize && endOfScheme(parts.first, 0) > 0) { + // Normalize scheme and authority. + parts.first = parts.first.toLowerCase(); + } + } for (var part in parts) { if (part == '.' || part == '') { // Do nothing. Ignore it. @@ -131,14 +146,21 @@ class ParsedPath { parts = newParts; separators = List.filled(newParts.length + 1, style.separator, growable: true); - if (!isAbsolute || newParts.isEmpty || !style.needsSeparator(root!)) { + + final root = this.root; + if (root == null || newParts.isEmpty || !style.needsSeparator(root)) { separators[0] = ''; } - // Normalize the Windows root if needed. - if (root != null && style == Style.windows) { - if (canonicalize) root = root!.toLowerCase(); - root = root!.replaceAll('/', '\\'); + if (root != null) { + if (style == Style.windows) { + // Normalize the Windows root if needed. + final canonRoot = canonicalize ? root.toLowerCase() : root; + this.root = canonRoot.replaceAll('/', r'\'); + } else if (canonicalize && style == Style.url) { + // Canonicalize the URL scheme and authority. + this.root = root.toLowerCase(); + } } removeTrailingSeparators(); } diff --git a/pkgs/path/lib/src/style/url.dart b/pkgs/path/lib/src/style/url.dart index 4d734149f..0e427a909 100644 --- a/pkgs/path/lib/src/style/url.dart +++ b/pkgs/path/lib/src/style/url.dart @@ -43,94 +43,72 @@ class UrlStyle extends InternalStyle { return path.endsWith('://') && rootLength(path) == path.length; } - /// Checks if [path] starts with `"file:"`, case insensitively. - static bool _startsWithFileColon(String path) { - if (path.length < 5) return false; - const f = 0x66; - const i = 0x69; - const l = 0x6c; - const e = 0x65; - return path.codeUnitAt(4) == chars.colon && - (path.codeUnitAt(0) | 0x20) == f && - (path.codeUnitAt(1) | 0x20) == i && - (path.codeUnitAt(2) | 0x20) == l && - (path.codeUnitAt(3) | 0x20) == e; - } - @override int rootLength(String path, {bool withDrive = false}) { if (path.isEmpty) return 0; - if (withDrive && _startsWithFileColon(path)) { - return _rootAuthorityLength(path, 5, withDrive: true); + // Find scheme. Recognize `file:` scheme specifically, + // since it is required for `withDrive` to apply. + final int afterScheme; + if (withDrive && startsWithFileColon(path)) { + // Check for authority, then drive letter. + const fileSchemeLength = 'file:'.length; + afterScheme = fileSchemeLength; + } else { + withDrive = false; + afterScheme = endOfScheme(path, 0); } - final firstChar = path.codeUnitAt(0); - if (chars.isLetter(firstChar)) { - // Check if starting with scheme or drive letter. - for (var i = 1; i < path.length; i++) { - final codeUnit = path.codeUnitAt(i); - if (chars.isLetter(codeUnit) || - chars.isDigit(codeUnit) || - codeUnit == chars.plus || - codeUnit == chars.minus || - codeUnit == chars.period) { - continue; - } - if (codeUnit == chars.colon) { - return _rootAuthorityLength(path, i + 1, withDrive: false); - } - break; - } - return 0; - } - return _rootAuthorityLength(path, 0, withDrive: false); - } - - /// Checks for authority part at start or after scheme. - /// - /// If found, includes this in the root length. - /// - /// Includes an authority starting at `//` until the next `/`, `?` or `#`, - /// or the end of the path. - int _rootAuthorityLength(String path, int index, {required bool withDrive}) { - if (path.startsWith('//', index)) { - index += 2; - while (true) { - if (index == path.length) return index; - final codeUnit = path.codeUnitAt(index); - if (codeUnit == chars.question || codeUnit == chars.hash) return index; - index++; - if (isSeparator(codeUnit)) break; + // If there is a scheme, include authority if any. + // If no scheme, a leading `//` is considered part of a path, unlike + // how a URI would parse it. (For backwards compatibility) + final afterAuthority = + afterScheme > 0 ? authorityEnd(path, afterScheme) : 0; + + // If no scheme and no authority, include leading `/` in root. + // If scheme or authority, do not include a first `/` of the path in root. + // If scheme and no authority, include first segment of path in root, + // but do not look for drive letters (even if scheme is `file:`). + // If `file:` scheme and authority (`//` or `//...`), and `withDrive`, + // look for drive letter at start of path (after first slash). + + if (afterAuthority == path.length) return afterAuthority; + final nextChar = path.codeUnitAt(afterAuthority); + final int afterPathRoot; + if (isSeparator(nextChar)) { + afterPathRoot = afterAuthority + 1; + if (withDrive && afterAuthority > afterScheme) { + final afterDriveLetter = driveLetterEnd(path, afterPathRoot); + if (afterDriveLetter > afterPathRoot) return afterDriveLetter; } + if (afterAuthority == 0) return afterPathRoot; + return afterAuthority; } - if (withDrive) return _withDrive(path, index); - return index; - } - - /// Checks for `[a-z]:/`, or `[a-z]:` when followed by `?` or `#` or nothing. - /// - /// If found, includes this in the root length. - int _withDrive(String path, int index) { - final afterDrive = index + 2; - if (path.length < afterDrive || - !chars.isLetter(path.codeUnitAt(index)) || - path.codeUnitAt(index + 1) != chars.colon) { - return index; + if (afterAuthority > afterScheme) { + // Has authority, not followed by `/`, so empty path. + // Character after authority must be `#` or `?`, or end of path. + return afterAuthority; } - if (path.length == afterDrive) return afterDrive; - final nextChar = path.codeUnitAt(afterDrive); - if (nextChar == chars.slash) { - // Include following slash in root. - return afterDrive + 1; - } - if (nextChar == chars.question || nextChar == chars.hash) { - return afterDrive; + if (afterScheme > 0) { + // If scheme, no authority, no leading `/` in path, include next segment + // in root (and not `/`). + var char = nextChar; + var i = afterAuthority; + while ( + char != chars.hash && char != chars.question && char != chars.slash) { + i++; + if (i == path.length) break; + char = path.codeUnitAt(i); + } + return i; // Never checks for drive letter after non-authority. } - return index; + // No scheme, no authority, path does not start with `/`. + return 0; } @override bool isRootRelative(String path) => - path.isNotEmpty && isSeparator(path.codeUnitAt(0)); + path.isNotEmpty && + isSeparator(path.codeUnitAt(0)) && + (path.length < 2 || !isSeparator(path.codeUnitAt(1))); @override String? getRelativeRoot(String path) => isRootRelative(path) ? '/' : null; diff --git a/pkgs/path/lib/src/utils.dart b/pkgs/path/lib/src/utils.dart index 7c01312db..43f7dc2a4 100644 --- a/pkgs/path/lib/src/utils.dart +++ b/pkgs/path/lib/src/utils.dart @@ -4,45 +4,119 @@ import 'characters.dart' as chars; -/// Returns whether [char] is the code for an ASCII letter (uppercase or -/// lowercase). +/// Whether [char] is the code for an ASCII letter (uppercase or lowercase). bool isAlphabetic(int char) => - (char >= chars.upperA && char <= chars.upperZ) || - (char >= chars.lowerA && char <= chars.lowerZ); + chars.lowerA <= (char |= 0x20) && char <= chars.lowerZ; -/// Returns whether [char] is the code for an ASCII digit. -bool isNumeric(int char) => char >= chars.zero && char <= chars.nine; +/// Whether [char] is the code for an ASCII digit. +bool isNumeric(int char) => char ^ chars.zero <= 9; -/// Returns whether [path] has a URL-formatted Windows drive letter beginning at +/// Whether [path] has a URL-formatted Windows drive letter beginning at /// [index]. bool isDriveLetter(String path, int index) => - driveLetterEnd(path, index) != null; + driveLetterEnd(path, index) != index; -/// Returns the index of the first character after the drive letter or a -/// URL-formatted path, or `null` if [index] is not the start of a drive letter. -/// A valid drive letter must be followed by a colon and then either a `/` or -/// the end of string. +/// Index after drive letter starting at [index], or [index] if none.path +/// +/// The [path] is a URI-formed path. +/// A valid drive letter must be followed by a colon and then either a `/`, +/// a `#`, a `?` or the end of the path. /// /// ``` /// d:/abc => 3 /// d:/ => 3 /// d: => 2 -/// d => null +/// d:# => 2 +/// d:? => 2 +/// d => 0 +/// d:a => 0 /// ``` -int? driveLetterEnd(String path, int index) { - if (path.length < index + 2) return null; - if (!isAlphabetic(path.codeUnitAt(index))) return null; - if (path.codeUnitAt(index + 1) != chars.colon) { - // If not a raw colon, check for escaped colon - if (path.length < index + 4) return null; - if (path.substring(index + 1, index + 4).toLowerCase() != '%3a') { - return null; +/// Same with every `:` replaced by `%3a` or `%3A`, and then the +/// returned numbers would be increased by 2. +int driveLetterEnd(String path, int index) { + if (path.length < index + 2) return index; + if (!isAlphabetic(path.codeUnitAt(index))) return index; + final colonChar = path.codeUnitAt(index + 1); + final int indexAfter; + if (colonChar == chars.colon) { + indexAfter = index + 2; // After ':' + } else if (colonChar == chars.percent && + path.length >= index + 4 && + path.codeUnitAt(index + 2) == (chars.zero + 3) && + (path.codeUnitAt(index + 3) | 0x20) == chars.lowerA) { + indexAfter = index + 4; // After '%3A'. + } else { + return index; + } + if (path.length == indexAfter) return indexAfter; + final nextChar = path.codeUnitAt(indexAfter); + if (nextChar == chars.slash) return indexAfter + 1; + if (nextChar == chars.hash || nextChar == chars.question) return indexAfter; + return index; +} + +/// Position after a leading URL scheme in [path], if any. +/// +/// If [path] starts with a valid URL scheme at position [index], then returns +/// then index after the following colon, otherwise returns [index]. +int endOfScheme(String path, int index) { + if (index >= path.length) return index; + final firstChar = path.codeUnitAt(index); + if (!isAlphabetic(firstChar)) return index; + for (var i = index + 1; i < path.length; i++) { + final codeUnit = path.codeUnitAt(i); + if (!isAlphabetic(codeUnit) && + !isNumeric(codeUnit) && + codeUnit != chars.plus && + codeUnit != chars.minus && + codeUnit != chars.period) { + if (codeUnit == chars.colon) { + return i + 1; + } + break; + } + } + return index; +} + +/// Checks if [path] starts with `"file:"`, case insensitively. +bool startsWithFileColon(String path) { + if (path.length < 5) return false; + return path.length >= 5 && + path.codeUnitAt(4) == chars.colon && + (path.codeUnitAt(0) | 0x20) == chars.lowerF && + (path.codeUnitAt(1) | 0x20) == chars.lowerI && + (path.codeUnitAt(2) | 0x20) == chars.lowerL && + (path.codeUnitAt(3) | 0x20) == chars.lowerE; +} + +/// Position after a URI authority part at [index], if any. +/// +/// The [path] must be a URI formatted text. +/// +/// If an authority part is found, meaning that [path] starts with `//` at +/// [index], the result is the position of the first +/// non-authority character, which must be one of `/`, `?` or `#`, +/// or the end of the `path`. +/// Otherwise the result is [index]. +int authorityEnd(String path, int index) { + if (!path.startsWith('//', index)) return index; + index += 2; + while (index < path.length) { + final codeUnit = path.codeUnitAt(index); + if (codeUnit == chars.question || codeUnit == chars.hash) break; + if (codeUnit == chars.slash) break; + index++; + } + return index; +} + +String removeQueryFragment(String pathSegment) { + for (var i = 0; i < pathSegment.length; i++) { + final codeUnit = pathSegment.codeUnitAt(i); + if (codeUnit == chars.question || codeUnit == chars.hash) { + return pathSegment.substring(0, i); } - // Offset the index to account for the extra 2 characters from the - // colon encoding. - index += 2; } - if (path.length == index + 2) return index + 2; - if (path.codeUnitAt(index + 2) != chars.slash) return null; - return index + 3; + return pathSegment; } diff --git a/pkgs/path/test/url_test.dart b/pkgs/path/test/url_test.dart index b3b459bc9..1d2a71679 100644 --- a/pkgs/path/test/url_test.dart +++ b/pkgs/path/test/url_test.dart @@ -30,17 +30,50 @@ void main() { expect(context.rootPrefix(''), ''); expect(context.rootPrefix('a'), ''); expect(context.rootPrefix('a/b'), ''); + expect(context.rootPrefix('/'), '/'); + expect(context.rootPrefix('//'), '/'); + expect(context.rootPrefix('///'), '/'); + expect(context.rootPrefix('/a'), '/'); + expect(context.rootPrefix('?/a'), ''); + expect(context.rootPrefix('#/a'), ''); expect(context.rootPrefix('https://dart.dev/a/c'), 'https://dart.dev'); + expect(context.rootPrefix('https://dart.dev?a/c'), 'https://dart.dev'); + expect(context.rootPrefix('https://dart.dev#a/c'), 'https://dart.dev'); expect(context.rootPrefix('file:///a/c'), 'file://'); expect(context.rootPrefix('/a/c'), '/'); expect(context.rootPrefix('https://dart.dev/'), 'https://dart.dev'); expect(context.rootPrefix('file:///'), 'file://'); - expect(context.rootPrefix('https://dart.dev'), 'https://dart.dev'); expect(context.rootPrefix('file://'), 'file://'); - expect(context.rootPrefix('/'), '/'); + expect(context.rootPrefix('file:/'), 'file:'); + expect(context.rootPrefix('file:'), 'file:'); + expect(context.rootPrefix('file:foo'), 'file:foo'); + expect(context.rootPrefix('file:foo/'), 'file:foo'); + expect(context.rootPrefix('https://dart.dev'), 'https://dart.dev'); expect(context.rootPrefix('foo/bar://'), ''); expect(context.rootPrefix('package:foo/bar.dart'), 'package:foo'); - expect(context.rootPrefix('foo/bar:baz/qux'), ''); + expect(context.rootPrefix('a+-.09:baz/qux'), 'a+-.09:baz'); + expect(context.rootPrefix('A+-.09:/baz/qux'), 'A+-.09:'); + expect(context.rootPrefix('+a:baz/qux'), ''); + expect(context.rootPrefix('-a:baz/qux'), ''); + expect(context.rootPrefix('.a:baz/qux'), ''); + expect(context.rootPrefix('0a:baz/qux'), ''); + expect(context.rootPrefix('9a:baz/qux'), ''); + + // Do not include fragments or queries. + expect(context.rootPrefix('file:#a'), 'file:'); + expect(context.rootPrefix('file:?a'), 'file:'); + expect(context.rootPrefix('file:foo#a'), 'file:foo'); + expect(context.rootPrefix('file:foo?a'), 'file:foo'); + expect(context.rootPrefix('file:#///'), 'file:'); + expect(context.rootPrefix('file:?///'), 'file:'); + expect(context.rootPrefix('file:/#a'), 'file:'); + expect(context.rootPrefix('file:/?a'), 'file:'); + expect(context.rootPrefix('file://#a'), 'file://'); + expect(context.rootPrefix('file://?a'), 'file://'); + expect(context.rootPrefix('file://example.com#a'), 'file://example.com'); + expect(context.rootPrefix('file://example.com?a'), 'file://example.com'); + expect(context.rootPrefix('foo#bar:baz/qux'), ''); + expect(context.rootPrefix('foo?bar:baz/qux'), ''); }); test('dirname', () { @@ -414,7 +447,6 @@ void main() { expect(context.split('foo/'), equals(['foo'])); expect(context.split('https://dart.dev//'), equals(['https://dart.dev'])); expect(context.split('file:////'), equals(['file://'])); - expect(context.split('//'), equals(['/'])); }); test('includes the root for absolute paths', () { @@ -428,9 +460,24 @@ void main() { expect(context.split('file:///'), equals(['file://'])); expect(context.split('file://'), equals(['file://'])); expect(context.split('/'), equals(['/'])); + expect(context.split('//'), equals(['/'])); + expect(context.split('//a'), equals(['/', 'a'])); + expect(context.split('//a/'), equals(['/', 'a'])); + expect(context.split('//a/b'), equals(['/', 'a', 'b'])); }); }); + test('includes all queries and fragments in last segment', () { + expect(context.split('https://dart.dev/foo/bar/baz#42/x'), + equals(['https://dart.dev', 'foo', 'bar', 'baz#42/x'])); + expect(context.split('file:///foo/bar/baz?42/x'), + equals(['file://', 'foo', 'bar', 'baz?42/x'])); + expect(context.split('https://dart.dev/foo/bar/baz/#42/x'), + equals(['https://dart.dev', 'foo', 'bar', 'baz', '#42/x'])); + expect(context.split('file:///foo/bar/baz/?42/x'), + equals(['file://', 'foo', 'bar', 'baz', '?42/x'])); + }); + group('normalize', () { test('simple cases', () { expect(context.normalize(''), '.'); @@ -448,6 +495,10 @@ void main() { expect(context.normalize(r'\\'), r'\\'); expect(context.normalize('a/./\xc5\u0bf8-;\u{1f085}\u{00}/c/d/../'), 'a/\xc5\u0bf8-;\u{1f085}\u{00}/c'); + expect(context.normalize(r'a#b'), r'a'); + expect(context.normalize(r'a/b#c/d'), r'a/b'); + expect(context.normalize(r'a?b'), r'a'); + expect(context.normalize(r'a/b?c/d#e/f'), r'a/b'); }); test('collapses redundant separators', () { @@ -504,6 +555,21 @@ void main() { expect(context.normalize('a/bc/../d'), 'a/d'); }); + test('eliminates queries and fragments', () { + expect(context.normalize('r/a/../b?c/.././/d'), 'r/b'); + expect(context.normalize('r/a/../b#c/.././/d'), 'r/b'); + expect(context.normalize('scheme:r/a/../b?c/.././/d'), 'scheme:r/b'); + expect(context.normalize('scheme:r/a/../b#c/.././/d'), 'scheme:r/b'); + expect(context.normalize('scheme://auth/r/a/../b?c/.././/d'), + 'scheme://auth/r/b'); + expect(context.normalize('scheme://auth/r/a/../b#c/.././/d'), + 'scheme://auth/r/b'); + expect( + context.normalize('file:///c:/r/a/../b?c/.././/d'), 'file:///c:/r/b'); + expect( + context.normalize('file:///c:/r/a/../b#c/.././/d'), 'file:///c:/r/b'); + }); + test('does not walk before root on absolute paths', () { expect(context.normalize('..'), '..'); expect(context.normalize('../'), '..'); @@ -536,14 +602,48 @@ void main() { expect(context.normalize('a/b///'), 'a/b'); }); - test('when canonicalizing', () { - expect(context.canonicalize('.'), 'https://dart.dev/root/path'); - expect(context.canonicalize('foo/bar'), - 'https://dart.dev/root/path/foo/bar'); - expect(context.canonicalize('FoO'), 'https://dart.dev/root/path/FoO'); - expect(context.canonicalize('/foo'), 'https://dart.dev/foo'); - expect(context.canonicalize('http://google.com/foo'), - 'http://google.com/foo'); + group('when canonicalizing', () { + test('adds scheme', () { + expect(context.canonicalize('.'), 'https://dart.dev/root/path'); + expect(context.canonicalize('foo/bar'), + 'https://dart.dev/root/path/foo/bar'); + expect(context.canonicalize('FoO'), 'https://dart.dev/root/path/FoO'); + expect(context.canonicalize('/foo'), 'https://dart.dev/foo'); + expect(context.canonicalize('http://google.com/foo'), + 'http://google.com/foo'); + }); + + test('eliminates queries and fragments', () { + // Adds scheme and path if relative. + expect(context.canonicalize('r/a/../b?c/.././/d'), + 'https://dart.dev/root/path/r/b'); + expect(context.canonicalize('r/a/../b#c/.././/d'), + 'https://dart.dev/root/path/r/b'); + // Adds scheme if root relative. + expect(context.canonicalize('/r/a/../b?c/.././/d'), + 'https://dart.dev/r/b'); + expect(context.canonicalize('/r/a/../b#c/.././/d'), + 'https://dart.dev/r/b'); + expect(context.canonicalize('scheme:r/a/../b?c/.././/d'), 'scheme:r/b'); + expect(context.canonicalize('scheme:r/a/../b#c/.././/d'), 'scheme:r/b'); + expect(context.canonicalize('scheme://auth/r/a/../b?c/.././/d'), + 'scheme://auth/r/b'); + expect(context.canonicalize('scheme://auth/r/a/../b#c/.././/d'), + 'scheme://auth/r/b'); + expect(context.canonicalize('file:///c:/r/a/../b?c/.././/d'), + 'file:///c:/r/b'); + expect(context.canonicalize('file:///c:/r/a/../b#c/.././/d'), + 'file:///c:/r/b'); + }); + + test('case-canonicalizes scheme and authority', () { + expect(context.canonicalize('HTTPS://EXAMPLE.COM/FILE.EXT'), + 'https://example.com/FILE.EXT'); + expect( + context.canonicalize('FILE:///C:/FILE.EXT'), 'file:///c:/FILE.EXT'); + expect(context.canonicalize('PACKAGE:FOO//FILE.EXT'), + 'package:foo/FILE.EXT'); + }); }); }); diff --git a/pkgs/path/test/windows_test.dart b/pkgs/path/test/windows_test.dart index 180f56004..3808354bd 100644 --- a/pkgs/path/test/windows_test.dart +++ b/pkgs/path/test/windows_test.dart @@ -880,9 +880,9 @@ void main() { }); test('driveLetterEnd', () { - expect(driveLetterEnd('', 0), null); - expect(driveLetterEnd('foo.dart', 0), null); - expect(driveLetterEnd('@', 0), null); + expect(driveLetterEnd('', 0), 0); + expect(driveLetterEnd('foo.dart', 0), 0); + expect(driveLetterEnd('@', 0), 0); expect(driveLetterEnd('c:', 0), 2); @@ -899,8 +899,8 @@ void main() { expect(driveLetterEnd('c%3A/a', 0), 5); // non-drive letter - expect(driveLetterEnd('ab:/c', 0), null); - expect(driveLetterEnd('ab%3a/c', 0), null); - expect(driveLetterEnd('ab%3A/c', 0), null); + expect(driveLetterEnd('ab:/c', 0), 0); + expect(driveLetterEnd('ab%3a/c', 0), 0); + expect(driveLetterEnd('ab%3A/c', 0), 0); }); } From 93ca6701c61e8794f52bb24ccce79130cf73c062 Mon Sep 17 00:00:00 2001 From: "Lasse R.H. Nielsen" Date: Tue, 25 Mar 2025 15:27:21 +0100 Subject: [PATCH 3/4] Format. --- pkgs/path/lib/src/internal_style.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/path/lib/src/internal_style.dart b/pkgs/path/lib/src/internal_style.dart index 39ba9cd55..c1589c27d 100644 --- a/pkgs/path/lib/src/internal_style.dart +++ b/pkgs/path/lib/src/internal_style.dart @@ -45,7 +45,7 @@ abstract class InternalStyle extends Style { /// drive letter is included. If a URL style path starts with a single `/`, /// it is included in the root. /// If a URL style path has a scheme, but not authority, and the path - /// does not start with a `/`, the first path segment of the path is + /// does not start with a `/`, the first path segment of the path is /// considered part of the root. int rootLength(String path, {bool withDrive = false}); From cb48ed06bd67c235bf50f70e8809011b5196240c Mon Sep 17 00:00:00 2001 From: "Lasse R.H. Nielsen" Date: Mon, 31 Mar 2025 12:09:01 +0200 Subject: [PATCH 4/4] Address comments. --- pkgs/path/lib/src/context.dart | 34 +++++++++++++++--------------- pkgs/path/lib/src/parsed_path.dart | 2 +- pkgs/path/lib/src/utils.dart | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pkgs/path/lib/src/context.dart b/pkgs/path/lib/src/context.dart index 461d77d4d..fef8bb64c 100644 --- a/pkgs/path/lib/src/context.dart +++ b/pkgs/path/lib/src/context.dart @@ -415,31 +415,31 @@ class Context { // Empty paths are normalized to ".". if (path.isEmpty) return true; - /// At start, no previous separator. + // At start, no previous separator. const stateStart = 0; - /// Previous character was a separator. + // Previous character was a separator. const stateSeparator = 1; - /// Added to state for each `.` seen. + // Added to state for each `.` seen. const stateDotCount = 2; - /// Path segment that contains anything other than nothing, `.` or `..`. - /// - /// Includes any value at or above this one. + // Path segment that contains anything other than nothing, `.` or `..`. + // + // Includes any value at or above this one. const stateNotDots = 6; - /// Current state of the last few characters. - /// - /// Seeing a separator resets to [stateSeparator]. - /// Seeing a `.` adds [stateDotCount]. - /// Seeing any non-separator or more than two dots will - /// bring the value above [stateNotDots]. - /// (The separator may be optional at the start, seeing one is fine, - /// and seeing dots will start counting.) - /// (That is, `/` has value 1, `/.` value 3, ``/..` value 5, and anything - /// else is 6 or above, except at the very start where empty path, `.` - /// and `..` have values 0, 2 and 4.) + // Current state of the last few characters. + // + // Seeing a separator resets to [stateSeparator]. + // Seeing a `.` adds [stateDotCount]. + // Seeing any non-separator or more than two dots will + // bring the value above [stateNotDots]. + // (The separator may be optional at the start, seeing one is fine, + // and seeing dots will start counting.) + // (That is, `/` has value 1, `/.` value 3, ``/..` value 5, and anything + // else is 6 or above, except at the very start where empty path, `.` + // and `..` have values 0, 2 and 4.) var state = stateStart; // Skip past the root before we start looking for snippets that need diff --git a/pkgs/path/lib/src/parsed_path.dart b/pkgs/path/lib/src/parsed_path.dart index a765052b6..b8f93feb0 100644 --- a/pkgs/path/lib/src/parsed_path.dart +++ b/pkgs/path/lib/src/parsed_path.dart @@ -37,7 +37,7 @@ class ParsedPath { /// one. String extension([int level = 1]) => _splitExtension(level)[1]; - /// `true` if this is an absolute path. + /// Whether this is an absolute path. bool get isAbsolute => root != null; factory ParsedPath.parse(String path, InternalStyle style) { diff --git a/pkgs/path/lib/src/utils.dart b/pkgs/path/lib/src/utils.dart index 43f7dc2a4..92d5951fd 100644 --- a/pkgs/path/lib/src/utils.dart +++ b/pkgs/path/lib/src/utils.dart @@ -16,7 +16,7 @@ bool isNumeric(int char) => char ^ chars.zero <= 9; bool isDriveLetter(String path, int index) => driveLetterEnd(path, index) != index; -/// Index after drive letter starting at [index], or [index] if none.path +/// Index after drive letter starting at [index], or [index] if none. /// /// The [path] is a URI-formed path. /// A valid drive letter must be followed by a colon and then either a `/`,