Skip to content

Make URL strategy better at recognizing URLs. #873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pkgs/path/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
## 1.9.2-wip

- Make `Url` style better at recognizing schemes and authorities.
Only consider a path as being a schemed URL if it starts with a
valid scheme.
Recognize `#` and `?` as ending an authority or path. Remove
queries and fragments when when normalizing, include them
in/as the last segment when splitting.

## 1.9.1

- Require Dart 3.4
Expand Down
10 changes: 9 additions & 1 deletion pkgs/path/lib/src/characters.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,24 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

/// This library contains character-code definitions.
// Character-code constants.

const hash = 0x23;
const percent = 0x25;
const plus = 0x2b;
const minus = 0x2d;
const period = 0x2e;
const slash = 0x2f;
const zero = 0x30;
const nine = 0x39;
const colon = 0x3a;
const question = 0x3f;
const upperA = 0x41;
const upperZ = 0x5a;
const lowerA = 0x61;
const lowerE = 0x65;
const lowerF = 0x66;
const lowerI = 0x69;
const lowerL = 0x6c;
const lowerZ = 0x7a;
const backslash = 0x5c;
114 changes: 66 additions & 48 deletions pkgs/path/lib/src/context.dart
Original file line number Diff line number Diff line change
Expand Up @@ -372,18 +372,22 @@ class Context {
/// Canonicalizes [path].
///
/// This is guaranteed to return the same path for two different input paths
/// if and only if both input paths point to the same location. Unlike
/// only if both input paths point to the same location. Unlike
/// [normalize], it returns absolute paths when possible and canonicalizes
/// ASCII case on Windows.
/// ASCII case on Windows, and scheme and authority case for URLs (but does
/// not normalize or canonicalize `%`-escapes.)
///
/// Note that this does not resolve symlinks.
///
/// If you want a map that uses path keys, it's probably more efficient to use
/// a Map with [equals] and [hash] specified as the callbacks to use for keys
/// than it is to canonicalize every key.
///
String canonicalize(String path) {
path = absolute(path);
if (style != Style.windows && !_needsNormalization(path)) return path;
// Windows and URL styles need to case-canonicalize, even if it doesn't
// need to normalize anything.
if (style == Style.posix && !_needsNormalization(path)) return path;

final parsed = _parse(path);
parsed.normalize(canonicalize: true);
Expand All @@ -395,7 +399,7 @@ class Context {
///
/// Note that this is *not* guaranteed to return the same result for two
/// equivalent input paths. For that, see [canonicalize]. Or, if you're using
/// paths as map keys use [equals] and [hash] as the key callbacks.
/// paths as map keys, use [equals] and [hash] as the key callbacks.
///
/// context.normalize('path/./to/..//file.text'); // -> 'path/file.txt'
String normalize(String path) {
Expand All @@ -408,68 +412,76 @@ class Context {

/// Returns whether [path] needs to be normalized.
bool _needsNormalization(String path) {
var start = 0;
final codeUnits = path.codeUnits;
int? previousPrevious;
int? previous;
// Empty paths are normalized to ".".
if (path.isEmpty) return true;

// At start, no previous separator.
const stateStart = 0;

// Previous character was a separator.
const stateSeparator = 1;

// Added to state for each `.` seen.
const stateDotCount = 2;

// Path segment that contains anything other than nothing, `.` or `..`.
//
// Includes any value at or above this one.
const stateNotDots = 6;

// Current state of the last few characters.
//
// Seeing a separator resets to [stateSeparator].
// Seeing a `.` adds [stateDotCount].
// Seeing any non-separator or more than two dots will
// bring the value above [stateNotDots].
// (The separator may be optional at the start, seeing one is fine,
// and seeing dots will start counting.)
// (That is, `/` has value 1, `/.` value 3, ``/..` value 5, and anything
// else is 6 or above, except at the very start where empty path, `.`
// and `..` have values 0, 2 and 4.)
var state = stateStart;

// Skip past the root before we start looking for snippets that need
// normalization. We want to normalize "//", but not when it's part of
// "http://".
final root = style.rootLength(path);
if (root != 0) {
start = root;
previous = chars.slash;

final start = style.rootLength(path);
if (start != 0) {
if (style.isSeparator(path.codeUnitAt(start - 1))) {
state = stateSeparator;
}
// On Windows, the root still needs to be normalized if it contains a
// forward slash.
if (style == Style.windows) {
for (var i = 0; i < root; i++) {
if (codeUnits[i] == chars.slash) return true;
for (var i = 0; i < start; i++) {
if (path.codeUnitAt(i) == chars.slash) return true;
}
}
}

for (var i = start; i < codeUnits.length; i++) {
final codeUnit = codeUnits[i];
for (var i = start; i < path.length; i++) {
final codeUnit = path.codeUnitAt(i);
if (style.isSeparator(codeUnit)) {
// If ending empty, `.` or `..` path segment.
if (state >= stateSeparator && state < stateNotDots) return true;
// Forward slashes in Windows paths are normalized to backslashes.
if (style == Style.windows && codeUnit == chars.slash) return true;

// Multiple separators are normalized to single separators.
if (previous != null && style.isSeparator(previous)) return true;

// Single dots and double dots are normalized to directory traversals.
//
// This can return false positives for ".../", but that's unlikely
// enough that it's probably not going to cause performance issues.
if (previous == chars.period &&
(previousPrevious == null ||
previousPrevious == chars.period ||
style.isSeparator(previousPrevious))) {
state = stateSeparator;
} else if (codeUnit == chars.period) {
state += stateDotCount;
} else {
state = stateNotDots;
if (style == Style.url &&
(codeUnit == chars.question || codeUnit == chars.hash)) {
// Normalize away `?` query parts and `#` fragment parts in URL
// styled paths.
return true;
}
}

previousPrevious = previous;
previous = codeUnit;
}

// Empty paths are normalized to ".".
if (previous == null) return true;

// Trailing separators are removed.
if (style.isSeparator(previous)) return true;

// Single dots and double dots are normalized to directory traversals.
if (previous == chars.period &&
(previousPrevious == null ||
style.isSeparator(previousPrevious) ||
previousPrevious == chars.period)) {
return true;
}

return false;
// Otherwise only normalize if there are separators and single/double dots.
return state >= stateSeparator && state < stateNotDots;
}

/// Attempts to convert [path] to an equivalent relative path relative to
Expand Down Expand Up @@ -1020,7 +1032,9 @@ class Context {
/// Returns the path represented by [uri], which may be a [String] or a [Uri].
///
/// For POSIX and Windows styles, [uri] must be a `file:` URI. For the URL
/// style, this will just convert [uri] to a string.
/// style, this will just convert [uri] to a string, but if the input was
/// a string, it will be parsed and normalized as a [Uri] first.
///
///
/// // POSIX
/// context.fromUri('file:///path/to/foo')
Expand All @@ -1036,7 +1050,11 @@ class Context {
///
/// If [uri] is relative, a relative path will be returned.
///
/// // POSIX
/// path.fromUri('path/to/foo'); // -> 'path/to/foo'
///
/// // Windows
/// path.fromUri('/C:/foo'); // -> r'C:\foo`
String fromUri(Object? uri) => style.pathFromUri(_parseUri(uri!));

/// Returns the URI that represents [path].
Expand Down
11 changes: 10 additions & 1 deletion pkgs/path/lib/src/internal_style.dart
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,21 @@ abstract class InternalStyle extends Style {
/// "usr", an additional "/" is needed (making "file:///usr").
bool needsSeparator(String path);

/// Returns the number of characters of the root part.
/// The number of characters of the root part.
///
/// Returns 0 if the path is relative and 1 if the path is root-relative.
///
/// If [withDrive] is `true`, this should include the drive letter for `file:`
/// URLs. Non-URL styles may ignore the parameter.
///
/// For URL style, the root includes any URI scheme and host/authority part.
/// If either of those are included, a leading `/` of the path is not
/// included unless it's followed by a drive letter. A slash after a
/// drive letter is included. If a URL style path starts with a single `/`,
/// it is included in the root.
/// If a URL style path has a scheme, but not authority, and the path
/// does not start with a `/`, the first path segment of the path is
/// considered part of the root.
int rootLength(String path, {bool withDrive = false});

/// Gets the root prefix of [path] if path is absolute. If [path] is relative,
Expand Down
36 changes: 29 additions & 7 deletions pkgs/path/lib/src/parsed_path.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'characters.dart' as chars;
import 'internal_style.dart';
import 'style.dart';
import 'utils.dart' show endOfScheme, removeQueryFragment;

class ParsedPath {
/// The [InternalStyle] that was used to parse this path.
Expand Down Expand Up @@ -35,7 +37,7 @@ class ParsedPath {
/// one.
String extension([int level = 1]) => _splitExtension(level)[1];

/// `true` if this is an absolute path.
/// Whether this is an absolute path.
bool get isAbsolute => root != null;

factory ParsedPath.parse(String path, InternalStyle style) {
Expand All @@ -58,11 +60,17 @@ class ParsedPath {
}

for (var i = start; i < path.length; i++) {
if (style.isSeparator(path.codeUnitAt(i))) {
final codeUnit = path.codeUnitAt(i);
if (style.isSeparator(codeUnit)) {
parts.add(path.substring(start, i));
separators.add(path[i]);
start = i + 1;
}
if (style == Style.url &&
(codeUnit == chars.question || codeUnit == chars.hash)) {
// Include `?` and `#` in final path segment.
break;
}
}

// Add the final part, if any.
Expand Down Expand Up @@ -101,6 +109,13 @@ class ParsedPath {
// Handle '.', '..', and empty parts.
var leadingDoubles = 0;
final newParts = <String>[];
if (style == Style.url && parts.isNotEmpty) {
parts.last = removeQueryFragment(parts.last);
if (canonicalize && endOfScheme(parts.first, 0) > 0) {
// Normalize scheme and authority.
parts.first = parts.first.toLowerCase();
}
}
for (var part in parts) {
if (part == '.' || part == '') {
// Do nothing. Ignore it.
Expand Down Expand Up @@ -131,14 +146,21 @@ class ParsedPath {
parts = newParts;
separators =
List.filled(newParts.length + 1, style.separator, growable: true);
if (!isAbsolute || newParts.isEmpty || !style.needsSeparator(root!)) {

final root = this.root;
if (root == null || newParts.isEmpty || !style.needsSeparator(root)) {
separators[0] = '';
}

// Normalize the Windows root if needed.
if (root != null && style == Style.windows) {
if (canonicalize) root = root!.toLowerCase();
root = root!.replaceAll('/', '\\');
if (root != null) {
if (style == Style.windows) {
// Normalize the Windows root if needed.
final canonRoot = canonicalize ? root.toLowerCase() : root;
this.root = canonRoot.replaceAll('/', r'\');
} else if (canonicalize && style == Style.url) {
// Canonicalize the URL scheme and authority.
this.root = root.toLowerCase();
}
}
removeTrailingSeparators();
}
Expand Down
Loading