Skip to content

Commit a5d5eb5

Browse files
committed
(142446243) Compatibility behaviors for Swift URL (swiftlang#1113)
1 parent b50ea3d commit a5d5eb5

File tree

4 files changed

+100
-37
lines changed

4 files changed

+100
-37
lines changed

Sources/FoundationEssentials/URL/URL.swift

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,10 @@ public struct URL: Equatable, Sendable, Hashable {
763763
internal var _parseInfo: URLParseInfo!
764764
private var _baseParseInfo: URLParseInfo?
765765

766+
private static func parse(urlString: String, encodingInvalidCharacters: Bool = true) -> URLParseInfo? {
767+
return Parser.parse(urlString: urlString, encodingInvalidCharacters: encodingInvalidCharacters, compatibility: .allowEmptyScheme)
768+
}
769+
766770
internal init(parseInfo: URLParseInfo, relativeTo url: URL? = nil) {
767771
_parseInfo = parseInfo
768772
if parseInfo.scheme == nil {
@@ -773,6 +777,31 @@ public struct URL: Equatable, Sendable, Hashable {
773777
#endif // FOUNDATION_FRAMEWORK
774778
}
775779

780+
/// The public initializers don't allow the empty string, and we must maintain that behavior
781+
/// for compatibility. However, there are cases internally where we need to create a URL with
782+
/// an empty string, such as when `.deletingLastPathComponent()` of a single path
783+
/// component. This previously worked since `URL` just wrapped an `NSURL`, which
784+
/// allows the empty string.
785+
internal init?(stringOrEmpty: String, relativeTo url: URL? = nil) {
786+
#if FOUNDATION_FRAMEWORK
787+
guard foundation_swift_url_enabled() else {
788+
guard let inner = NSURL(string: stringOrEmpty, relativeTo: url) else { return nil }
789+
_url = URL._converted(from: inner)
790+
return
791+
}
792+
#endif // FOUNDATION_FRAMEWORK
793+
guard let parseInfo = URL.parse(urlString: stringOrEmpty) else {
794+
return nil
795+
}
796+
_parseInfo = parseInfo
797+
if parseInfo.scheme == nil {
798+
_baseParseInfo = url?.absoluteURL._parseInfo
799+
}
800+
#if FOUNDATION_FRAMEWORK
801+
_url = URL._nsURL(from: _parseInfo, baseParseInfo: _baseParseInfo)
802+
#endif // FOUNDATION_FRAMEWORK
803+
}
804+
776805
/// Initialize with string.
777806
///
778807
/// Returns `nil` if a `URL` cannot be formed with the string (for example, if the string contains characters that are illegal in a URL, or is an empty string).
@@ -785,7 +814,7 @@ public struct URL: Equatable, Sendable, Hashable {
785814
return
786815
}
787816
#endif // FOUNDATION_FRAMEWORK
788-
guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: true) else {
817+
guard let parseInfo = URL.parse(urlString: string) else {
789818
return nil
790819
}
791820
_parseInfo = parseInfo
@@ -798,14 +827,15 @@ public struct URL: Equatable, Sendable, Hashable {
798827
///
799828
/// Returns `nil` if a `URL` cannot be formed with the string (for example, if the string contains characters that are illegal in a URL, or is an empty string).
800829
public init?(string: __shared String, relativeTo url: __shared URL?) {
830+
guard !string.isEmpty else { return nil }
801831
#if FOUNDATION_FRAMEWORK
802832
guard foundation_swift_url_enabled() else {
803-
guard !string.isEmpty, let inner = NSURL(string: string, relativeTo: url) else { return nil }
833+
guard let inner = NSURL(string: string, relativeTo: url) else { return nil }
804834
_url = URL._converted(from: inner)
805835
return
806836
}
807837
#endif // FOUNDATION_FRAMEWORK
808-
guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: true) else {
838+
guard let parseInfo = URL.parse(urlString: string) else {
809839
return nil
810840
}
811841
_parseInfo = parseInfo
@@ -824,14 +854,15 @@ public struct URL: Equatable, Sendable, Hashable {
824854
/// If the URL string is still invalid after encoding, `nil` is returned.
825855
@available(macOS 14.0, iOS 17.0, watchOS 10.0, tvOS 17.0, *)
826856
public init?(string: __shared String, encodingInvalidCharacters: Bool) {
857+
guard !string.isEmpty else { return nil }
827858
#if FOUNDATION_FRAMEWORK
828859
guard foundation_swift_url_enabled() else {
829-
guard !string.isEmpty, let inner = NSURL(string: string, encodingInvalidCharacters: encodingInvalidCharacters) else { return nil }
860+
guard let inner = NSURL(string: string, encodingInvalidCharacters: encodingInvalidCharacters) else { return nil }
830861
_url = URL._converted(from: inner)
831862
return
832863
}
833864
#endif // FOUNDATION_FRAMEWORK
834-
guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: encodingInvalidCharacters) else {
865+
guard let parseInfo = URL.parse(urlString: string, encodingInvalidCharacters: encodingInvalidCharacters) else {
835866
return nil
836867
}
837868
_parseInfo = parseInfo
@@ -858,7 +889,7 @@ public struct URL: Equatable, Sendable, Hashable {
858889
}
859890
#endif
860891
let directoryHint: DirectoryHint = isDirectory ? .isDirectory : .notDirectory
861-
self.init(filePath: path, directoryHint: directoryHint, relativeTo: base)
892+
self.init(filePath: path.isEmpty ? "." : path, directoryHint: directoryHint, relativeTo: base)
862893
}
863894

864895
/// Initializes a newly created file URL referencing the local file or directory at path, relative to a base URL.
@@ -877,7 +908,7 @@ public struct URL: Equatable, Sendable, Hashable {
877908
return
878909
}
879910
#endif
880-
self.init(filePath: path, directoryHint: .checkFileSystem, relativeTo: base)
911+
self.init(filePath: path.isEmpty ? "." : path, directoryHint: .checkFileSystem, relativeTo: base)
881912
}
882913

883914
/// Initializes a newly created file URL referencing the local file or directory at path.
@@ -898,7 +929,7 @@ public struct URL: Equatable, Sendable, Hashable {
898929
}
899930
#endif
900931
let directoryHint: DirectoryHint = isDirectory ? .isDirectory : .notDirectory
901-
self.init(filePath: path, directoryHint: directoryHint)
932+
self.init(filePath: path.isEmpty ? "." : path, directoryHint: directoryHint)
902933
}
903934

904935
/// Initializes a newly created file URL referencing the local file or directory at path.
@@ -917,7 +948,7 @@ public struct URL: Equatable, Sendable, Hashable {
917948
return
918949
}
919950
#endif
920-
self.init(filePath: path, directoryHint: .checkFileSystem)
951+
self.init(filePath: path.isEmpty ? "." : path, directoryHint: .checkFileSystem)
921952
}
922953

923954
// NSURL(fileURLWithPath:) can return nil incorrectly for some malformed paths
@@ -941,24 +972,24 @@ public struct URL: Equatable, Sendable, Hashable {
941972
///
942973
/// If the data representation is not a legal URL string as ASCII bytes, the URL object may not behave as expected. If the URL cannot be formed then this will return nil.
943974
@available(macOS 10.11, iOS 9.0, watchOS 2.0, tvOS 9.0, *)
944-
public init?(dataRepresentation: __shared Data, relativeTo url: __shared URL?, isAbsolute: Bool = false) {
975+
public init?(dataRepresentation: __shared Data, relativeTo base: __shared URL?, isAbsolute: Bool = false) {
945976
guard !dataRepresentation.isEmpty else { return nil }
946977
#if FOUNDATION_FRAMEWORK
947978
guard foundation_swift_url_enabled() else {
948979
if isAbsolute {
949-
_url = URL._converted(from: NSURL(absoluteURLWithDataRepresentation: dataRepresentation, relativeTo: url))
980+
_url = URL._converted(from: NSURL(absoluteURLWithDataRepresentation: dataRepresentation, relativeTo: base))
950981
} else {
951-
_url = URL._converted(from: NSURL(dataRepresentation: dataRepresentation, relativeTo: url))
982+
_url = URL._converted(from: NSURL(dataRepresentation: dataRepresentation, relativeTo: base))
952983
}
953984
return
954985
}
955986
#endif
956987
var url: URL?
957988
if let string = String(data: dataRepresentation, encoding: .utf8) {
958-
url = URL(string: string, relativeTo: url)
989+
url = URL(stringOrEmpty: string, relativeTo: base)
959990
}
960991
if url == nil, let string = String(data: dataRepresentation, encoding: .isoLatin1) {
961-
url = URL(string: string, relativeTo: url)
992+
url = URL(stringOrEmpty: string, relativeTo: base)
962993
}
963994
guard let url else {
964995
return nil
@@ -983,7 +1014,7 @@ public struct URL: Equatable, Sendable, Hashable {
9831014
return
9841015
}
9851016
#endif
986-
guard let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true) else {
1017+
guard let parseInfo = URL.parse(urlString: _url.relativeString) else {
9871018
return nil
9881019
}
9891020
_parseInfo = parseInfo
@@ -1004,7 +1035,7 @@ public struct URL: Equatable, Sendable, Hashable {
10041035
}
10051036
#endif
10061037
bookmarkDataIsStale = stale.boolValue
1007-
let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true)!
1038+
let parseInfo = URL.parse(urlString: _url.relativeString)!
10081039
_parseInfo = parseInfo
10091040
if parseInfo.scheme == nil {
10101041
_baseParseInfo = url?.absoluteURL._parseInfo
@@ -1229,6 +1260,14 @@ public struct URL: Equatable, Sendable, Hashable {
12291260
return nil
12301261
}
12311262

1263+
// According to RFC 3986, a host always exists if there is an authority
1264+
// component, it just might be empty. However, the old implementation
1265+
// of URL.host() returned nil for URLs like "https:///", and apps rely
1266+
// on this behavior, so keep it for bincompat.
1267+
if encodedHost.isEmpty, user() == nil, password() == nil, port == nil {
1268+
return nil
1269+
}
1270+
12321271
func requestedHost() -> String? {
12331272
let didPercentEncodeHost = hasAuthority ? _parseInfo.didPercentEncodeHost : _baseParseInfo?.didPercentEncodeHost ?? false
12341273
if percentEncoded {
@@ -2053,7 +2092,7 @@ public struct URL: Equatable, Sendable, Hashable {
20532092
return
20542093
}
20552094
#endif
2056-
if let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true) {
2095+
if let parseInfo = URL.parse(urlString: _url.relativeString) {
20572096
_parseInfo = parseInfo
20582097
} else {
20592098
// Go to compatibility jail (allow `URL` as a dummy string container for `NSURL` instead of crashing)
@@ -2211,7 +2250,7 @@ extension URL {
22112250
#if !NO_FILESYSTEM
22122251
baseURL = baseURL ?? .currentDirectoryOrNil()
22132252
#endif
2214-
self.init(string: "", relativeTo: baseURL)!
2253+
self.init(string: "./", relativeTo: baseURL)!
22152254
return
22162255
}
22172256

@@ -2474,6 +2513,14 @@ extension URL {
24742513
#endif // NO_FILESYSTEM
24752514
}
24762515
#endif // FOUNDATION_FRAMEWORK
2516+
2517+
// The old .appending(component:) implementation did not actually percent-encode
2518+
// "/" for file URLs as the documentation suggests. Many apps accidentally use
2519+
// .appending(component: "path/with/slashes") instead of using .appending(path:),
2520+
// so changing this behavior would cause breakage.
2521+
if isFileURL {
2522+
return appending(path: component, directoryHint: directoryHint, encodingSlashes: false)
2523+
}
24772524
return appending(path: component, directoryHint: directoryHint, encodingSlashes: true)
24782525
}
24792526

Sources/FoundationEssentials/URL/URLComponents.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,7 @@ public struct URLComponents: Hashable, Equatable, Sendable {
676676
return CFURLCreateWithString(kCFAllocatorDefault, string as CFString, nil) as URL?
677677
}
678678
#endif
679-
return URL(string: string, relativeTo: nil)
679+
return URL(stringOrEmpty: string, relativeTo: nil)
680680
}
681681

682682
/// Returns a URL created from the URLComponents relative to a base URL.
@@ -690,7 +690,7 @@ public struct URLComponents: Hashable, Equatable, Sendable {
690690
return CFURLCreateWithString(kCFAllocatorDefault, string as CFString, base as CFURL) as URL?
691691
}
692692
#endif
693-
return URL(string: string, relativeTo: base)
693+
return URL(stringOrEmpty: string, relativeTo: base)
694694
}
695695

696696
/// Returns a URL string created from the URLComponents.

Sources/FoundationEssentials/URL/URLParser.swift

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,17 @@ internal enum URLParserKind {
137137
case RFC3986
138138
}
139139

140+
internal struct URLParserCompatibility: OptionSet {
141+
let rawValue: UInt8
142+
static let allowEmptyScheme = URLParserCompatibility(rawValue: 1 << 0)
143+
}
144+
140145
internal protocol URLParserProtocol {
141146
static var kind: URLParserKind { get }
142147

143148
static func parse(urlString: String, encodingInvalidCharacters: Bool) -> URLParseInfo?
149+
static func parse(urlString: String, encodingInvalidCharacters: Bool, compatibility: URLParserCompatibility) -> URLParseInfo?
150+
144151
static func validate(_ string: (some StringProtocol)?, component: URLComponents.Component) -> Bool
145152
static func validate(_ string: (some StringProtocol)?, component: URLComponents.Component, percentEncodingAllowed: Bool) -> Bool
146153

@@ -401,15 +408,18 @@ internal struct RFC3986Parser: URLParserProtocol {
401408
}
402409

403410
/// Fast path used during initial URL buffer parsing.
404-
private static func validate(schemeBuffer: Slice<UnsafeBufferPointer<UInt8>>) -> Bool {
405-
guard let first = schemeBuffer.first,
406-
first >= UInt8(ascii: "A"),
411+
private static func validate(schemeBuffer: Slice<UnsafeBufferPointer<UInt8>>, compatibility: URLParserCompatibility = .init()) -> Bool {
412+
guard let first = schemeBuffer.first else {
413+
return compatibility.contains(.allowEmptyScheme)
414+
}
415+
guard first >= UInt8(ascii: "A"),
407416
validate(buffer: schemeBuffer, component: .scheme, percentEncodingAllowed: false) else {
408417
return false
409418
}
410419
return true
411420
}
412421

422+
/// Only used by URLComponents, don't need to consider `URLParserCompatibility.allowEmptyScheme`
413423
private static func validate(scheme: some StringProtocol) -> Bool {
414424
// A valid scheme must start with an ALPHA character.
415425
// If first >= "A" and is in schemeAllowed, then first is ALPHA.
@@ -593,10 +603,14 @@ internal struct RFC3986Parser: URLParserProtocol {
593603
/// Parses a URL string into `URLParseInfo`, with the option to add (or skip) encoding of invalid characters.
594604
/// If `encodingInvalidCharacters` is `true`, this function handles encoding of invalid components.
595605
static func parse(urlString: String, encodingInvalidCharacters: Bool) -> URLParseInfo? {
606+
return parse(urlString: urlString, encodingInvalidCharacters: encodingInvalidCharacters, compatibility: .init())
607+
}
608+
609+
static func parse(urlString: String, encodingInvalidCharacters: Bool, compatibility: URLParserCompatibility) -> URLParseInfo? {
596610
#if os(Windows)
597611
let urlString = urlString.replacing(UInt8(ascii: "\\"), with: UInt8(ascii: "/"))
598612
#endif
599-
guard let parseInfo = parse(urlString: urlString) else {
613+
guard let parseInfo = parse(urlString: urlString, compatibility: compatibility) else {
600614
return nil
601615
}
602616

@@ -690,10 +704,10 @@ internal struct RFC3986Parser: URLParserProtocol {
690704

691705
/// Parses a URL string into its component parts and stores these ranges in a `URLParseInfo`.
692706
/// This function calls `parse(buffer:)`, then converts the buffer ranges into string ranges.
693-
private static func parse(urlString: String) -> URLParseInfo? {
707+
private static func parse(urlString: String, compatibility: URLParserCompatibility = .init()) -> URLParseInfo? {
694708
var string = urlString
695709
let bufferParseInfo = string.withUTF8 {
696-
parse(buffer: $0)
710+
parse(buffer: $0, compatibility: compatibility)
697711
}
698712
guard let bufferParseInfo else {
699713
return nil
@@ -726,7 +740,7 @@ internal struct RFC3986Parser: URLParserProtocol {
726740

727741
/// Parses a URL string into its component parts and stores these ranges in a `URLBufferParseInfo`.
728742
/// This function only parses based on delimiters and does not do any encoding.
729-
private static func parse(buffer: UnsafeBufferPointer<UInt8>) -> URLBufferParseInfo? {
743+
private static func parse(buffer: UnsafeBufferPointer<UInt8>, compatibility: URLParserCompatibility = .init()) -> URLBufferParseInfo? {
730744
// A URI is either:
731745
// 1. scheme ":" hier-part [ "?" query ] [ "#" fragment ]
732746
// 2. relative-ref
@@ -746,12 +760,12 @@ internal struct RFC3986Parser: URLParserProtocol {
746760
let v = buffer[currentIndex]
747761
if v == UInt8(ascii: ":") {
748762
// Scheme must be at least 1 character, otherwise this is a relative-ref.
749-
if currentIndex != buffer.startIndex {
763+
if currentIndex != buffer.startIndex || compatibility.contains(.allowEmptyScheme) {
750764
parseInfo.schemeRange = buffer.startIndex..<currentIndex
751765
currentIndex = buffer.index(after: currentIndex)
752766
if currentIndex == buffer.endIndex {
753767
guard let schemeRange = parseInfo.schemeRange,
754-
validate(schemeBuffer: buffer[schemeRange]) else {
768+
validate(schemeBuffer: buffer[schemeRange], compatibility: compatibility) else {
755769
return nil
756770
}
757771
// The string only contained a scheme, but the path always exists.
@@ -777,7 +791,7 @@ internal struct RFC3986Parser: URLParserProtocol {
777791
}
778792

779793
if let schemeRange = parseInfo.schemeRange {
780-
guard validate(schemeBuffer: buffer[schemeRange]) else {
794+
guard validate(schemeBuffer: buffer[schemeRange], compatibility: compatibility) else {
781795
return nil
782796
}
783797
}

Tests/FoundationEssentialsTests/URLTests.swift

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ final class URLTests : XCTestCase {
179179
}
180180
#endif
181181

182-
let url = URL(string: test.key, relativeTo: base)
182+
let url = URL(stringOrEmpty: test.key, relativeTo: base)
183183
XCTAssertNotNil(url, "Got nil url for string: \(test.key)")
184184
XCTAssertEqual(url?.absoluteString, test.value, "Failed test for string: \(test.key)")
185185
}
@@ -242,7 +242,7 @@ final class URLTests : XCTestCase {
242242
"http:g" : "g", // For strict parsers
243243
]
244244
for test in tests {
245-
let url = URL(string: test.key, relativeTo: base)!
245+
let url = URL(stringOrEmpty: test.key, relativeTo: base)!
246246
XCTAssertEqual(url.path(), test.value)
247247
if (url.hasDirectoryPath && url.path().count > 1) {
248248
// The trailing slash is stripped in .path for file system compatibility
@@ -607,11 +607,13 @@ final class URLTests : XCTestCase {
607607
XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/no:slash")
608608
XCTAssertEqual(appended.relativePath, "relative/no:slash")
609609

610-
// `appending(component:)` should explicitly treat `component` as a single
611-
// path component, meaning "/" should be encoded to "%2F" before appending
610+
// .appending(component:) should explicitly treat slashComponent as a single
611+
// path component, meaning "/" should be encoded to "%2F" before appending.
612+
// However, the old behavior didn't do this for file URLs, so we maintain the
613+
// old behavior to prevent breakage.
612614
appended = url.appending(component: slashComponent, directoryHint: .notDirectory)
613-
checkBehavior(appended.absoluteString, new: "file:///var/mobile/relative/%2Fwith:slash", old: "file:///var/mobile/relative/with:slash")
614-
checkBehavior(appended.relativePath, new: "relative/%2Fwith:slash", old: "relative/with:slash")
615+
XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/with:slash")
616+
XCTAssertEqual(appended.relativePath, "relative/with:slash")
615617

616618
appended = url.appendingPathComponent(component, isDirectory: false)
617619
XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/no:slash")
@@ -687,7 +689,7 @@ final class URLTests : XCTestCase {
687689
checkBehavior(relative.path, new: "/", old: "/..")
688690

689691
relative = URL(filePath: "", relativeTo: absolute)
690-
checkBehavior(relative.relativePath, new: "", old: ".")
692+
XCTAssertEqual(relative.relativePath, ".")
691693
XCTAssertTrue(relative.hasDirectoryPath)
692694
XCTAssertEqual(relative.path, "/absolute")
693695

0 commit comments

Comments
 (0)