diff --git a/Sources/FoundationEssentials/URL/URL.swift b/Sources/FoundationEssentials/URL/URL.swift index 44c367854..ea6d4f127 100644 --- a/Sources/FoundationEssentials/URL/URL.swift +++ b/Sources/FoundationEssentials/URL/URL.swift @@ -616,6 +616,10 @@ internal func foundation_swift_url_enabled() -> Bool { internal func foundation_swift_url_enabled() -> Bool { return true } #endif +#if canImport(os) +internal import os +#endif + /// A URL is a type that can potentially contain the location of a resource on a remote server, the path of a local file on disk, or even an arbitrary piece of encoded data. /// /// You can construct URLs and access their parts. For URLs that represent local files, you can also manipulate properties of those files directly, such as changing the file's last modification date. Finally, you can pass URLs to other APIs to retrieve the contents of those URLs. For example, you can use the URLSession classes to access the contents of remote resources, as described in URL Session Programming Guide. @@ -624,6 +628,12 @@ internal func foundation_swift_url_enabled() -> Bool { return true } @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *) public struct URL: Equatable, Sendable, Hashable { +#if canImport(os) + internal static let logger: Logger = { + Logger(subsystem: "com.apple.foundation", category: "url") + }() +#endif + #if FOUNDATION_FRAMEWORK private var _url: NSURL @@ -763,6 +773,10 @@ public struct URL: Equatable, Sendable, Hashable { internal var _parseInfo: URLParseInfo! private var _baseParseInfo: URLParseInfo? + private static func parse(urlString: String, encodingInvalidCharacters: Bool = true) -> URLParseInfo? { + return Parser.parse(urlString: urlString, encodingInvalidCharacters: encodingInvalidCharacters, compatibility: .allowEmptyScheme) + } + internal init(parseInfo: URLParseInfo, relativeTo url: URL? = nil) { _parseInfo = parseInfo if parseInfo.scheme == nil { @@ -773,6 +787,31 @@ public struct URL: Equatable, Sendable, Hashable { #endif // FOUNDATION_FRAMEWORK } + /// The public initializers don't allow the empty string, and we must maintain that behavior + /// for compatibility. However, there are cases internally where we need to create a URL with + /// an empty string, such as when `.deletingLastPathComponent()` of a single path + /// component. This previously worked since `URL` just wrapped an `NSURL`, which + /// allows the empty string. + internal init?(stringOrEmpty: String, relativeTo url: URL? = nil) { + #if FOUNDATION_FRAMEWORK + guard foundation_swift_url_enabled() else { + guard let inner = NSURL(string: stringOrEmpty, relativeTo: url) else { return nil } + _url = URL._converted(from: inner) + return + } + #endif // FOUNDATION_FRAMEWORK + guard let parseInfo = URL.parse(urlString: stringOrEmpty) else { + return nil + } + _parseInfo = parseInfo + if parseInfo.scheme == nil { + _baseParseInfo = url?.absoluteURL._parseInfo + } + #if FOUNDATION_FRAMEWORK + _url = URL._nsURL(from: _parseInfo, baseParseInfo: _baseParseInfo) + #endif // FOUNDATION_FRAMEWORK + } + /// Initialize with string. /// /// Returns `nil` if a `URL` cannot be formed with the string (for example, if the string contains characters that are illegal in a URL, or is an empty string). @@ -785,7 +824,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif // FOUNDATION_FRAMEWORK - guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: true) else { + guard let parseInfo = URL.parse(urlString: string) else { return nil } _parseInfo = parseInfo @@ -798,14 +837,15 @@ public struct URL: Equatable, Sendable, Hashable { /// /// Returns `nil` if a `URL` cannot be formed with the string (for example, if the string contains characters that are illegal in a URL, or is an empty string). public init?(string: __shared String, relativeTo url: __shared URL?) { + guard !string.isEmpty else { return nil } #if FOUNDATION_FRAMEWORK guard foundation_swift_url_enabled() else { - guard !string.isEmpty, let inner = NSURL(string: string, relativeTo: url) else { return nil } + guard let inner = NSURL(string: string, relativeTo: url) else { return nil } _url = URL._converted(from: inner) return } #endif // FOUNDATION_FRAMEWORK - guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: true) else { + guard let parseInfo = URL.parse(urlString: string) else { return nil } _parseInfo = parseInfo @@ -824,14 +864,15 @@ public struct URL: Equatable, Sendable, Hashable { /// If the URL string is still invalid after encoding, `nil` is returned. @available(macOS 14.0, iOS 17.0, watchOS 10.0, tvOS 17.0, *) public init?(string: __shared String, encodingInvalidCharacters: Bool) { + guard !string.isEmpty else { return nil } #if FOUNDATION_FRAMEWORK guard foundation_swift_url_enabled() else { - guard !string.isEmpty, let inner = NSURL(string: string, encodingInvalidCharacters: encodingInvalidCharacters) else { return nil } + guard let inner = NSURL(string: string, encodingInvalidCharacters: encodingInvalidCharacters) else { return nil } _url = URL._converted(from: inner) return } #endif // FOUNDATION_FRAMEWORK - guard let parseInfo = Parser.parse(urlString: string, encodingInvalidCharacters: encodingInvalidCharacters) else { + guard let parseInfo = URL.parse(urlString: string, encodingInvalidCharacters: encodingInvalidCharacters) else { return nil } _parseInfo = parseInfo @@ -858,7 +899,7 @@ public struct URL: Equatable, Sendable, Hashable { } #endif let directoryHint: DirectoryHint = isDirectory ? .isDirectory : .notDirectory - self.init(filePath: path, directoryHint: directoryHint, relativeTo: base) + self.init(filePath: path.isEmpty ? "." : path, directoryHint: directoryHint, relativeTo: base) } /// Initializes a newly created file URL referencing the local file or directory at path, relative to a base URL. @@ -877,7 +918,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif - self.init(filePath: path, directoryHint: .checkFileSystem, relativeTo: base) + self.init(filePath: path.isEmpty ? "." : path, directoryHint: .checkFileSystem, relativeTo: base) } /// Initializes a newly created file URL referencing the local file or directory at path. @@ -898,7 +939,7 @@ public struct URL: Equatable, Sendable, Hashable { } #endif let directoryHint: DirectoryHint = isDirectory ? .isDirectory : .notDirectory - self.init(filePath: path, directoryHint: directoryHint) + self.init(filePath: path.isEmpty ? "." : path, directoryHint: directoryHint) } /// Initializes a newly created file URL referencing the local file or directory at path. @@ -917,7 +958,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif - self.init(filePath: path, directoryHint: .checkFileSystem) + self.init(filePath: path.isEmpty ? "." : path, directoryHint: .checkFileSystem) } // NSURL(fileURLWithPath:) can return nil incorrectly for some malformed paths @@ -941,24 +982,24 @@ public struct URL: Equatable, Sendable, Hashable { /// /// If the data representation is not a legal URL string as ASCII bytes, the URL object may not behave as expected. If the URL cannot be formed then this will return nil. @available(macOS 10.11, iOS 9.0, watchOS 2.0, tvOS 9.0, *) - public init?(dataRepresentation: __shared Data, relativeTo url: __shared URL?, isAbsolute: Bool = false) { + public init?(dataRepresentation: __shared Data, relativeTo base: __shared URL?, isAbsolute: Bool = false) { guard !dataRepresentation.isEmpty else { return nil } #if FOUNDATION_FRAMEWORK guard foundation_swift_url_enabled() else { if isAbsolute { - _url = URL._converted(from: NSURL(absoluteURLWithDataRepresentation: dataRepresentation, relativeTo: url)) + _url = URL._converted(from: NSURL(absoluteURLWithDataRepresentation: dataRepresentation, relativeTo: base)) } else { - _url = URL._converted(from: NSURL(dataRepresentation: dataRepresentation, relativeTo: url)) + _url = URL._converted(from: NSURL(dataRepresentation: dataRepresentation, relativeTo: base)) } return } #endif var url: URL? if let string = String(data: dataRepresentation, encoding: .utf8) { - url = URL(string: string, relativeTo: url) + url = URL(stringOrEmpty: string, relativeTo: base) } if url == nil, let string = String(data: dataRepresentation, encoding: .isoLatin1) { - url = URL(string: string, relativeTo: url) + url = URL(stringOrEmpty: string, relativeTo: base) } guard let url else { return nil @@ -983,7 +1024,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif - guard let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true) else { + guard let parseInfo = URL.parse(urlString: _url.relativeString) else { return nil } _parseInfo = parseInfo @@ -1004,7 +1045,7 @@ public struct URL: Equatable, Sendable, Hashable { } #endif bookmarkDataIsStale = stale.boolValue - let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true)! + let parseInfo = URL.parse(urlString: _url.relativeString)! _parseInfo = parseInfo if parseInfo.scheme == nil { _baseParseInfo = url?.absoluteURL._parseInfo @@ -1229,15 +1270,13 @@ public struct URL: Equatable, Sendable, Hashable { return nil } - #if FOUNDATION_FRAMEWORK - // Linked-on-or-after check for apps which expect .host() to return nil - // for URLs like "https:///". The new .host() implementation returns - // an empty string because according to RFC 3986, a host always exists - // if there is an authority component, it just might be empty. - if Self.compatibility2 && encodedHost.isEmpty { + // According to RFC 3986, a host always exists if there is an authority + // component, it just might be empty. However, the old implementation + // of URL.host() returned nil for URLs like "https:///", and apps rely + // on this behavior, so keep it for bincompat. + if encodedHost.isEmpty, user() == nil, password() == nil, port == nil { return nil } - #endif func requestedHost() -> String? { let didPercentEncodeHost = hasAuthority ? _parseInfo.didPercentEncodeHost : _baseParseInfo?.didPercentEncodeHost ?? false @@ -2063,7 +2102,7 @@ public struct URL: Equatable, Sendable, Hashable { return } #endif - if let parseInfo = Parser.parse(urlString: _url.relativeString, encodingInvalidCharacters: true) { + if let parseInfo = URL.parse(urlString: _url.relativeString) { _parseInfo = parseInfo } else { // Go to compatibility jail (allow `URL` as a dummy string container for `NSURL` instead of crashing) @@ -2221,7 +2260,7 @@ extension URL { #if !NO_FILESYSTEM baseURL = baseURL ?? .currentDirectoryOrNil() #endif - self.init(string: "", relativeTo: baseURL)! + self.init(string: "./", relativeTo: baseURL)! return } @@ -2233,13 +2272,27 @@ extension URL { #endif #if FOUNDATION_FRAMEWORK - // Linked-on-or-after check for apps which incorrectly pass a full - // "file:" URL string. In the old implementation, this could work + // Linked-on-or-after check for apps which incorrectly pass a full URL + // string with a scheme. In the old implementation, this could work // rarely if the app immediately called .appendingPathComponent(_:), - // which used to accidentally interpret a relative string starting - // with "file:" as an absolute file URL string. - if Self.compatibility3 && filePath.starts(with: "file:") { - filePath = String(filePath.dropFirst(5)) + // which used to accidentally interpret a relative path starting with + // "scheme:" as an absolute "scheme:" URL string. + if Self.compatibility1 { + if filePath.utf8.starts(with: "file:".utf8) { + #if canImport(os) + URL.logger.fault("API MISUSE: URL(filePath:) called with a \"file:\" scheme. Input must only contain a path. Dropping \"file:\" scheme.") + #endif + filePath = String(filePath.dropFirst(5))._compressingSlashes() + } else if filePath.utf8.starts(with: "http:".utf8) || filePath.utf8.starts(with: "https:".utf8) { + #if canImport(os) + URL.logger.fault("API MISUSE: URL(filePath:) called with an HTTP URL string. Using URL(string:) instead.") + #endif + guard let httpURL = URL(string: filePath) else { + fatalError("API MISUSE: URL(filePath:) called with an HTTP URL string. URL(string:) returned nil.") + } + self = httpURL + return + } } #endif @@ -2495,6 +2548,14 @@ extension URL { #endif // NO_FILESYSTEM } #endif // FOUNDATION_FRAMEWORK + + // The old .appending(component:) implementation did not actually percent-encode + // "/" for file URLs as the documentation suggests. Many apps accidentally use + // .appending(component: "path/with/slashes") instead of using .appending(path:), + // so changing this behavior would cause breakage. + if isFileURL { + return appending(path: component, directoryHint: directoryHint, encodingSlashes: false) + } return appending(path: component, directoryHint: directoryHint, encodingSlashes: true) } diff --git a/Sources/FoundationEssentials/URL/URLComponents.swift b/Sources/FoundationEssentials/URL/URLComponents.swift index 6eb3a6680..e0fc9d137 100644 --- a/Sources/FoundationEssentials/URL/URLComponents.swift +++ b/Sources/FoundationEssentials/URL/URLComponents.swift @@ -676,7 +676,7 @@ public struct URLComponents: Hashable, Equatable, Sendable { return CFURLCreateWithString(kCFAllocatorDefault, string as CFString, nil) as URL? } #endif - return URL(string: string, relativeTo: nil) + return URL(stringOrEmpty: string, relativeTo: nil) } /// Returns a URL created from the URLComponents relative to a base URL. @@ -690,7 +690,7 @@ public struct URLComponents: Hashable, Equatable, Sendable { return CFURLCreateWithString(kCFAllocatorDefault, string as CFString, base as CFURL) as URL? } #endif - return URL(string: string, relativeTo: base) + return URL(stringOrEmpty: string, relativeTo: base) } /// Returns a URL string created from the URLComponents. diff --git a/Sources/FoundationEssentials/URL/URLParser.swift b/Sources/FoundationEssentials/URL/URLParser.swift index 25a3a2e70..9938b0f43 100644 --- a/Sources/FoundationEssentials/URL/URLParser.swift +++ b/Sources/FoundationEssentials/URL/URLParser.swift @@ -137,10 +137,17 @@ internal enum URLParserKind { case RFC3986 } +internal struct URLParserCompatibility: OptionSet { + let rawValue: UInt8 + static let allowEmptyScheme = URLParserCompatibility(rawValue: 1 << 0) +} + internal protocol URLParserProtocol { static var kind: URLParserKind { get } static func parse(urlString: String, encodingInvalidCharacters: Bool) -> URLParseInfo? + static func parse(urlString: String, encodingInvalidCharacters: Bool, compatibility: URLParserCompatibility) -> URLParseInfo? + static func validate(_ string: (some StringProtocol)?, component: URLComponents.Component) -> Bool static func validate(_ string: (some StringProtocol)?, component: URLComponents.Component, percentEncodingAllowed: Bool) -> Bool @@ -401,15 +408,18 @@ internal struct RFC3986Parser: URLParserProtocol { } /// Fast path used during initial URL buffer parsing. - private static func validate(schemeBuffer: Slice>) -> Bool { - guard let first = schemeBuffer.first, - first >= UInt8(ascii: "A"), + private static func validate(schemeBuffer: Slice>, compatibility: URLParserCompatibility = .init()) -> Bool { + guard let first = schemeBuffer.first else { + return compatibility.contains(.allowEmptyScheme) + } + guard first >= UInt8(ascii: "A"), validate(buffer: schemeBuffer, component: .scheme, percentEncodingAllowed: false) else { return false } return true } + /// Only used by URLComponents, don't need to consider `URLParserCompatibility.allowEmptyScheme` private static func validate(scheme: some StringProtocol) -> Bool { // A valid scheme must start with an ALPHA character. // If first >= "A" and is in schemeAllowed, then first is ALPHA. @@ -593,10 +603,14 @@ internal struct RFC3986Parser: URLParserProtocol { /// Parses a URL string into `URLParseInfo`, with the option to add (or skip) encoding of invalid characters. /// If `encodingInvalidCharacters` is `true`, this function handles encoding of invalid components. static func parse(urlString: String, encodingInvalidCharacters: Bool) -> URLParseInfo? { + return parse(urlString: urlString, encodingInvalidCharacters: encodingInvalidCharacters, compatibility: .init()) + } + + static func parse(urlString: String, encodingInvalidCharacters: Bool, compatibility: URLParserCompatibility) -> URLParseInfo? { #if os(Windows) let urlString = urlString.replacing(UInt8(ascii: "\\"), with: UInt8(ascii: "/")) #endif - guard let parseInfo = parse(urlString: urlString) else { + guard let parseInfo = parse(urlString: urlString, compatibility: compatibility) else { return nil } @@ -690,10 +704,10 @@ internal struct RFC3986Parser: URLParserProtocol { /// Parses a URL string into its component parts and stores these ranges in a `URLParseInfo`. /// This function calls `parse(buffer:)`, then converts the buffer ranges into string ranges. - private static func parse(urlString: String) -> URLParseInfo? { + private static func parse(urlString: String, compatibility: URLParserCompatibility = .init()) -> URLParseInfo? { var string = urlString let bufferParseInfo = string.withUTF8 { - parse(buffer: $0) + parse(buffer: $0, compatibility: compatibility) } guard let bufferParseInfo else { return nil @@ -726,7 +740,7 @@ internal struct RFC3986Parser: URLParserProtocol { /// Parses a URL string into its component parts and stores these ranges in a `URLBufferParseInfo`. /// This function only parses based on delimiters and does not do any encoding. - private static func parse(buffer: UnsafeBufferPointer) -> URLBufferParseInfo? { + private static func parse(buffer: UnsafeBufferPointer, compatibility: URLParserCompatibility = .init()) -> URLBufferParseInfo? { // A URI is either: // 1. scheme ":" hier-part [ "?" query ] [ "#" fragment ] // 2. relative-ref @@ -746,12 +760,12 @@ internal struct RFC3986Parser: URLParserProtocol { let v = buffer[currentIndex] if v == UInt8(ascii: ":") { // Scheme must be at least 1 character, otherwise this is a relative-ref. - if currentIndex != buffer.startIndex { + if currentIndex != buffer.startIndex || compatibility.contains(.allowEmptyScheme) { parseInfo.schemeRange = buffer.startIndex.. 1) { // The trailing slash is stripped in .path for file system compatibility @@ -589,11 +589,13 @@ final class URLTests : XCTestCase { XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/no:slash") XCTAssertEqual(appended.relativePath, "relative/no:slash") - // `appending(component:)` should explicitly treat `component` as a single - // path component, meaning "/" should be encoded to "%2F" before appending + // .appending(component:) should explicitly treat slashComponent as a single + // path component, meaning "/" should be encoded to "%2F" before appending. + // However, the old behavior didn't do this for file URLs, so we maintain the + // old behavior to prevent breakage. appended = url.appending(component: slashComponent, directoryHint: .notDirectory) - checkBehavior(appended.absoluteString, new: "file:///var/mobile/relative/%2Fwith:slash", old: "file:///var/mobile/relative/with:slash") - checkBehavior(appended.relativePath, new: "relative/%2Fwith:slash", old: "relative/with:slash") + XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/with:slash") + XCTAssertEqual(appended.relativePath, "relative/with:slash") appended = url.appendingPathComponent(component, isDirectory: false) XCTAssertEqual(appended.absoluteString, "file:///var/mobile/relative/no:slash") @@ -669,7 +671,7 @@ final class URLTests : XCTestCase { checkBehavior(relative.path, new: "/", old: "/..") relative = URL(filePath: "", relativeTo: absolute) - checkBehavior(relative.relativePath, new: "", old: ".") + XCTAssertEqual(relative.relativePath, ".") XCTAssertTrue(relative.hasDirectoryPath) XCTAssertEqual(relative.path, "/absolute")