|
| 1 | +//===----------------------------------------------------------------------===// |
| 2 | +// |
| 3 | +// This source file is part of the Swift.org open source project |
| 4 | +// |
| 5 | +// Copyright (c) 2025 Apple Inc. and the Swift project authors |
| 6 | +// Licensed under Apache License v2.0 with Runtime Library Exception |
| 7 | +// |
| 8 | +// See https://swift.org/LICENSE.txt for license information |
| 9 | +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| 10 | +// |
| 11 | +//===----------------------------------------------------------------------===// |
| 12 | + |
| 13 | +#if canImport(FoundationEssentials) |
| 14 | +import FoundationEssentials |
| 15 | +#endif |
| 16 | +internal import _FoundationICU |
| 17 | + |
| 18 | +private extension String.Encoding { |
| 19 | + var _icuConverterName: String? { |
| 20 | + // TODO: Replace this with forthcoming(?) public property such as https://github.com/swiftlang/swift-foundation/pull/1243 |
| 21 | + switch self { |
| 22 | + case .utf8: "UTF-8" |
| 23 | + case .ascii: "US-ASCII" |
| 24 | + case .japaneseEUC: "EUC-JP" |
| 25 | + case .isoLatin1: "ISO-8859-1" |
| 26 | + case .shiftJIS: "Shift_JIS" |
| 27 | + case .isoLatin2: "ISO-8859-2" |
| 28 | + case .unicode: "UTF-16" |
| 29 | + case .windowsCP1251: "windows-1251" |
| 30 | + case .windowsCP1252: "windows-1252" |
| 31 | + case .windowsCP1253: "windows-1253" |
| 32 | + case .windowsCP1254: "windows-1254" |
| 33 | + case .windowsCP1250: "windows-1250" |
| 34 | + case .iso2022JP: "ISO-2022-JP" |
| 35 | + case .macOSRoman: "macintosh" |
| 36 | + case .utf16BigEndian: "UTF-16BE" |
| 37 | + case .utf16LittleEndian: "UTF-16LE" |
| 38 | + case .utf32: "UTF-32" |
| 39 | + case .utf32BigEndian: "UTF-32BE" |
| 40 | + case .utf32LittleEndian: "UTF-32LE" |
| 41 | + default: nil |
| 42 | + } |
| 43 | + } |
| 44 | +} |
| 45 | + |
| 46 | +extension ICU { |
| 47 | + final class StringConverter: @unchecked Sendable { |
| 48 | + private let _converter: LockedState<OpaquePointer> // UConverter* |
| 49 | + |
| 50 | + let encoding: String.Encoding |
| 51 | + |
| 52 | + init?(encoding: String.Encoding) { |
| 53 | + guard let convName = encoding._icuConverterName else { |
| 54 | + return nil |
| 55 | + } |
| 56 | + var status: UErrorCode = U_ZERO_ERROR |
| 57 | + guard let converter = ucnv_open(convName, &status), status.isSuccess else { |
| 58 | + return nil |
| 59 | + } |
| 60 | + self._converter = LockedState(initialState: converter) |
| 61 | + self.encoding = encoding |
| 62 | + } |
| 63 | + |
| 64 | + deinit { |
| 65 | + _converter.withLock { ucnv_close($0) } |
| 66 | + } |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +extension ICU.StringConverter { |
| 71 | + func decode(data: Data) -> String? { |
| 72 | + return _converter.withLock { converter in |
| 73 | + defer { |
| 74 | + ucnv_resetToUnicode(converter) |
| 75 | + } |
| 76 | + |
| 77 | + let srcLength = CInt(data.count) |
| 78 | + let initCapacity = srcLength * CInt(ucnv_getMinCharSize(converter)) + 1 |
| 79 | + return _withResizingUCharBuffer(initialSize: initCapacity) { (dest, capacity, status) in |
| 80 | + return data.withUnsafeBytes { src in |
| 81 | + ucnv_toUChars( |
| 82 | + converter, |
| 83 | + dest, |
| 84 | + capacity, |
| 85 | + src.baseAddress, |
| 86 | + srcLength, |
| 87 | + &status |
| 88 | + ) |
| 89 | + } |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + func encode(string: String, allowLossyConversion lossy: Bool) -> Data? { |
| 95 | + return _converter.withLock { (converter) -> Data? in |
| 96 | + defer { |
| 97 | + ucnv_resetFromUnicode(converter) |
| 98 | + } |
| 99 | + |
| 100 | + let utf16Rep = string.utf16 |
| 101 | + let uchars = UnsafeMutableBufferPointer<UChar>.allocate(capacity: utf16Rep.count) |
| 102 | + _ = uchars.initialize(fromContentsOf: utf16Rep) |
| 103 | + defer { |
| 104 | + uchars.deallocate() |
| 105 | + } |
| 106 | + |
| 107 | + let srcLength = uchars.count |
| 108 | + let capacity = srcLength * Int(ucnv_getMaxCharSize(converter)) + 1 |
| 109 | + let dest = UnsafeMutableRawPointer.allocate( |
| 110 | + byteCount: capacity, |
| 111 | + alignment: MemoryLayout<CChar>.alignment |
| 112 | + ) |
| 113 | + |
| 114 | + var status: UErrorCode = U_ZERO_ERROR |
| 115 | + if lossy { |
| 116 | + var lossyChar: UChar = encoding == .ascii ? 0xFF : 0x3F |
| 117 | + ucnv_setSubstString( |
| 118 | + converter, |
| 119 | + &lossyChar, |
| 120 | + 1, |
| 121 | + &status |
| 122 | + ) |
| 123 | + guard status.isSuccess else { return nil } |
| 124 | + |
| 125 | + ucnv_setFromUCallBack( |
| 126 | + converter, |
| 127 | + UCNV_FROM_U_CALLBACK_SUBSTITUTE, |
| 128 | + nil, // newContext |
| 129 | + nil, // oldAction |
| 130 | + nil, // oldContext |
| 131 | + &status |
| 132 | + ) |
| 133 | + guard status.isSuccess else { return nil } |
| 134 | + } else { |
| 135 | + ucnv_setFromUCallBack( |
| 136 | + converter, |
| 137 | + UCNV_FROM_U_CALLBACK_STOP, |
| 138 | + nil, // newContext |
| 139 | + nil, // oldAction |
| 140 | + nil, // oldContext |
| 141 | + &status |
| 142 | + ) |
| 143 | + guard status.isSuccess else { return nil } |
| 144 | + } |
| 145 | + |
| 146 | + let actualLength = ucnv_fromUChars( |
| 147 | + converter, |
| 148 | + dest, |
| 149 | + CInt(capacity), |
| 150 | + uchars.baseAddress, |
| 151 | + CInt(srcLength), |
| 152 | + &status |
| 153 | + ) |
| 154 | + guard status.isSuccess else { return nil } |
| 155 | + return Data( |
| 156 | + bytesNoCopy: dest, |
| 157 | + count: Int(actualLength), |
| 158 | + deallocator: .custom({ pointer, _ in pointer.deallocate() }) |
| 159 | + ) |
| 160 | + } |
| 161 | + } |
| 162 | +} |
| 163 | + |
| 164 | +extension ICU.StringConverter { |
| 165 | + nonisolated(unsafe) static private var _converters: LockedState<[String.Encoding: ICU.StringConverter]> = .init(initialState: [:]) |
| 166 | + |
| 167 | + static func converter(for encoding: String.Encoding) -> ICU.StringConverter? { |
| 168 | + return _converters.withLock { |
| 169 | + if let converter = $0[encoding] { |
| 170 | + return converter |
| 171 | + } |
| 172 | + if let converter = ICU.StringConverter(encoding: encoding) { |
| 173 | + $0[encoding] = converter |
| 174 | + return converter |
| 175 | + } |
| 176 | + return nil |
| 177 | + } |
| 178 | + } |
| 179 | +} |
| 180 | + |
| 181 | + |
| 182 | +@_dynamicReplacement(for: _icuMakeStringFromBytes(_:encoding:)) |
| 183 | +func _icuMakeStringFromBytes_impl(_ bytes: UnsafeBufferPointer<UInt8>, encoding: String.Encoding) -> String? { |
| 184 | + guard let converter = ICU.StringConverter.converter(for: encoding), |
| 185 | + let pointer = bytes.baseAddress else { |
| 186 | + return nil |
| 187 | + } |
| 188 | + let data = Data( |
| 189 | + bytesNoCopy: UnsafeMutableRawPointer(mutating: pointer), |
| 190 | + count: bytes.count, |
| 191 | + deallocator: .none |
| 192 | + ) |
| 193 | + return converter.decode(data: data) |
| 194 | +} |
| 195 | + |
| 196 | +@_dynamicReplacement(for: _icuStringEncodingConvert(string:using:allowLossyConversion:)) |
| 197 | +func _icuStringEncodingConvert_impl(string: String, using encoding: String.Encoding, allowLossyConversion: Bool) -> Data? { |
| 198 | + guard let converter = ICU.StringConverter.converter(for: encoding) else { |
| 199 | + return nil |
| 200 | + } |
| 201 | + return converter.encode(string: string, allowLossyConversion: allowLossyConversion) |
| 202 | +} |
0 commit comments