Skip to content

Commit 338b2ed

Browse files
committed
Enable string conversion in EUC-JP.
Background: EUC-JP is not supported by OSS CoreFoundation, while it is supported by macOS Foundation Framework. See #1016 This commit resolves the issue by calling ICU API if necessary.
1 parent 0f62460 commit 338b2ed

File tree

4 files changed

+352
-4
lines changed

4 files changed

+352
-4
lines changed

Sources/FoundationEssentials/String/String+IO.swift

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ dynamic public func _cfMakeStringFromBytes(_ bytes: UnsafeBufferPointer<UInt8>,
2424
// Provide swift-corelibs-foundation with an entry point to convert some bytes into a String
2525
return nil
2626
}
27+
28+
dynamic package func _icuMakeStringFromBytes(_ bytes: UnsafeBufferPointer<UInt8>, encoding: String.Encoding) -> String? {
29+
// Concrete implementation is provided by FoundationInternationalization.
30+
return nil
31+
}
2732
#endif
2833

2934
@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
@@ -202,8 +207,14 @@ extension String {
202207
return nil
203208
}
204209
#else
205-
if let string = (bytes.withContiguousStorageIfAvailable({ _cfMakeStringFromBytes($0, encoding: encoding.rawValue) }) ??
206-
Array(bytes).withUnsafeBufferPointer({ _cfMakeStringFromBytes($0, encoding: encoding.rawValue) })) {
210+
func makeString(from bytes: UnsafeBufferPointer<UInt8>) -> String? {
211+
return (
212+
_cfMakeStringFromBytes(bytes, encoding: encoding.rawValue) ??
213+
_icuMakeStringFromBytes(bytes, encoding: encoding)
214+
)
215+
}
216+
if let string = (bytes.withContiguousStorageIfAvailable({ makeString(from: $0) }) ??
217+
Array(bytes).withUnsafeBufferPointer({ makeString(from: $0) })) {
207218
self = string
208219
} else {
209220
return nil

Sources/FoundationEssentials/String/StringProtocol+Essentials.swift

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ dynamic public func _cfStringEncodingConvert(string: String, using encoding: UIn
9191
// Dynamically replaced by swift-corelibs-foundation to implement encodings that we do not have Swift replacements for, yet
9292
return nil
9393
}
94+
95+
dynamic package func _icuStringEncodingConvert(string: String, using encoding: String.Encoding, allowLossyConversion: Bool) -> Data? {
96+
// Concrete implementation is provided by FoundationInternationalization.
97+
return nil
98+
}
9499
#endif
95100

96101
@available(FoundationPreview 0.4, *)
@@ -255,8 +260,12 @@ extension String {
255260
// Other encodings, defer to the CoreFoundation implementation
256261
return _ns.data(using: encoding.rawValue, allowLossyConversion: allowLossyConversion)
257262
#else
258-
// Attempt an up-call into swift-corelibs-foundation, which can defer to the CoreFoundation implementation
259-
return _cfStringEncodingConvert(string: self, using: encoding.rawValue, allowLossyConversion: allowLossyConversion)
263+
return (
264+
// Attempt an up-call into swift-corelibs-foundation, which can defer to the CoreFoundation implementation
265+
_cfStringEncodingConvert(string: self, using: encoding.rawValue, allowLossyConversion: allowLossyConversion) ??
266+
// Or attempt an up-call into ICU via FoundationInternationalization
267+
_icuStringEncodingConvert(string: self, using: encoding, allowLossyConversion: allowLossyConversion)
268+
)
260269
#endif
261270
}
262271
}
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#if canImport(FoundationEssentials)
14+
import FoundationEssentials
15+
#endif
16+
internal import _FoundationICU
17+
18+
private extension String.Encoding {
19+
var _icuConverterName: String? {
20+
// TODO: Replace this with forthcoming(?) public property such as https://github.com/swiftlang/swift-foundation/pull/1243
21+
switch self {
22+
case .utf8: "UTF-8"
23+
case .ascii: "US-ASCII"
24+
case .japaneseEUC: "EUC-JP"
25+
case .isoLatin1: "ISO-8859-1"
26+
case .shiftJIS: "Shift_JIS"
27+
case .isoLatin2: "ISO-8859-2"
28+
case .unicode: "UTF-16"
29+
case .windowsCP1251: "windows-1251"
30+
case .windowsCP1252: "windows-1252"
31+
case .windowsCP1253: "windows-1253"
32+
case .windowsCP1254: "windows-1254"
33+
case .windowsCP1250: "windows-1250"
34+
case .iso2022JP: "ISO-2022-JP"
35+
case .macOSRoman: "macintosh"
36+
case .utf16BigEndian: "UTF-16BE"
37+
case .utf16LittleEndian: "UTF-16LE"
38+
case .utf32: "UTF-32"
39+
case .utf32BigEndian: "UTF-32BE"
40+
case .utf32LittleEndian: "UTF-32LE"
41+
default: nil
42+
}
43+
}
44+
}
45+
46+
extension ICU {
47+
final class StringConverter: @unchecked Sendable {
48+
private let _converter: LockedState<OpaquePointer> // UConverter*
49+
50+
let encoding: String.Encoding
51+
52+
init?(encoding: String.Encoding) {
53+
guard let convName = encoding._icuConverterName else {
54+
return nil
55+
}
56+
var status: UErrorCode = U_ZERO_ERROR
57+
guard let converter = ucnv_open(convName, &status), status.isSuccess else {
58+
return nil
59+
}
60+
self._converter = LockedState(initialState: converter)
61+
self.encoding = encoding
62+
}
63+
64+
deinit {
65+
_converter.withLock { ucnv_close($0) }
66+
}
67+
}
68+
}
69+
70+
extension ICU.StringConverter {
71+
func decode(data: Data) -> String? {
72+
return _converter.withLock { converter in
73+
defer {
74+
ucnv_resetToUnicode(converter)
75+
}
76+
77+
let srcLength = CInt(data.count)
78+
let initCapacity = srcLength * CInt(ucnv_getMinCharSize(converter)) + 1
79+
return _withResizingUCharBuffer(initialSize: initCapacity) { (dest, capacity, status) in
80+
return data.withUnsafeBytes { src in
81+
ucnv_toUChars(
82+
converter,
83+
dest,
84+
capacity,
85+
src.baseAddress,
86+
srcLength,
87+
&status
88+
)
89+
}
90+
}
91+
}
92+
}
93+
94+
func encode(string: String, allowLossyConversion lossy: Bool) -> Data? {
95+
return _converter.withLock { (converter) -> Data? in
96+
defer {
97+
ucnv_resetFromUnicode(converter)
98+
}
99+
100+
let utf16Rep = string.utf16
101+
let uchars = UnsafeMutableBufferPointer<UChar>.allocate(capacity: utf16Rep.count)
102+
_ = uchars.initialize(fromContentsOf: utf16Rep)
103+
defer {
104+
uchars.deallocate()
105+
}
106+
107+
let srcLength = uchars.count
108+
let capacity = srcLength * Int(ucnv_getMaxCharSize(converter)) + 1
109+
let dest = UnsafeMutableRawPointer.allocate(
110+
byteCount: capacity,
111+
alignment: MemoryLayout<CChar>.alignment
112+
)
113+
114+
var status: UErrorCode = U_ZERO_ERROR
115+
if lossy {
116+
var lossyChar: UChar = encoding == .ascii ? 0xFF : 0x3F
117+
ucnv_setSubstString(
118+
converter,
119+
&lossyChar,
120+
1,
121+
&status
122+
)
123+
guard status.isSuccess else { return nil }
124+
125+
ucnv_setFromUCallBack(
126+
converter,
127+
UCNV_FROM_U_CALLBACK_SUBSTITUTE,
128+
nil, // newContext
129+
nil, // oldAction
130+
nil, // oldContext
131+
&status
132+
)
133+
guard status.isSuccess else { return nil }
134+
} else {
135+
ucnv_setFromUCallBack(
136+
converter,
137+
UCNV_FROM_U_CALLBACK_STOP,
138+
nil, // newContext
139+
nil, // oldAction
140+
nil, // oldContext
141+
&status
142+
)
143+
guard status.isSuccess else { return nil }
144+
}
145+
146+
let actualLength = ucnv_fromUChars(
147+
converter,
148+
dest,
149+
CInt(capacity),
150+
uchars.baseAddress,
151+
CInt(srcLength),
152+
&status
153+
)
154+
guard status.isSuccess else { return nil }
155+
return Data(
156+
bytesNoCopy: dest,
157+
count: Int(actualLength),
158+
deallocator: .custom({ pointer, _ in pointer.deallocate() })
159+
)
160+
}
161+
}
162+
}
163+
164+
extension ICU.StringConverter {
165+
nonisolated(unsafe) static private var _converters: LockedState<[String.Encoding: ICU.StringConverter]> = .init(initialState: [:])
166+
167+
static func converter(for encoding: String.Encoding) -> ICU.StringConverter? {
168+
return _converters.withLock {
169+
if let converter = $0[encoding] {
170+
return converter
171+
}
172+
if let converter = ICU.StringConverter(encoding: encoding) {
173+
$0[encoding] = converter
174+
return converter
175+
}
176+
return nil
177+
}
178+
}
179+
}
180+
181+
182+
@_dynamicReplacement(for: _icuMakeStringFromBytes(_:encoding:))
183+
func _icuMakeStringFromBytes_impl(_ bytes: UnsafeBufferPointer<UInt8>, encoding: String.Encoding) -> String? {
184+
guard let converter = ICU.StringConverter.converter(for: encoding),
185+
let pointer = bytes.baseAddress else {
186+
return nil
187+
}
188+
let data = Data(
189+
bytesNoCopy: UnsafeMutableRawPointer(mutating: pointer),
190+
count: bytes.count,
191+
deallocator: .none
192+
)
193+
return converter.decode(data: data)
194+
}
195+
196+
@_dynamicReplacement(for: _icuStringEncodingConvert(string:using:allowLossyConversion:))
197+
func _icuStringEncodingConvert_impl(string: String, using encoding: String.Encoding, allowLossyConversion: Bool) -> Data? {
198+
guard let converter = ICU.StringConverter.converter(for: encoding) else {
199+
return nil
200+
}
201+
return converter.encode(string: string, allowLossyConversion: allowLossyConversion)
202+
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#if FOUNDATION_FRAMEWORK
14+
@testable import Foundation
15+
#else
16+
@testable import FoundationEssentials
17+
@testable import FoundationInternationalization
18+
#endif // FOUNDATION_FRAMEWORK
19+
20+
#if canImport(TestSupport)
21+
import TestSupport
22+
#endif
23+
24+
final class StringConverterTests: XCTestCase {
25+
private func _test_roundTripConversion(
26+
string: String,
27+
data: Data,
28+
encoding: String._Encoding,
29+
file: StaticString = #filePath,
30+
line: UInt = #line
31+
) {
32+
XCTAssertEqual(
33+
string.data(using: encoding), data, "Failed to convert string to data.",
34+
file: file, line: line
35+
)
36+
XCTAssertEqual(
37+
string, String(data: data, encoding: encoding), "Failed to convert data to string.",
38+
file: file, line: line
39+
)
40+
}
41+
42+
func test_japaneseEUC() {
43+
// Confirm that https://github.com/swiftlang/swift-foundation/issues/1016 is fixed.
44+
45+
// ASCII
46+
_test_roundTripConversion(
47+
string: "ABC",
48+
data: Data([0x41, 0x42, 0x43]),
49+
encoding: .japaneseEUC
50+
)
51+
52+
// Plane 1 Row 1
53+
_test_roundTripConversion(
54+
string: "、。◇",
55+
data: Data([
56+
0xA1, 0xA2,
57+
0xA1, 0xA3,
58+
0xA1, 0xFE,
59+
]),
60+
encoding: .japaneseEUC
61+
)
62+
63+
// Plane 1 Row 4 (Hiragana)
64+
_test_roundTripConversion(
65+
string: "ひらがな",
66+
data: Data([
67+
0xA4, 0xD2,
68+
0xA4, 0xE9,
69+
0xA4, 0xAC,
70+
0xA4, 0xCA,
71+
]),
72+
encoding: .japaneseEUC
73+
)
74+
75+
// Plane 1 Row 5 (Katakana)
76+
_test_roundTripConversion(
77+
string: "ヴヵヶ",
78+
data: Data([
79+
0xA5, 0xF4,
80+
0xA5, 0xF5,
81+
0xA5, 0xF6,
82+
]),
83+
encoding: .japaneseEUC
84+
)
85+
86+
// Plane 1 Row 6 (Greek Alphabets)
87+
_test_roundTripConversion(
88+
string: "Σπ",
89+
data: Data([
90+
0xA6, 0xB2,
91+
0xA6, 0xD0,
92+
]),
93+
encoding: .japaneseEUC
94+
)
95+
96+
// Basic Kanji
97+
_test_roundTripConversion(
98+
string: "日本",
99+
data: Data([
100+
0xC6, 0xFC,
101+
0xCB, 0xDC,
102+
]),
103+
encoding: .japaneseEUC
104+
)
105+
106+
// Amendment by JIS83/JIS90
107+
_test_roundTripConversion(
108+
string: "扉⇔穴",
109+
data: Data([
110+
0xC8, 0xE2,
111+
0xA2, 0xCE,
112+
0xB7, 0xEA,
113+
]),
114+
encoding: .japaneseEUC
115+
)
116+
117+
// Unsupported characters
118+
let sushi = "Sushi🍣"
119+
XCTAssertNil(sushi.data(using: String._Encoding.japaneseEUC))
120+
XCTAssertEqual(
121+
sushi.data(using: String._Encoding.japaneseEUC, allowLossyConversion: true),
122+
"Sushi?".data(using: .utf8)
123+
)
124+
}
125+
}
126+

0 commit comments

Comments
 (0)