Skip to content

Commit 2afb6b5

Browse files
authored
Merge pull request #82077 from glessard/issue81931
[stdlib] fix utf8Span accessors for small strings
2 parents c146993 + 694e94b commit 2afb6b5

File tree

2 files changed

+136
-18
lines changed

2 files changed

+136
-18
lines changed

stdlib/public/core/UTF8Span.swift

Lines changed: 94 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -201,36 +201,112 @@ extension String {
201201
}
202202

203203
@available(SwiftStdlib 6.2, *)
204-
public var utf8Span: UTF8Span {
204+
private var _span: Span<UTF8.CodeUnit> {
205205
@lifetime(borrow self)
206206
borrowing get {
207-
let isKnownASCII = _guts.isASCII
208-
let utf8 = self.utf8
209-
let span = utf8.span
210-
let result = unsafe UTF8Span(
211-
unchecked: span,
212-
isKnownASCII: isKnownASCII)
213-
return unsafe _overrideLifetime(result, borrowing: self)
207+
#if _runtime(_ObjC)
208+
// handle non-UTF8 Objective-C bridging cases here
209+
if !_guts.isFastUTF8, _guts._object.hasObjCBridgeableObject {
210+
let storage = _guts._getOrAllocateAssociatedStorage()
211+
let (start, count) = unsafe (storage.start, storage.count)
212+
let span = unsafe Span(_unsafeStart: start, count: count)
213+
return unsafe _overrideLifetime(span, borrowing: self)
214+
}
215+
#endif
216+
let count = _guts.count
217+
if _guts.isSmall {
218+
let a = Builtin.addressOfBorrow(self)
219+
let address = unsafe UnsafePointer<UTF8.CodeUnit>(a)
220+
let span = unsafe Span(_unsafeStart: address, count: count)
221+
return unsafe _overrideLifetime(span, borrowing: self)
222+
}
223+
let isFastUTF8 = _guts.isFastUTF8
224+
_precondition(isFastUTF8, "String must be contiguous UTF8")
225+
let buffer = unsafe _guts._object.fastUTF8
226+
let span = unsafe Span(_unsafeElements: buffer)
227+
return unsafe _overrideLifetime(span, borrowing: self)
214228
}
215229
}
216-
}
217230

218-
extension Substring {
231+
/// A UTF8span over the code units that make up this string.
232+
///
233+
/// - Note: In the case of bridged UTF16 String instances (on Apple
234+
/// platforms,) this property transcodes the code units the first time
235+
/// it is called. The transcoded buffer is cached, and subsequent calls
236+
/// to `span` can reuse the buffer.
237+
///
238+
/// Returns: a `UTF8Span` over the code units of this String.
239+
///
240+
/// Complexity: O(1) for native UTF8 Strings,
241+
/// amortized O(1) for bridged UTF16 Strings.
219242
@available(SwiftStdlib 6.2, *)
220243
public var utf8Span: UTF8Span {
221244
@lifetime(borrow self)
222245
borrowing get {
223-
let isKnownASCII = base._guts.isASCII
224-
let utf8 = self.utf8
225-
let span = utf8.span
226-
let result = unsafe UTF8Span(
227-
unchecked: span,
228-
isKnownASCII: isKnownASCII)
229-
return unsafe _overrideLifetime(result, borrowing: self)
246+
unsafe UTF8Span(unchecked: _span, isKnownASCII: _guts.isASCII)
230247
}
231248
}
232249
}
233250

251+
extension Substring {
234252

253+
@available(SwiftStdlib 6.2, *)
254+
private var _span: Span<UTF8.CodeUnit> {
255+
@lifetime(borrow self)
256+
borrowing get {
257+
#if _runtime(_ObjC)
258+
// handle non-UTF8 Objective-C bridging cases here
259+
if !_wholeGuts.isFastUTF8, _wholeGuts._object.hasObjCBridgeableObject {
260+
let base: String.UTF8View = _slice._base.utf8
261+
let first = base._foreignDistance(from: base.startIndex, to: startIndex)
262+
let count = base._foreignDistance(from: startIndex, to: endIndex)
263+
let span = base.span._extracting(first..<(first &+ count))
264+
return unsafe _overrideLifetime(span, borrowing: self)
265+
}
266+
#endif
267+
let first = _slice._startIndex._encodedOffset
268+
let end = _slice._endIndex._encodedOffset
269+
if _wholeGuts.isSmall {
270+
let a = Builtin.addressOfBorrow(self)
271+
let offset = first &+ (2 &* MemoryLayout<String.Index>.stride)
272+
let start = unsafe UnsafePointer<UTF8.CodeUnit>(a).advanced(by: offset)
273+
let span = unsafe Span(_unsafeStart: start, count: end &- first)
274+
return unsafe _overrideLifetime(span, borrowing: self)
275+
}
276+
let isFastUTF8 = _wholeGuts.isFastUTF8
277+
_precondition(isFastUTF8, "Substring must be contiguous UTF8")
278+
var span = unsafe Span(_unsafeElements: _wholeGuts._object.fastUTF8)
279+
span = span._extracting(first..<end)
280+
return unsafe _overrideLifetime(span, borrowing: self)
281+
}
282+
}
235283

236-
284+
/// A UTF8Span over the code units that make up this substring.
285+
///
286+
/// - Note: In the case of bridged UTF16 String instances (on Apple
287+
/// platforms,) this property needs to transcode the code units every time
288+
/// it is called.
289+
/// For example, if `string` has the bridged UTF16 representation,
290+
/// for word in string.split(separator: " ") {
291+
/// useSpan(word.span)
292+
/// }
293+
/// is accidentally quadratic because of this issue. A workaround is to
294+
/// explicitly convert the string into its native UTF8 representation:
295+
/// var nativeString = consume string
296+
/// nativeString.makeContiguousUTF8()
297+
/// for word in nativeString.split(separator: " ") {
298+
/// useSpan(word.span)
299+
/// }
300+
/// This second option has linear time complexity, as expected.
301+
///
302+
/// Returns: a `UTF8Span` over the code units of this Substring.
303+
///
304+
/// Complexity: O(1) for native UTF8 Strings, O(n) for bridged UTF16 Strings.
305+
@available(SwiftStdlib 6.2, *)
306+
public var utf8Span: UTF8Span {
307+
@lifetime(borrow self)
308+
borrowing get {
309+
unsafe UTF8Span(unchecked: _span, isKnownASCII: base._guts.isASCII)
310+
}
311+
}
312+
}

test/stdlib/Span/StringUTF8SpanProperty.swift

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,45 @@ suite.test("Span from Large Native String's Substring")
8585
expectEqual(span[i], u[i])
8686
}
8787
}
88+
89+
suite.test("Span from String.utf8Span")
90+
.require(.stdlib_6_2).code {
91+
guard #available(SwiftStdlib 6.2, *) else { return }
92+
93+
let s = String(200)
94+
let utf8span = s.utf8Span
95+
let span1 = utf8span.span
96+
let utf8view = s.utf8
97+
let span2 = utf8view.span
98+
expectEqual(span1.count, span2.count)
99+
for (i,j) in zip(span1.indices, span2.indices) {
100+
expectEqual(span1[i], span2[j])
101+
}
102+
}
103+
104+
suite.test("UTF8Span from Span")
105+
.require(.stdlib_6_2).code {
106+
guard #available(SwiftStdlib 6.2, *) else { return }
107+
108+
let s = String(200).utf8
109+
let span1 = s.span
110+
guard let utf8 = expectNotNil(try? UTF8Span(validating: span1)) else { return }
111+
112+
let span2 = utf8.span
113+
expectTrue(span1.isIdentical(to: span2))
114+
}
115+
116+
suite.test("Span from Substring.utf8Span")
117+
.require(.stdlib_6_2).code {
118+
guard #available(SwiftStdlib 6.2, *) else { return }
119+
120+
let s = String(22000).dropFirst().dropLast()
121+
let utf8span = s.utf8Span
122+
let span1 = utf8span.span
123+
let utf8view = s.utf8
124+
let span2 = utf8view.span
125+
expectEqual(span1.count, span2.count)
126+
for (i,j) in zip(span1.indices, span2.indices) {
127+
expectEqual(span1[i], span2[j])
128+
}
129+
}

0 commit comments

Comments
 (0)