Skip to content

Commit c732438

Browse files
committed
[stdlib] fix small string usage for Substring.utf8Span
1 parent 8a2ff96 commit c732438

File tree

1 file changed

+54
-11
lines changed

1 file changed

+54
-11
lines changed

stdlib/public/core/UTF8Span.swift

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -249,21 +249,64 @@ extension String {
249249
}
250250

251251
extension Substring {
252+
253+
@available(SwiftStdlib 6.2, *)
254+
private var _span: Span<UTF8.CodeUnit> {
255+
@lifetime(borrow self)
256+
borrowing get {
257+
#if _runtime(_ObjC)
258+
// handle non-UTF8 Objective-C bridging cases here
259+
if !_wholeGuts.isFastUTF8, _wholeGuts._object.hasObjCBridgeableObject {
260+
let base: String.UTF8View = _slice._base.utf8
261+
let first = base._foreignDistance(from: base.startIndex, to: startIndex)
262+
let count = base._foreignDistance(from: startIndex, to: endIndex)
263+
let span = base.span._extracting(first..<(first &+ count))
264+
return unsafe _overrideLifetime(span, borrowing: self)
265+
}
266+
#endif
267+
let first = _slice._startIndex._encodedOffset
268+
let end = _slice._endIndex._encodedOffset
269+
if _wholeGuts.isSmall {
270+
let a = Builtin.addressOfBorrow(self)
271+
let offset = first &+ (2 &* MemoryLayout<String.Index>.stride)
272+
let start = unsafe UnsafePointer<UTF8.CodeUnit>(a).advanced(by: offset)
273+
let span = unsafe Span(_unsafeStart: start, count: end &- first)
274+
return unsafe _overrideLifetime(span, borrowing: self)
275+
}
276+
let isFastUTF8 = _wholeGuts.isFastUTF8
277+
_precondition(isFastUTF8, "Substring must be contiguous UTF8")
278+
var span = unsafe Span(_unsafeElements: _wholeGuts._object.fastUTF8)
279+
span = span._extracting(first..<end)
280+
return unsafe _overrideLifetime(span, borrowing: self)
281+
}
282+
}
283+
284+
/// A UTF8Span over the code units that make up this substring.
285+
///
286+
/// - Note: In the case of bridged UTF16 String instances (on Apple
287+
/// platforms,) this property needs to transcode the code units every time
288+
/// it is called.
289+
/// For example, if `string` has the bridged UTF16 representation,
290+
/// for word in string.split(separator: " ") {
291+
/// useSpan(word.span)
292+
/// }
293+
/// is accidentally quadratic because of this issue. A workaround is to
294+
/// explicitly convert the string into its native UTF8 representation:
295+
/// var nativeString = consume string
296+
/// nativeString.makeContiguousUTF8()
297+
/// for word in nativeString.split(separator: " ") {
298+
/// useSpan(word.span)
299+
/// }
300+
/// This second option has linear time complexity, as expected.
301+
///
302+
/// Returns: a `UTF8Span` over the code units of this Substring.
303+
///
304+
/// Complexity: O(1) for native UTF8 Strings, O(n) for bridged UTF16 Strings.
252305
@available(SwiftStdlib 6.2, *)
253306
public var utf8Span: UTF8Span {
254307
@lifetime(borrow self)
255308
borrowing get {
256-
let isKnownASCII = base._guts.isASCII
257-
let utf8 = self.utf8
258-
let span = utf8.span
259-
let result = unsafe UTF8Span(
260-
unchecked: span,
261-
isKnownASCII: isKnownASCII)
262-
return unsafe _overrideLifetime(result, borrowing: self)
309+
unsafe UTF8Span(unchecked: _span, isKnownASCII: base._guts.isASCII)
263310
}
264311
}
265312
}
266-
267-
268-
269-

0 commit comments

Comments
 (0)