Skip to content

Commit 0cf3d34

Browse files
committed
Optimize repeating: intializers on concrete SIMD types
Previously these were not transparent, and so were not generally inlined in debug; that's basically always a win because LLVM knows how to optimize them to single instructions + some debugging bookkeeping (which we should also figure out how to eliminate). Add FileCheck tests that ensure that we get optimal -O codegen and near-optimal -Onone codegen.
1 parent 4a101c4 commit 0cf3d34

7 files changed

+408
-4
lines changed

stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,38 @@ vectorscalarCounts = storagescalarCounts + [3]
2929
@available(SwiftStdlib 5.3, *)
3030
% end
3131
extension SIMD${n} where Scalar == ${Scalar} {
32-
@_alwaysEmitIntoClient
32+
@_alwaysEmitIntoClient @_transparent
3333
internal init(_ _builtin: Builtin.${Builtin}) {
3434
_storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
3535
}
3636

37+
@_alwaysEmitIntoClient @_transparent
38+
public init(repeating scalar: ${Scalar}) {
39+
let asVector = Builtin.insertelement_${Builtin}_FPIEEE${bits}_Int32(
40+
Builtin.zeroInitializer(), scalar._value, Builtin.zeroInitializer()
41+
)
42+
let repeated = Builtin.shufflevector_${Builtin}_Vec${storageN}xInt32(
43+
asVector, Builtin.zeroInitializer(), Builtin.zeroInitializer()
44+
)
45+
%if n != 3:
46+
self.init(repeated)
47+
%else:
48+
self.init(Builtin.insertelement_${Builtin}_FPIEEE${bits}_Int32(
49+
repeated, Builtin.zeroInitializer(), Int32(3)._value
50+
))
51+
%end
52+
}
53+
54+
% if n >= 4:
55+
@_alwaysEmitIntoClient @_transparent
56+
public init(
57+
lowHalf: SIMD${n//2}<${Scalar}>,
58+
highHalf: SIMD${n//2}<${Scalar}>
59+
) {
60+
self = unsafe unsafeBitCast((lowHalf, highHalf), to: Self.self)
61+
}
62+
63+
% end
3764
/// A vector mask with the result of a pointwise equality comparison.
3865
@_alwaysEmitIntoClient
3966
public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {

stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,38 @@ vectorscalarCounts = storagescalarCounts + [3]
2626
% Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
2727
% MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
2828
extension SIMD${n} where Scalar == ${Scalar} {
29-
@_alwaysEmitIntoClient
29+
@_alwaysEmitIntoClient @_transparent
3030
internal init(_ _builtin: Builtin.${Builtin}) {
3131
_storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
3232
}
33-
33+
34+
@_alwaysEmitIntoClient @_transparent
35+
public init(repeating scalar: ${Scalar}) {
36+
let asVector = Builtin.insertelement_${Builtin}_Int${int.bits}_Int32(
37+
Builtin.zeroInitializer(), scalar._value, Builtin.zeroInitializer()
38+
)
39+
let repeated = Builtin.shufflevector_${Builtin}_Vec${storageN}xInt32(
40+
asVector, Builtin.zeroInitializer(), Builtin.zeroInitializer()
41+
)
42+
% if n != 3:
43+
self.init(repeated)
44+
% else:
45+
self.init(Builtin.insertelement_${Builtin}_Int${int.bits}_Int32(
46+
repeated, Builtin.zeroInitializer(), Int32(3)._value
47+
))
48+
% end
49+
}
50+
51+
% if n >= 4:
52+
@_alwaysEmitIntoClient @_transparent
53+
public init(
54+
lowHalf: SIMD${n//2}<${Scalar}>,
55+
highHalf: SIMD${n//2}<${Scalar}>
56+
) {
57+
self = unsafe unsafeBitCast((lowHalf, highHalf), to: Self.self)
58+
}
59+
60+
% end
3461
/// A vector mask with the result of a pointwise equality comparison.
3562
@_alwaysEmitIntoClient
3663
public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {

stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,29 @@ vectorscalarCounts = storagescalarCounts + [3]
2626
% Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
2727
% MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
2828
extension SIMDMask where Storage == ${Vector} {
29-
@_alwaysEmitIntoClient
29+
@_alwaysEmitIntoClient @_transparent
3030
internal init(_ _builtin: Builtin.${Builtin}) {
3131
_storage = ${Vector}(_builtin)
3232
}
3333

34+
@_alwaysEmitIntoClient @_transparent
35+
public init(repeating scalar: Bool) {
36+
_storage = ${Vector}(repeating: scalar ? -1 : 0)
37+
}
38+
39+
% if n >= 4:
40+
@_alwaysEmitIntoClient @_transparent
41+
public init(
42+
lowHalf: SIMDMask<SIMD${n//2}<${Scalar}>>,
43+
highHalf: SIMDMask<SIMD${n//2}<${Scalar}>>
44+
) {
45+
_storage = ${Vector}(
46+
lowHalf: lowHalf._storage,
47+
highHalf: highHalf._storage
48+
)
49+
}
50+
51+
% end
3452
@_alwaysEmitIntoClient
3553
internal static var allTrue: Self {
3654
let zero = ${Vector}()
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
//===--- SIMDFloatInitializers.swift.gyb -------------------*- swift -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
// RUN: %empty-directory(%t)
13+
// RUN: %gyb %s -o %t/SIMDFloatInitializers.swift
14+
// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
15+
// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S -O | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
16+
17+
import Swift
18+
19+
%for bits in [16,32,64]:
20+
% scalar = {16:'Float16',32:'Float',64:'Double'}[bits]
21+
% for totalBits in [64,128]:
22+
% n = totalBits // bits
23+
% if n != 1:
24+
% neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
25+
% if bits == 16:
26+
#if arch(arm64)
27+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
28+
% end
29+
func repeating${n}x${bits}(_ scalar: ${scalar}) -> SIMD${n}<${scalar}> {
30+
SIMD${n}(repeating: scalar)
31+
}
32+
% if bits == 16:
33+
#endif
34+
// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]]+}}:
35+
% else:
36+
// CHECK: repeating${n}x${bits}{{[[:alnum:]]+}}:
37+
% end
38+
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, v0[0]
39+
// CHECKO-arm64-NEXT: ret
40+
// CHECKOnone-arm64: dup.${neonSuffix}
41+
// CHECKOnone-arm64: ret
42+
43+
% end
44+
% end
45+
%end
46+
47+
#if arch(arm64)
48+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
49+
func concat4x16(_ a: SIMD4<Float16>, _ b: SIMD4<Float16>) -> SIMD8<Float16> {
50+
SIMD8(lowHalf: a, highHalf: b)
51+
}
52+
// CHECK-arm64: _$s21SIMDFloatInitializers10concat4x16ys5SIMD8Vys7Float16VGs5SIMD4VyAFG_AJtF:
53+
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
54+
// CHECKO-arm64-NEXT: ret
55+
56+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
57+
func concat8x16(_ a: SIMD8<Float16>, _ b: SIMD8<Float16>) -> SIMD16<Float16> {
58+
SIMD16(lowHalf: a, highHalf: b)
59+
}
60+
// CHECK-arm64: _$s21SIMDFloatInitializers10concat8x16ys6SIMD16Vys7Float16VGs5SIMD8VyAFG_AJtF:
61+
// CHECKO-arm64-NEXT: ret
62+
#endif
63+
64+
func concat2x32(_ a: SIMD2<Float>, _ b: SIMD2<Float>) -> SIMD4<Float> {
65+
SIMD4(lowHalf: a, highHalf: b)
66+
}
67+
// CHECK: _$s21SIMDFloatInitializers10concat2x32ys5SIMD4VySfGs5SIMD2VySfG_AHtF:
68+
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
69+
// CHECKO-arm64-NEXT: ret
70+
71+
func concat4x32(_ a: SIMD4<Float>, _ b: SIMD4<Float>) -> SIMD8<Float> {
72+
SIMD8(lowHalf: a, highHalf: b)
73+
}
74+
// CHECK: _$s21SIMDFloatInitializers10concat4x32ys5SIMD8VySfGs5SIMD4VySfG_AHtF:
75+
// CHECKO-arm64-NEXT: ret
76+
77+
func concat2x64(_ a: SIMD2<Double>, _ b: SIMD2<Double>) -> SIMD4<Double> {
78+
SIMD4(lowHalf: a, highHalf: b)
79+
}
80+
// CHECK: _$s21SIMDFloatInitializers10concat2x64ys5SIMD4VySdGs5SIMD2VySdG_AHtF:
81+
// CHECKO-arm64-NEXT: ret
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//===--- SIMDMaskInitializers.swift.gyb -------------------*- swift -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
// RUN: %empty-directory(%t)
13+
// RUN: %gyb %s -o %t/SIMDMaskInitializers.swift
14+
// RUN: %target-swift-frontend -primary-file %t/SIMDMaskInitializers.swift -S | %FileCheck %t/SIMDMaskInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
15+
// RUN: %target-swift-frontend -primary-file %t/SIMDMaskInitializers.swift -S -O | %FileCheck %t/SIMDMaskInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
16+
17+
import Swift
18+
19+
%for bits in [8,16,32,64]:
20+
% for totalBits in [64,128]:
21+
% n = totalBits // bits
22+
% if n != 1:
23+
% neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
24+
func repeating${n}_mask${bits}(_ scalar: Bool) -> SIMDMask<SIMD${n}<Int${bits}>> {
25+
SIMDMask(repeating: scalar)
26+
}
27+
// CHECK: repeating${n}_mask${bits}{{[[:alnum:]]+}}:
28+
// CHECKO-arm64-NEXT: sbfx [[REG:[wx][0-9]]], {{[wx]}}0, #0, #1
29+
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, [[REG]]
30+
// CHECKO-arm64-NEXT: ret
31+
// CHECKOnone-arm64: dup.${neonSuffix}
32+
// CHECKOnone-arm64: ret
33+
34+
% end
35+
% end
36+
%end
37+
38+
func concat8x8(_ a: SIMDMask<SIMD8<Int8>>, _ b: SIMDMask<SIMD8<Int8>>) -> SIMDMask<SIMD16<Int8>> {
39+
SIMDMask(lowHalf: a, highHalf: b)
40+
}
41+
// CHECK: _$s20SIMDMaskInitializers9concat8x8ys0A0Vys6SIMD16Vys4Int8VGGADys5SIMD8VyAHGG_ANtF:
42+
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
43+
// CHECKO-arm64-NEXT: ret
44+
// CHECKO-x86_64: punpcklqdq
45+
46+
func concat16x8(_ a: SIMDMask<SIMD16<Int8>>, _ b: SIMDMask<SIMD16<Int8>>) -> SIMDMask<SIMD32<Int8>> {
47+
SIMDMask(lowHalf: a, highHalf: b)
48+
}
49+
// CHECK: _$s20SIMDMaskInitializers10concat16x8ys0A0Vys6SIMD32Vys4Int8VGGADys6SIMD16VyAHGG_ANtF:
50+
// CHECKO-arm64-NEXT: ret
51+
52+
func concat4x16(_ a: SIMDMask<SIMD4<Int16>>, _ b: SIMDMask<SIMD4<Int16>>) -> SIMDMask<SIMD8<Int16>> {
53+
SIMDMask(lowHalf: a, highHalf: b)
54+
}
55+
// CHECK: _$s20SIMDMaskInitializers10concat4x16ys0A0Vys5SIMD8Vys5Int16VGGADys5SIMD4VyAHGG_ANtF:
56+
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
57+
// CHECKO-arm64-NEXT: ret
58+
// CHECKO-x86_64: punpcklqdq
59+
60+
func concat8x16(_ a: SIMDMask<SIMD8<Int16>>, _ b: SIMDMask<SIMD8<Int16>>) -> SIMDMask<SIMD16<Int16>> {
61+
SIMDMask(lowHalf: a, highHalf: b)
62+
}
63+
// CHECK: _$s20SIMDMaskInitializers10concat8x16ys0A0Vys6SIMD16Vys5Int16VGGADys5SIMD8VyAHGG_ANtF:
64+
// CHECKO-arm64-NEXT: ret
65+
66+
func concat2x32(_ a: SIMDMask<SIMD2<Int32>>, _ b: SIMDMask<SIMD2<Int32>>) -> SIMDMask<SIMD4<Int32>> {
67+
SIMDMask(lowHalf: a, highHalf: b)
68+
}
69+
// CHECK: _$s20SIMDMaskInitializers10concat2x32ys0A0Vys5SIMD4Vys5Int32VGGADys5SIMD2VyAHGG_ANtF:
70+
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
71+
// CHECKO-arm64-NEXT: ret
72+
// CHECKO-x86_64: punpcklqdq
73+
74+
func concat4x32(_ a: SIMDMask<SIMD4<Int32>>, _ b: SIMDMask<SIMD4<Int32>>) -> SIMDMask<SIMD8<Int32>> {
75+
SIMDMask(lowHalf: a, highHalf: b)
76+
}
77+
// CHECK: _$s20SIMDMaskInitializers10concat4x32ys0A0Vys5SIMD8Vys5Int32VGGADys5SIMD4VyAHGG_ANtF:
78+
// CHECKO-arm64-NEXT: ret
79+
80+
func concat2x64(_ a: SIMDMask<SIMD2<Int64>>, _ b: SIMDMask<SIMD2<Int64>>) -> SIMDMask<SIMD4<Int64>> {
81+
SIMDMask(lowHalf: a, highHalf: b)
82+
}
83+
// CHECK: _$s20SIMDMaskInitializers10concat2x64ys0A0Vys5SIMD4Vys5Int64VGGADys5SIMD2VyAHGG_ANtF:
84+
// CHECKO-arm64-NEXT: ret
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//===--- SIMDSignedInitializers.swift.gyb ---------------------*- swift -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
// RUN: %empty-directory(%t)
13+
// RUN: %gyb %s -o %t/SIMDSignedInitializers.swift
14+
// RUN: %target-swift-frontend -primary-file %t/SIMDSignedInitializers.swift -S | %FileCheck %t/SIMDSignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
15+
// RUN: %target-swift-frontend -primary-file %t/SIMDSignedInitializers.swift -S -O | %FileCheck %t/SIMDSignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
16+
17+
import Swift
18+
19+
%for bits in [8,16,32,64]:
20+
% for totalBits in [64,128]:
21+
% n = totalBits // bits
22+
% if n != 1:
23+
% neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
24+
25+
func repeating${n}_int${bits}(_ scalar: Int${bits}) -> SIMD${n}<Int${bits}> {
26+
SIMD${n}(repeating: scalar)
27+
}
28+
// CHECK: repeating${n}_int${bits}{{[[:alnum:]]+}}:
29+
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
30+
// CHECKO-arm64-NEXT: ret
31+
// CHECKOnone-arm64: dup.${neonSuffix}
32+
// CHECKOnone-arm64: ret
33+
% end
34+
% end
35+
%end
36+
37+
func concat8x8(_ a: SIMD8<Int8>, _ b: SIMD8<Int8>) -> SIMD16<Int8> {
38+
SIMD16(lowHalf: a, highHalf: b)
39+
}
40+
// CHECK: _$s22SIMDSignedInitializers9concat8x8ys6SIMD16Vys4Int8VGs5SIMD8VyAFG_AJtF:
41+
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
42+
// CHECKO-arm64-NEXT: ret
43+
// CHECKO-x86_64: punpcklqdq
44+
45+
func concat16x8(_ a: SIMD16<Int8>, _ b: SIMD16<Int8>) -> SIMD32<Int8> {
46+
SIMD32(lowHalf: a, highHalf: b)
47+
}
48+
// CHECK: _$s22SIMDSignedInitializers10concat16x8ys6SIMD32Vys4Int8VGs6SIMD16VyAFG_AJtF:
49+
// CHECKO-arm64-NEXT: ret
50+
51+
func concat4x16(_ a: SIMD4<Int16>, _ b: SIMD4<Int16>) -> SIMD8<Int16> {
52+
SIMD8(lowHalf: a, highHalf: b)
53+
}
54+
// CHECK: _$s22SIMDSignedInitializers10concat4x16ys5SIMD8Vys5Int16VGs5SIMD4VyAFG_AJtF:
55+
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
56+
// CHECKO-arm64-NEXT: ret
57+
// CHECKO-x86_64: punpcklqdq
58+
59+
func concat8x16(_ a: SIMD8<Int16>, _ b: SIMD8<Int16>) -> SIMD16<Int16> {
60+
SIMD16(lowHalf: a, highHalf: b)
61+
}
62+
// CHECK: _$s22SIMDSignedInitializers10concat8x16ys6SIMD16Vys5Int16VGs5SIMD8VyAFG_AJtF:
63+
// CHECKO-arm64-NEXT: ret
64+
65+
func concat2x32(_ a: SIMD2<Int32>, _ b: SIMD2<Int32>) -> SIMD4<Int32> {
66+
SIMD4(lowHalf: a, highHalf: b)
67+
}
68+
// CHECK: _$s22SIMDSignedInitializers10concat2x32ys5SIMD4Vys5Int32VGs5SIMD2VyAFG_AJtF:
69+
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
70+
// CHECKO-arm64-NEXT: ret
71+
// CHECKO-x86_64: punpcklqdq
72+
73+
func concat4x32(_ a: SIMD4<Int32>, _ b: SIMD4<Int32>) -> SIMD8<Int32> {
74+
SIMD8(lowHalf: a, highHalf: b)
75+
}
76+
// CHECK: _$s22SIMDSignedInitializers10concat4x32ys5SIMD8Vys5Int32VGs5SIMD4VyAFG_AJtF:
77+
// CHECKO-arm64-NEXT: ret
78+
79+
func concat2x64(_ a: SIMD2<Int64>, _ b: SIMD2<Int64>) -> SIMD4<Int64> {
80+
SIMD4(lowHalf: a, highHalf: b)
81+
}
82+
// CHECK: _$s22SIMDSignedInitializers10concat2x64ys5SIMD4Vys5Int64VGs5SIMD2VyAFG_AJtF:
83+
// CHECKO-arm64-NEXT: ret
84+

0 commit comments

Comments
 (0)