Skip to content

Commit 8a92c45

Browse files
committed
[Clang] Add integer mul reduction builtin
Similar to the existing bitwise reduction builtins, this lowers to a llvm.vector.reduce.mul intrinsic call. For other reductions, we've tried to share builtins for float/integer vectors, but the fmul reduction intrinsic also take a starting value argument and can either do unordered or serialized, but not reduction-trees as specified for the builtins. However we address fmul support this shouldn't affect the integer case. Differential Revision: https://reviews.llvm.org/D117829
1 parent 12cb540 commit 8a92c45

File tree

6 files changed

+46
-1
lines changed

6 files changed

+46
-1
lines changed

clang/docs/LanguageExtensions.rst

+1
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,7 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
647647
is a NaN, return the other argument. If both arguments are
648648
NaNs, fmax() return a NaN.
649649
ET __builtin_reduce_add(VT a) \+ integer and floating point types
650+
ET __builtin_reduce_mul(VT a) * integer and floating point types
650651
ET __builtin_reduce_and(VT a) & integer types
651652
ET __builtin_reduce_or(VT a) \| integer types
652653
ET __builtin_reduce_xor(VT a) ^ integer types

clang/include/clang/Basic/Builtins.def

+1
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,7 @@ BUILTIN(__builtin_reduce_xor, "v.", "nct")
664664
BUILTIN(__builtin_reduce_or, "v.", "nct")
665665
BUILTIN(__builtin_reduce_and, "v.", "nct")
666666
BUILTIN(__builtin_reduce_add, "v.", "nct")
667+
BUILTIN(__builtin_reduce_mul, "v.", "nct")
667668

668669
BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
669670
BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")

clang/lib/CodeGen/CGBuiltin.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -3146,6 +3146,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
31463146
case Builtin::BI__builtin_reduce_add:
31473147
return RValue::get(emitUnaryBuiltin(
31483148
*this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3149+
case Builtin::BI__builtin_reduce_mul:
3150+
return RValue::get(emitUnaryBuiltin(
3151+
*this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
31493152
case Builtin::BI__builtin_reduce_xor:
31503153
return RValue::get(emitUnaryBuiltin(
31513154
*this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));

clang/lib/Sema/SemaChecking.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -2596,8 +2596,9 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
25962596
}
25972597

25982598
// These builtins support vectors of integers only.
2599-
// TODO: ADD should support floating-point types.
2599+
// TODO: ADD/MUL should support floating-point types.
26002600
case Builtin::BI__builtin_reduce_add:
2601+
case Builtin::BI__builtin_reduce_mul:
26012602
case Builtin::BI__builtin_reduce_xor:
26022603
case Builtin::BI__builtin_reduce_or:
26032604
case Builtin::BI__builtin_reduce_and: {

clang/test/CodeGen/builtins-reduction-math.c

+22
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,28 @@ void test_builtin_reduce_add(si8 vi1, u4 vu1) {
8080
unsigned long long r5 = __builtin_reduce_add(cvu1);
8181
}
8282

83+
void test_builtin_reduce_mul(si8 vi1, u4 vu1) {
84+
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
85+
// CHECK-NEXT: call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[VI1]])
86+
short r2 = __builtin_reduce_mul(vi1);
87+
88+
// CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
89+
// CHECK-NEXT: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VU1]])
90+
unsigned r3 = __builtin_reduce_mul(vu1);
91+
92+
// CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16
93+
// CHECK-NEXT: [[RDX1:%.+]] = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[CVI1]])
94+
// CHECK-NEXT: sext i16 [[RDX1]] to i32
95+
const si8 cvi1 = vi1;
96+
int r4 = __builtin_reduce_mul(cvi1);
97+
98+
// CHECK: [[CVU1:%.+]] = load <4 x i32>, <4 x i32>* %cvu1, align 16
99+
// CHECK-NEXT: [[RDX2:%.+]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[CVU1]])
100+
// CHECK-NEXT: zext i32 [[RDX2]] to i64
101+
const u4 cvu1 = vu1;
102+
unsigned long long r5 = __builtin_reduce_mul(cvu1);
103+
}
104+
83105
void test_builtin_reduce_xor(si8 vi1, u4 vu1) {
84106

85107
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16

clang/test/Sema/builtins-reduction-math.c

+17
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,23 @@ void test_builtin_reduce_add(int i, float4 v, int3 iv) {
5353
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
5454
}
5555

56+
void test_builtin_reduce_mul(int i, float4 v, int3 iv) {
57+
struct Foo s = __builtin_reduce_mul(iv);
58+
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
59+
60+
i = __builtin_reduce_mul();
61+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
62+
63+
i = __builtin_reduce_mul(iv, iv);
64+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
65+
66+
i = __builtin_reduce_mul(i);
67+
// expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
68+
69+
i = __builtin_reduce_mul(v);
70+
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
71+
}
72+
5673
void test_builtin_reduce_xor(int i, float4 v, int3 iv) {
5774
struct Foo s = __builtin_reduce_xor(iv);
5875
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}

0 commit comments

Comments
 (0)