Skip to content

Commit a23291b

Browse files
committed
[Clang] Add integer add reduction builtin
Similar to the existing bitwise reduction builtins, this lowers to a llvm.vector.reduce.add intrinsic call. For other reductions, we've tried to share builtins for float/integer vectors, but the fadd reduction intrinsics also take a starting value argument and can either do unordered or serialized, but not reduction-trees as specified for the builtins. However we address fadd support this shouldn't affect the integer case. (Split off from D117829) Differential Revision: https://reviews.llvm.org/D124741
1 parent 29dff0d commit a23291b

File tree

5 files changed

+45
-0
lines changed

5 files changed

+45
-0
lines changed

clang/include/clang/Basic/Builtins.def

+1
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,7 @@ BUILTIN(__builtin_reduce_min, "v.", "nct")
663663
BUILTIN(__builtin_reduce_xor, "v.", "nct")
664664
BUILTIN(__builtin_reduce_or, "v.", "nct")
665665
BUILTIN(__builtin_reduce_and, "v.", "nct")
666+
BUILTIN(__builtin_reduce_add, "v.", "nct")
666667

667668
BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
668669
BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")

clang/lib/CodeGen/CGBuiltin.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -3273,6 +3273,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
32733273
*this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
32743274
}
32753275

3276+
case Builtin::BI__builtin_reduce_add:
3277+
return RValue::get(emitUnaryBuiltin(
3278+
*this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
32763279
case Builtin::BI__builtin_reduce_xor:
32773280
return RValue::get(emitUnaryBuiltin(
32783281
*this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));

clang/lib/Sema/SemaChecking.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -2331,6 +2331,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
23312331
}
23322332

23332333
// These builtins support vectors of integers only.
2334+
// TODO: ADD should support floating-point types.
2335+
case Builtin::BI__builtin_reduce_add:
23342336
case Builtin::BI__builtin_reduce_xor:
23352337
case Builtin::BI__builtin_reduce_or:
23362338
case Builtin::BI__builtin_reduce_and: {

clang/test/CodeGen/builtins-reduction-math.c

+22
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,28 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
5858
unsigned long long r5 = __builtin_reduce_min(cvi1);
5959
}
6060

61+
void test_builtin_reduce_add(si8 vi1, u4 vu1) {
62+
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
63+
// CHECK-NEXT: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VI1]])
64+
short r2 = __builtin_reduce_add(vi1);
65+
66+
// CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
67+
// CHECK-NEXT: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VU1]])
68+
unsigned r3 = __builtin_reduce_add(vu1);
69+
70+
// CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16
71+
// CHECK-NEXT: [[RDX1:%.+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[CVI1]])
72+
// CHECK-NEXT: sext i16 [[RDX1]] to i32
73+
const si8 cvi1 = vi1;
74+
int r4 = __builtin_reduce_add(cvi1);
75+
76+
// CHECK: [[CVU1:%.+]] = load <4 x i32>, <4 x i32>* %cvu1, align 16
77+
// CHECK-NEXT: [[RDX2:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[CVU1]])
78+
// CHECK-NEXT: zext i32 [[RDX2]] to i64
79+
const u4 cvu1 = vu1;
80+
unsigned long long r5 = __builtin_reduce_add(cvu1);
81+
}
82+
6183
void test_builtin_reduce_xor(si8 vi1, u4 vu1) {
6284

6385
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16

clang/test/Sema/builtins-reduction-math.c

+17
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,23 @@ void test_builtin_reduce_min(int i, float4 v, int3 iv) {
3636
// expected-error@-1 {{1st argument must be a vector type (was 'int')}}
3737
}
3838

39+
void test_builtin_reduce_add(int i, float4 v, int3 iv) {
40+
struct Foo s = __builtin_reduce_add(iv);
41+
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
42+
43+
i = __builtin_reduce_add();
44+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
45+
46+
i = __builtin_reduce_add(iv, iv);
47+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
48+
49+
i = __builtin_reduce_add(i);
50+
// expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
51+
52+
i = __builtin_reduce_add(v);
53+
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
54+
}
55+
3956
void test_builtin_reduce_xor(int i, float4 v, int3 iv) {
4057
struct Foo s = __builtin_reduce_xor(iv);
4158
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}

0 commit comments

Comments
 (0)