Skip to content

Commit d72983f

Browse files
committed
[Clang] Add float type support to __builtin_reduce_add and __builtin_reduce_multipy
1 parent b59a0a6 commit d72983f

File tree

8 files changed

+120
-14
lines changed

8 files changed

+120
-14
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

+2-1
Original file line numberDiff line numberDiff line change
@@ -12355,7 +12355,8 @@ def err_builtin_invalid_arg_type: Error <
1235512355
"a vector of integers|"
1235612356
"an unsigned integer|"
1235712357
"an 'int'|"
12358-
"a vector of floating points}1 (was %2)">;
12358+
"a vector of floating points|"
12359+
"a vector of integers or floating points}1 (was %2)">;
1235912360

1236012361
def err_builtin_matrix_disabled: Error<
1236112362
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88
#include "../ExprConstShared.h"
99
#include "Boolean.h"
10+
#include "ByteCode/Floating.h"
1011
#include "Compiler.h"
1112
#include "EvalEmitter.h"
1213
#include "Interp.h"
@@ -1754,6 +1755,17 @@ static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
17541755
PrimType ElemT = *S.getContext().classify(ElemType);
17551756
unsigned NumElems = Arg.getNumElems();
17561757

1758+
if (ElemType->isRealFloatingType()) {
1759+
if (ID != Builtin::BI__builtin_reduce_add &&
1760+
ID != Builtin::BI__builtin_reduce_mul)
1761+
llvm_unreachable("Only reduce_add and reduce_mul are supported for "
1762+
"floating-point types.");
1763+
// Floating-point arithmetic is not valid for constant expression
1764+
// initialization. Returning false defers checks to integral constant
1765+
// expression validation, preventing a bad deref of Floating as an integer.
1766+
return false;
1767+
}
1768+
17571769
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
17581770
T Result = Arg.atIndex(0).deref<T>();
17591771
unsigned BitWidth = Result.bitWidth();

clang/lib/CodeGen/CGBuiltin.cpp

+27-2
Original file line numberDiff line numberDiff line change
@@ -4274,12 +4274,37 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
42744274
*this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
42754275
}
42764276

4277-
case Builtin::BI__builtin_reduce_add:
4277+
case Builtin::BI__builtin_reduce_add: {
4278+
// Note: vector_reduce_fadd takes two arguments a
4279+
// scalar start value and a vector. That would mean to
4280+
// correctly call it we would need emitBuiltinWithOneOverloadedType<2>
4281+
// To keep the builtin sema behavior the same despite type we will
4282+
// popululate vector_reduce_fadd scalar value with a 0.
4283+
if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
4284+
Value *X = EmitScalarExpr(E->getArg(0));
4285+
auto EltTy = X->getType()->getScalarType();
4286+
Value *Seed = ConstantFP::get(EltTy, 0);
4287+
return RValue::get(Builder.CreateIntrinsic(
4288+
/*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_fadd,
4289+
ArrayRef<Value *>{Seed, X}, nullptr, "rdx.fadd"));
4290+
}
4291+
assert(E->getArg(0)->getType()->hasIntegerRepresentation());
42784292
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
42794293
*this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4280-
case Builtin::BI__builtin_reduce_mul:
4294+
}
4295+
case Builtin::BI__builtin_reduce_mul: {
4296+
if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
4297+
Value *X = EmitScalarExpr(E->getArg(0));
4298+
auto EltTy = X->getType()->getScalarType();
4299+
Value *Seed = ConstantFP::get(EltTy, 0);
4300+
return RValue::get(Builder.CreateIntrinsic(
4301+
/*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_fmul,
4302+
ArrayRef<Value *>{Seed, X}, nullptr, "rdx.fmul"));
4303+
}
4304+
assert(E->getArg(0)->getType()->hasIntegerRepresentation());
42814305
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
42824306
*this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4307+
}
42834308
case Builtin::BI__builtin_reduce_xor:
42844309
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
42854310
*this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));

clang/lib/Sema/SemaChecking.cpp

+23-3
Original file line numberDiff line numberDiff line change
@@ -2883,11 +2883,31 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
28832883
TheCall->setType(ElTy);
28842884
break;
28852885
}
2886+
case Builtin::BI__builtin_reduce_add:
2887+
case Builtin::BI__builtin_reduce_mul: {
2888+
if (PrepareBuiltinReduceMathOneArgCall(TheCall))
2889+
return ExprError();
2890+
2891+
const Expr *Arg = TheCall->getArg(0);
2892+
const auto *TyA = Arg->getType()->getAs<VectorType>();
2893+
2894+
QualType ElTy;
2895+
if (TyA)
2896+
ElTy = TyA->getElementType();
2897+
else if (Arg->getType()->isSizelessVectorType())
2898+
ElTy = Arg->getType()->getSizelessVectorEltType(Context);
2899+
2900+
if (ElTy.isNull()) {
2901+
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
2902+
<< 1 << /* vector of integers or floating points */ 10
2903+
<< Arg->getType();
2904+
return ExprError();
2905+
}
2906+
TheCall->setType(ElTy);
2907+
break;
2908+
}
28862909

28872910
// These builtins support vectors of integers only.
2888-
// TODO: ADD/MUL should support floating-point types.
2889-
case Builtin::BI__builtin_reduce_add:
2890-
case Builtin::BI__builtin_reduce_mul:
28912911
case Builtin::BI__builtin_reduce_xor:
28922912
case Builtin::BI__builtin_reduce_or:
28932913
case Builtin::BI__builtin_reduce_and: {

clang/test/AST/ByteCode/builtin-functions.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,14 @@ namespace RecuceAdd {
10561056
static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0);
10571057
static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0);
10581058

1059+
static_assert(__builtin_reduce_add((vector4float){}) == 0.0);
1060+
// both-error@-1 {{static assertion expression is not an integral constant expression}}
1061+
static_assert(__builtin_reduce_add((vector4float){1.1, 2.2, 3.3, 4.4}) == 11.0);
1062+
// both-error@-1 {{static assertion expression is not an integral constant expression}}
1063+
static_assert(__builtin_reduce_add((vector4double){100.1, 200.2, 300.3, 400.4}) == 1001.0);
1064+
// both-error@-1 {{static assertion expression is not an integral constant expression}}
1065+
1066+
10591067

10601068
#ifdef __SIZEOF_INT128__
10611069
typedef __int128 v4i128 __attribute__((__vector_size__(128 * 2)));
@@ -1091,6 +1099,13 @@ namespace ReduceMul {
10911099
(~0U - 1));
10921100
#endif
10931101
static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1);
1102+
1103+
static_assert(__builtin_reduce_mul((vector4float){}) == 0.0);
1104+
// both-error@-1 {{static assertion expression is not an integral constant expression}}
1105+
static_assert(__builtin_reduce_mul((vector4float){1.0, 2.0, 3.0, 1.0}) == 6.0);
1106+
// both-error@-1 {{static assertion expression is not an integral constant expression}}
1107+
static_assert(__builtin_reduce_mul((vector4double){3.0, 4.0, 1.0, 1.0}) == 12.0);
1108+
// both-error@-1 {{static assertion expression is not an integral constant expression}}
10941109
}
10951110

10961111
namespace ReduceAnd {

clang/test/CodeGen/builtins-reduction-math.c

+21
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=SVE %s
55

66
typedef float float4 __attribute__((ext_vector_type(4)));
7+
typedef double double4 __attribute__((ext_vector_type(4)));
78
typedef short int si8 __attribute__((ext_vector_type(8)));
89
typedef unsigned int u4 __attribute__((ext_vector_type(4)));
910

@@ -61,6 +62,16 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
6162
unsigned long long r5 = __builtin_reduce_min(cvi1);
6263
}
6364

65+
void test_builtin_reduce_addf(float4 vf4, double4 vd4) {
66+
// CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16
67+
// CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VF4]])
68+
float r2 = __builtin_reduce_add(vf4);
69+
70+
// CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16
71+
// CHECK-NEXT: call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VD4]])
72+
double r3 = __builtin_reduce_add(vd4);
73+
}
74+
6475
void test_builtin_reduce_add(si8 vi1, u4 vu1) {
6576
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
6677
// CHECK-NEXT: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VI1]])
@@ -83,6 +94,16 @@ void test_builtin_reduce_add(si8 vi1, u4 vu1) {
8394
unsigned long long r5 = __builtin_reduce_add(cvu1);
8495
}
8596

97+
void test_builtin_reduce_mulf(float4 vf4, double4 vd4) {
98+
// CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16
99+
// CHECK-NEXT: call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> [[VF4]])
100+
float r2 = __builtin_reduce_mul(vf4);
101+
102+
// CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16
103+
// CHECK-NEXT: call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> [[VD4]])
104+
double r3 = __builtin_reduce_mul(vd4);
105+
}
106+
86107
void test_builtin_reduce_mul(si8 vi1, u4 vu1) {
87108
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
88109
// CHECK-NEXT: call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> [[VI1]])

clang/test/Sema/builtins-reduction-math.c

+8-8
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void test_builtin_reduce_min(int i, float4 v, int3 iv) {
3636
// expected-error@-1 {{1st argument must be a vector type (was 'int')}}
3737
}
3838

39-
void test_builtin_reduce_add(int i, float4 v, int3 iv) {
39+
void test_builtin_reduce_add(int i, float f, int3 iv) {
4040
struct Foo s = __builtin_reduce_add(iv);
4141
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
4242

@@ -47,13 +47,13 @@ void test_builtin_reduce_add(int i, float4 v, int3 iv) {
4747
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
4848

4949
i = __builtin_reduce_add(i);
50-
// expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
50+
// expected-error@-1 {{1st argument must be a vector of integers or floating points (was 'int')}}
5151

52-
i = __builtin_reduce_add(v);
53-
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
52+
f = __builtin_reduce_add(f);
53+
// expected-error@-1 {{1st argument must be a vector of integers or floating points (was 'float')}}
5454
}
5555

56-
void test_builtin_reduce_mul(int i, float4 v, int3 iv) {
56+
void test_builtin_reduce_mul(int i, float f, int3 iv) {
5757
struct Foo s = __builtin_reduce_mul(iv);
5858
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
5959

@@ -64,10 +64,10 @@ void test_builtin_reduce_mul(int i, float4 v, int3 iv) {
6464
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
6565

6666
i = __builtin_reduce_mul(i);
67-
// expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
67+
// expected-error@-1 {{1st argument must be a vector of integers or floating points (was 'int')}}
6868

69-
i = __builtin_reduce_mul(v);
70-
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
69+
f = __builtin_reduce_mul(f);
70+
// expected-error@-1 {{1st argument must be a vector of integers or floating points (was 'float')}}
7171
}
7272

7373
void test_builtin_reduce_xor(int i, float4 v, int3 iv) {

clang/test/Sema/constant_builtins_vector.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,12 @@ constexpr long long reduceAddLong2 = __builtin_reduce_add((vector4long){(1LL <<
746746
static_assert(__builtin_reduce_add((vector4uint){~0U, 0, 0, 1}) == 0);
747747
static_assert(__builtin_reduce_add((vector4ulong){~0ULL, 0, 0, 1}) == 0);
748748

749+
constexpr float reduceAddFloat = __builtin_reduce_add((vector4float){1.0, 2.0, 3.0, 4.0});
750+
// expected-error@-1 {{must be initialized by a constant expression}}
751+
752+
constexpr double reduceAddDouble = __builtin_reduce_add((vector4double){-1.0, 2.0, -3.0, 4.0});
753+
// expected-error@-1 {{must be initialized by a constant expression}}
754+
749755
static_assert(__builtin_reduce_mul((vector4char){}) == 0);
750756
static_assert(__builtin_reduce_mul((vector4char){1, 2, 3, 4}) == 24);
751757
static_assert(__builtin_reduce_mul((vector4short){1, 2, 30, 40}) == 2400);
@@ -766,6 +772,12 @@ constexpr long long reduceMulLong2 = __builtin_reduce_mul((vector4long){(1LL <<
766772
static_assert(__builtin_reduce_mul((vector4uint){~0U, 1, 1, 2}) == ~0U - 1);
767773
static_assert(__builtin_reduce_mul((vector4ulong){~0ULL, 1, 1, 2}) == ~0ULL - 1);
768774

775+
constexpr float reduceMulFloat = __builtin_reduce_mul((vector4float){1.0, 2.0, 3.0, 1.0});
776+
// expected-error@-1 {{must be initialized by a constant expression}}
777+
778+
constexpr double reduceMulDouble = __builtin_reduce_mul((vector4double){3.0, 4.0, 1.0, 1.0});
779+
// expected-error@-1 {{must be initialized by a constant expression}}
780+
769781
static_assert(__builtin_reduce_and((vector4char){}) == 0);
770782
static_assert(__builtin_reduce_and((vector4char){(char)0x11, (char)0x22, (char)0x44, (char)0x88}) == 0);
771783
static_assert(__builtin_reduce_and((vector4short){(short)0x1111, (short)0x2222, (short)0x4444, (short)0x8888}) == 0);

0 commit comments

Comments
 (0)