Skip to content

Commit 80ec43d

Browse files
authored
[CIR] Implement builtin reduce fadd/fmul/fmax/fmin (#171633)
New files are created to match the structure over at OGs
1 parent c18d9ea commit 80ec43d

File tree

5 files changed

+360
-5
lines changed

5 files changed

+360
-5
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,26 +1552,52 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
15521552
case X86::BI__builtin_ia32_vpshrdw128:
15531553
case X86::BI__builtin_ia32_vpshrdw256:
15541554
case X86::BI__builtin_ia32_vpshrdw512:
1555+
cgm.errorNYI(expr->getSourceRange(),
1556+
std::string("unimplemented X86 builtin call: ") +
1557+
getContext().BuiltinInfo.getName(builtinID));
1558+
return {};
15551559
case X86::BI__builtin_ia32_reduce_fadd_pd512:
15561560
case X86::BI__builtin_ia32_reduce_fadd_ps512:
15571561
case X86::BI__builtin_ia32_reduce_fadd_ph512:
15581562
case X86::BI__builtin_ia32_reduce_fadd_ph256:
1559-
case X86::BI__builtin_ia32_reduce_fadd_ph128:
1563+
case X86::BI__builtin_ia32_reduce_fadd_ph128: {
1564+
assert(!cir::MissingFeatures::fastMathFlags());
1565+
return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
1566+
"vector.reduce.fadd", ops[0].getType(),
1567+
mlir::ValueRange{ops[0], ops[1]});
1568+
}
15601569
case X86::BI__builtin_ia32_reduce_fmul_pd512:
15611570
case X86::BI__builtin_ia32_reduce_fmul_ps512:
15621571
case X86::BI__builtin_ia32_reduce_fmul_ph512:
15631572
case X86::BI__builtin_ia32_reduce_fmul_ph256:
1564-
case X86::BI__builtin_ia32_reduce_fmul_ph128:
1573+
case X86::BI__builtin_ia32_reduce_fmul_ph128: {
1574+
assert(!cir::MissingFeatures::fastMathFlags());
1575+
return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
1576+
"vector.reduce.fmul", ops[0].getType(),
1577+
mlir::ValueRange{ops[0], ops[1]});
1578+
}
15651579
case X86::BI__builtin_ia32_reduce_fmax_pd512:
15661580
case X86::BI__builtin_ia32_reduce_fmax_ps512:
15671581
case X86::BI__builtin_ia32_reduce_fmax_ph512:
15681582
case X86::BI__builtin_ia32_reduce_fmax_ph256:
1569-
case X86::BI__builtin_ia32_reduce_fmax_ph128:
1583+
case X86::BI__builtin_ia32_reduce_fmax_ph128: {
1584+
assert(!cir::MissingFeatures::fastMathFlags());
1585+
cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
1586+
return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
1587+
"vector.reduce.fmax", vecTy.getElementType(),
1588+
mlir::ValueRange{ops[0]});
1589+
}
15701590
case X86::BI__builtin_ia32_reduce_fmin_pd512:
15711591
case X86::BI__builtin_ia32_reduce_fmin_ps512:
15721592
case X86::BI__builtin_ia32_reduce_fmin_ph512:
15731593
case X86::BI__builtin_ia32_reduce_fmin_ph256:
1574-
case X86::BI__builtin_ia32_reduce_fmin_ph128:
1594+
case X86::BI__builtin_ia32_reduce_fmin_ph128: {
1595+
assert(!cir::MissingFeatures::fastMathFlags());
1596+
cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
1597+
return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
1598+
"vector.reduce.fmin", vecTy.getElementType(),
1599+
mlir::ValueRange{ops[0]});
1600+
}
15751601
case X86::BI__builtin_ia32_rdrand16_step:
15761602
case X86::BI__builtin_ia32_rdrand32_step:
15771603
case X86::BI__builtin_ia32_rdrand64_step:
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR
2+
// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM
3+
// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
4+
5+
#include <immintrin.h>
6+
7+
double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){
8+
9+
// CIR-LABEL: _mm512_reduce_add_pd
10+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
11+
12+
// CIR-LABEL: test_mm512_reduce_add_pd
13+
// CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double
14+
15+
// LLVM-LABEL: test_mm512_reduce_add_pd
16+
// LLVM: call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
17+
18+
// OGCG-LABEL: test_mm512_reduce_add_pd
19+
// OGCG-NOT: reassoc
20+
// OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
21+
// OGCG-NOT: reassoc
22+
return _mm512_reduce_add_pd(__W) + ExtraAddOp;
23+
}
24+
25+
double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){
26+
// CIR-LABEL: _mm512_reduce_mul_pd
27+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
28+
29+
// CIR-LABEL: test_mm512_reduce_mul_pd
30+
// CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double
31+
32+
// LLVM-LABEL: test_mm512_reduce_mul_pd
33+
// LLVM: call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
34+
35+
// OGCG-LABEL: test_mm512_reduce_mul_pd
36+
// OGCG-NOT: reassoc
37+
// OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
38+
// OGCG-NOT: reassoc
39+
return _mm512_reduce_mul_pd(__W) * ExtraMulOp;
40+
}
41+
42+
43+
float test_mm512_reduce_add_ps(__m512 __W){
44+
// CIR-LABEL: _mm512_reduce_add_ps
45+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
46+
47+
// CIR-LABEL: test_mm512_reduce_add_ps
48+
// CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float
49+
50+
// LLVM-LABEL: test_mm512_reduce_add_ps
51+
// LLVM: call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
52+
53+
// OGCG-LABEL: test_mm512_reduce_add_ps
54+
// OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
55+
return _mm512_reduce_add_ps(__W);
56+
}
57+
58+
float test_mm512_reduce_mul_ps(__m512 __W){
59+
// CIR-LABEL: _mm512_reduce_mul_ps
60+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
61+
62+
// CIR-LABEL: test_mm512_reduce_mul_ps
63+
// CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float
64+
65+
// LLVM-LABEL: test_mm512_reduce_mul_ps
66+
// LLVM: call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
67+
68+
// OGCG-LABEL: test_mm512_reduce_mul_ps
69+
// OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
70+
return _mm512_reduce_mul_ps(__W);
71+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR
2+
// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM
3+
// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
4+
5+
#include <immintrin.h>
6+
7+
double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
8+
// CIR-LABEL: _mm512_reduce_max_pd
9+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double
10+
11+
// CIR-LABEL: test_mm512_reduce_max_pd
12+
// CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double
13+
14+
// LLVM-LABEL: test_mm512_reduce_max_pd
15+
// LLVM: call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
16+
17+
// OGCG-LABEL: test_mm512_reduce_max_pd
18+
// OGCG-NOT: nnan
19+
// OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
20+
// OGCG-NOT: nnan
21+
return _mm512_reduce_max_pd(__W) + ExtraAddOp;
22+
}
23+
24+
double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
25+
// CIR-LABEL: _mm512_reduce_min_pd
26+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double
27+
28+
// CIR-LABEL: test_mm512_reduce_min_pd
29+
// CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double
30+
31+
// LLVM-LABEL: test_mm512_reduce_min_pd
32+
// LLVM: call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
33+
34+
// OGCG-LABEL: test_mm512_reduce_min_pd
35+
// OGCG-NOT: nnan
36+
// OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
37+
// OGCG-NOT: nnan
38+
return _mm512_reduce_min_pd(__W) * ExtraMulOp;
39+
}
40+
41+
float test_mm512_reduce_max_ps(__m512 __W){
42+
// CIR-LABEL: _mm512_reduce_max_ps
43+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float
44+
45+
// CIR-LABEL: test_mm512_reduce_max_ps
46+
// CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float
47+
48+
// LLVM-LABEL: test_mm512_reduce_max_ps
49+
// LLVM: call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
50+
51+
// OGCG-LABEL: test_mm512_reduce_max_ps
52+
// OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
53+
return _mm512_reduce_max_ps(__W);
54+
}
55+
56+
float test_mm512_reduce_min_ps(__m512 __W){
57+
// CIR-LABEL: _mm512_reduce_min_ps
58+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float
59+
60+
// CIR-LABEL: test_mm512_reduce_min_ps
61+
// CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float
62+
63+
// LLVM-LABEL: test_mm512_reduce_min_ps
64+
// LLVM: call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
65+
66+
// OGCG-LABEL: test_mm512_reduce_min_ps
67+
// OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
68+
return _mm512_reduce_min_ps(__W);
69+
}

clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,64 @@ __m512h test_mm512_undefined_ph(void) {
6363
// OGCG-LABEL: test_mm512_undefined_ph
6464
// OGCG: ret <32 x half> zeroinitializer
6565
return _mm512_undefined_ph();
66-
}
66+
}
67+
68+
_Float16 test_mm512_reduce_add_ph(__m512h __W) {
69+
// CIR-LABEL: _mm512_reduce_add_ph
70+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
71+
72+
// CIR-LABEL: test_mm512_reduce_add_ph
73+
// CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16
74+
75+
// LLVM-LABEL: test_mm512_reduce_add_ph
76+
// LLVM: call half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})
77+
78+
// OGCG-LABEL: test_mm512_reduce_add_ph
79+
// OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})
80+
return _mm512_reduce_add_ph(__W);
81+
}
82+
83+
_Float16 test_mm512_reduce_mul_ph(__m512h __W) {
84+
// CIR-LABEL: _mm512_reduce_mul_ph
85+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
86+
87+
// CIR-LABEL: test_mm512_reduce_mul_ph
88+
// CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16
89+
90+
// LLVM-LABEL: test_mm512_reduce_mul_ph
91+
// LLVM: call half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}})
92+
93+
// OGCG-LABEL: test_mm512_reduce_mul_ph
94+
// OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}})
95+
return _mm512_reduce_mul_ph(__W);
96+
}
97+
98+
_Float16 test_mm512_reduce_max_ph(__m512h __W) {
99+
// CIR-LABEL: _mm512_reduce_max_ph
100+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16
101+
102+
// CIR-LABEL: test_mm512_reduce_max_ph
103+
// CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16
104+
105+
// LLVM-LABEL: test_mm512_reduce_max_ph
106+
// LLVM: call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}})
107+
108+
// OGCG-LABEL: test_mm512_reduce_max_ph
109+
// OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}})
110+
return _mm512_reduce_max_ph(__W);
111+
}
112+
113+
_Float16 test_mm512_reduce_min_ph(__m512h __W) {
114+
// CIR-LABEL: _mm512_reduce_min_ph
115+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16
116+
117+
// CIR-LABEL: test_mm512_reduce_min_ph
118+
// CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16
119+
120+
// LLVM-LABEL: test_mm512_reduce_min_ph
121+
// LLVM: call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}})
122+
123+
// OGCG-LABEL: test_mm512_reduce_min_ph
124+
// OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}})
125+
return _mm512_reduce_min_ph(__W);
126+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-llvm -o %t.ll -Wall -Werror
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
7+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
8+
#include <immintrin.h>
9+
10+
_Float16 test_mm256_reduce_add_ph(__m256h __W) {
11+
// CIR-LABEL: _mm256_reduce_add_ph
12+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
13+
14+
// CIR-LABEL: test_mm256_reduce_add_ph
15+
// CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16
16+
17+
// LLVM-LABEL: test_mm256_reduce_add_ph
18+
// LLVM: call half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}})
19+
20+
// OGCG-LABEL: test_mm256_reduce_add_ph
21+
// OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}})
22+
return _mm256_reduce_add_ph(__W);
23+
}
24+
25+
_Float16 test_mm256_reduce_mul_ph(__m256h __W) {
26+
// CIR-LABEL: _mm256_reduce_mul_ph
27+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
28+
29+
// CIR-LABEL: test_mm256_reduce_mul_ph
30+
// CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16
31+
32+
// LLVM-LABEL: test_mm256_reduce_mul_ph
33+
// LLVM: call half @llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}})
34+
35+
// OGCG-LABEL: test_mm256_reduce_mul_ph
36+
// OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}})
37+
return _mm256_reduce_mul_ph(__W);
38+
}
39+
40+
_Float16 test_mm256_reduce_max_ph(__m256h __W) {
41+
// CIR-LABEL: _mm256_reduce_max_ph
42+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<16 x !cir.f16>) -> !cir.f16
43+
44+
// CIR-LABEL: test_mm256_reduce_max_ph
45+
// CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16
46+
47+
// LLVM-LABEL: test_mm256_reduce_max_ph
48+
// LLVM: call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
49+
50+
// OGCG-LABEL: test_mm256_reduce_max_ph
51+
// OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
52+
return _mm256_reduce_max_ph(__W);
53+
}
54+
55+
_Float16 test_mm256_reduce_min_ph(__m256h __W) {
56+
// CIR-LABEL: _mm256_reduce_min_ph
57+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<16 x !cir.f16>) -> !cir.f16
58+
59+
// CIR-LABEL: test_mm256_reduce_min_ph
60+
// CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16
61+
62+
// LLVM-LABEL: test_mm256_reduce_min_ph
63+
// LLVM: call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
64+
65+
// OGCG-LABEL: test_mm256_reduce_min_ph
66+
// OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
67+
return _mm256_reduce_min_ph(__W);
68+
}
69+
70+
_Float16 test_mm_reduce_add_ph(__m128h __W) {
71+
// CIR-LABEL: _mm_reduce_add_ph
72+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
73+
74+
// CIR-LABEL: test_mm_reduce_add_ph
75+
// CIR: cir.call @_mm_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16
76+
77+
// LLVM-LABEL: test_mm_reduce_add_ph
78+
// LLVM: call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}})
79+
80+
// OGCG-LABEL: test_mm_reduce_add_ph
81+
// OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}})
82+
return _mm_reduce_add_ph(__W);
83+
}
84+
85+
_Float16 test_mm_reduce_mul_ph(__m128h __W) {
86+
// CIR-LABEL: _mm_reduce_mul_ph
87+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
88+
89+
// CIR-LABEL: test_mm_reduce_mul_ph
90+
// CIR: cir.call @_mm_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16
91+
92+
// LLVM-LABEL: test_mm_reduce_mul_ph
93+
// LLVM: call half @llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> %{{.*}})
94+
95+
// OGCG-LABEL: test_mm_reduce_mul_ph
96+
// OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> %{{.*}})
97+
return _mm_reduce_mul_ph(__W);
98+
}
99+
100+
_Float16 test_mm_reduce_max_ph(__m128h __W) {
101+
// CIR-LABEL: _mm_reduce_max_ph
102+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<8 x !cir.f16>) -> !cir.f16
103+
104+
// CIR-LABEL: test_mm_reduce_max_ph
105+
// CIR: cir.call @_mm_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16
106+
107+
// LLVM-LABEL: test_mm_reduce_max_ph
108+
// LLVM: call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
109+
110+
// OGCG-LABEL: test_mm_reduce_max_ph
111+
// OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
112+
return _mm_reduce_max_ph(__W);
113+
}
114+
115+
_Float16 test_mm_reduce_min_ph(__m128h __W) {
116+
// CIR-LABEL: _mm_reduce_min_ph
117+
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<8 x !cir.f16>) -> !cir.f16
118+
119+
// CIR-LABEL: test_mm_reduce_min_ph
120+
// CIR: cir.call @_mm_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16
121+
122+
// LLVM-LABEL: test_mm_reduce_min_ph
123+
// LLVM: call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}})
124+
125+
// OGCG-LABEL: test_mm_reduce_min_ph
126+
// OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}})
127+
return _mm_reduce_min_ph(__W);
128+
}
129+

0 commit comments

Comments
 (0)