-
Notifications
You must be signed in to change notification settings - Fork 14.6k
Closed
Description
When we are working with different vector widths but the same constant data, we often see cases where the constants are repeated at each vector width:
void fabs_cvt(const double *src, int *dst) {
for(int i = 0; i != 6; ++i) {
*dst++ = __builtin_fabs(*src++);
}
}
define void @fabs_cvt(ptr nocapture noundef readonly %src, ptr nocapture noundef writeonly %dst) {
entry:
%incdec.ptr.3 = getelementptr inbounds double, ptr %src, i64 4
%incdec.ptr1.3 = getelementptr inbounds i32, ptr %dst, i64 4
%0 = load <4 x double>, ptr %src, align 8
%1 = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %0)
%2 = fptosi <4 x double> %1 to <4 x i32>
store <4 x i32> %2, ptr %dst, align 4
%3 = load <2 x double>, ptr %incdec.ptr.3, align 8
%4 = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %3)
%5 = fptosi <2 x double> %4 to <2 x i32>
store <2 x i32> %5, ptr %incdec.ptr1.3, align 4
ret void
}
declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
AVX1:
.LCPI0_0:
.quad 0x7fffffffffffffff # double NaN
.quad 0x7fffffffffffffff # double NaN
.quad 0x7fffffffffffffff # double NaN
.quad 0x7fffffffffffffff # double NaN
.LCPI0_1:
.quad 0x7fffffffffffffff # double NaN
.quad 0x7fffffffffffffff # double NaN
fabs_cvt(double const*, int*): # @fabs_cvt(double const*, int*)
vmovupd (%rdi), %ymm0
vandpd .LCPI0_0(%rip), %ymm0, %ymm0
vcvttpd2dq %ymm0, %xmm0
vmovupd %xmm0, (%rsi)
vmovupd 32(%rdi), %xmm0
vandpd .LCPI0_1(%rip), %xmm0, %xmm0
vcvttpd2dq %xmm0, %xmm0
vmovlpd %xmm0, 16(%rsi)
retq
AVX2:
.LCPI0_0:
.quad 0x7fffffffffffffff # double NaN
.LCPI0_1:
.quad 0x7fffffffffffffff # double NaN
.quad 0x7fffffffffffffff # double NaN
fabs_cvt(double const*, int*): # @fabs_cvt(double const*, int*)
vbroadcastsd .LCPI0_0(%rip), %ymm0 # ymm0 = [NaN,NaN,NaN,NaN]
vandpd (%rdi), %ymm0, %ymm0
vcvttpd2dq %ymm0, %xmm0
vmovupd %xmm0, (%rsi)
vmovupd 32(%rdi), %xmm0
vandpd .LCPI0_1(%rip), %xmm0, %xmm0
vcvttpd2dq %xmm0, %xmm0
vmovlpd %xmm0, 16(%rsi)
vzeroupper
retq