Skip to content

[X86] Duplicate XMM/YMM constant data #70947

@RKSimon

Description

@RKSimon

When we are working with different vector widths but the same constant data, we often see cases where the constants are repeated at each vector width:

void fabs_cvt(const double *src, int *dst) {
    for(int i = 0; i != 6; ++i) {
        *dst++ = __builtin_fabs(*src++);
    }
}
define void @fabs_cvt(ptr nocapture noundef readonly %src, ptr nocapture noundef writeonly %dst) {
entry:
  %incdec.ptr.3 = getelementptr inbounds double, ptr %src, i64 4
  %incdec.ptr1.3 = getelementptr inbounds i32, ptr %dst, i64 4
  %0 = load <4 x double>, ptr %src, align 8
  %1 = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %0)
  %2 = fptosi <4 x double> %1 to <4 x i32>
  store <4 x i32> %2, ptr %dst, align 4
  %3 = load <2 x double>, ptr %incdec.ptr.3, align 8
  %4 = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %3)
  %5 = fptosi <2 x double> %4 to <2 x i32>
  store <2 x i32> %5, ptr %incdec.ptr1.3, align 4
  ret void
}
declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)

AVX1:

.LCPI0_0:
  .quad 0x7fffffffffffffff # double NaN
  .quad 0x7fffffffffffffff # double NaN
  .quad 0x7fffffffffffffff # double NaN
  .quad 0x7fffffffffffffff # double NaN
.LCPI0_1:
  .quad 0x7fffffffffffffff # double NaN
  .quad 0x7fffffffffffffff # double NaN
fabs_cvt(double const*, int*): # @fabs_cvt(double const*, int*)
  vmovupd (%rdi), %ymm0
  vandpd .LCPI0_0(%rip), %ymm0, %ymm0
  vcvttpd2dq %ymm0, %xmm0
  vmovupd %xmm0, (%rsi)
  vmovupd 32(%rdi), %xmm0
  vandpd .LCPI0_1(%rip), %xmm0, %xmm0
  vcvttpd2dq %xmm0, %xmm0
  vmovlpd %xmm0, 16(%rsi)
  retq

AVX2:

.LCPI0_0:
  .quad 0x7fffffffffffffff # double NaN
.LCPI0_1:
  .quad 0x7fffffffffffffff # double NaN
  .quad 0x7fffffffffffffff # double NaN
fabs_cvt(double const*, int*): # @fabs_cvt(double const*, int*)
  vbroadcastsd .LCPI0_0(%rip), %ymm0 # ymm0 = [NaN,NaN,NaN,NaN]
  vandpd (%rdi), %ymm0, %ymm0
  vcvttpd2dq %ymm0, %xmm0
  vmovupd %xmm0, (%rsi)
  vmovupd 32(%rdi), %xmm0
  vandpd .LCPI0_1(%rip), %xmm0, %xmm0
  vcvttpd2dq %xmm0, %xmm0
  vmovlpd %xmm0, 16(%rsi)
  vzeroupper
  retq

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions