diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 4cd32a0502c66..a03a92b5a97f7 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -862,8 +862,8 @@ class AMDGPUImageDimIntrinsicRsrcIndex)); - if (BaseOpcode->Sampler) - Ops.push_back(Op.getOperand(ArgOffset + Intr->SampIndex)); + SDValue Rsrc = Op.getOperand(ArgOffset + Intr->RsrcIndex); + EVT RsrcVT = Rsrc.getValueType(); + if (RsrcVT != MVT::v4i32 && RsrcVT != MVT::v8i32) + return Op; + Ops.push_back(Rsrc); + if (BaseOpcode->Sampler) { + SDValue Samp = Op.getOperand(ArgOffset + Intr->SampIndex); + if (Samp.getValueType() != MVT::v4i32) + return Op; + Ops.push_back(Samp); + } Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32)); if (IsGFX10Plus) Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32)); diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll index f6ac8fbd99c58..e2b743e5f514a 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,131 +1,131 @@ ; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32.v8i32( define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32.v8i32( define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32.v8i32( define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32.v8i32( define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32.v8i32( define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32.v8i32( define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32.v8i32( define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32.v8i32( define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32.v8i32( define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32.v8i32( define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32.v8i32( define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32.v8i32( define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32.v8i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32.v8i32( define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32.v8i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32( +;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32.v8i32( define float @image_atomic_add_2d(<8 x i32> inreg %rsrc, i32 inreg %s, i32 inreg %t, i32 inreg %data) #0 { main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32.v8i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) %r = bitcast i32 %orig to float ret float %r } -declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 - -declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #0 + +declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll index 5ffdbb0f8c5b0..4df6fcd584ff6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll @@ -7,10 +7,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i3 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0:[0-9]+]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -20,7 +20,7 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i3 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0:[0-9]+]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 @@ -32,10 +32,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i3 ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -47,10 +47,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(<8 x i32> inreg %rsrc, i3 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -60,7 +60,7 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(<8 x i32> inreg %rsrc, i3 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 @@ -72,10 +72,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(<8 x i32> inreg %rsrc, i3 ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -87,10 +87,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(<8 x i32> inreg %rsrc, i3 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -100,7 +100,7 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(<8 x i32> inreg %rsrc, i3 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 @@ -112,10 +112,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(<8 x i32> inreg %rsrc, i3 ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -127,10 +127,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(<8 x i32> inreg %rsrc, i3 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -140,7 +140,7 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(<8 x i32> inreg %rsrc, i3 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 @@ -152,10 +152,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(<8 x i32> inreg %rsrc, i3 ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -167,10 +167,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(<8 x i32> inreg %rsrc, i ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -180,7 +180,7 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(<8 x i32> inreg %rsrc, i ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 @@ -192,10 +192,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(<8 x i32> inreg %rsrc, i ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -208,10 +208,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(<8 x i32> inreg %rsrc, i32 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S0:%.*]], i32 [[T0:%.*]], i32 [[S1:%.*]], i32 [[T1:%.*]], i32 [[S2:%.*]], i32 [[T2:%.*]], i32 [[S3:%.*]], i32 [[T3:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -221,10 +221,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(<8 x i32> inreg %rsrc, i32 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S0:%.*]], i32 [[T0:%.*]], i32 [[S1:%.*]], i32 [[T1:%.*]], i32 [[S2:%.*]], i32 [[T2:%.*]], i32 [[S3:%.*]], i32 [[T3:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -232,10 +232,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(<8 x i32> inreg %rsrc, i32 ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s0, i32 %t0, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s1, i32 %t1, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s2, i32 %t2, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s3, i32 %t3, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s0, i32 %t0, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s1, i32 %t1, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s2, i32 %t2, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s3, i32 %t3, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -247,14 +247,14 @@ define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(<8 x i32> inreg %rsrc, i32 %s, i ; NO-MSAA-LABEL: define amdgpu_ps [8 x float] @load_2dmsaa_v8f32( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I5:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I7:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I5:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I7:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I8:%.*]] = insertvalue [8 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I9:%.*]] = insertvalue [8 x float] [[I8]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I10:%.*]] = insertvalue [8 x float] [[I9]], float [[I2]], 2 @@ -268,7 +268,7 @@ define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(<8 x i32> inreg %rsrc, i32 %s, i ; MSAA-LABEL: define amdgpu_ps [8 x float] @load_2dmsaa_v8f32( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 @@ -288,14 +288,14 @@ define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(<8 x i32> inreg %rsrc, i32 %s, i ; MSAA-NEXT: ret [8 x float] [[I15]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) - %i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i5 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i7 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i5 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i7 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i8 = insertvalue [8 x float] undef, float %i, 0 %i9 = insertvalue [8 x float] %i8, float %i1, 1 %i10 = insertvalue [8 x float] %i9, float %i2, 2 @@ -311,20 +311,20 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(<8 x i32> inreg %rsr ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I1:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I1]], float [[I2]], 1 -; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I3]], float [[I4]], 2 -; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I5]], float [[I6]], 3 ; NO-MSAA-NEXT: ret [4 x float] [[I7]] ; ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 @@ -336,13 +336,13 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(<8 x i32> inreg %rsr ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) %i1 = insertvalue [4 x float] undef, float %i, 0 - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) %i3 = insertvalue [4 x float] %i1, float %i2, 1 - %i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) %i5 = insertvalue [4 x float] %i3, float %i4, 2 - %i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i7 = insertvalue [4 x float] %i5, float %i6, 3 ret [4 x float] %i7 } @@ -351,8 +351,8 @@ define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(<8 x i32> inreg %rsrc, ; NO-MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1 ; NO-MSAA-NEXT: ret [2 x float] [[I3]] @@ -360,7 +360,7 @@ define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(<8 x i32> inreg %rsrc, ; MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0 @@ -368,8 +368,8 @@ define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(<8 x i32> inreg %rsrc, ; MSAA-NEXT: ret [2 x float] [[I3]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) %i2 = insertvalue [2 x float] undef, float %i, 0 %i3 = insertvalue [2 x float] %i2, float %i1, 1 ret [2 x float] %i3 @@ -379,8 +379,8 @@ define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(<8 x i32> inreg %rsrc, ; NO-MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1 ; NO-MSAA-NEXT: ret [2 x float] [[I3]] @@ -388,7 +388,7 @@ define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(<8 x i32> inreg %rsrc, ; MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0 @@ -396,8 +396,8 @@ define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(<8 x i32> inreg %rsrc, ; MSAA-NEXT: ret [2 x float] [[I3]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i2 = insertvalue [2 x float] undef, float %i, 0 %i3 = insertvalue [2 x float] %i2, float %i1, 1 ret [2 x float] %i3 @@ -409,8 +409,8 @@ define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(<8 x i32> inreg % ; NO-MSAA-LABEL: define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [2 x <2 x float>] undef, <2 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [2 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; NO-MSAA-NEXT: ret [2 x <2 x float>] [[I5]] @@ -418,15 +418,15 @@ define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(<8 x i32> inreg % ; MSAA-LABEL: define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I4:%.*]] = insertvalue [2 x <2 x float>] undef, <2 x float> [[I]], 0 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [2 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; MSAA-NEXT: ret [2 x <2 x float>] [[I5]] ; main_body: - %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [2 x <2 x float>] undef, <2 x float> %i, 0 %i5 = insertvalue [2 x <2 x float>] %i4, <2 x float> %i1, 1 ret [2 x <2 x float>] %i5 @@ -439,10 +439,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(<8 x i32> inreg % ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 @@ -452,8 +452,8 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(<8 x i32> inreg % ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -477,10 +477,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(<8 x i32> inreg % ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] ; main_body: - %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 @@ -495,10 +495,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(<8 x i32> inreg % ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 @@ -508,8 +508,8 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(<8 x i32> inreg % ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -533,10 +533,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(<8 x i32> inreg % ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] ; main_body: - %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 @@ -551,10 +551,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(<8 x i32> inreg % ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 @@ -564,8 +564,8 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(<8 x i32> inreg % ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -589,10 +589,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(<8 x i32> inreg % ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] ; main_body: - %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 @@ -607,10 +607,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(<8 x i32> inreg % ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 @@ -620,8 +620,8 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(<8 x i32> inreg % ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -645,10 +645,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(<8 x i32> inreg % ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] ; main_body: - %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 @@ -663,10 +663,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(<8 x i32> inreg ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 @@ -676,8 +676,8 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(<8 x i32> inreg ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -701,10 +701,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(<8 x i32> inreg ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] ; main_body: - %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 @@ -719,10 +719,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(<8 x i32> inreg ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 @@ -732,8 +732,8 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(<8 x i32> inreg ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -757,10 +757,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(<8 x i32> inreg ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] ; main_body: - %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 @@ -772,8 +772,8 @@ define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(<8 x i32> inreg %rsrc, i ; NO-MSAA-LABEL: define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x half] undef, half [[I]], 0 ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x half] [[I2]], half [[I1]], 1 ; NO-MSAA-NEXT: ret [2 x half] [[I3]] @@ -781,7 +781,7 @@ define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(<8 x i32> inreg %rsrc, i ; MSAA-LABEL: define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x half> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x half> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x half] undef, half [[I]], 0 @@ -789,8 +789,8 @@ define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(<8 x i32> inreg %rsrc, i ; MSAA-NEXT: ret [2 x half] [[I3]] ; main_body: - %i = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) %i2 = insertvalue [2 x half] undef, half %i, 0 %i3 = insertvalue [2 x half] %i2, half %i1, 1 ret [2 x half] %i3 @@ -800,10 +800,10 @@ define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(<8 x i32> inreg %rsr ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -813,7 +813,7 @@ define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(<8 x i32> inreg %rsr ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 @@ -825,10 +825,10 @@ define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(<8 x i32> inreg %rsr ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -843,10 +843,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(<8 x i32> in ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 @@ -856,8 +856,8 @@ define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(<8 x i32> in ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -881,10 +881,10 @@ define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(<8 x i32> in ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] ; main_body: - %i = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 @@ -901,10 +901,10 @@ define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(<8 x i32> inreg % ; NO-MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2 @@ -914,9 +914,9 @@ define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(<8 x i32> inreg % ; MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = insertelement <3 x float> undef, float [[TMP3]], i64 0 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -948,10 +948,10 @@ define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(<8 x i32> inreg % ; MSAA-NEXT: ret [4 x <3 x float>] [[I7]] ; main_body: - %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0 %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1 %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2 @@ -968,10 +968,10 @@ define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(<8 x i32> ; NO-MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 5, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 6, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 7, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 5, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 6, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 7, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2 @@ -981,9 +981,9 @@ define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(<8 x i32> ; MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[TMP4:%.*]] = insertelement <3 x float> undef, float [[TMP3]], i64 0 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 @@ -1015,10 +1015,10 @@ define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(<8 x i32> ; MSAA-NEXT: ret [4 x <3 x float>] [[I7]] ; main_body: - %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 4, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 5, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 6, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 7, <8 x i32> %rsrc, i32 0, i32 0) + %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 4, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 5, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 6, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 7, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0 %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1 %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2 @@ -1030,11 +1030,11 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(<8 x i32> inreg %rsrc, ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], float [[VDATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -1044,11 +1044,11 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(<8 x i32> inreg %rsrc, ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], float [[VDATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 -; MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0) -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 @@ -1058,11 +1058,11 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(<8 x i32> inreg %rsrc, ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) call void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float %vdata, i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -1074,10 +1074,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i3 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -1085,20 +1085,20 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i3 ; NO-MSAA-NEXT: [[I8:%.*]] = trunc i32 [[COND]] to i1 ; NO-MSAA-NEXT: br i1 [[I8]], label [[IF_EQUAL:%.*]], label [[IF_UNEQUAL:%.*]] ; NO-MSAA: if_equal: -; NO-MSAA-NEXT: [[I9:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I10:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I11:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I12:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I9:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I10:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I11:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I12:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I13:%.*]] = insertvalue [4 x float] undef, float [[I9]], 0 ; NO-MSAA-NEXT: [[I14:%.*]] = insertvalue [4 x float] [[I13]], float [[I10]], 1 ; NO-MSAA-NEXT: [[I15:%.*]] = insertvalue [4 x float] [[I14]], float [[I11]], 2 ; NO-MSAA-NEXT: [[I16:%.*]] = insertvalue [4 x float] [[I15]], float [[I12]], 3 ; NO-MSAA-NEXT: br label [[MERGE:%.*]] ; NO-MSAA: if_unequal: -; NO-MSAA-NEXT: [[I17:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I18:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I19:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I20:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I17:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I18:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I19:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I20:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I21:%.*]] = insertvalue [4 x float] undef, float [[I17]], 0 ; NO-MSAA-NEXT: [[I22:%.*]] = insertvalue [4 x float] [[I21]], float [[I18]], 1 ; NO-MSAA-NEXT: [[I23:%.*]] = insertvalue [4 x float] [[I22]], float [[I19]], 2 @@ -1111,7 +1111,7 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i3 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks( ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 @@ -1123,7 +1123,7 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i3 ; MSAA-NEXT: [[I8:%.*]] = trunc i32 [[COND]] to i1 ; MSAA-NEXT: br i1 [[I8]], label [[IF_EQUAL:%.*]], label [[IF_UNEQUAL:%.*]] ; MSAA: if_equal: -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I9:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 ; MSAA-NEXT: [[I10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 ; MSAA-NEXT: [[I11:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 @@ -1134,7 +1134,7 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i3 ; MSAA-NEXT: [[I16:%.*]] = insertvalue [4 x float] [[I15]], float [[I12]], 3 ; MSAA-NEXT: br label [[MERGE:%.*]] ; MSAA: if_unequal: -; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) +; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) ; MSAA-NEXT: [[I17:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 ; MSAA-NEXT: [[I18:%.*]] = extractelement <4 x float> [[TMP2]], i64 1 ; MSAA-NEXT: [[I19:%.*]] = extractelement <4 x float> [[TMP2]], i64 2 @@ -1149,10 +1149,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i3 ; MSAA-NEXT: ret [4 x float] [[I25]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -1160,20 +1160,20 @@ main_body: %i8 = trunc i32 %cond to i1 br i1 %i8, label %if_equal, label %if_unequal if_equal: - %i9 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i10 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i11 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i12 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i9 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i10 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i11 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i12 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i13 = insertvalue [4 x float] undef, float %i9, 0 %i14 = insertvalue [4 x float] %i13, float %i10, 1 %i15 = insertvalue [4 x float] %i14, float %i11, 2 %i16 = insertvalue [4 x float] %i15, float %i12, 3 br label %merge if_unequal: - %i17 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - %i18 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) - %i19 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) - %i20 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) + %i17 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %i18 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) + %i19 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) + %i20 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) %i21 = insertvalue [4 x float] undef, float %i17, 0 %i22 = insertvalue [4 x float] %i21, float %i18, 1 %i23 = insertvalue [4 x float] %i22, float %i19, 2 @@ -1188,10 +1188,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(<8 x i32> ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc( ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC1:%.*]], <8 x i32> inreg [[RSRC2:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; NO-MSAA-NEXT: main_body: -; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC1]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0) -; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC2]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC1]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0) +; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC2]], i32 0, i32 0) ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 @@ -1201,10 +1201,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(<8 x i32> ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc( ; MSAA-SAME: <8 x i32> inreg [[RSRC1:%.*]], <8 x i32> inreg [[RSRC2:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { ; MSAA-NEXT: main_body: -; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0) +; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0) ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 -; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0) +; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0) ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 @@ -1214,10 +1214,10 @@ define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(<8 x i32> ; MSAA-NEXT: ret [4 x float] [[I7]] ; main_body: - %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc1, i32 0, i32 0) - %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc1, i32 0, i32 0) - %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc2, i32 0, i32 0) - %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc2, i32 0, i32 0) + %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc1, i32 0, i32 0) + %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc1, i32 0, i32 0) + %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc2, i32 0, i32 0) + %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc2, i32 0, i32 0) %i4 = insertvalue [4 x float] undef, float %i, 0 %i5 = insertvalue [4 x float] %i4, float %i1, 1 %i6 = insertvalue [4 x float] %i5, float %i2, 2 @@ -1225,16 +1225,16 @@ main_body: ret [4 x float] %i7 } -declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) +declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) attributes #0 = { nounwind readonly willreturn } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll index 7e9d12241ee36..ea6422e5ed8c2 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll @@ -4085,10 +4085,10 @@ declare <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8), i define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } @@ -4096,12 +4096,12 @@ define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 ; Check that the intrinsic remains unchanged in the presence of TFE or LWE define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe( -; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 1, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 1, i32 0) ; CHECK-NEXT: [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0) + %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0) %data.vec = extractvalue {<4 x float>,i32} %data, 0 %elt0 = extractelement <4 x float> %data.vec, i32 0 ret float %elt0 @@ -4110,12 +4110,12 @@ define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, ; Check that the intrinsic remains unchanged in the presence of TFE or LWE define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe( -; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 2, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 2, i32 0) ; CHECK-NEXT: [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0) + %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0) %data.vec = extractvalue {<4 x float>,i32} %data, 0 %elt0 = extractelement <4 x float> %data.vec, i32 0 ret float %elt0 @@ -4123,194 +4123,194 @@ define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1darray.f32.f32(i32 1, float [[S:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1darray.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 2, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 4, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 8, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 8, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[SHUF:%.*]] = insertelement <2 x float> poison, float [[DATA]], i64 0 ; CHECK-NEXT: ret <2 x float> [[SHUF]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[SHUF:%.*]] = insertelement <3 x float> poison, float [[DATA]], i64 0 ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <3 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <3 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } -declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cl @@ -4318,15 +4318,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, floa define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 1 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.d @@ -4334,15 +4334,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, f define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 4, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 2 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.d.cl @@ -4350,15 +4350,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, f define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 3 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.l @@ -4366,15 +4366,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 4, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <2 x float> %data, i32 1 ret float %elt0 } -declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.b @@ -4382,15 +4382,15 @@ declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 1 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.b.cl @@ -4398,15 +4398,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, flo define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32(i32 12, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32.v8i32.v4i32(i32 12, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.lz @@ -4414,15 +4414,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32(i32 10, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32.v8i32.v4i32(i32 10, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } -declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cd @@ -4430,15 +4430,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32(i32 14, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32.v8i32.v4i32(i32 14, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <3 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } -declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cd.cl @@ -4446,78 +4446,78 @@ declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, fl define amdgpu_ps half @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret half [[DATA]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x half> %data, i32 3 ret half %elt0 } define amdgpu_ps half @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 4, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret half [[DATA]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x half> %data, i32 2 ret half %elt0 } define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 2, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 2, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret half [[DATA]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x half> %data, i32 1 ret half %elt0 } define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32.v8i32.v4i32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[RES]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32.v8i32.v4i32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[RES]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x half> poison, half [[DATA]], i64 0 ; CHECK-NEXT: ret <4 x half> [[RES]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret half [[DATA]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x half> %data, i32 0 ret half %elt0 } -declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c @@ -4525,15 +4525,15 @@ declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.1d.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cl @@ -4541,15 +4541,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.d @@ -4557,15 +4557,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, floa define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.d.cl @@ -4573,15 +4573,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, f define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.l @@ -4589,15 +4589,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.b @@ -4605,15 +4605,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.b.cl @@ -4621,15 +4621,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, f define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.lz @@ -4637,15 +4637,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cd @@ -4653,15 +4653,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, floa define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cd.cl @@ -4669,15 +4669,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.o @@ -4685,15 +4685,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, floa define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cl.o @@ -4701,15 +4701,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.d.o @@ -4717,15 +4717,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.d.cl.o @@ -4733,15 +4733,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, flo define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.l.o @@ -4749,15 +4749,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.b.o @@ -4765,15 +4765,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.b.cl.o @@ -4781,15 +4781,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, flo define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.lz.o @@ -4797,15 +4797,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cd.o @@ -4813,15 +4813,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cd.cl.o @@ -4829,15 +4829,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, fl define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.o @@ -4845,15 +4845,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cl.o @@ -4861,15 +4861,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.d.o @@ -4877,15 +4877,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, floa define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.d.cl.o @@ -4893,15 +4893,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, f define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.l.o @@ -4909,15 +4909,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32 define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.b.o @@ -4925,15 +4925,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.b.cl.o @@ -4941,15 +4941,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, f define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.lz.o @@ -4957,15 +4957,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32 define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cd.o @@ -4973,15 +4973,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, floa define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cd.cl.o @@ -4989,15 +4989,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4 @@ -5007,16 +5007,16 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i3 define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.cl @@ -5024,16 +5024,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, < define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.l @@ -5041,16 +5041,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.b @@ -5058,16 +5058,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.b.cl @@ -5075,16 +5075,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, floa define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.lz @@ -5092,16 +5092,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, floa define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.o @@ -5109,16 +5109,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.cl.o @@ -5126,16 +5126,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, f define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.l.o @@ -5143,16 +5143,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.b.o @@ -5160,16 +5160,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.b.cl.o @@ -5177,16 +5177,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, fl define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.lz.o @@ -5194,16 +5194,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.o @@ -5211,16 +5211,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.cl.o @@ -5228,16 +5228,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.l.o @@ -5245,16 +5245,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, flo define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.b.o @@ -5262,16 +5262,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, floa define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.b.cl.o @@ -5279,16 +5279,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.lz.o @@ -5296,16 +5296,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i3 define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.getlod @@ -5313,15 +5313,15 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, flo define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getlod.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getlod.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.load @@ -5329,15 +5329,15 @@ declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32> define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 { ; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SAMPLE:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SAMPLE:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.load.mip @@ -5345,15 +5345,15 @@ declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 { ; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 1, i32 [[S:%.*]], i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.mip.1d.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.getresinfo @@ -5361,15 +5361,15 @@ declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 { ; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 1, i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32.v8i32(i32 1, i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1 ; -------------------------------------------------------------------- ; TFE / LWE @@ -5377,30 +5377,30 @@ declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i3 define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32( -; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 1) +; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 1) ; CHECK-NEXT: [[RGBA:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[RGBA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) + %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) %rgba = extractvalue { <4 x float>, i32 } %data, 0 %elt0 = extractelement <4 x float> %rgba, i32 0 ret float %elt0 } -declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32, i32, <8 x i32>, i32, i32) #1 +declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1 define amdgpu_hs float @tfe_check_assert() #0 { ; CHECK-LABEL: @tfe_check_assert( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2 + %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2 %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 +declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll index af12367b9759c..66184405f3092 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -4084,10 +4084,10 @@ declare <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8), i define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } @@ -4095,12 +4095,12 @@ define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 ; Check that the intrinsic remains unchanged in the presence of TFE or LWE define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe( -; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 1, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 1, i32 0) ; CHECK-NEXT: [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0) + %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0) %data.vec = extractvalue {<4 x float>,i32} %data, 0 %elt0 = extractelement <4 x float> %data.vec, i32 0 ret float %elt0 @@ -4109,12 +4109,12 @@ define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, ; Check that the intrinsic remains unchanged in the presence of TFE or LWE define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe( -; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 2, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 2, i32 0) ; CHECK-NEXT: [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0) + %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0) %data.vec = extractvalue {<4 x float>,i32} %data, 0 %elt0 = extractelement <4 x float> %data.vec, i32 0 ret float %elt0 @@ -4122,194 +4122,194 @@ define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1darray.f32.f32(i32 1, float [[S:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1darray.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 2, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 4, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 8, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 8, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[SHUF:%.*]] = insertelement <2 x float> poison, float [[DATA]], i64 0 ; CHECK-NEXT: ret <2 x float> [[SHUF]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[SHUF:%.*]] = insertelement <3 x float> poison, float [[DATA]], i64 0 ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <3 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <3 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } -declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cl @@ -4317,15 +4317,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, floa define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 1 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.d @@ -4333,15 +4333,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, f define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 4, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 2 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.d.cl @@ -4349,15 +4349,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, f define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 3 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.l @@ -4365,15 +4365,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 4, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <2 x float> %data, i32 1 ret float %elt0 } -declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.b @@ -4381,15 +4381,15 @@ declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 1 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.b.cl @@ -4397,15 +4397,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, flo define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32(i32 12, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32.v8i32.v4i32(i32 12, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.lz @@ -4413,15 +4413,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32(i32 10, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32.v8i32.v4i32(i32 10, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } -declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cd @@ -4429,15 +4429,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32(i32 14, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32.v8i32.v4i32(i32 14, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret <3 x float> [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } -declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cd.cl @@ -4445,78 +4445,78 @@ declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, fl define amdgpu_ps half @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret half [[DATA]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x half> %data, i32 3 ret half %elt0 } define amdgpu_ps half @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 4, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret half [[DATA]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x half> %data, i32 2 ret half %elt0 } define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 2, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 2, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret half [[DATA]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x half> %data, i32 1 ret half %elt0 } define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32.v8i32.v4i32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[RES]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32.v8i32.v4i32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[RES]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x half> poison, half [[DATA]], i64 0 ; CHECK-NEXT: ret <4 x half> [[RES]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret half [[DATA]] ; - %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x half> %data, i32 0 ret half %elt0 } -declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c @@ -4524,15 +4524,15 @@ declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.1d.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cl @@ -4540,15 +4540,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.d @@ -4556,15 +4556,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, floa define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.d.cl @@ -4572,15 +4572,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, f define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.l @@ -4588,15 +4588,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.b @@ -4604,15 +4604,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.b.cl @@ -4620,15 +4620,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, f define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.lz @@ -4636,15 +4636,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cd @@ -4652,15 +4652,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, floa define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cd.cl @@ -4668,15 +4668,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.o @@ -4684,15 +4684,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, floa define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cl.o @@ -4700,15 +4700,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.d.o @@ -4716,15 +4716,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.d.cl.o @@ -4732,15 +4732,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, flo define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.l.o @@ -4748,15 +4748,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.b.o @@ -4764,15 +4764,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.b.cl.o @@ -4780,15 +4780,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, flo define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.lz.o @@ -4796,15 +4796,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cd.o @@ -4812,15 +4812,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.cd.cl.o @@ -4828,15 +4828,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, fl define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.o @@ -4844,15 +4844,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cl.o @@ -4860,15 +4860,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.d.o @@ -4876,15 +4876,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, floa define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.d.cl.o @@ -4892,15 +4892,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, f define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.l.o @@ -4908,15 +4908,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32 define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.b.o @@ -4924,15 +4924,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.b.cl.o @@ -4940,15 +4940,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, f define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.lz.o @@ -4956,15 +4956,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32 define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cd.o @@ -4972,15 +4972,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, floa define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample.c.cd.cl.o @@ -4988,15 +4988,15 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4 @@ -5006,16 +5006,16 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i3 define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.cl @@ -5023,16 +5023,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, < define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.l @@ -5040,16 +5040,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.b @@ -5057,16 +5057,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.b.cl @@ -5074,16 +5074,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, floa define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.lz @@ -5091,16 +5091,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, floa define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.o @@ -5108,16 +5108,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.cl.o @@ -5125,16 +5125,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, f define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.l.o @@ -5142,16 +5142,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.b.o @@ -5159,16 +5159,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.b.cl.o @@ -5176,16 +5176,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, fl define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.lz.o @@ -5193,16 +5193,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.o @@ -5210,16 +5210,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.cl.o @@ -5227,16 +5227,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.l.o @@ -5244,16 +5244,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, flo define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.b.o @@ -5261,16 +5261,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, floa define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.b.cl.o @@ -5278,16 +5278,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.gather4.c.lz.o @@ -5295,16 +5295,16 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i3 define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.getlod @@ -5312,15 +5312,15 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, flo define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getlod.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getlod.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.load @@ -5328,15 +5328,15 @@ declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32> define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 { ; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SAMPLE:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SAMPLE:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.load.mip @@ -5344,15 +5344,15 @@ declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 { ; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 1, i32 [[S:%.*]], i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.mip.1d.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.getresinfo @@ -5360,15 +5360,15 @@ declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 { ; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 1, i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32.v8i32(i32 1, i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) + %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1 ; -------------------------------------------------------------------- ; TFE / LWE @@ -5376,30 +5376,30 @@ declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i3 define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32( -; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 1) +; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 1) ; CHECK-NEXT: [[RGBA:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[RGBA]], i64 0 ; CHECK-NEXT: ret float [[ELT0]] ; - %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) + %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) %rgba = extractvalue { <4 x float>, i32 } %data, 0 %elt0 = extractelement <4 x float> %rgba, i32 0 ret float %elt0 } -declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32, i32, <8 x i32>, i32, i32) #1 +declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1 define amdgpu_hs float @tfe_check_assert() #0 { ; CHECK-LABEL: @tfe_check_assert( -; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) +; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) ; CHECK-NEXT: ret float [[DATA]] ; - %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2 + %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2 %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } -declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 +declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll index 0cb9b4b84a556..4df170dc8f85c 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll @@ -5,22 +5,22 @@ ; llvm.amdgcn.image.sample a16 is disabled on pre-gfx9 ; -------------------------------------------------------------------- -declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 define amdgpu_kernel void @image_sample_a16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -29,13 +29,13 @@ define amdgpu_kernel void @image_sample_a16_2d(ptr addrspace(1) %out, <8 x i32> ; CHECK-LABEL: @image_sample_a16_2d( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -45,14 +45,14 @@ define amdgpu_kernel void @image_sample_a16_3d(ptr addrspace(1) %out, <8 x i32> ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float ; CHECK-NEXT: [[R32:%.*]] = fpext half [[R:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[R32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float [[S32]], float [[T32]], float [[R32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %r32 = fpext half %r to float - %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -63,14 +63,14 @@ define amdgpu_kernel void @image_sample_a16_cube(ptr addrspace(1) %out, <8 x i32 ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float ; CHECK-NEXT: [[FACE32:%.*]] = fpext half [[FACE:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[FACE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float [[S32]], float [[T32]], float [[FACE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %face32 = fpext half %face to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -79,13 +79,13 @@ define amdgpu_kernel void @image_sample_a16_1darray(ptr addrspace(1) %out, <8 x ; CHECK-LABEL: @image_sample_a16_1darray( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float [[S32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float [[S32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %slice32 = fpext half %slice to float - %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -95,14 +95,14 @@ define amdgpu_kernel void @image_sample_a16_2darray(ptr addrspace(1) %out, <8 x ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float ; CHECK-NEXT: [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float [[S32]], float [[T32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %slice32 = fpext half %slice to float - %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index fabf8ab51764b..e4f8bb133e6ce 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -3159,96 +3159,96 @@ define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src0) ; llvm.amdgcn.image.sample a16 ; -------------------------------------------------------------------- -declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 define amdgpu_kernel void @image_sample_a16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { ; CHECK-LABEL: @image_sample_a16_3d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %r32 = fpext half %r to float - %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3256,133 +3256,133 @@ define amdgpu_kernel void @image_sample_a16_3d(ptr addrspace(1) %out, <8 x i32> define amdgpu_kernel void @image_sample_a16_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { ; ; CHECK-LABEL: @image_sample_a16_cube( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %face32 = fpext half %face to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { ; CHECK-LABEL: @image_sample_a16_1darray( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %slice32 = fpext half %slice to float - %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { ; CHECK-LABEL: @image_sample_a16_2darray( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %slice32 = fpext half %slice to float - %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { ; CHECK-LABEL: @image_sample_a16_c_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_b16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) { ; CHECK-LABEL: @image_sample_a16_b16_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %bias32 = fpext half %bias to float %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3390,26 +3390,26 @@ define amdgpu_kernel void @image_sample_a16_b16_1d(ptr addrspace(1) %out, <8 x i define amdgpu_kernel void @image_sample_a16_b32_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { ; CHECK-LABEL: @image_sample_a16_b32_1d( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_b16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_b16_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %bias32 = fpext half %bias to float %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3418,26 +3418,26 @@ define amdgpu_kernel void @image_sample_a16_b32_2d(ptr addrspace(1) %out, <8 x i ; CHECK-LABEL: @image_sample_a16_b32_2d( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_b16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) { ; CHECK-LABEL: @image_sample_a16_c_b16_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %bias32 = fpext half %bias to float %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3445,26 +3445,26 @@ define amdgpu_kernel void @image_sample_a16_c_b16_1d(ptr addrspace(1) %out, <8 x define amdgpu_kernel void @image_sample_a16_c_b32_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { ; CHECK-LABEL: @image_sample_a16_c_b32_1d( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_b16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_b16_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %bias32 = fpext half %bias to float %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3473,27 +3473,27 @@ define amdgpu_kernel void @image_sample_a16_c_b32_2d(ptr addrspace(1) %out, <8 x ; CHECK-LABEL: @image_sample_a16_c_b32_2d( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_b16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_b16_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %bias32 = fpext half %bias to float %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3502,20 +3502,20 @@ define amdgpu_kernel void @image_sample_a16_b32_cl_1d(ptr addrspace(1) %out, <8 ; CHECK-LABEL: @image_sample_a16_b32_cl_1d( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_b16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_b16_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3523,7 +3523,7 @@ define amdgpu_kernel void @image_sample_a16_b16_cl_2d(ptr addrspace(1) %out, <8 %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3533,28 +3533,28 @@ define amdgpu_kernel void @image_sample_a16_b32_cl_2d(ptr addrspace(1) %out, <8 ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float ; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_b16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_b16_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %bias32 = fpext half %bias to float %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3563,20 +3563,20 @@ define amdgpu_kernel void @image_sample_a16_c_b32_cl_1d(ptr addrspace(1) %out, < ; CHECK-LABEL: @image_sample_a16_c_b32_cl_1d( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_b16_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3584,7 +3584,7 @@ define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(ptr addrspace(1) %out, < %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -3594,35 +3594,35 @@ define amdgpu_kernel void @image_sample_a16_c_b32_cl_2d(ptr addrspace(1) %out, < ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float ; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { ; CHECK-LABEL: @image_sample_a16_d_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_d_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3632,14 +3632,14 @@ define amdgpu_kernel void @image_sample_a16_d_2d(ptr addrspace(1) %out, <8 x i32 %dtdv32 = fpext half %dtdv to float %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_d_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { ; CHECK-LABEL: @image_sample_a16_d_3d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3652,28 +3652,28 @@ define amdgpu_kernel void @image_sample_a16_d_3d(ptr addrspace(1) %out, <8 x i32 %s32 = fpext half %s to float %t32 = fpext half %t to float %r32 = fpext half %r to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { ; CHECK-LABEL: @image_sample_a16_c_d_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_d_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3683,14 +3683,14 @@ define amdgpu_kernel void @image_sample_a16_c_d_2d(ptr addrspace(1) %out, <8 x i %dtdv32 = fpext half %dtdv to float %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_d_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3698,14 +3698,14 @@ define amdgpu_kernel void @image_sample_a16_d_cl_1d(ptr addrspace(1) %out, <8 x %dsdv32 = fpext half %dsdv to float %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_d_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3716,14 +3716,14 @@ define amdgpu_kernel void @image_sample_a16_d_cl_2d(ptr addrspace(1) %out, <8 x %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_d_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3731,14 +3731,14 @@ define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(ptr addrspace(1) %out, <8 %dsdv32 = fpext half %dsdv to float %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_d_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3749,28 +3749,28 @@ define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(ptr addrspace(1) %out, <8 %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { ; CHECK-LABEL: @image_sample_a16_cd_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_cd_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3780,28 +3780,28 @@ define amdgpu_kernel void @image_sample_a16_cd_2d(ptr addrspace(1) %out, <8 x i3 %dtdv32 = fpext half %dtdv to float %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { ; CHECK-LABEL: @image_sample_a16_c_cd_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_cd_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3811,14 +3811,14 @@ define amdgpu_kernel void @image_sample_a16_c_cd_2d(ptr addrspace(1) %out, <8 x %dtdv32 = fpext half %dtdv to float %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_cd_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3826,14 +3826,14 @@ define amdgpu_kernel void @image_sample_a16_cd_cl_1d(ptr addrspace(1) %out, <8 x %dsdv32 = fpext half %dsdv to float %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_cd_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3844,14 +3844,14 @@ define amdgpu_kernel void @image_sample_a16_cd_cl_2d(ptr addrspace(1) %out, <8 x %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3859,14 +3859,14 @@ define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(ptr addrspace(1) %out, <8 %dsdv32 = fpext half %dsdv to float %s32 = fpext half %s to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3877,118 +3877,118 @@ define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(ptr addrspace(1) %out, <8 %s32 = fpext half %s to float %t32 = fpext half %t to float %clamp32 = fpext half %clamp to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { ; CHECK-LABEL: @image_sample_a16_l_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %lod32 = fpext half %lod to float - %res = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { ; CHECK-LABEL: @image_sample_a16_l_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %lod32 = fpext half %lod to float - %res = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { ; CHECK-LABEL: @image_sample_a16_c_l_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %lod32 = fpext half %lod to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { ; CHECK-LABEL: @image_sample_a16_c_l_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %lod32 = fpext half %lod to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_lz_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_lz_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_lz_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_lz_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_lz_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { ; CHECK-LABEL: @image_sample_a16_c_lz_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_lz_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_lz_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1( -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16.v8i32.v4i32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store float [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -3999,14 +3999,14 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(ptr addrspace(1) %o %s32 = fpext half %s to float %t32 = fpext half %t to float %slice32 = fpext half %slice to float - %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32.v8i32.v4i32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store float %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2( -; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16.v8i32.v4i32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -4017,14 +4017,14 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(ptr addrspace(1) %o %s32 = fpext half %s to float %t32 = fpext half %t to float %slice32 = fpext half %slice to float - %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <2 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) { ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const( -; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half 0xH3400, half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16.v8i32.v4i32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half 0xH3400, half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -4034,7 +4034,7 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(ptr addrspace(1) %dtdv32 = fpext half %dtdv to float %s32 = fpext half %s to float %slice32 = fpext half %slice to float - %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 0.25, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 0.25, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <2 x float> %res, ptr addrspace(1) %out ret void } @@ -4043,7 +4043,7 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(ptr addrsp ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const_noopt( ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float ; CHECK-NEXT: [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float -; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S32]], float 1.000000e+10, float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32.v8i32.v4i32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S32]], float 1.000000e+10, float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -4053,19 +4053,19 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(ptr addrsp %dtdv32 = fpext half %dtdv to float %s32 = fpext half %s to float %slice32 = fpext half %slice to float - %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 1.0e+10, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 1.0e+10, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <2 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_load_a16_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { ; CHECK-LABEL: @image_load_a16_mip_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = zext i16 %s to i32 - %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -4073,37 +4073,37 @@ define amdgpu_kernel void @image_load_a16_mip_1d(ptr addrspace(1) %out, <8 x i32 define amdgpu_kernel void @image_load_a16_mip_1d_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { ; CHECK-LABEL: @image_load_a16_mip_1d_noopt( ; CHECK-NEXT: [[S32:%.*]] = sext i16 [[S:%.*]] to i32 -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S32]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 [[S32]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = sext i16 %s to i32 - %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_load_a16_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s, i16 %t) { ; CHECK-LABEL: @image_load_a16_mip_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16.v8i32(i32 15, i16 [[S:%.*]], i16 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = zext i16 %s to i32 %t32 = zext i16 %t to i32 - %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 %t32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 %t32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_load_a16_mip_2d_const(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { ; CHECK-LABEL: @image_load_a16_mip_2d_const( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 -1, <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16.v8i32(i32 15, i16 [[S:%.*]], i16 -1, <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = zext i16 %s to i32 - %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65535, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 65535, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -4111,12 +4111,12 @@ define amdgpu_kernel void @image_load_a16_mip_2d_const(ptr addrspace(1) %out, <8 define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { ; CHECK-LABEL: @image_load_a16_mip_2d_const_noopt( ; CHECK-NEXT: [[S32:%.*]] = zext i16 [[S:%.*]] to i32 -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S32]], i32 65536, <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 [[S32]], i32 65536, <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = zext i16 %s to i32 - %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65536, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 %s32, i32 65536, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -4127,20 +4127,20 @@ define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(ptr addrspace(1) %o define amdgpu_kernel void @image_sample_g16_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { ; CHECK-LABEL: @image_sample_g16_d_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { ; CHECK-LABEL: @image_sample_g16_d_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4148,14 +4148,14 @@ define amdgpu_kernel void @image_sample_g16_d_2d(ptr addrspace(1) %out, <8 x i32 %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_d_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { ; CHECK-LABEL: @image_sample_g16_d_3d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4165,27 +4165,27 @@ define amdgpu_kernel void @image_sample_g16_d_3d(ptr addrspace(1) %out, <8 x i32 %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float %drdv32 = fpext half %drdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { ; CHECK-LABEL: @image_sample_g16_c_d_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { ; CHECK-LABEL: @image_sample_g16_c_d_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4193,27 +4193,27 @@ define amdgpu_kernel void @image_sample_g16_c_d_2d(ptr addrspace(1) %out, <8 x i %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @image_sample_g16_d_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @image_sample_g16_d_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4221,27 +4221,27 @@ define amdgpu_kernel void @image_sample_g16_d_cl_2d(ptr addrspace(1) %out, <8 x %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @image_sample_g16_c_d_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @image_sample_g16_c_d_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4249,27 +4249,27 @@ define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(ptr addrspace(1) %out, <8 %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { ; CHECK-LABEL: @image_sample_g16_cd_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { ; CHECK-LABEL: @image_sample_g16_cd_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4277,27 +4277,27 @@ define amdgpu_kernel void @image_sample_g16_cd_2d(ptr addrspace(1) %out, <8 x i3 %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { ; CHECK-LABEL: @image_sample_g16_c_cd_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { ; CHECK-LABEL: @image_sample_g16_c_cd_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4305,27 +4305,27 @@ define amdgpu_kernel void @image_sample_g16_c_cd_2d(ptr addrspace(1) %out, <8 x %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @image_sample_g16_cd_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @image_sample_g16_cd_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4333,27 +4333,27 @@ define amdgpu_kernel void @image_sample_g16_cd_cl_2d(ptr addrspace(1) %out, <8 x %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float %dsdv32 = fpext half %dsdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -4361,14 +4361,14 @@ define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(ptr addrspace(1) %out, <8 %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1( -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32.v8i32.v4i32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store float [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -4376,14 +4376,14 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(ptr addrspace(1) %o %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32.v8i32.v4i32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store float %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2( -; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32.v8i32.v4i32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 ; CHECK-NEXT: ret void ; @@ -4391,7 +4391,7 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(ptr addrspace(1) %o %dtdh32 = fpext half %dtdh to float %dsdv32 = fpext half %dsdv to float %dtdv32 = fpext half %dtdv to float - %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32.v8i32.v4i32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <2 x float> %res, ptr addrspace(1) %out ret void } @@ -4402,63 +4402,63 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(ptr addrspace(1) %o define amdgpu_kernel void @image_sample_a16_1d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d_nnan( -; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz( -; CHECK-NEXT: [[RES:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_1d_fast(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d_fast( -; CHECK-NEXT: [[RES:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float - %res = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_2d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_2d_nnan( -; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float - %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_3d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { ; CHECK-LABEL: @image_sample_a16_3d_nnan( -; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %r32 = fpext half %r to float - %res = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -4466,41 +4466,41 @@ define amdgpu_kernel void @image_sample_a16_3d_nnan(ptr addrspace(1) %out, <8 x define amdgpu_kernel void @image_sample_a16_cube_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { ; ; CHECK-LABEL: @image_sample_a16_cube_nnan( -; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %face32 = fpext half %face to float - %res = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_1darray_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { ; CHECK-LABEL: @image_sample_a16_1darray_nnan( -; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %slice32 = fpext half %slice to float - %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } define amdgpu_kernel void @image_sample_a16_2darray_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { ; CHECK-LABEL: @image_sample_a16_2darray_nnan( -; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float %t32 = fpext half %t to float %slice32 = fpext half %slice to float - %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %res, ptr addrspace(1) %out ret void } @@ -4509,26 +4509,26 @@ define amdgpu_kernel void @image_sample_a16_2darray_nnan(ptr addrspace(1) %out, ; llvm.amdgcn.image.sample l to lz ; -------------------------------------------------------------------- -declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32.v8i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 define amdgpu_kernel void @sample_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { ; CHECK-LABEL: @sample_l_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4536,12 +4536,12 @@ main_body: define amdgpu_kernel void @sample_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { ; CHECK-LABEL: @sample_l_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4549,12 +4549,12 @@ main_body: define amdgpu_kernel void @sample_c_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { ; CHECK-LABEL: @sample_c_l_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4562,12 +4562,12 @@ main_body: define amdgpu_kernel void @sample_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { ; CHECK-LABEL: @sample_c_l_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4575,12 +4575,12 @@ main_body: define amdgpu_kernel void @sample_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { ; CHECK-LABEL: @sample_l_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4588,12 +4588,12 @@ main_body: define amdgpu_kernel void @sample_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { ; CHECK-LABEL: @sample_l_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4601,12 +4601,12 @@ main_body: define amdgpu_kernel void @sample_c_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { ; CHECK-LABEL: @sample_c_l_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4614,12 +4614,12 @@ main_body: define amdgpu_kernel void @sample_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { ; CHECK-LABEL: @sample_c_l_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4627,12 +4627,12 @@ main_body: define amdgpu_kernel void @gather4_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { ; CHECK-LABEL: @gather4_l_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4640,12 +4640,12 @@ main_body: define amdgpu_kernel void @gather4_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { ; CHECK-LABEL: @gather4_c_l_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32.v8i32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4653,12 +4653,12 @@ main_body: define amdgpu_kernel void @gather4_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { ; CHECK-LABEL: @gather4_l_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4666,12 +4666,12 @@ main_body: define amdgpu_kernel void @gather4_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { ; CHECK-LABEL: @gather4_c_l_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4679,12 +4679,12 @@ main_body: define amdgpu_kernel void @gather4_c_l_o_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %slice, float %lod) { ; CHECK-LABEL: @gather4_c_l_o_2darray( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2darray.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2darray.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %slice, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32.v8i32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %slice, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4696,12 +4696,12 @@ main_body: define amdgpu_kernel void @load_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s) { ; CHECK-LABEL: @load_mip_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4709,12 +4709,12 @@ main_body: define amdgpu_kernel void @load_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; CHECK-LABEL: @load_mip_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4722,12 +4722,12 @@ main_body: define amdgpu_kernel void @load_mip_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { ; CHECK-LABEL: @load_mip_3d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4735,12 +4735,12 @@ main_body: define amdgpu_kernel void @load_mip_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; CHECK-LABEL: @load_mip_1darray( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4748,12 +4748,12 @@ main_body: define amdgpu_kernel void @load_mip_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { ; CHECK-LABEL: @load_mip_2darray( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4761,12 +4761,12 @@ main_body: define amdgpu_kernel void @load_mip_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { ; CHECK-LABEL: @load_mip_cube( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4775,7 +4775,7 @@ main_body: define amdgpu_kernel void @store_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { ; CHECK-LABEL: @store_mip_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret void ; main_body: @@ -4786,7 +4786,7 @@ main_body: define amdgpu_kernel void @store_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { ; CHECK-LABEL: @store_mip_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret void ; main_body: @@ -4797,7 +4797,7 @@ main_body: define amdgpu_kernel void @store_mip_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { ; CHECK-LABEL: @store_mip_3d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret void ; main_body: @@ -4808,7 +4808,7 @@ main_body: define amdgpu_kernel void @store_mip_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { ; CHECK-LABEL: @store_mip_1darray( ; CHECK-NEXT: main_body: -; CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret void ; main_body: @@ -4819,7 +4819,7 @@ main_body: define amdgpu_kernel void @store_mip_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { ; CHECK-LABEL: @store_mip_2darray( ; CHECK-NEXT: main_body: -; CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret void ; main_body: @@ -4830,7 +4830,7 @@ main_body: define amdgpu_kernel void @store_mip_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { ; CHECK-LABEL: @store_mip_cube( ; CHECK-NEXT: main_body: -; CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; CHECK-NEXT: ret void ; main_body: @@ -4838,49 +4838,49 @@ main_body: ret void } -declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 ; -------------------------------------------------------------------- ; llvm.amdgcn.image.sample bias zero ; -------------------------------------------------------------------- -declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32, i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16.v8i32.v4i32(i32, i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 define amdgpu_kernel void @sample_b_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; CHECK-LABEL: @sample_b_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4888,12 +4888,12 @@ main_body: define amdgpu_kernel void @sample_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; CHECK-LABEL: @sample_b_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.v8i32.v4i32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4901,12 +4901,12 @@ main_body: define amdgpu_kernel void @sample_c_b_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { ; CHECK-LABEL: @sample_c_b_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4914,12 +4914,12 @@ main_body: define amdgpu_kernel void @sample_c_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { ; CHECK-LABEL: @sample_c_b_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.v8i32.v4i32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4927,12 +4927,12 @@ main_body: define amdgpu_kernel void @sample_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) { ; CHECK-LABEL: @sample_b_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4940,12 +4940,12 @@ main_body: define amdgpu_kernel void @sample_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { ; CHECK-LABEL: @sample_b_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4953,12 +4953,12 @@ main_body: define amdgpu_kernel void @sample_c_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) { ; CHECK-LABEL: @sample_c_b_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4966,12 +4966,12 @@ main_body: define amdgpu_kernel void @sample_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { ; CHECK-LABEL: @sample_c_b_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4979,12 +4979,12 @@ main_body: define amdgpu_kernel void @gather4_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; CHECK-LABEL: @gather4_b_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32 15, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.v8i32(i32 15, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -4992,12 +4992,12 @@ main_body: define amdgpu_kernel void @gather4_c_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { ; CHECK-LABEL: @gather4_c_b_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.v8i32(i32 15, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5005,12 +5005,12 @@ main_body: define amdgpu_kernel void @gather4_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { ; CHECK-LABEL: @gather4_b_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.v8i32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5018,12 +5018,12 @@ main_body: define amdgpu_kernel void @gather4_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { ; CHECK-LABEL: @gather4_c_b_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.v8i32(i32 15, i32 %offset, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5031,12 +5031,12 @@ main_body: define amdgpu_kernel void @sample_c_b_o_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) { ; CHECK-LABEL: @sample_c_b_o_a16_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f16(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f16.v8i32.v4i32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16.v8i32.v4i32(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5045,12 +5045,12 @@ main_body: define amdgpu_kernel void @sample_b_1d_pos(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; CHECK-LABEL: @sample_b_1d_pos( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float 1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float 1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float 1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5059,12 +5059,12 @@ main_body: define amdgpu_kernel void @sample_b_1d_neg(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; CHECK-LABEL: @sample_b_1d_neg( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float -1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float -1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float -1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5073,13 +5073,13 @@ main_body: define amdgpu_kernel void @sample_b_1d_a16(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @sample_b_1d_a16( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16.v8i32.v4i32(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: %s32 = fpext half %s to float - %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -0.0, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.v8i32.v4i32(i32 15, float -0.0, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5091,12 +5091,12 @@ main_body: define amdgpu_kernel void @offset_sample_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; CHECK-LABEL: @offset_sample_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5104,12 +5104,12 @@ main_body: define amdgpu_kernel void @offset_sample_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; CHECK-LABEL: @offset_sample_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5117,12 +5117,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { ; CHECK-LABEL: @offset_sample_c_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5130,12 +5130,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { ; CHECK-LABEL: @offset_sample_c_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5143,12 +5143,12 @@ main_body: define amdgpu_kernel void @offset_sample_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { ; CHECK-LABEL: @offset_sample_cl_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5156,12 +5156,12 @@ main_body: define amdgpu_kernel void @offset_sample_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { ; CHECK-LABEL: @offset_sample_cl_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5169,12 +5169,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { ; CHECK-LABEL: @offset_sample_c_cl_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5182,12 +5182,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { ; CHECK-LABEL: @offset_sample_c_cl_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5195,12 +5195,12 @@ main_body: define amdgpu_kernel void @offset_sample_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { ; CHECK-LABEL: @offset_sample_b_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5208,12 +5208,12 @@ main_body: define amdgpu_kernel void @offset_sample_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { ; CHECK-LABEL: @offset_sample_b_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5221,12 +5221,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { ; CHECK-LABEL: @offset_sample_c_b_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5234,12 +5234,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { ; CHECK-LABEL: @offset_sample_c_b_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5247,12 +5247,12 @@ main_body: define amdgpu_kernel void @offset_sample_b_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { ; CHECK-LABEL: @offset_sample_b_cl_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5260,12 +5260,12 @@ main_body: define amdgpu_kernel void @offset_sample_b_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { ; CHECK-LABEL: @offset_sample_b_cl_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5273,12 +5273,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_b_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { ; CHECK-LABEL: @offset_sample_c_b_cl_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5286,12 +5286,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_b_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { ; CHECK-LABEL: @offset_sample_c_b_cl_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5299,12 +5299,12 @@ main_body: define amdgpu_kernel void @offset_sample_d_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { ; CHECK-LABEL: @offset_sample_d_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5312,12 +5312,12 @@ main_body: define amdgpu_kernel void @offset_sample_d_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { ; CHECK-LABEL: @offset_sample_d_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5325,12 +5325,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_d_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { ; CHECK-LABEL: @offset_sample_c_d_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5338,12 +5338,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_d_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { ; CHECK-LABEL: @offset_sample_c_d_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5351,12 +5351,12 @@ main_body: define amdgpu_kernel void @offset_sample_d_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @offset_sample_d_cl_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5364,12 +5364,12 @@ main_body: define amdgpu_kernel void @offset_sample_d_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @offset_sample_d_cl_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5377,12 +5377,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_d_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @offset_sample_c_d_cl_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5390,12 +5390,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_d_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @offset_sample_c_d_cl_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5403,12 +5403,12 @@ main_body: define amdgpu_kernel void @offset_sample_cd_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { ; CHECK-LABEL: @offset_sample_cd_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5416,12 +5416,12 @@ main_body: define amdgpu_kernel void @offset_sample_cd_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { ; CHECK-LABEL: @offset_sample_cd_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5429,12 +5429,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_cd_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { ; CHECK-LABEL: @offset_sample_c_cd_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5442,12 +5442,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_cd_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { ; CHECK-LABEL: @offset_sample_c_cd_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5455,12 +5455,12 @@ main_body: define amdgpu_kernel void @offset_sample_cd_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @offset_sample_cd_cl_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5468,12 +5468,12 @@ main_body: define amdgpu_kernel void @offset_sample_cd_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @offset_sample_cd_cl_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5481,12 +5481,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_cd_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @offset_sample_c_cd_cl_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5494,12 +5494,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_cd_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @offset_sample_c_cd_cl_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5507,12 +5507,12 @@ main_body: define amdgpu_kernel void @offset_sample_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { ; CHECK-LABEL: @offset_sample_l_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5520,12 +5520,12 @@ main_body: define amdgpu_kernel void @offset_sample_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { ; CHECK-LABEL: @offset_sample_l_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5533,12 +5533,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { ; CHECK-LABEL: @offset_sample_c_l_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5546,12 +5546,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { ; CHECK-LABEL: @offset_sample_c_l_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5559,12 +5559,12 @@ main_body: define amdgpu_kernel void @offset_sample_lz_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { ; CHECK-LABEL: @offset_sample_lz_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5572,12 +5572,12 @@ main_body: define amdgpu_kernel void @offset_sample_lz_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; CHECK-LABEL: @offset_sample_lz_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5585,12 +5585,12 @@ main_body: define amdgpu_kernel void @offset_sample_c_lz_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { ; CHECK-LABEL: @offset_sample_c_lz_o_1d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } @@ -5598,56 +5598,56 @@ main_body: define amdgpu_kernel void @offset_sample_c_lz_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { ; CHECK-LABEL: @offset_sample_c_lz_o_2d( ; CHECK-NEXT: main_body: -; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) store <4 x float> %v, ptr addrspace(1) %out ret void } -declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 ; -------------------------------------------------------------------- ; llvm.amdgcn.is.shared diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll index 65c89618e20ba..e7a36e805a012 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll @@ -7,41 +7,41 @@ define amdgpu_ps void @image_store_1d_store_all_zeros(<8 x i32> inreg %rsrc, i32 %s) #0 { ; GCN-LABEL: @image_store_1d_store_all_zeros( -; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32(float 0.000000e+00, i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32.v8i32(float 0.000000e+00, i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GCN-NEXT: ret void ; ; GFX12-LABEL: @image_store_1d_store_all_zeros( -; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32(float 0.000000e+00, i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32.v8i32(float 0.000000e+00, i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX12-NEXT: ret void ; ; GFXUNKNOWN-LABEL: @image_store_1d_store_all_zeros( -; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> zeroinitializer, i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> zeroinitializer, i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFXUNKNOWN-NEXT: ret void ; - call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> zeroinitializer, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> zeroinitializer, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void } define amdgpu_ps void @image_store_1d_store_insert_zeros_at_end(<8 x i32> inreg %rsrc, float %vdata1, i32 %s) #0 { ; GCN-LABEL: @image_store_1d_store_insert_zeros_at_end( -; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32(float [[VDATA1:%.*]], i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32.v8i32(float [[VDATA1:%.*]], i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GCN-NEXT: ret void ; ; GFX12-LABEL: @image_store_1d_store_insert_zeros_at_end( ; GFX12-NEXT: [[NEWVDATA4:%.*]] = insertelement <4 x float> , float [[VDATA1:%.*]], i64 0 -; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[NEWVDATA4]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[NEWVDATA4]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX12-NEXT: ret void ; ; GFXUNKNOWN-LABEL: @image_store_1d_store_insert_zeros_at_end( ; GFXUNKNOWN-NEXT: [[NEWVDATA4:%.*]] = insertelement <4 x float> , float [[VDATA1:%.*]], i64 0 -; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[NEWVDATA4]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[NEWVDATA4]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFXUNKNOWN-NEXT: ret void ; %newvdata1 = insertelement <4 x float> undef, float %vdata1, i32 0 %newvdata2 = insertelement <4 x float> %newvdata1, float 0.0, i32 1 %newvdata3 = insertelement <4 x float> %newvdata2, float 0.0, i32 2 %newvdata4 = insertelement <4 x float> %newvdata3, float 0.0, i32 3 - call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %newvdata4, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> %newvdata4, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -49,26 +49,26 @@ define amdgpu_ps void @image_store_mip_1d_store_insert_zeros_at_end(<8 x i32> in ; GCN-LABEL: @image_store_mip_1d_store_insert_zeros_at_end( ; GCN-NEXT: [[TMP1:%.*]] = insertelement <3 x float> , float [[VDATA1:%.*]], i64 1 ; GCN-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[VDATA2:%.*]], i64 2 -; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.v3f32.i32(<3 x float> [[TMP2]], i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.v3f32.i32.v8i32(<3 x float> [[TMP2]], i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GCN-NEXT: ret void ; ; GFX12-LABEL: @image_store_mip_1d_store_insert_zeros_at_end( ; GFX12-NEXT: [[TMP1:%.*]] = insertelement <3 x float> , float [[VDATA1:%.*]], i64 1 ; GFX12-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[VDATA2:%.*]], i64 2 -; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.v3f32.i32(<3 x float> [[TMP2]], i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.v3f32.i32.v8i32(<3 x float> [[TMP2]], i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX12-NEXT: ret void ; ; GFXUNKNOWN-LABEL: @image_store_mip_1d_store_insert_zeros_at_end( ; GFXUNKNOWN-NEXT: [[TMP1:%.*]] = insertelement <4 x float> , float [[VDATA1:%.*]], i64 1 ; GFXUNKNOWN-NEXT: [[NEWVDATA4:%.*]] = insertelement <4 x float> [[TMP1]], float [[VDATA2:%.*]], i64 2 -; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[NEWVDATA4]], i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[NEWVDATA4]], i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFXUNKNOWN-NEXT: ret void ; %newvdata1 = insertelement <4 x float> undef, float 0.0, i32 0 %newvdata2 = insertelement <4 x float> %newvdata1, float %vdata1, i32 1 %newvdata3 = insertelement <4 x float> %newvdata2, float %vdata2, i32 2 %newvdata4 = insertelement <4 x float> %newvdata3, float 0.0, i32 3 - call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %newvdata4, i32 7, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> %newvdata4, i32 7, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -148,39 +148,39 @@ define amdgpu_ps void @struct_tbuffer_store_insert_undefs(<4 x i32> inreg %a, fl define amdgpu_ps void @image_store_1d_store_shufflevector_same(<8 x i32> inreg %rsrc, <4 x float> %vdata1, i32 %s) #0 { ; GCN-LABEL: @image_store_1d_store_shufflevector_same( ; GCN-NEXT: [[DATA:%.*]] = shufflevector <4 x float> [[VDATA1:%.*]], <4 x float> poison, <4 x i32> zeroinitializer -; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[DATA]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[DATA]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GCN-NEXT: ret void ; ; GFX12-LABEL: @image_store_1d_store_shufflevector_same( ; GFX12-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VDATA1:%.*]], i64 0 -; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32(float [[TMP1]], i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.f32.i32.v8i32(float [[TMP1]], i32 1, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX12-NEXT: ret void ; ; GFXUNKNOWN-LABEL: @image_store_1d_store_shufflevector_same( ; GFXUNKNOWN-NEXT: [[DATA:%.*]] = shufflevector <4 x float> [[VDATA1:%.*]], <4 x float> poison, <4 x i32> zeroinitializer -; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[DATA]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[DATA]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFXUNKNOWN-NEXT: ret void ; %data = shufflevector <4 x float> %vdata1, <4 x float> poison, <4 x i32> - call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> %data, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void } define amdgpu_ps void @image_store_1d_store_shufflevector(<8 x i32> inreg %rsrc, float %vdata1, i32 %s) #0 { ; GCN-LABEL: @image_store_1d_store_shufflevector( -; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> , i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GCN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> , i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GCN-NEXT: ret void ; ; GFX12-LABEL: @image_store_1d_store_shufflevector( -; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.v3f32.i32(<3 x float> , i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX12-NEXT: call void @llvm.amdgcn.image.store.1d.v3f32.i32.v8i32(<3 x float> , i32 7, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX12-NEXT: ret void ; ; GFXUNKNOWN-LABEL: @image_store_1d_store_shufflevector( -; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> , i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFXUNKNOWN-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> , i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFXUNKNOWN-NEXT: ret void ; %data = shufflevector <4 x float> , <4 x float> poison, <4 x i32> - call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> %data, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -280,20 +280,20 @@ declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2 declare void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32) #0 declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0 -declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 attributes #0 = { nounwind } attributes #1 = { nounwind writeonly } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll index d8d3c25688367..30431ad724843 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll @@ -7,17 +7,17 @@ define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; GFX7-LABEL: @image_sample_2d_fptrunc_to_d16( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[TEX:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX7-NEXT: [[TEX:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX7-NEXT: [[TEX_HALF:%.*]] = fptrunc float [[TEX]] to half ; GFX7-NEXT: ret half [[TEX_HALF]] ; ; GFX81PLUS-LABEL: @image_sample_2d_fptrunc_to_d16( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX81PLUS-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX81PLUS-NEXT: ret half [[TEX]] ; main_body: - %tex = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + %tex = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) %tex_half = fptrunc float %tex to half ret half %tex_half } @@ -25,7 +25,7 @@ main_body: define amdgpu_ps half @image_sample_2d_v2f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; GFX7-LABEL: @image_sample_2d_v2f32( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[TEX:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX7-NEXT: [[TEX:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX7-NEXT: [[TEX_2_HALF:%.*]] = fptrunc <2 x float> [[TEX]] to <2 x half> ; GFX7-NEXT: [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX_2_HALF]], i64 0 ; GFX7-NEXT: [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX_2_HALF]], i64 1 @@ -34,14 +34,14 @@ define amdgpu_ps half @image_sample_2d_v2f32(<8 x i32> inreg %rsrc, <4 x i32> in ; ; GFX81PLUS-LABEL: @image_sample_2d_v2f32( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX81PLUS-NEXT: [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX81PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0 ; GFX81PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1 ; GFX81PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]] ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_0]] ; main_body: - %tex = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + %tex = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) %tex_2_half = fptrunc <2 x float> %tex to <2 x half> %tex_half_0 = extractelement <2 x half> %tex_2_half, i64 0 %tex_half_1 = extractelement <2 x half> %tex_2_half, i64 1 @@ -52,7 +52,7 @@ main_body: define amdgpu_ps half @image_sample_2d_v3f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; GFX7-LABEL: @image_sample_2d_v3f32( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[TEX:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX7-NEXT: [[TEX:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX7-NEXT: [[TEX_3_HALF:%.*]] = fptrunc <3 x float> [[TEX]] to <3 x half> ; GFX7-NEXT: [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX_3_HALF]], i64 0 ; GFX7-NEXT: [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX_3_HALF]], i64 1 @@ -63,7 +63,7 @@ define amdgpu_ps half @image_sample_2d_v3f32(<8 x i32> inreg %rsrc, <4 x i32> in ; ; GFX81PLUS-LABEL: @image_sample_2d_v3f32( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX81PLUS-NEXT: [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX81PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0 ; GFX81PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1 ; GFX81PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2 @@ -72,7 +72,7 @@ define amdgpu_ps half @image_sample_2d_v3f32(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_1]] ; main_body: - %tex = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 7, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + %tex = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) %tex_3_half = fptrunc <3 x float> %tex to <3 x half> %tex_half_0 = extractelement <3 x half> %tex_3_half, i64 0 %tex_half_1 = extractelement <3 x half> %tex_3_half, i64 1 @@ -85,7 +85,7 @@ main_body: define amdgpu_ps half @image_sample_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { ; GFX7-LABEL: @image_sample_2d_v4f32( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[TEX:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX7-NEXT: [[TEX:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX7-NEXT: [[TEX_4_HALF:%.*]] = fptrunc <4 x float> [[TEX]] to <4 x half> ; GFX7-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX_4_HALF]], i64 0 ; GFX7-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX_4_HALF]], i64 1 @@ -98,7 +98,7 @@ define amdgpu_ps half @image_sample_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> in ; ; GFX81PLUS-LABEL: @image_sample_2d_v4f32( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX81PLUS-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX81PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0 ; GFX81PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1 ; GFX81PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2 @@ -109,7 +109,7 @@ define amdgpu_ps half @image_sample_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_2]] ; main_body: - %tex = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) %tex_4_half = fptrunc <4 x float> %tex to <4 x half> %tex_half_0 = extractelement <4 x half> %tex_4_half, i64 0 %tex_half_1 = extractelement <4 x half> %tex_4_half, i64 1 @@ -124,7 +124,7 @@ main_body: define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { ; GFX7-LABEL: @image_gather4_2d_v4f32( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[TEX:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX7-NEXT: [[TEX:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX7-NEXT: [[TEX_4_HALF:%.*]] = fptrunc <4 x float> [[TEX]] to <4 x half> ; GFX7-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX_4_HALF]], i64 0 ; GFX7-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX_4_HALF]], i64 1 @@ -137,7 +137,7 @@ define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> i ; ; GFX81PLUS-LABEL: @image_gather4_2d_v4f32( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; GFX81PLUS-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; GFX81PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0 ; GFX81PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1 ; GFX81PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2 @@ -148,7 +148,7 @@ define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_2]] ; main_body: - %tex = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + %tex = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16.v8i32.v4i32(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) %tex_4_half = fptrunc <4 x float> %tex to <4 x half> %tex_half_0 = extractelement <4 x half> %tex_4_half, i64 0 %tex_half_1 = extractelement <4 x half> %tex_4_half, i64 1 @@ -162,22 +162,22 @@ main_body: define amdgpu_ps half @load_1d(i16 %s, <8 x i32> inreg %rsrc) { ; GFX7-LABEL: @load_1d( -; GFX7-NEXT: [[S_FLOAT:%.*]] = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX7-NEXT: [[S_FLOAT:%.*]] = call float @llvm.amdgcn.image.load.1d.f32.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX7-NEXT: [[S_HALF:%.*]] = fptrunc float [[S_FLOAT]] to half ; GFX7-NEXT: ret half [[S_HALF]] ; ; GFX81PLUS-LABEL: @load_1d( -; GFX81PLUS-NEXT: [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX81PLUS-NEXT: [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX81PLUS-NEXT: ret half [[S_FLOAT]] ; - %s_float = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 1, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %s_float = call float @llvm.amdgcn.image.load.1d.f32.i16.v8i32(i32 1, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) %s_half = fptrunc float %s_float to half ret half %s_half } define amdgpu_ps half @load_1d_v2(i16 %s, <8 x i32> inreg %rsrc) { ; GFX7-LABEL: @load_1d_v2( -; GFX7-NEXT: [[V2_FLOAT:%.*]] = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 3, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX7-NEXT: [[V2_FLOAT:%.*]] = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16.v8i32(i32 3, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX7-NEXT: [[V2_HALF:%.*]] = fptrunc <2 x float> [[V2_FLOAT]] to <2 x half> ; GFX7-NEXT: [[S0:%.*]] = extractelement <2 x half> [[V2_HALF]], i64 0 ; GFX7-NEXT: [[S1:%.*]] = extractelement <2 x half> [[V2_HALF]], i64 1 @@ -185,13 +185,13 @@ define amdgpu_ps half @load_1d_v2(i16 %s, <8 x i32> inreg %rsrc) { ; GFX7-NEXT: ret half [[ADDF_SUM_0]] ; ; GFX81PLUS-LABEL: @load_1d_v2( -; GFX81PLUS-NEXT: [[V2_FLOAT:%.*]] = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i16(i32 3, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX81PLUS-NEXT: [[V2_FLOAT:%.*]] = call <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i16.v8i32(i32 3, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX81PLUS-NEXT: [[S0:%.*]] = extractelement <2 x half> [[V2_FLOAT]], i64 0 ; GFX81PLUS-NEXT: [[S1:%.*]] = extractelement <2 x half> [[V2_FLOAT]], i64 1 ; GFX81PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[S0]], [[S1]] ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_0]] ; - %v2_float = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 3, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %v2_float = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16.v8i32(i32 3, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) %v2_half = fptrunc <2 x float> %v2_float to <2 x half> %s0 = extractelement <2 x half> %v2_half, i64 0 %s1 = extractelement <2 x half> %v2_half, i64 1 @@ -201,7 +201,7 @@ define amdgpu_ps half @load_1d_v2(i16 %s, <8 x i32> inreg %rsrc) { define amdgpu_ps half @load_1d_v3(i16 %s, <8 x i32> inreg %rsrc) { ; GFX7-LABEL: @load_1d_v3( -; GFX7-NEXT: [[V3_FLOAT:%.*]] = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i16(i32 7, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX7-NEXT: [[V3_FLOAT:%.*]] = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i16.v8i32(i32 7, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX7-NEXT: [[V3_HALF:%.*]] = fptrunc <3 x float> [[V3_FLOAT]] to <3 x half> ; GFX7-NEXT: [[S0:%.*]] = extractelement <3 x half> [[V3_HALF]], i64 0 ; GFX7-NEXT: [[S1:%.*]] = extractelement <3 x half> [[V3_HALF]], i64 1 @@ -211,7 +211,7 @@ define amdgpu_ps half @load_1d_v3(i16 %s, <8 x i32> inreg %rsrc) { ; GFX7-NEXT: ret half [[ADDF_SUM_1]] ; ; GFX81PLUS-LABEL: @load_1d_v3( -; GFX81PLUS-NEXT: [[V3_FLOAT:%.*]] = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i16(i32 7, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX81PLUS-NEXT: [[V3_FLOAT:%.*]] = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i16.v8i32(i32 7, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX81PLUS-NEXT: [[S0:%.*]] = extractelement <3 x half> [[V3_FLOAT]], i64 0 ; GFX81PLUS-NEXT: [[S1:%.*]] = extractelement <3 x half> [[V3_FLOAT]], i64 1 ; GFX81PLUS-NEXT: [[S2:%.*]] = extractelement <3 x half> [[V3_FLOAT]], i64 2 @@ -219,7 +219,7 @@ define amdgpu_ps half @load_1d_v3(i16 %s, <8 x i32> inreg %rsrc) { ; GFX81PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[S2]], [[ADDF_SUM_0]] ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_1]] ; - %v3_float = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i16(i32 7, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %v3_float = call <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i16.v8i32(i32 7, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) %v3_half = fptrunc <3 x float> %v3_float to <3 x half> %s0 = extractelement <3 x half> %v3_half, i64 0 %s1 = extractelement <3 x half> %v3_half, i64 1 @@ -231,7 +231,7 @@ define amdgpu_ps half @load_1d_v3(i16 %s, <8 x i32> inreg %rsrc) { define amdgpu_ps half @load_1d_v4(i16 %s, <8 x i32> inreg %rsrc) { ; GFX7-LABEL: @load_1d_v4( -; GFX7-NEXT: [[V4_FLOAT:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX7-NEXT: [[V4_FLOAT:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX7-NEXT: [[V4_HALF:%.*]] = fptrunc <4 x float> [[V4_FLOAT]] to <4 x half> ; GFX7-NEXT: [[S0:%.*]] = extractelement <4 x half> [[V4_HALF]], i64 0 ; GFX7-NEXT: [[S1:%.*]] = extractelement <4 x half> [[V4_HALF]], i64 1 @@ -243,7 +243,7 @@ define amdgpu_ps half @load_1d_v4(i16 %s, <8 x i32> inreg %rsrc) { ; GFX7-NEXT: ret half [[ADDF_SUM_2]] ; ; GFX81PLUS-LABEL: @load_1d_v4( -; GFX81PLUS-NEXT: [[V4_FLOAT:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX81PLUS-NEXT: [[V4_FLOAT:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16.v8i32(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX81PLUS-NEXT: [[S0:%.*]] = extractelement <4 x half> [[V4_FLOAT]], i64 0 ; GFX81PLUS-NEXT: [[S1:%.*]] = extractelement <4 x half> [[V4_FLOAT]], i64 1 ; GFX81PLUS-NEXT: [[S2:%.*]] = extractelement <4 x half> [[V4_FLOAT]], i64 2 @@ -253,7 +253,7 @@ define amdgpu_ps half @load_1d_v4(i16 %s, <8 x i32> inreg %rsrc) { ; GFX81PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]] ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_2]] ; - %v4_float = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %v4_float = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) %v4_half = fptrunc <4 x float> %v4_float to <4 x half> %s0 = extractelement <4 x half> %v4_half, i64 0 %s1 = extractelement <4 x half> %v4_half, i64 1 @@ -268,17 +268,17 @@ define amdgpu_ps half @load_1d_v4(i16 %s, <8 x i32> inreg %rsrc) { define amdgpu_ps half @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { ; GFX7-LABEL: @load_2dmsaa( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[S_FLOAT:%.*]] = call float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX7-NEXT: [[S_FLOAT:%.*]] = call float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX7-NEXT: [[S_HALF:%.*]] = fptrunc float [[S_FLOAT]] to half ; GFX7-NEXT: ret half [[S_HALF]] ; ; GFX81PLUS-LABEL: @load_2dmsaa( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.msaa.load.x.2dmsaa.f16.i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX81PLUS-NEXT: [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.msaa.load.x.2dmsaa.f16.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX81PLUS-NEXT: ret half [[S_FLOAT]] ; main_body: - %s_float = call float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %s_float = call float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) %s_half = fptrunc float %s_float to half ret half %s_half } @@ -286,7 +286,7 @@ main_body: define amdgpu_ps half @load_2dmsaa_v2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { ; GFX7-LABEL: @load_2dmsaa_v2( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[V2_FLOAT:%.*]] = call <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32 3, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX7-NEXT: [[V2_FLOAT:%.*]] = call <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX7-NEXT: [[V2_HALF:%.*]] = fptrunc <2 x float> [[V2_FLOAT]] to <2 x half> ; GFX7-NEXT: [[S0:%.*]] = extractelement <2 x half> [[V2_HALF]], i64 0 ; GFX7-NEXT: [[S1:%.*]] = extractelement <2 x half> [[V2_HALF]], i64 1 @@ -295,14 +295,14 @@ define amdgpu_ps half @load_2dmsaa_v2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 ; ; GFX81PLUS-LABEL: @load_2dmsaa_v2( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[V2_FLOAT:%.*]] = call <2 x half> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f16.i32(i32 3, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX81PLUS-NEXT: [[V2_FLOAT:%.*]] = call <2 x half> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f16.i32.v8i32(i32 3, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX81PLUS-NEXT: [[S0:%.*]] = extractelement <2 x half> [[V2_FLOAT]], i64 0 ; GFX81PLUS-NEXT: [[S1:%.*]] = extractelement <2 x half> [[V2_FLOAT]], i64 1 ; GFX81PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[S0]], [[S1]] ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_0]] ; main_body: - %v2_float = call <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %v2_float = call <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) %v2_half = fptrunc <2 x float> %v2_float to <2 x half> %s0 = extractelement <2 x half> %v2_half, i64 0 %s1 = extractelement <2 x half> %v2_half, i64 1 @@ -313,7 +313,7 @@ main_body: define amdgpu_ps half @load_2dmsaa_v3(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { ; GFX7-LABEL: @load_2dmsaa_v3( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[V3_FLOAT:%.*]] = call <3 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v3f32.i32(i32 7, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX7-NEXT: [[V3_FLOAT:%.*]] = call <3 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX7-NEXT: [[V3_HALF:%.*]] = fptrunc <3 x float> [[V3_FLOAT]] to <3 x half> ; GFX7-NEXT: [[S0:%.*]] = extractelement <3 x half> [[V3_HALF]], i64 0 ; GFX7-NEXT: [[S1:%.*]] = extractelement <3 x half> [[V3_HALF]], i64 1 @@ -324,7 +324,7 @@ define amdgpu_ps half @load_2dmsaa_v3(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 ; ; GFX81PLUS-LABEL: @load_2dmsaa_v3( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[V3_FLOAT:%.*]] = call <3 x half> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v3f16.i32(i32 7, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX81PLUS-NEXT: [[V3_FLOAT:%.*]] = call <3 x half> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v3f16.i32.v8i32(i32 7, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX81PLUS-NEXT: [[S0:%.*]] = extractelement <3 x half> [[V3_FLOAT]], i64 0 ; GFX81PLUS-NEXT: [[S1:%.*]] = extractelement <3 x half> [[V3_FLOAT]], i64 1 ; GFX81PLUS-NEXT: [[S2:%.*]] = extractelement <3 x half> [[V3_FLOAT]], i64 2 @@ -333,7 +333,7 @@ define amdgpu_ps half @load_2dmsaa_v3(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_1]] ; main_body: - %v3_float = call <3 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %v3_float = call <3 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) %v3_half = fptrunc <3 x float> %v3_float to <3 x half> %s0 = extractelement <3 x half> %v3_half, i64 0 %s1 = extractelement <3 x half> %v3_half, i64 1 @@ -346,7 +346,7 @@ main_body: define amdgpu_ps half @load_2dmsaa_v4(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { ; GFX7-LABEL: @load_2dmsaa_v4( ; GFX7-NEXT: main_body: -; GFX7-NEXT: [[V4_FLOAT:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX7-NEXT: [[V4_FLOAT:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX7-NEXT: [[V4_HALF:%.*]] = fptrunc <4 x float> [[V4_FLOAT]] to <4 x half> ; GFX7-NEXT: [[S0:%.*]] = extractelement <4 x half> [[V4_HALF]], i64 0 ; GFX7-NEXT: [[S1:%.*]] = extractelement <4 x half> [[V4_HALF]], i64 1 @@ -359,7 +359,7 @@ define amdgpu_ps half @load_2dmsaa_v4(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 ; ; GFX81PLUS-LABEL: @load_2dmsaa_v4( ; GFX81PLUS-NEXT: main_body: -; GFX81PLUS-NEXT: [[V4_FLOAT:%.*]] = call <4 x half> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f16.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; GFX81PLUS-NEXT: [[V4_FLOAT:%.*]] = call <4 x half> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f16.i32.v8i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) ; GFX81PLUS-NEXT: [[S0:%.*]] = extractelement <4 x half> [[V4_FLOAT]], i64 0 ; GFX81PLUS-NEXT: [[S1:%.*]] = extractelement <4 x half> [[V4_FLOAT]], i64 1 ; GFX81PLUS-NEXT: [[S2:%.*]] = extractelement <4 x half> [[V4_FLOAT]], i64 2 @@ -370,7 +370,7 @@ define amdgpu_ps half @load_2dmsaa_v4(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 ; GFX81PLUS-NEXT: ret half [[ADDF_SUM_2]] ; main_body: - %v4_float = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + %v4_float = call <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) %v4_half = fptrunc <4 x float> %v4_float to <4 x half> %s0 = extractelement <4 x half> %v4_half, i64 0 %s1 = extractelement <4 x half> %v4_half, i64 1 diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll index 13d13bca56de6..cb99632c287b3 100644 --- a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll +++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll @@ -36,27 +36,27 @@ define void @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 ret void } -declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) define void @invalid_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask, i1 %bool, i32 %arg) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %dmask - ; CHECK-NEXT: %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + ; CHECK-NEXT: %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i1 %bool - ; CHECK-NEXT: %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0) - %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0) + ; CHECK-NEXT: %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0) + %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %arg - ; CHECK-NEXT: %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0) - %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0) + ; CHECK-NEXT: %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0) + %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %arg - ; CHECK-NEXT: %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg) - %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg) + ; CHECK-NEXT: %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg) + %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg) ret void } @@ -336,84 +336,84 @@ define amdgpu_kernel void @update_dpp_test(ptr addrspace(1) %out, i32 %in1, i32 ret void } -declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) define amdgpu_ps void @load_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %var) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) - %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0) - %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0) + ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0) + %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var) - %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var) + ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var) + %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var) ret void } -declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) +declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) define amdgpu_ps void @load_1d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %val) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %val - ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) - %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32.v8i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %val - ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) - %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %val - ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val) - %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val) + ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val) + %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val) ret void } -declare {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) +declare {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) define amdgpu_ps void @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s, i32 %var, i1 %bool) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) - %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) + ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) + %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32.v8i32.v4i32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i1 %bool - ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0) - %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0) + ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0) + %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32.v8i32.v4i32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0) - %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0) + ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0) + %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32.v8i32.v4i32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val3 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var) - %val3 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var) + ; CHECK-NEXT: %val3 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var) + %val3 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32.v8i32.v4i32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var) ret void } -declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32, i16, <8 x i32>, i32, i32) define amdgpu_ps void @load_1d_a16(<8 x i32> inreg %rsrc, <2 x i16> %coords, i16 %s, i32 %var) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) - %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0) - %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0) + ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0) + %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %var - ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var) - %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var) + ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var) + %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16.v8i32(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var) ret void } @@ -426,31 +426,31 @@ define amdgpu_ps void @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 %data, ret void } -declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) +declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) define amdgpu_ps void @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %val) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %val - ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) - %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32.v8i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32.v8i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %val - ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) - %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32.v8i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32.v8i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) ret void } -declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 +declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #0 define amdgpu_ps void @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s, i32 %val) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %val - ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) - %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32.v8i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32.v8i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %val - ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) - %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32.v8i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32.v8i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) ret void }