diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 421fc41ad3b62..b9d06c090a52a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -872,6 +872,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, FDIVActions.customFor({S16}); } + if (ST.hasPackedFP32Ops()) { + FPOpActions.legalFor({V2S32}); + FPOpActions.clampMaxNumElementsStrict(0, S32, 2); + } + auto &MinNumMaxNum = getActionDefinitionsBuilder({ G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE}); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index da6701fbcea1d..5549652aae90b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2720,6 +2720,13 @@ def : GCNPat< (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src) >; +let SubtargetPredicate = HasPackedFP32Ops in { +def : GCNPat< + (fcanonicalize (v2f32 (VOP3PMods v2f32:$src, i32:$src_mods))), + (V_PK_MUL_F32 0, CONST.FP32_ONE, $src_mods, $src) +>; +} + // TODO: Handle fneg like other types. def : GCNPat< (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll index 2af6adb4d16c0..ee2e5c7a4ed70 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll @@ -1,9 +1,12 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s -; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s ; GCN-LABEL: {{^}}fadd_v2_vv: ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] +; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -15,7 +18,7 @@ define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fadd_v2_vs: ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_vs(ptr addrspace(1) %a, <2 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -27,7 +30,7 @@ define amdgpu_kernel void @fadd_v2_vs(ptr addrspace(1) %a, <2 x float> %x) { ; GCN-LABEL: {{^}}fadd_v4_vs: ; GFX900-COUNT-4: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id @@ -39,7 +42,7 @@ define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) { ; GCN-LABEL: {{^}}fadd_v32_vs: ; GFX900-COUNT-32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-COUNT-16: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED-COUNT-16: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id @@ -49,10 +52,13 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) { ret void } +; FIXME: GISel does not use op_sel for splat constants. + ; GCN-LABEL: {{^}}fadd_v2_v_imm: -; GFX90A: s_mov_b32 s[[K:[0-9]+]], 0x42c80000 +; PACKED: s_mov_b32 s[[K:[0-9]+]], 0x42c80000 ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, 0x42c80000, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -64,7 +70,8 @@ define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fadd_v2_v_v_splat: ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v0 -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1] op_sel_hi:[1,0]{{$}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1] op_sel_hi:[1,0]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1]{{$}} define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -79,7 +86,8 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fadd_v2_v_lit_splat: ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1.0 op_sel_hi:[1,0]{{$}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1.0 op_sel_hi:[1,0]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -92,8 +100,8 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fadd_v2_v_lit_hi0: ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} -; GFX90A-DAG: s_mov_b64 [[K:s\[[0-9:]+\]]], 0x3f800000 -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], [[K]] +; PACKED-DAG: s_mov_b64 [[K:s\[[0-9:]+\]]], 0x3f800000 +; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], [[K]] define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -106,9 +114,9 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fadd_v2_v_lit_lo0: ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} -; GFX90A-DAG: s_mov_b32 s[[LO:[0-9]+]], 0 -; GFX90A-DAG: s_mov_b32 s[[HI:[0-9]+]], 1.0 -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[LO]]:[[HI]]]{{$}} +; PACKED-DAG: s_mov_b32 s[[LO:[0-9]+]], 0 +; PACKED-DAG: s_mov_b32 s[[HI:[0-9]+]], 1.0 +; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[LO]]:[[HI]]]{{$}} define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -121,9 +129,9 @@ define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fadd_v2_v_unfoldable_lit: ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} -; GFX90A-DAG: s_mov_b32 s{{[0-9]+}}, 1.0 -; GFX90A-DAG: s_mov_b32 s{{[0-9]+}}, 2.0 -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 1.0 +; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 2.0 +; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -133,9 +141,12 @@ define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ret void } +; FIXME: Fold fneg into v_pk_add_f32 with Global ISel. + ; GCN-LABEL: {{^}}fadd_v2_v_fneg: ; GFX900-COUNT-2: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -149,9 +160,10 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) { } ; GCN-LABEL: {{^}}fadd_v2_v_fneg_lo: -; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1]{{$}} +; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -165,9 +177,10 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) { } ; GCN-LABEL: {{^}}fadd_v2_v_fneg_hi: -; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_hi:[0,1]{{$}} +; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_hi:[0,1]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -181,9 +194,10 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) { } ; GCN-LABEL: {{^}}fadd_v2_v_fneg_lo2: -; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] neg_lo:[0,1]{{$}} +; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] neg_lo:[0,1]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, float %y) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -197,9 +211,10 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, flo } ; GCN-LABEL: {{^}}fadd_v2_v_fneg_hi2: -; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0] neg_hi:[0,1]{{$}} +; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0] neg_hi:[0,1]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, float %y) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -214,7 +229,7 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, flo ; GCN-LABEL: {{^}}fmul_v2_vv: ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] +; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] define amdgpu_kernel void @fmul_v2_vv(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -226,7 +241,7 @@ define amdgpu_kernel void @fmul_v2_vv(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fmul_v2_vs: ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fmul_v2_vs(ptr addrspace(1) %a, <2 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -238,7 +253,7 @@ define amdgpu_kernel void @fmul_v2_vs(ptr addrspace(1) %a, <2 x float> %x) { ; GCN-LABEL: {{^}}fmul_v4_vs: ; GFX900-COUNT-4: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-COUNT-2: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED-COUNT-2: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id @@ -250,7 +265,7 @@ define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) { ; GCN-LABEL: {{^}}fmul_v32_vs: ; GFX900-COUNT-32: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-COUNT-16: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED-COUNT-16: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id @@ -261,9 +276,10 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) { } ; GCN-LABEL: {{^}}fmul_v2_v_imm: -; GFX90A: s_mov_b32 s[[K:[0-9]+]], 0x42c80000 +; PACKED: s_mov_b32 s[[K:[0-9]+]], 0x42c80000 ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, 0x42c80000, v{{[0-9]+}} -; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}} +; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}} +; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -275,7 +291,8 @@ define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fmul_v2_v_v_splat: ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v0 -; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1] op_sel_hi:[1,0]{{$}} +; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1] op_sel_hi:[1,0]{{$}} +; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1]{{$}} define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -290,7 +307,8 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fmul_v2_v_lit_splat: ; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} -; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0 op_sel_hi:[1,0]{{$}} +; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0 op_sel_hi:[1,0]{{$}} +; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -303,9 +321,9 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fmul_v2_v_unfoldable_lit: ; GFX900-DAG: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} ; GFX900-DAG: v_mul_f32_e32 v{{[0-9]+}}, 0x40400000, v{{[0-9]+}} -; GFX90A-DAG: s_mov_b32 s{{[0-9]+}}, 4.0 -; GFX90A-DAG: s_mov_b32 s{{[0-9]+}}, 0x40400000 -; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 4.0 +; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 0x40400000 +; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -317,7 +335,8 @@ define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fmul_v2_v_fneg: ; GFX900-COUNT-2: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -s{{[0-9]+}} -; GFX90A: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}} +; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}} +; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -332,7 +351,7 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) { ; GCN-LABEL: {{^}}fma_v2_vv: ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] +; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] define amdgpu_kernel void @fma_v2_vv(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -344,7 +363,7 @@ define amdgpu_kernel void @fma_v2_vv(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fma_v2_vs: ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fma_v2_vs(ptr addrspace(1) %a, <2 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -356,7 +375,7 @@ define amdgpu_kernel void @fma_v2_vs(ptr addrspace(1) %a, <2 x float> %x) { ; GCN-LABEL: {{^}}fma_v4_vs: ; GFX900-COUNT-4: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GFX90A-COUNT-2: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED-COUNT-2: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id @@ -368,7 +387,7 @@ define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) { ; GCN-LABEL: {{^}}fma_v32_vs: ; GFX900-COUNT-32: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GFX90A-COUNT-16: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} +; PACKED-COUNT-16: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id @@ -379,10 +398,12 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) { } ; GCN-LABEL: {{^}}fma_v2_v_imm: -; GCN-DAG: s_mov_b32 s[[K1:[0-9]+]], 0x42c80000 -; GCN-DAG: v_mov_b32_e32 v[[K2:[0-9]+]], 0x43480000 -; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s[[K1]], v[[K2]] -; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K1]]:{{[0-9:]+}}], v[[[K2]]:{{[0-9:]+}}] op_sel_hi:[1,0,0]{{$}} +; GCN-DAG: s_mov_b32 s[[K1:[0-9]+]], 0x42c80000 +; GFX900-DAG: v_mov_b32_e32 v[[K2:[0-9]+]], 0x43480000 +; PACKED-SDAG-DAG: v_mov_b32_e32 v[[K2:[0-9]+]], 0x43480000 +; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s[[K1]], v[[K2]] +; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K1]]:{{[0-9:]+}}], v[[[K2]]:{{[0-9:]+}}] op_sel_hi:[1,0,0]{{$}} +; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K1]]:{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -394,7 +415,8 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fma_v2_v_v_splat: ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v0, v0 -; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1], v[0:1] op_sel_hi:[1,0,0]{{$}} +; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1], v[0:1] op_sel_hi:[1,0,0]{{$}} +; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1], v[0:1]{{$}} define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -409,7 +431,8 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fma_v2_v_lit_splat: ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0 -; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0, 1.0 op_sel_hi:[1,0,0]{{$}} +; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0, 1.0 op_sel_hi:[1,0,0]{{$}} +; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -420,13 +443,13 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) { } ; GCN-LABEL: {{^}}fma_v2_v_unfoldable_lit: -; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0x40400000 -; GFX900-DAG: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0 -; GFX900-DAG: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, 2.0 -; GFX90A-DAG: s_mov_b32 s{{[0-9]+}}, 4.0 -; GFX90A-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 -; GFX90A-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 -; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} +; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0x40400000 +; GFX900-DAG: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0 +; GFX900-DAG: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, 2.0 +; PACKED-SDAG-DAG: s_mov_b32 s{{[0-9]+}}, 4.0 +; PACKED-SDAG-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 +; PACKED-SDAG-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 +; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -438,7 +461,8 @@ define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ; GCN-LABEL: {{^}}fma_v2_v_fneg: ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}} -; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0,0] neg_lo:[0,1,1] neg_hi:[0,1,1]{{$}} +; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0,0] neg_lo:[0,1,1] neg_hi:[0,1,1]{{$}} +; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id @@ -453,7 +477,8 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) { ; GCN-LABEL: {{^}}add_vector_neg_bitcast_scalar_lo: ; GFX900-COUNT-2: v_sub_f32_e32 -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1] +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) { bb: %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 4 @@ -470,7 +495,8 @@ bb: ; GCN-LABEL: {{^}}fma_vector_vector_neg_scalar_lo_scalar_hi: ; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}} -; GFX90A: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] neg_lo:[0,0,1] neg_hi:[0,0,1] +; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}} +; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) { bb: %lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1 @@ -493,7 +519,8 @@ bb: ; GCN-LABEL: {{^}}shuffle_add_f32: ; GFX900-COUNT-2: v_add_f32_e32 -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0]{{$}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 { bb: %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8 @@ -507,7 +534,8 @@ bb: ; GCN-LABEL: {{^}}shuffle_neg_add_f32: ; GFX900-COUNT-2: v_sub_f32_e32 -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 { bb: %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8 @@ -522,10 +550,12 @@ bb: } ; GCN-LABEL: {{^}}fadd_fadd_fsub: -; GFX900: v_add_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 0 -; GFX900: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0 op_sel_hi:[1,0] -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0 op_sel_hi:[1,0] +; GFX900: v_add_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 0 +; GFX900: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0 op_sel_hi:[1,0]{{$}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0 op_sel_hi:[1,0]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} +; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg) { bb: %i12 = fadd <2 x float> zeroinitializer, %arg @@ -538,8 +568,9 @@ bb: } ; GCN-LABEL: {{^}}fadd_shuffle_v4: -; GFX900-COUNT-4: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0] +; GFX900-COUNT-4: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; PACKED-SDAG-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0]{{$}} +; PACKED-GISEL-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}} define amdgpu_kernel void @fadd_shuffle_v4(ptr addrspace(1) %arg) { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -552,8 +583,10 @@ bb: } ; GCN-LABEL: {{^}}fneg_v2f32_vec: -; GFX900-COUNT-2: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0 neg_lo:[1,1] neg_hi:[1,1]{{$}} +; GFX900-COUNT-2: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} +; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0 neg_lo:[1,1] neg_hi:[1,1]{{$}} +; PACKED-GISEL-COUNT-2: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} +; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}] op_sel_hi:[0,1]{{$}} define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id