diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 21fffba14287e..e3a330d45aaa5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -22,7 +22,6 @@ #include "SIISelLowering.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/UniformityAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 11c4cdd560c2f..5ae0b179d7d0e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -17,6 +17,7 @@ #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIModeRegisterDefaults.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 5f4cca0645b0e..3f211e7cbdde5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3550,9 +3550,13 @@ def : AMDGPUPat < (V_BFE_U32_e64 $src, (i32 0), $width) >; +def uint5Bits : PatLeaf<(i32 VGPR_32:$width), [{ + return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxActiveBits() <= 5; +}]>; + // x & (-1 >> (bitwidth - y)) def : AMDGPUPat < - (DivergentBinFrag i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (DivergentBinFrag i32:$src, (srl_oneuse -1, (sub 32, uint5Bits:$width))), (V_BFE_U32_e64 $src, (i32 0), $width) >; diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll index 7f1f7133d6991..5e637ba071d97 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -99,12 +99,36 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ---------------------------------------------------------------------------- ; define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { -; GCN-LABEL: bzhi32_c0: +; SI-LABEL: bzhi32_c0: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 +; SI-NEXT: v_lshr_b32_e32 v1, -1, v1 +; SI-NEXT: v_and_b32_e32 v0, v1, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: bzhi32_c0: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 +; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1 +; VI-NEXT: v_and_b32_e32 v0, v1, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c0_clamp(i32 %val, i32 %numlowbits) nounwind { +; GCN-LABEL: bzhi32_c0_clamp: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v1, 31, v1 ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 ; GCN-NEXT: s_setpc_b64 s[30:31] - %numhighbits = sub i32 32, %numlowbits + %low5bits = and i32 %numlowbits, 31 + %numhighbits = sub i32 32, %low5bits %mask = lshr i32 -1, %numhighbits %masked = and i32 %mask, %val ret i32 %masked @@ -134,11 +158,21 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { } define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { -; GCN-LABEL: bzhi32_c4_commutative: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SI-LABEL: bzhi32_c4_commutative: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 +; SI-NEXT: v_lshr_b32_e32 v1, -1, v1 +; SI-NEXT: v_and_b32_e32 v0, v0, v1 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: bzhi32_c4_commutative: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 +; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1 +; VI-NEXT: v_and_b32_e32 v0, v0, v1 +; VI-NEXT: s_setpc_b64 s[30:31] %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits %masked = and i32 %val, %mask ; swapped order