From 4308d79d7771805e43de6682061c24bb05aab07b Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Mon, 6 May 2024 14:24:13 +0800 Subject: [PATCH 1/3] [LoongArch] Optimize codegen for ISD::ROTL --- .../LoongArch/LoongArchISelLowering.cpp | 35 +++++++++++++------ .../Target/LoongArch/LoongArchInstrInfo.td | 7 ++-- llvm/test/CodeGen/LoongArch/rotl-rotr.ll | 33 +++++++---------- 3 files changed, 40 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 46a6703f29d50..5c61db760f856 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -79,6 +79,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); setOperationAction(ISD::ROTL, GRLenVT, Expand); + setOperationAction(ISD::ROTR, GRLenVT, Legal); setOperationAction(ISD::CTPOP, GRLenVT, Expand); setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, @@ -1671,10 +1672,6 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { return LoongArchISD::SRA_W; case ISD::SRL: return LoongArchISD::SRL_W; - case ISD::ROTR: - return LoongArchISD::ROTR_W; - case ISD::ROTL: - return LoongArchISD::ROTL_W; case ISD::CTTZ: return LoongArchISD::CTZ_W; case ISD::CTLZ: @@ -1715,6 +1712,26 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); } +static SDValue customLegalizeToRotateWOp(SDNode *N, SelectionDAG &DAG, + unsigned ExtOpc = ISD::ANY_EXTEND) { + SDLoc DL(N); + SDValue NewOp0, NewOp1, NewRes; + + NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); + NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); + + if (N->getOpcode() == ISD::ROTL) { + SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64); + NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1); + } + + NewRes = DAG.getNode(LoongArchISD::ROTR_W, DL, MVT::i64, NewOp0, NewOp1); + + // ReplaceNodeResults requires we maintain the same type for the return + // value. + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); +} + // Helper function that emits error message for intrinsics with/without chain // and return a UNDEF or and the chain as the results. static void emitErrorAndReplaceIntrinsicResults( @@ -1841,7 +1858,6 @@ void LoongArchTargetLowering::ReplaceNodeResults( case ISD::SHL: case ISD::SRA: case ISD::SRL: - case ISD::ROTR: assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); if (N->getOperand(1).getOpcode() != ISD::Constant) { @@ -1850,11 +1866,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( } break; case ISD::ROTL: - ConstantSDNode *CN; - if ((CN = dyn_cast(N->getOperand(1)))) { - Results.push_back(customLegalizeToWOp(N, DAG, 2)); - break; - } + case ISD::ROTR: + assert(VT == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + Results.push_back(customLegalizeToRotateWOp(N, DAG)); break; case ISD::FP_TO_SINT: { assert(VT == MVT::i32 && Subtarget.is64Bit() && diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index a7f6eb9a79ebc..a4272210d0eaa 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -85,7 +85,6 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>; -def loongarch_rotl_w : SDNode<"LoongArchISD::ROTL_W", SDT_LoongArchIntBinOpW>; def loongarch_crc_w_b_w : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>; def loongarch_crc_w_h_w @@ -1116,12 +1115,10 @@ def : PatGprGpr; def : PatGprGpr; def : PatGprGpr; def : PatGprGpr; +def : PatGprGpr_32; def : PatGprImm; def : PatGprImm_32; -def : Pat<(loongarch_rotl_w GPR:$rj, uimm5:$imm), - (ROTRI_W GPR:$rj, (ImmSubFrom32 uimm5:$imm))>; -def : Pat<(sext_inreg (loongarch_rotl_w GPR:$rj, uimm5:$imm), i32), - (ROTRI_W GPR:$rj, (ImmSubFrom32 uimm5:$imm))>; +def : PatGprImm_32; // TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the // product are used. def : PatGprGpr; diff --git a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll index 8646771e5d48a..9552930be76ae 100644 --- a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll +++ b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll @@ -14,10 +14,9 @@ define signext i32 @rotl_32(i32 signext %x, i32 signext %y) nounwind { ; ; LA64-LABEL: rotl_32: ; LA64: # %bb.0: -; LA64-NEXT: sll.w $a2, $a0, $a1 -; LA64-NEXT: sub.d $a1, $zero, $a1 -; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: or $a0, $a2, $a0 +; LA64-NEXT: ori $a2, $zero, 32 +; LA64-NEXT: sub.d $a1, $a2, $a1 +; LA64-NEXT: rotr.w $a0, $a0, $a1 ; LA64-NEXT: ret %z = sub i32 32, %y %b = shl i32 %x, %y @@ -152,10 +151,9 @@ define signext i32 @rotl_32_mask(i32 signext %x, i32 signext %y) nounwind { ; ; LA64-LABEL: rotl_32_mask: ; LA64: # %bb.0: -; LA64-NEXT: sll.w $a2, $a0, $a1 -; LA64-NEXT: sub.d $a1, $zero, $a1 -; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: or $a0, $a2, $a0 +; LA64-NEXT: ori $a2, $zero, 32 +; LA64-NEXT: sub.d $a1, $a2, $a1 +; LA64-NEXT: rotr.w $a0, $a0, $a1 ; LA64-NEXT: ret %z = sub i32 0, %y %and = and i32 %z, 31 @@ -174,10 +172,9 @@ define signext i32 @rotl_32_mask_and_63_and_31(i32 signext %x, i32 signext %y) n ; ; LA64-LABEL: rotl_32_mask_and_63_and_31: ; LA64: # %bb.0: -; LA64-NEXT: sll.w $a2, $a0, $a1 -; LA64-NEXT: sub.d $a1, $zero, $a1 -; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: or $a0, $a2, $a0 +; LA64-NEXT: ori $a2, $zero, 32 +; LA64-NEXT: sub.d $a1, $a2, $a1 +; LA64-NEXT: rotr.w $a0, $a0, $a1 ; LA64-NEXT: ret %a = and i32 %y, 63 %b = shl i32 %x, %a @@ -197,10 +194,9 @@ define signext i32 @rotl_32_mask_or_64_or_32(i32 signext %x, i32 signext %y) nou ; ; LA64-LABEL: rotl_32_mask_or_64_or_32: ; LA64: # %bb.0: -; LA64-NEXT: sll.w $a2, $a0, $a1 -; LA64-NEXT: sub.d $a1, $zero, $a1 -; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: or $a0, $a2, $a0 +; LA64-NEXT: ori $a2, $zero, 32 +; LA64-NEXT: sub.d $a1, $a2, $a1 +; LA64-NEXT: rotr.w $a0, $a0, $a1 ; LA64-NEXT: ret %a = or i32 %y, 64 %b = shl i32 %x, %a @@ -591,10 +587,7 @@ define signext i32 @rotr_i32_fshr(i32 signext %a) nounwind { ; ; LA64-LABEL: rotr_i32_fshr: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a1, $a0, 20 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 12 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: rotri.w $a0, $a0, 12 ; LA64-NEXT: ret %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 12) ret i32 %or From cf550b1735ffdcbf939e41cfee172ff4b7ecc61b Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 7 May 2024 09:51:07 +0800 Subject: [PATCH 2/3] Address SixWeining's comment --- llvm/test/CodeGen/LoongArch/rotl-rotr.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll index 9552930be76ae..b9fbd962e6bbf 100644 --- a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll +++ b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll @@ -2,8 +2,6 @@ ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 -;; TODO: Add optimization to ISD::ROTL - define signext i32 @rotl_32(i32 signext %x, i32 signext %y) nounwind { ; LA32-LABEL: rotl_32: ; LA32: # %bb.0: From d39c5461624899a2499e35ec990adce0bae9c73b Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 7 May 2024 13:05:37 +0800 Subject: [PATCH 3/3] Address wangleiat's comments --- .../LoongArch/LoongArchISelLowering.cpp | 30 +++++-------------- .../Target/LoongArch/LoongArchInstrInfo.td | 2 +- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 5c61db760f856..21d520656091c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -79,7 +79,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); setOperationAction(ISD::ROTL, GRLenVT, Expand); - setOperationAction(ISD::ROTR, GRLenVT, Legal); setOperationAction(ISD::CTPOP, GRLenVT, Expand); setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, @@ -1672,6 +1671,9 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { return LoongArchISD::SRA_W; case ISD::SRL: return LoongArchISD::SRL_W; + case ISD::ROTL: + case ISD::ROTR: + return LoongArchISD::ROTR_W; case ISD::CTTZ: return LoongArchISD::CTZ_W; case ISD::CTLZ: @@ -1701,6 +1703,10 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, case 2: { NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); + if (N->getOpcode() == ISD::ROTL) { + SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64); + NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1); + } NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); break; } @@ -1712,26 +1718,6 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); } -static SDValue customLegalizeToRotateWOp(SDNode *N, SelectionDAG &DAG, - unsigned ExtOpc = ISD::ANY_EXTEND) { - SDLoc DL(N); - SDValue NewOp0, NewOp1, NewRes; - - NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); - NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); - - if (N->getOpcode() == ISD::ROTL) { - SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64); - NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1); - } - - NewRes = DAG.getNode(LoongArchISD::ROTR_W, DL, MVT::i64, NewOp0, NewOp1); - - // ReplaceNodeResults requires we maintain the same type for the return - // value. - return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); -} - // Helper function that emits error message for intrinsics with/without chain // and return a UNDEF or and the chain as the results. static void emitErrorAndReplaceIntrinsicResults( @@ -1869,7 +1855,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( case ISD::ROTR: assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); - Results.push_back(customLegalizeToRotateWOp(N, DAG)); + Results.push_back(customLegalizeToWOp(N, DAG, 2)); break; case ISD::FP_TO_SINT: { assert(VT == MVT::i32 && Subtarget.is64Bit() && diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index a4272210d0eaa..f56f8f7e1179c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1118,7 +1118,7 @@ def : PatGprGpr; def : PatGprGpr_32; def : PatGprImm; def : PatGprImm_32; -def : PatGprImm_32; +def : PatGprImm; // TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the // product are used. def : PatGprGpr;