[RISCV][LoopIdiomVectorize] Support VP intrinsics in LoopIdiomVectorize #94082

mshockwave · 2024-05-31T23:54:29Z

Teach LoopIdiomVectorize to use VP intrinsics to replace the byte compare loops. Right now only RISC-V uses LoopIdiomVectorize of this style.

This PR stacks on top of #94682

llvmbot · 2024-05-31T23:55:01Z

@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-risc-v

Author: Min-Yih Hsu (mshockwave)

Changes

Teach LoopIdiomTransform to generate VP intrinsics to replace the byte compare loops. Right now RISC-V is the only user of this style.

This PR stacks on top of #94081

Patch is 234.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94082.diff

16 Files Affected:

(added) llvm/include/llvm/Transforms/Vectorize/LoopIdiomTransform.h (+36)
(modified) llvm/lib/Passes/PassBuilder.cpp (+1)
(modified) llvm/lib/Passes/PassRegistry.def (+1)
(modified) llvm/lib/Target/AArch64/AArch64.h (-1)
(removed) llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h (-25)
(removed) llvm/lib/Target/AArch64/AArch64PassRegistry.def (-20)
(modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (+2-6)
(modified) llvm/lib/Target/AArch64/AArch64TargetMachine.h (-1)
(modified) llvm/lib/Target/AArch64/CMakeLists.txt (+1-1)
(modified) llvm/lib/Target/RISCV/RISCVTargetMachine.cpp (+10)
(modified) llvm/lib/Target/RISCV/RISCVTargetMachine.h (+2)
(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+2)
(modified) llvm/lib/Transforms/Vectorize/CMakeLists.txt (+1)
(renamed) llvm/lib/Transforms/Vectorize/LoopIdiomTransform.cpp (+336-253)
(modified) llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll (+201-204)
(added) llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll (+1751)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopIdiomTransform.h b/llvm/include/llvm/Transforms/Vectorize/LoopIdiomTransform.h
new file mode 100644
index 0000000000000..866bf7e72e406
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopIdiomTransform.h
@@ -0,0 +1,36 @@
+//===----------LoopIdiomTransform.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_VECTORIZE_LOOPIDIOMTRANSFORM_H
+#define LLVM_LIB_TRANSFORMS_VECTORIZE_LOOPIDIOMTRANSFORM_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+enum class LoopIdiomTransformStyle { Masked, Predicated };
+
+class LoopIdiomTransformPass : public PassInfoMixin<LoopIdiomTransformPass> {
+  LoopIdiomTransformStyle VectorizeStyle = LoopIdiomTransformStyle::Masked;
+
+  // The VF used in vectorizing the byte compare pattern.
+  unsigned ByteCompareVF = 16;
+
+public:
+  LoopIdiomTransformPass() = default;
+  explicit LoopIdiomTransformPass(LoopIdiomTransformStyle S)
+      : VectorizeStyle(S) {}
+
+  LoopIdiomTransformPass(LoopIdiomTransformStyle S, unsigned BCVF)
+      : VectorizeStyle(S), ByteCompareVF(BCVF) {}
+
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+} // namespace llvm
+#endif // LLVM_LIB_TRANSFORMS_VECTORIZE_LOOPIDIOMTRANSFORM_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 734ca4d5deec9..bf11146a05e5a 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -295,6 +295,7 @@
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include "llvm/Transforms/Utils/UnifyLoopExits.h"
 #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
+#include "llvm/Transforms/Vectorize/LoopIdiomTransform.h"
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
 #include "llvm/Transforms/Vectorize/VectorCombine.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 50682ca4970f1..714058f91bfc6 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -621,6 +621,7 @@ LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 LOOP_PASS("loop-bound-split", LoopBoundSplitPass())
 LOOP_PASS("loop-deletion", LoopDeletionPass())
 LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
+LOOP_PASS("loop-idiom-transform", LoopIdiomTransformPass())
 LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
 LOOP_PASS("loop-predication", LoopPredicationPass())
 LOOP_PASS("loop-reduce", LoopStrengthReducePass())
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index b70fbe42fe5fc..19e0d1e2f5960 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -90,7 +90,6 @@ void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
 void initializeAArch64ExpandPseudoPass(PassRegistry &);
 void initializeAArch64GlobalsTaggingPass(PassRegistry &);
 void initializeAArch64LoadStoreOptPass(PassRegistry&);
-void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
 void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
 void initializeAArch64MIPeepholeOptPass(PassRegistry &);
 void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h
deleted file mode 100644
index cc68425bb68b5..0000000000000
--- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- AArch64LoopIdiomTransform.h --------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H
-
-#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
-
-namespace llvm {
-
-struct AArch64LoopIdiomTransformPass
-    : PassInfoMixin<AArch64LoopIdiomTransformPass> {
-  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
-                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
-};
-
-} // namespace llvm
-
-#endif // LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H
diff --git a/llvm/lib/Target/AArch64/AArch64PassRegistry.def b/llvm/lib/Target/AArch64/AArch64PassRegistry.def
deleted file mode 100644
index ca944579f93a9..0000000000000
--- a/llvm/lib/Target/AArch64/AArch64PassRegistry.def
+++ /dev/null
@@ -1,20 +0,0 @@
-//===- AArch64PassRegistry.def - Registry of AArch64 passes -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is used as the registry of passes that are part of the
-// AArch64 backend.
-//
-//===----------------------------------------------------------------------===//
-
-// NOTE: NO INCLUDE GUARD DESIRED!
-
-#ifndef LOOP_PASS
-#define LOOP_PASS(NAME, CREATE_PASS)
-#endif
-LOOP_PASS("aarch64-lit", AArch64LoopIdiomTransformPass())
-#undef LOOP_PASS
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 945ab5cf1f303..a6e26501541f3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -11,7 +11,6 @@
 
 #include "AArch64TargetMachine.h"
 #include "AArch64.h"
-#include "AArch64LoopIdiomTransform.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64MachineScheduler.h"
 #include "AArch64MacroFusion.h"
@@ -52,6 +51,7 @@
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/CFGuard.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize/LoopIdiomTransform.h"
 #include <memory>
 #include <optional>
 #include <string>
@@ -234,7 +234,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
   initializeAArch64DeadRegisterDefinitionsPass(*PR);
   initializeAArch64ExpandPseudoPass(*PR);
   initializeAArch64LoadStoreOptPass(*PR);
-  initializeAArch64LoopIdiomTransformLegacyPassPass(*PR);
   initializeAArch64MIPeepholeOptPass(*PR);
   initializeAArch64SIMDInstrOptPass(*PR);
   initializeAArch64O0PreLegalizerCombinerPass(*PR);
@@ -553,12 +552,9 @@ class AArch64PassConfig : public TargetPassConfig {
 void AArch64TargetMachine::registerPassBuilderCallbacks(
     PassBuilder &PB, bool PopulateClassToPassNames) {
 
-#define GET_PASS_REGISTRY "AArch64PassRegistry.def"
-#include "llvm/Passes/TargetPassRegistry.inc"
-
   PB.registerLateLoopOptimizationsEPCallback(
       [=](LoopPassManager &LPM, OptimizationLevel Level) {
-        LPM.addPass(AArch64LoopIdiomTransformPass());
+        LPM.addPass(LoopIdiomTransformPass());
       });
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 8fb68b06f1378..e396d9204716a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -14,7 +14,6 @@
 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H
 
 #include "AArch64InstrInfo.h"
-#include "AArch64LoopIdiomTransform.h"
 #include "AArch64Subtarget.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 8e76f6c9279e7..639bc0707dff2 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -65,7 +65,6 @@ add_llvm_target(AArch64CodeGen
   AArch64ISelLowering.cpp
   AArch64InstrInfo.cpp
   AArch64LoadStoreOptimizer.cpp
-  AArch64LoopIdiomTransform.cpp
   AArch64LowerHomogeneousPrologEpilog.cpp
   AArch64MachineFunctionInfo.cpp
   AArch64MachineScheduler.cpp
@@ -112,6 +111,7 @@ add_llvm_target(AArch64CodeGen
   Target
   TargetParser
   TransformUtils
+  Vectorize
 
   ADD_TO_COMPONENT
   AArch64
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index d9f8222669cab..f380a69b5e7e0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -33,10 +33,12 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/Passes/PassBuilder.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize/LoopIdiomTransform.h"
 #include <optional>
 using namespace llvm;
 
@@ -576,6 +578,14 @@ void RISCVPassConfig::addPostRegAlloc() {
     addPass(createRISCVRedundantCopyEliminationPass());
 }
 
+void RISCVTargetMachine::registerPassBuilderCallbacks(
+    PassBuilder &PB, bool PopulateClassToPassNames) {
+  PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM,
+                                                 OptimizationLevel Level) {
+    LPM.addPass(LoopIdiomTransformPass(LoopIdiomTransformStyle::Predicated));
+  });
+}
+
 yaml::MachineFunctionInfo *
 RISCVTargetMachine::createDefaultFuncInfoYAML() const {
   return new yaml::RISCVMachineFunctionInfo();
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index 68dfb3c81f2fe..7111d5ec80e47 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -59,6 +59,8 @@ class RISCVTargetMachine : public LLVMTargetMachine {
                                 PerFunctionMIParsingState &PFS,
                                 SMDiagnostic &Error,
                                 SMRange &SourceRange) const override;
+  void registerPassBuilderCallbacks(PassBuilder &PB,
+                                    bool PopulateClassToPassNames) override;
 };
 } // namespace llvm
 
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index a4d1390875095..073779e07b513 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -397,6 +397,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   bool shouldFoldTerminatingConditionAfterLSR() const {
     return true;
   }
+
+  std::optional<unsigned> getMinPageSize() const { return 4096; }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 9674094024b9e..3ca5c404d020f 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_component_library(LLVMVectorize
   LoadStoreVectorizer.cpp
+  LoopIdiomTransform.cpp
   LoopVectorizationLegality.cpp
   LoopVectorize.cpp
   SLPVectorizer.cpp
diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomTransform.cpp
similarity index 60%
rename from llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
rename to llvm/lib/Transforms/Vectorize/LoopIdiomTransform.cpp
index a9bd8d877fb2e..c034797a97fc3 100644
--- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopIdiomTransform.cpp
@@ -1,4 +1,4 @@
-//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition -------------===//
+//===-------- LoopIdiomTransform.cpp - Loop idiom recognition -------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -35,7 +35,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AArch64LoopIdiomTransform.h"
+#include "llvm/Transforms/Vectorize/LoopIdiomTransform.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -44,48 +45,64 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
 using namespace PatternMatch;
 
-#define DEBUG_TYPE "aarch64-loop-idiom-transform"
+#define DEBUG_TYPE "loop-idiom-transform"
 
-static cl::opt<bool>
-    DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
-               cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
-
-static cl::opt<bool> DisableByteCmp(
-    "disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false),
-    cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do "
-             "not convert byte-compare loop(s)."));
+static cl::opt<bool> DisableAll("disable-loop-idiom-transform-all", cl::Hidden,
+                                cl::init(false),
+                                cl::desc("Disable Loop Idiom Transform Pass."));
 
-static cl::opt<bool> VerifyLoops(
-    "aarch64-lit-verify", cl::Hidden, cl::init(false),
-    cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass."));
+static cl::opt<LoopIdiomTransformStyle>
+    LITVecStyle("loop-idiom-transform-style", cl::Hidden,
+                cl::desc("The vectorization style for loop idiom transform."),
+                cl::values(clEnumValN(LoopIdiomTransformStyle::Masked, "masked",
+                                      "Use masked vector intrinsics"),
+                           clEnumValN(LoopIdiomTransformStyle::Predicated,
+                                      "predicated", "Use VP intrinsics")),
+                cl::init(LoopIdiomTransformStyle::Masked));
 
-namespace llvm {
+static cl::opt<bool>
+    DisableByteCmp("disable-loop-idiom-transform-bytecmp", cl::Hidden,
+                   cl::init(false),
+                   cl::desc("Proceed with Loop Idiom Transform Pass, but do "
+                            "not convert byte-compare loop(s)."));
 
-void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
-Pass *createAArch64LoopIdiomTransformPass();
+static cl::opt<unsigned>
+    ByteCmpVF("loop-idiom-transform-bytecmp-vf", cl::Hidden,
+              cl::desc("The vectorization factor for byte-compare patterns."),
+              cl::init(16));
 
-} // end namespace llvm
+static cl::opt<bool>
+    VerifyLoops("verify-loop-idiom-transform", cl::Hidden, cl::init(false),
+                cl::desc("Verify loops generated Loop Idiom Transform Pass."));
 
 namespace {
-
-class AArch64LoopIdiomTransform {
+class LoopIdiomTransform {
+  LoopIdiomTransformStyle VectorizeStyle;
+  unsigned ByteCompareVF;
   Loop *CurLoop = nullptr;
   DominatorTree *DT;
   LoopInfo *LI;
   const TargetTransformInfo *TTI;
   const DataLayout *DL;
 
+  // Blocks that will be used for inserting vectorized code.
+  BasicBlock *EndBlock = nullptr;
+  BasicBlock *VectorLoopPreheaderBlock = nullptr;
+  BasicBlock *VectorLoopStartBlock = nullptr;
+  BasicBlock *VectorLoopMismatchBlock = nullptr;
+  BasicBlock *VectorLoopIncBlock = nullptr;
+
 public:
-  explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI,
-                                     const TargetTransformInfo *TTI,
-                                     const DataLayout *DL)
-      : DT(DT), LI(LI), TTI(TTI), DL(DL) {}
+  LoopIdiomTransform(LoopIdiomTransformStyle S, unsigned VF, DominatorTree *DT,
+                     LoopInfo *LI, const TargetTransformInfo *TTI,
+                     const DataLayout *DL)
+      : VectorizeStyle(S), ByteCompareVF(VF), DT(DT), LI(LI), TTI(TTI), DL(DL) {
+  }
 
   bool run(Loop *L);
 
@@ -98,83 +115,44 @@ class AArch64LoopIdiomTransform {
                       SmallVectorImpl<BasicBlock *> &ExitBlocks);
 
   bool recognizeByteCompare();
+
   Value *expandFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU,
                             GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
                             Instruction *Index, Value *Start, Value *MaxLen);
+
+  Value *createMaskedFindMismatch(IRBuilder<> &Builder, GetElementPtrInst *GEPA,
+                                  GetElementPtrInst *GEPB, Value *ExtStart,
+                                  Value *ExtEnd);
+  Value *createPredicatedFindMismatch(IRBuilder<> &Builder,
+                                      GetElementPtrInst *GEPA,
+                                      GetElementPtrInst *GEPB, Value *ExtStart,
+                                      Value *ExtEnd);
+
   void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
                             PHINode *IndPhi, Value *MaxLen, Instruction *Index,
                             Value *Start, bool IncIdx, BasicBlock *FoundBB,
                             BasicBlock *EndBB);
   /// @}
 };
+} // anonymous namespace
 
-class AArch64LoopIdiomTransformLegacyPass : public LoopPass {
-public:
-  static char ID;
-
-  explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) {
-    initializeAArch64LoopIdiomTransformLegacyPassPass(
-        *PassRegistry::getPassRegistry());
-  }
-
-  StringRef getPassName() const override {
-    return "Transform AArch64-specific loop idioms";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<LoopInfoWrapperPass>();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<TargetTransformInfoWrapperPass>();
-  }
-
-  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-};
-
-bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L,
-                                                    LPPassManager &LPM) {
-
-  if (skipLoop(L))
-    return false;
-
-  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
-      *L->getHeader()->getParent());
-  return AArch64LoopIdiomTransform(
-             DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout())
-      .run(L);
-}
-
-} // end anonymous namespace
-
-char AArch64LoopIdiomTransformLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(
-    AArch64LoopIdiomTransformLegacyPass, "aarch64-lit",
-    "Transform specific loop idioms into optimized vector forms", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(
-    AArch64LoopIdiomTransformLegacyPass, "aarch64-lit",
-    "Transform specific loop idioms into optimized vector forms", false, false)
-
-Pass *llvm::createAArch64LoopIdiomTransformPass() {
-  return new AArch64LoopIdiomTransformLegacyPass();
-}
-
-PreservedAnalyses
-AArch64LoopIdiomTransformPass::run(Loop &L, LoopAnalysisManager &AM,
-                                   LoopStandardAnalysisResults &AR,
-                                   LPMUpdater &) {
+PreservedAnalyses LoopIdiomTransformPass::run(Loop &L, LoopAnalysisManager &AM,
+                                              LoopStandardAnalysisResults &AR,
+                                              LPMUpdater &) {
   if (DisableAll)
     return PreservedAnalyses::all();
 
   const auto *DL = &L.getHeader()->getModule()->getDataLayout();
 
-  AArch64LoopIdiomTransform LIT(&AR.DT, &AR.LI, &AR.TTI, DL);
+  LoopIdiomTransformStyle VecStyle = VectorizeStyle;
+  if (LITVecStyle.getNumOccurrences())
+    VecStyle = LITVecStyle;
+
+  unsigned BCVF = ByteCompareVF;
+  if (ByteCmpVF.getNumOccurrences())
+    BCVF = ByteCmpVF;
+
+  LoopIdiomTransform LIT(VecStyle, BCVF, &AR.DT, &AR.LI, &AR.TTI,...
[truncated]

github-actions · 2024-05-31T23:57:26Z

✅ With the latest revision this PR passed the C/C++ code formatter.

…iomVectorize (#94081) To facilitate sharing LoopIdiomTransform between AArch64 and RISC-V, this first patch moves AArch64LoopIdiomTransform from lib/Target/AArch64 to lib/Transforms/Vectorize and renames it to LoopIdiomVectorize. The following patch (#94082) will teach LoopIdiomVectorize how to generate VP intrinsics (in addition to the current masked vector style) in favor of RVV.

…iomVectorize (llvm#94081) To facilitate sharing LoopIdiomTransform between AArch64 and RISC-V, this first patch moves AArch64LoopIdiomTransform from lib/Target/AArch64 to lib/Transforms/Vectorize and renames it to LoopIdiomVectorize. The following patch (llvm#94082) will teach LoopIdiomVectorize how to generate VP intrinsics (in addition to the current masked vector style) in favor of RVV. Signed-off-by: Hafidz Muzakky <[email protected]>

mshockwave · 2024-06-13T22:10:21Z

This PR has been rebased on the latest predecessor patches.

mshockwave · 2024-06-13T22:11:50Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

@@ -397,6 +397,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
  bool shouldFoldTerminatingConditionAfterLSR() const {
    return true;
  }
+
+  std::optional<unsigned> getMinPageSize() const { return 4096; }


Note: LoopIdiomVectorize is the only user of this TTI hook.

mshockwave · 2024-06-21T18:09:54Z

This PR has been rebased to the latest stack of patches.

topperc · 2024-06-27T05:09:46Z

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

+
+  auto *VectorLoadType = ScalableVectorType::get(LoadType, ByteCompareVF);
+  auto *VF = ConstantInt::get(
+      I32Type, VectorLoadType->getElementCount().getKnownMinValue());


Can we just pass ByteCompareVF here? Why do we need to extract it from VectorLoadType?

topperc · 2024-06-27T05:10:03Z

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

+  auto *VF = ConstantInt::get(
+      I32Type, VectorLoadType->getElementCount().getKnownMinValue());
+  auto *IsScalable = ConstantInt::getBool(
+      Builder.getContext(), VectorLoadType->getElementCount().isScalable());


Can we just pass true here instead of extract from the type?

topperc · 2024-06-27T05:11:41Z

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

+  Value *GepOffset = VectorIndexPhi;
+
+  Value *VectorLhsGep = Builder.CreateGEP(LoadType, PtrA, GepOffset);
+  if (GEPA->isInBounds())


Can we pass GEPA->isInBounds() to the CreateGEP call above?

topperc · 2024-06-27T05:12:00Z

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

+      {VectorLhsGep, AllTrueMask, VL}, nullptr, "lhs.load");
+
+  Value *VectorRhsGep = Builder.CreateGEP(LoadType, PtrB, GepOffset);
+  if (GEPB->isInBounds())


Can we pass GEPB->isInBounds() to the CreateGEP call above?

topperc · 2024-06-27T05:15:40Z

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

+      Intrinsic::vp_cttz_elts, {ResType, VectorMatchCmp->getType()},
+      {VectorMatchCmp, /*ZeroIsPoison=*/Builder.getInt1(true), AllTrueMask,
+       VL});
+  // RISC-V refines/lowers the poison returned by vp.cttz.elts to -1.


From the perspective of IR, the intrinsic never returns -1. It will return VL if all bits are 0.

My bad, forgot to fix this.

topperc · 2024-06-27T05:17:51Z

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

+      "mismatch.cmp");
+  Value *CTZ = Builder.CreateIntrinsic(
+      Intrinsic::vp_cttz_elts, {ResType, VectorMatchCmp->getType()},
+      {VectorMatchCmp, /*ZeroIsPoison=*/Builder.getInt1(true), AllTrueMask,


Zero isn't poison here. The vp.icmp can return an all 0 mask.

It's fixed now.

preames

Minor drive by comments only.

preames · 2024-07-01T19:08:32Z

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

+  Value *InitialPred = Builder.CreateIntrinsic(
+      Intrinsic::get_active_lane_mask, {PredVTy, I64Type}, {ExtStart, ExtEnd});
+
+  Value *VecLen = Builder.CreateIntrinsic(Intrinsic::vscale, {I64Type}, {});


See IRBuilders, CreateElementCount

This part of the patch is an extraction of the existing code into a function. We shouldn't make any changes here in this patch.

preames · 2024-07-01T19:12:07Z

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

+
+  auto *VectorLoadType = ScalableVectorType::get(LoadType, ByteCompareVF);
+  auto *VF = ConstantInt::get(I32Type, ByteCompareVF);
+  auto *IsScalable = ConstantInt::getBool(Builder.getContext(), true);


getTrue on either ConstantInt or IRBuilder

preames · 2024-07-01T19:17:14Z

llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll

@@ -0,0 +1,1751 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes=loop-idiom-vectorize -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=predicated -mattr=+v -S < %s | FileCheck %s


Can you add a version which uses the non-predicated style? I'd like to know both work on RISCV.

Just added.

topperc · 2024-07-01T19:56:18Z

llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll

+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[MISMATCH_VECTOR_INDEX]]
+; CHECK-NEXT:    [[RHS_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr [[TMP21]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]])
+; CHECK-NEXT:    [[MISMATCH_CMP:%.*]] = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> [[LHS_LOAD]], <vscale x 16 x i8> [[RHS_LOAD]], metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]])
+; CHECK-NEXT:    [[FIRST:%.*]] = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> [[MISMATCH_CMP]], i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP19]])


Can we add a codegen test too? This is going to generate some complicated code that I'm not sure how to fix right now. The mismatch between the intrinsic returning EVL and RISC-V returning -1 is going to be hard to optimize.

SelectionDAG can't do it because it can't see the usage in the mismatch_vec_loop_inc block and that it only occurs when the result is not EVL.

A new codegen test is added in test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll

This is going to generate some complicated code that I'm not sure how to fix right now. The mismatch between the intrinsic returning EVL and RISC-V returning -1 is going to be hard to optimize.

SelectionDAG can't do it because it can't see the usage in the mismatch_vec_loop_inc block and that it only occurs when the result is not EVL.

Yeah I think I tried to fix it a while ago but ended up nowhere because there were too many corner case that made it hard to generalize.

The previous attempts only tried to handle the case where the setcc was the only user. The issue raised above is different because there are two users.

topperc · 2024-07-02T18:48:58Z

llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll

+
+; Testing VFIRST patterns related to llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll
+
+define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) {


RISC-V ABI should have signext attribute on all i32 arguments.

topperc · 2024-07-02T18:55:53Z

llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll

+  %mismatch_result = phi i32 [ %n, %mismatch_loop_inc ], [ %mismatch_index, %mismatch_loop ], [ %n, %mismatch_vec_loop_inc ], [ %29, %mismatch_vec_loop_found ]
+  br i1 true, label %byte.compare, label %while.cond
+
+while.cond:                                       ; preds = %mismatch_end, %while.body


Not for this patch, but this code looks a lot like the scalar mismatch_loop. Is it possible to use the original loop in place of mismatch_loop and just insert the vector loop and checks on top of it? I think that's conceptually similar to the how the normal vectorizer works.

topperc

LGTM

Teach LoopIdiomVectorize to use VP intrinsics to replace the byte compare loops. Right now only RISC-V uses LoopIdiomVectorize of this style.

…Vectorize

llvm-ci · 2024-07-03T01:54:58Z

LLVM Buildbot has detected a new failure on builder bolt-x86_64-ubuntu-shared running on bolt-worker while building llvm at step 5 "build-bolt".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/151/builds/731

Here is the relevant piece of the build log for the reference:

Step 5 (build-bolt) failure: build (failure)
...
14.770 [6/7/21] Building CXX object lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVTargetMachine.cpp.o
14.771 [6/6/22] Building CXX object lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVInstructionSelector.cpp.o
15.787 [6/5/23] Building CXX object lib/Target/AArch64/CMakeFiles/LLVMAArch64CodeGen.dir/AArch64TargetMachine.cpp.o
15.865 [5/5/24] Linking CXX shared library lib/libLLVMAArch64CodeGen.so.19.0git
15.872 [4/5/25] Creating library symlink lib/libLLVMAArch64CodeGen.so
16.005 [4/4/26] Building CXX object lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVFoldMasks.cpp.o
16.759 [4/3/27] Building CXX object lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVPostLegalizerCombiner.cpp.o
19.365 [4/2/28] Building CXX object lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVISelDAGToDAG.cpp.o
25.503 [4/1/29] Building CXX object lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVISelLowering.cpp.o
25.554 [3/1/30] Linking CXX shared library lib/libLLVMRISCVCodeGen.so.19.0git
FAILED: lib/libLLVMRISCVCodeGen.so.19.0git 
: && /usr/bin/c++ -fPIC -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -fno-lifetime-dse -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG  -Wl,-z,defs -Wl,-z,nodelete -fuse-ld=lld -Wl,--color-diagnostics   -Wl,--gc-sections -shared -Wl,-soname,libLLVMRISCVCodeGen.so.19.0git -o lib/libLLVMRISCVCodeGen.so.19.0git lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVAsmPrinter.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVCodeGenPrepare.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVDeadRegisterDefinitions.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVMakeCompressible.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVExpandAtomicPseudoInsts.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVExpandPseudoInsts.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVFoldMasks.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVFrameLowering.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVGatherScatterLowering.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVInsertVSETVLI.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVInsertReadWriteCSR.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVInsertWriteVXRM.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVInstrInfo.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVISelDAGToDAG.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVISelLowering.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVMachineFunctionInfo.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVMergeBaseOffset.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVOptWInstrs.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVPostRAExpandPseudoInsts.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVRedundantCopyElimination.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVMoveMerger.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVPushPopOptimizer.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVRegisterInfo.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVSubtarget.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVTargetMachine.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVTargetObjectFile.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVTargetTransformInfo.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVCallLowering.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVInstructionSelector.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVLegalizerInfo.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVPostLegalizerCombiner.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVO0PreLegalizerCombiner.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVPreLegalizerCombiner.cpp.o lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/GISel/RISCVRegisterBankInfo.cpp.o  -Wl,-rpath,"\$ORIGIN/../lib:/home/worker/bolt-worker2/bolt-x86_64-ubuntu-shared/build/lib:"  lib/libLLVMAsmPrinter.so.19.0git  lib/libLLVMGlobalISel.so.19.0git  lib/libLLVMipo.so.19.0git  lib/libLLVMRISCVDesc.so.19.0git  lib/libLLVMRISCVInfo.so.19.0git  lib/libLLVMSelectionDAG.so.19.0git  lib/libLLVMCodeGen.so.19.0git  lib/libLLVMCodeGenTypes.so.19.0git  lib/libLLVMTarget.so.19.0git  lib/libLLVMScalarOpts.so.19.0git  lib/libLLVMTransformUtils.so.19.0git  lib/libLLVMAnalysis.so.19.0git  lib/libLLVMCore.so.19.0git  lib/libLLVMMC.so.19.0git  lib/libLLVMTargetParser.so.19.0git  lib/libLLVMSupport.so.19.0git  -Wl,-rpath-link,/home/worker/bolt-worker2/bolt-x86_64-ubuntu-shared/build/lib && :
ld.lld: error: undefined symbol: llvm::LoopIdiomVectorizePass::run(llvm::Loop&, llvm::AnalysisManager<llvm::Loop, llvm::LoopStandardAnalysisResults&>&, llvm::LoopStandardAnalysisResults&, llvm::LPMUpdater&)
>>> referenced by RISCVTargetMachine.cpp
>>>               lib/Target/RISCV/CMakeFiles/LLVMRISCVCodeGen.dir/RISCVTargetMachine.cpp.o:(llvm::detail::PassModel<llvm::Loop, llvm::LoopIdiomVectorizePass, llvm::AnalysisManager<llvm::Loop, llvm::LoopStandardAnalysisResults&>, llvm::LoopStandardAnalysisResults&, llvm::LPMUpdater&>::run(llvm::Loop&, llvm::AnalysisManager<llvm::Loop, llvm::LoopStandardAnalysisResults&>&, llvm::LoopStandardAnalysisResults&, llvm::LPMUpdater&))
collect2: error: ld returned 1 exit status
ninja: build stopped: subcommand failed.

Caused by #94082.

…ze (llvm#94082) Teach LoopIdiomVectorize to use VP intrinsics to replace the byte compare loops. Right now only RISC-V uses LoopIdiomVectorize of this style.

Caused by llvm#94082.

…ze (llvm#94082) Teach LoopIdiomVectorize to use VP intrinsics to replace the byte compare loops. Right now only RISC-V uses LoopIdiomVectorize of this style.

Caused by llvm#94082.

mshockwave requested review from preames, alexey-bataev, topperc and david-arm May 31, 2024 23:54

llvmbot added backend:AArch64 backend:RISC-V vectorizers llvm:transforms labels May 31, 2024

mshockwave mentioned this pull request May 31, 2024

[AArch64][LoopIdiom] Generalize AArch64LoopIdiomTransform into LoopIdiomVectorize #94081

Merged

mshockwave mentioned this pull request May 31, 2024

[RISCV] Introduce the RISCVLoopIdiomRecognizePass #92441

Closed

mshockwave force-pushed the patch/riscv-lit branch from 764aac0 to 47050e6 Compare June 13, 2024 22:08

mshockwave changed the title ~~[RISCV][LoopIdiom] Support VP intrinsics in LoopIdiomTransform~~ [RISCV][LoopIdiomVectorize] Support VP intrinsics in LoopIdiomVectorize Jun 13, 2024

mshockwave commented Jun 13, 2024

View reviewed changes

mshockwave force-pushed the patch/riscv-lit branch from d90f0d8 to f507db4 Compare June 21, 2024 18:08

topperc reviewed Jun 27, 2024

View reviewed changes

preames reviewed Jul 1, 2024

View reviewed changes

topperc reviewed Jul 1, 2024

View reviewed changes

topperc reviewed Jul 2, 2024

View reviewed changes

topperc approved these changes Jul 2, 2024

View reviewed changes

mshockwave added 4 commits July 2, 2024 18:23

[RISCV][LoopIdiomVectorize] Support VP intrinsics in LoopIdiomVectorize

1e15813

Teach LoopIdiomVectorize to use VP intrinsics to replace the byte compare loops. Right now only RISC-V uses LoopIdiomVectorize of this style.

fixup! [RISCV][LoopIdiomVectorize] Support VP intrinsics in LoopIdiom…

c3700ca

…Vectorize

Address review comments

3babd98

Add more tests and address review comments

49d491d

mshockwave added 2 commits July 2, 2024 18:23

Fix signext argument attribute in codegen test

f4fc8b8

Fix the TargetMachine::registerPassBuilder hook

d4fb4c0

mshockwave force-pushed the patch/riscv-lit branch from f6250ba to d4fb4c0 Compare July 3, 2024 01:44

mshockwave merged commit 8b55d34 into llvm:main Jul 3, 2024
4 of 6 checks passed

mshockwave deleted the patch/riscv-lit branch July 3, 2024 01:48

mshockwave added a commit that referenced this pull request Jul 3, 2024

[RISCV] Add the missing dependency on Vectorize

2360c41

Caused by #94082.

lravenclaw pushed a commit to lravenclaw/llvm-project that referenced this pull request Jul 3, 2024

[RISCV] Add the missing dependency on Vectorize

cee59bc

Caused by llvm#94082.

kbluck pushed a commit to kbluck/llvm-project that referenced this pull request Jul 6, 2024

[RISCV] Add the missing dependency on Vectorize

0f89a52

Caused by llvm#94082.

		@@ -0,0 +1,1751 @@
		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
		; RUN: opt -passes=loop-idiom-vectorize -mtriple=riscv64-unknown-linux-gnu -loop-idiom-vectorize-style=predicated -mattr=+v -S < %s \| FileCheck %s


		; Testing VFIRST patterns related to llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll

		define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %n) {

[RISCV][LoopIdiomVectorize] Support VP intrinsics in LoopIdiomVectorize #94082

[RISCV][LoopIdiomVectorize] Support VP intrinsics in LoopIdiomVectorize #94082

Uh oh!

Conversation

mshockwave commented May 31, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 31, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented May 31, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mshockwave commented Jun 13, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

mshockwave commented Jun 21, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

preames left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

topperc Jul 1, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

topperc Jul 1, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

topperc Jul 1, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

topperc Jul 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

mshockwave commented May 31, 2024 •

edited

Loading

llvmbot commented May 31, 2024 •

edited

Loading

github-actions bot commented May 31, 2024 •

edited

Loading

topperc Jul 1, 2024 •

edited

Loading

topperc Jul 1, 2024 •

edited

Loading

topperc Jul 1, 2024 •

edited

Loading

topperc Jul 2, 2024 •

edited

Loading