Skip to content

[X86] canonicalizeShuffleWithOp - add handling for X86ISD::VPERMV nodes #127625

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 18, 2025

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Feb 18, 2025

Handle different src/mask operand ordering

@llvmbot
Copy link
Member

llvmbot commented Feb 18, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Handle different src/mask operand ordering


Full diff: https://github.com/llvm/llvm-project/pull/127625.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+17-10)
  • (modified) llvm/test/CodeGen/X86/vector-partial-undef.ll (+1-1)
  • (modified) llvm/test/CodeGen/X86/vector-shuffle-combining.ll (+1-1)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 386d56dcda9de..696bb14292dd0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41629,23 +41629,28 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
   case X86ISD::PSHUFD:
   case X86ISD::PSHUFHW:
   case X86ISD::PSHUFLW:
+  case X86ISD::VPERMV:
   case X86ISD::VPERMI:
   case X86ISD::VPERMILPI: {
-    if (N.getOperand(0).getValueType() == ShuffleVT &&
-        N->isOnlyUserOf(N.getOperand(0).getNode())) {
-      SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+    unsigned SrcIdx = Opc == X86ISD::VPERMV ? 1 : 0;
+    if (N.getOperand(SrcIdx).getValueType() == ShuffleVT &&
+        N->isOnlyUserOf(N.getOperand(SrcIdx).getNode())) {
+      SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(SrcIdx));
       unsigned SrcOpcode = N0.getOpcode();
       EVT OpVT = N0.getValueType();
       if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
         SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
         SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
-        bool FoldShuf = Opc != X86ISD::VPERMI;
+        bool FoldShuf = Opc != X86ISD::VPERMI && Opc != X86ISD::VPERMV;
         if (IsMergeableWithShuffle(Op00, FoldShuf) ||
             IsMergeableWithShuffle(Op01, FoldShuf)) {
           SDValue LHS, RHS;
           Op00 = DAG.getBitcast(ShuffleVT, Op00);
           Op01 = DAG.getBitcast(ShuffleVT, Op01);
-          if (N.getNumOperands() == 2) {
+          if (Opc == X86ISD::VPERMV) {
+            LHS = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Op00);
+            RHS = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Op01);
+          } else if (N.getNumOperands() == 2) {
             LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1));
             RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, N.getOperand(1));
           } else {
@@ -41661,11 +41666,13 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
       if (SrcOpcode == ISD::SINT_TO_FP && IsSafeToMoveShuffle(N0, SrcOpcode) &&
           OpVT.getScalarSizeInBits() ==
               N0.getOperand(0).getScalarValueSizeInBits()) {
-        SDValue Op00 = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
-        SDValue Res =
-            N.getNumOperands() == 2
-                ? DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1))
-                : DAG.getNode(Opc, DL, ShuffleVT, Op00);
+        SDValue Res = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
+        if (Opc == X86ISD::VPERMV)
+          Res = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Res);
+        else if (N.getNumOperands() == 2)
+          Res = DAG.getNode(Opc, DL, ShuffleVT, Res, N.getOperand(1));
+        else
+          Res = DAG.getNode(Opc, DL, ShuffleVT, Res);
         Res = DAG.getBitcast(N0.getOperand(0).getValueType(), Res);
         return DAG.getBitcast(ShuffleVT, DAG.getNode(SrcOpcode, DL, OpVT, Res));
       }
diff --git a/llvm/test/CodeGen/X86/vector-partial-undef.ll b/llvm/test/CodeGen/X86/vector-partial-undef.ll
index fd41fd53e3be1..4753dba2d468f 100644
--- a/llvm/test/CodeGen/X86/vector-partial-undef.ll
+++ b/llvm/test/CodeGen/X86/vector-partial-undef.ll
@@ -151,9 +151,9 @@ define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) {
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
 ; AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef>
   %bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 07c770abc65d6..05b0a7c10b4e1 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -2469,10 +2469,10 @@ define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
 ;
 ; AVX2-FAST-ALL-LABEL: combine_unneeded_subvector1:
 ; AVX2-FAST-ALL:       # %bb.0:
-; AVX2-FAST-ALL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
 ; AVX2-FAST-ALL-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
 ; AVX2-FAST-ALL-NEXT:    # ymm1 = mem[0,1,0,1]
 ; AVX2-FAST-ALL-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-FAST-ALL-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
 ; AVX2-FAST-ALL-NEXT:    retq
 ;
 ; AVX2-FAST-PERLANE-LABEL: combine_unneeded_subvector1:

@RKSimon RKSimon merged commit 10fc06a into llvm:main Feb 18, 2025
10 checks passed
@RKSimon RKSimon deleted the x86-combine-shuffle-op-vpermv branch February 18, 2025 14:42
@llvm-ci
Copy link
Collaborator

llvm-ci commented Feb 18, 2025

LLVM Buildbot has detected a new failure on builder lldb-arm-ubuntu running on linaro-lldb-arm-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/11645

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: tools/lldb-dap/exception/cpp/TestDAP_exception_cpp.py (1153 of 2904)
UNSUPPORTED: lldb-api :: tools/lldb-dap/extendedStackTrace/TestDAP_extendedStackTrace.py (1154 of 2904)
PASS: lldb-api :: tools/lldb-dap/disconnect/TestDAP_disconnect.py (1155 of 2904)
PASS: lldb-api :: tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py (1156 of 2904)
PASS: lldb-api :: tools/lldb-dap/locations/TestDAP_locations.py (1157 of 2904)
PASS: lldb-api :: tools/lldb-dap/memory/TestDAP_memory.py (1158 of 2904)
PASS: lldb-api :: tools/lldb-dap/optimized/TestDAP_optimized.py (1159 of 2904)
PASS: lldb-api :: tools/lldb-dap/evaluate/TestDAP_evaluate.py (1160 of 2904)
PASS: lldb-api :: tools/lldb-dap/module/TestDAP_module.py (1161 of 2904)
PASS: lldb-api :: tools/lldb-dap/launch/TestDAP_launch.py (1162 of 2904)
FAIL: lldb-api :: tools/lldb-dap/output/TestDAP_output.py (1163 of 2904)
******************** TEST 'lldb-api :: tools/lldb-dap/output/TestDAP_output.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --arch armv8l --build-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/tools/lldb-dap/output -p TestDAP_output.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision 10fc06a3215ac11220cc8e13d65ef0aa4c4089c9)
  clang revision 10fc06a3215ac11220cc8e13d65ef0aa4c4089c9
  llvm revision 10fc06a3215ac11220cc8e13d65ef0aa4c4089c9
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']
========= DEBUG ADAPTER PROTOCOL LOGS =========
1739891356.708014965 --> 
Content-Length: 344

{
  "arguments": {
    "adapterID": "lldb-native",
    "clientID": "vscode",
    "columnsStartAt1": true,
    "linesStartAt1": true,
    "locale": "en-us",
    "pathFormat": "path",
    "sourceInitFile": false,
    "supportsRunInTerminalRequest": true,
    "supportsStartDebuggingRequest": true,
    "supportsVariablePaging": true,
    "supportsVariableType": true
  },
  "command": "initialize",
  "seq": 1,
  "type": "request"
}
1739891356.710498095 <-- 
Content-Length: 1631


wldfngrs pushed a commit to wldfngrs/llvm-project that referenced this pull request Feb 19, 2025
…es (llvm#127625)

Handle different src/mask operand ordering of X86ISD::VPERMV nodes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants