From 45ad537e65e9b1482ebdffeec2e4ab02a669d85f Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Thu, 31 Jul 2025 14:11:24 -0700 Subject: [PATCH 1/6] [WebAssembly] Add simd support for memcmp --- .../WebAssembly/WebAssemblyISelLowering.cpp | 60 ++++++++++++++++++- .../WebAssemblyTargetTransformInfo.cpp | 3 +- .../test/CodeGen/WebAssembly/memcmp-expand.ll | 22 +++---- 3 files changed, 68 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index cd434f7a331e4..ee16f7bf9133d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3383,8 +3383,61 @@ static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) { return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); } +static SDValue +combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + const WebAssemblySubtarget *Subtarget) { + + SDLoc DL(N); + SDValue X = N->getOperand(0); + SDValue Y = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT OpVT = X.getValueType(); + + ISD::CondCode CC = cast(N->getOperand(2))->get(); + SelectionDAG &DAG = DCI.DAG; + // We're looking for an oversized integer equality comparison. + if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 || + !Subtarget->hasSIMD128()) + return SDValue(); + + // Don't perform this combine if constructing the vector will be expensive. + auto IsVectorBitCastCheap = [](SDValue X) { + X = peekThroughBitcasts(X); + return isa(X) || X.getOpcode() == ISD::LOAD; + }; + + if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) + return SDValue(); + + // TODO: Not sure what's the purpose of this? I'm keeping here since RISCV has + // it + if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat)) + return SDValue(); + + unsigned OpSize = OpVT.getSizeInBits(); + unsigned VecSize = OpSize / 8; + + EVT VecVT = EVT::getVectorVT(*DCI.DAG.getContext(), MVT::i8, VecSize); + EVT CmpVT = EVT::getVectorVT(*DCI.DAG.getContext(), MVT::i8, VecSize); + + SDValue VecX = DAG.getBitcast(VecVT, X); + SDValue VecY = DAG.getBitcast(VecVT, Y); + + SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, CC); + + SDValue AllTrue = DAG.getZExtOrTrunc( + DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32), Cmp}), + DL, MVT::i1); + + return DAG.getSetCC(DL, VT, AllTrue, DAG.getConstant(0, DL, MVT::i1), CC); +} + static SDValue performSETCCCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const WebAssemblySubtarget *Subtarget) { if (!DCI.isBeforeLegalize()) return SDValue(); @@ -3392,6 +3445,9 @@ static SDValue performSETCCCombine(SDNode *N, if (!VT.isScalarInteger()) return SDValue(); + if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget)) + return V; + SDValue LHS = N->getOperand(0); if (LHS->getOpcode() != ISD::BITCAST) return SDValue(); @@ -3532,7 +3588,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::BITCAST: return performBitcastCombine(N, DCI); case ISD::SETCC: - return performSETCCCombine(N, DCI); + return performSETCCCombine(N, DCI, Subtarget); case ISD::VECTOR_SHUFFLE: return performVECTOR_SHUFFLECombine(N, DCI); case ISD::SIGN_EXTEND: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 52e706514226b..08fb7586d215e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -147,7 +147,8 @@ WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.AllowOverlappingLoads = true; - // TODO: Teach WebAssembly backend about load v128. + if (ST->hasSIMD128()) + Options.LoadSizes.push_back(16); Options.LoadSizes.append({8, 4, 2, 1}); Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 8030438645f82..c6df6b50693fa 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -132,19 +132,13 @@ define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: ; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0 -; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0 -; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6 -; CHECK-NEXT: i32.const $push0=, 8 -; CHECK-NEXT: i32.add $push3=, $0, $pop0 -; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0 -; CHECK-NEXT: i32.const $push11=, 8 -; CHECK-NEXT: i32.add $push1=, $1, $pop11 -; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0 -; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2 -; CHECK-NEXT: i64.or $push9=, $pop8, $pop5 -; CHECK-NEXT: i64.eqz $push10=, $pop9 -; CHECK-NEXT: return $pop10 +; CHECK-NEXT: v128.load $push1=, 0($0):p2align=0 +; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0 +; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: i8x16.all_true $push3=, $pop2 +; CHECK-NEXT: i32.const $push4=, 1 +; CHECK-NEXT: i32.xor $push5=, $pop3, $pop4 +; CHECK-NEXT: return $pop5 %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) %res = icmp eq i32 %cmp_16, 0 ret i1 %res From 1ae947fdb4cc4914078343fbb5dd3e1c99f80444 Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Tue, 5 Aug 2025 14:24:58 -0700 Subject: [PATCH 2/6] Addresses PR reviews --- .../WebAssembly/WebAssemblyISelLowering.cpp | 24 +++++-------------- .../test/CodeGen/WebAssembly/memcmp-expand.ll | 5 ++-- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index ee16f7bf9133d..b29c54ebc0b0b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3409,30 +3409,18 @@ combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) return SDValue(); - // TODO: Not sure what's the purpose of this? I'm keeping here since RISCV has - // it - if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute( - Attribute::NoImplicitFloat)) - return SDValue(); - - unsigned OpSize = OpVT.getSizeInBits(); - unsigned VecSize = OpSize / 8; - - EVT VecVT = EVT::getVectorVT(*DCI.DAG.getContext(), MVT::i8, VecSize); - EVT CmpVT = EVT::getVectorVT(*DCI.DAG.getContext(), MVT::i8, VecSize); + EVT VecVT = MVT::v16i8; SDValue VecX = DAG.getBitcast(VecVT, X); SDValue VecY = DAG.getBitcast(VecVT, Y); - SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, CC); + SDValue Cmp = DAG.getSetCC(DL, VecVT, VecX, VecY, CC); - SDValue AllTrue = DAG.getZExtOrTrunc( - DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32), Cmp}), - DL, MVT::i1); + SDValue AllTrue = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32), Cmp}); - return DAG.getSetCC(DL, VT, AllTrue, DAG.getConstant(0, DL, MVT::i1), CC); + return DAG.getSetCC(DL, VT, AllTrue, DAG.getConstant(0, DL, MVT::i32), CC); } static SDValue performSETCCCombine(SDNode *N, diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index c6df6b50693fa..00e87917aa173 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -136,9 +136,8 @@ define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0 ; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0 ; CHECK-NEXT: i8x16.all_true $push3=, $pop2 -; CHECK-NEXT: i32.const $push4=, 1 -; CHECK-NEXT: i32.xor $push5=, $pop3, $pop4 -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: i32.eqz $push4=, $pop3 +; CHECK-NEXT: return $pop4 %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) %res = icmp eq i32 %cmp_16, 0 ret i1 %res From 1934453a9f3bbf1e871204723d57c5bb1813cddc Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Thu, 7 Aug 2025 11:27:04 -0700 Subject: [PATCH 3/6] Addresses shortcomings, add more tests --- .../WebAssembly/WebAssemblyISelLowering.cpp | 8 +- .../test/CodeGen/WebAssembly/memcmp-expand.ll | 1 - llvm/test/CodeGen/WebAssembly/simd-setcc.ll | 89 +++++++++++++++++++ 3 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/simd-setcc.ll diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index b29c54ebc0b0b..971b1b85634e8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -19,6 +19,7 @@ #include "WebAssemblyTargetMachine.h" #include "WebAssemblyUtilities.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -3394,7 +3395,13 @@ combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, EVT OpVT = X.getValueType(); ISD::CondCode CC = cast(N->getOperand(2))->get(); + if (!isIntEqualitySetCC(CC)) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; + if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat)) + return SDValue(); // We're looking for an oversized integer equality comparison. if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 || !Subtarget->hasSIMD128()) @@ -3413,7 +3420,6 @@ combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SDValue VecX = DAG.getBitcast(VecVT, X); SDValue VecY = DAG.getBitcast(VecVT, Y); - SDValue Cmp = DAG.getSetCC(DL, VecVT, VecX, VecY, CC); SDValue AllTrue = DAG.getNode( diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 00e87917aa173..9c520ff9a4db7 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -127,7 +127,6 @@ define i1 @memcmp_expand_8(ptr %a, ptr %b) { ret i1 %res } -; TODO: Should be using a single load i64x2 or equivalent in bitsizes define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: ; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll new file mode 100644 index 0000000000000..eb89be344e13b --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare i32 @memcmp(ptr, ptr, i32) + +define i1 @setcc_load(ptr %a, ptr %b) { +; CHECK-LABEL: setcc_load: +; CHECK: .functype setcc_load (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.load $push1=, 0($0):p2align=0 +; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0 +; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: i8x16.all_true $push3=, $pop2 +; CHECK-NEXT: i32.eqz $push4=, $pop3 +; CHECK-NEXT: return $pop4 + %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) + %res = icmp eq i32 %cmp_16, 0 + ret i1 %res +} + +; INFO: Negative test: noimplicitfloat disables simd +define i1 @setcc_load_should_not_vectorize(ptr %a, ptr %b) noimplicitfloat { +; CHECK-LABEL: setcc_load_should_not_vectorize: +; CHECK: .functype setcc_load_should_not_vectorize (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push4=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push3=, 0($1):p2align=0 +; CHECK-NEXT: i64.xor $push5=, $pop4, $pop3 +; CHECK-NEXT: i64.load $push1=, 8($0):p2align=0 +; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0 +; CHECK-NEXT: i64.xor $push2=, $pop1, $pop0 +; CHECK-NEXT: i64.or $push6=, $pop5, $pop2 +; CHECK-NEXT: i64.eqz $push7=, $pop6 +; CHECK-NEXT: return $pop7 + %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) + %res = icmp eq i32 %cmp_16, 0 + ret i1 %res +} + +define i1 @setcc_eq_const_i128(ptr %ptr) { +; CHECK-LABEL: setcc_eq_const_i128: +; CHECK: .functype setcc_eq_const_i128 (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.load $push0=, 0($0) +; CHECK-NEXT: v128.const $push1=, 6, 0 +; CHECK-NEXT: i8x16.eq $push2=, $pop0, $pop1 +; CHECK-NEXT: i8x16.all_true $push3=, $pop2 +; CHECK-NEXT: i32.eqz $push4=, $pop3 +; CHECK-NEXT: return $pop4 + %l = load i128, ptr %ptr + %res = icmp eq i128 %l, 6 + ret i1 %res +} + +define i1 @setcc_ne_const_i128(ptr %ptr) { +; CHECK-LABEL: setcc_ne_const_i128: +; CHECK: .functype setcc_ne_const_i128 (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.load $push0=, 0($0) +; CHECK-NEXT: v128.const $push1=, 16, 0 +; CHECK-NEXT: i8x16.ne $push2=, $pop0, $pop1 +; CHECK-NEXT: i8x16.all_true $push3=, $pop2 +; CHECK-NEXT: return $pop3 + %l = load i128, ptr %ptr + %res = icmp ne i128 %l, 16 + ret i1 %res +} + +; INFO: Negative test: only eq and ne works +define i1 @setcc_slt_const_i128(ptr %ptr) { +; CHECK-LABEL: setcc_slt_const_i128: +; CHECK: .functype setcc_slt_const_i128 (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push2=, 0($0) +; CHECK-NEXT: i64.const $push3=, 25 +; CHECK-NEXT: i64.lt_u $push4=, $pop2, $pop3 +; CHECK-NEXT: i64.load $push8=, 8($0) +; CHECK-NEXT: local.tee $push7=, $1=, $pop8 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.lt_s $push1=, $pop7, $pop0 +; CHECK-NEXT: i64.eqz $push5=, $1 +; CHECK-NEXT: i32.select $push6=, $pop4, $pop1, $pop5 +; CHECK-NEXT: return $pop6 + %l = load i128, ptr %ptr + %res = icmp slt i128 %l, 25 + ret i1 %res +} From 0b7a92fdb62a96dec56bc94d2aafda9458c00696 Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Thu, 7 Aug 2025 12:13:41 -0700 Subject: [PATCH 4/6] Remove redundant include --- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 971b1b85634e8..f8272dceaf455 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -19,7 +19,6 @@ #include "WebAssemblyTargetMachine.h" #include "WebAssemblyUtilities.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" From a180a57cf63eeba685b082cc19e1d582cb5bcc35 Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Mon, 11 Aug 2025 09:01:03 -0700 Subject: [PATCH 5/6] Fix shortcomings in setcc ne --- .../WebAssembly/WebAssemblyISelLowering.cpp | 27 ++++++++++--------- llvm/test/CodeGen/WebAssembly/simd-setcc.ll | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index f8272dceaf455..920563f5d0b55 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3393,10 +3393,6 @@ combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, EVT VT = N->getValueType(0); EVT OpVT = X.getValueType(); - ISD::CondCode CC = cast(N->getOperand(2))->get(); - if (!isIntEqualitySetCC(CC)) - return SDValue(); - SelectionDAG &DAG = DCI.DAG; if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat)) @@ -3406,6 +3402,10 @@ combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, !Subtarget->hasSIMD128()) return SDValue(); + ISD::CondCode CC = cast(N->getOperand(2))->get(); + if (!isIntEqualitySetCC(CC)) + return SDValue(); + // Don't perform this combine if constructing the vector will be expensive. auto IsVectorBitCastCheap = [](SDValue X) { X = peekThroughBitcasts(X); @@ -3415,17 +3415,18 @@ combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) return SDValue(); - EVT VecVT = MVT::v16i8; + SDValue VecX = DAG.getBitcast(MVT::v16i8, X); + SDValue VecY = DAG.getBitcast(MVT::v16i8, Y); + SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC); - SDValue VecX = DAG.getBitcast(VecVT, X); - SDValue VecY = DAG.getBitcast(VecVT, Y); - SDValue Cmp = DAG.getSetCC(DL, VecVT, VecX, VecY, CC); - - SDValue AllTrue = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - {DAG.getConstant(Intrinsic::wasm_alltrue, DL, MVT::i32), Cmp}); + SDValue Intr = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue + : Intrinsic::wasm_anytrue, + DL, MVT::i32), + Cmp}); - return DAG.getSetCC(DL, VT, AllTrue, DAG.getConstant(0, DL, MVT::i32), CC); + return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32), CC); } static SDValue performSETCCCombine(SDNode *N, diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll index eb89be344e13b..20d82d549882c 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll @@ -61,7 +61,7 @@ define i1 @setcc_ne_const_i128(ptr %ptr) { ; CHECK-NEXT: v128.load $push0=, 0($0) ; CHECK-NEXT: v128.const $push1=, 16, 0 ; CHECK-NEXT: i8x16.ne $push2=, $pop0, $pop1 -; CHECK-NEXT: i8x16.all_true $push3=, $pop2 +; CHECK-NEXT: v128.any_true $push3=, $pop2 ; CHECK-NEXT: return $pop3 %l = load i128, ptr %ptr %res = icmp ne i128 %l, 16 From 9d2b041c6d4d41d18f89f19f54c6fcef68c5e106 Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Mon, 11 Aug 2025 11:01:40 -0700 Subject: [PATCH 6/6] Addresses nits --- .../lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 10 +++++----- llvm/test/CodeGen/WebAssembly/simd-setcc.ll | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 920563f5d0b55..61ef2f0c2fde0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3383,6 +3383,8 @@ static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) { return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); } +/// Try to convert a i128 comparison to a v16i8 comparison before type +/// legalization splits it up into chunks static SDValue combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const WebAssemblySubtarget *Subtarget) { @@ -3397,13 +3399,11 @@ combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat)) return SDValue(); - // We're looking for an oversized integer equality comparison. - if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 || - !Subtarget->hasSIMD128()) - return SDValue(); ISD::CondCode CC = cast(N->getOperand(2))->get(); - if (!isIntEqualitySetCC(CC)) + // We're looking for an oversized integer equality comparison with SIMD + if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 || + !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC)) return SDValue(); // Don't perform this combine if constructing the vector will be expensive. diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll index 20d82d549882c..9efa63ab9bbbd 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s target triple = "wasm32-unknown-unknown"