Skip to content

[RISCV] Check isFixedLengthVector before calling getVectorNumElements in getSingleShuffleSrc. #125455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4510,7 +4510,8 @@ static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,

// Src needs to have twice the number of elements.
unsigned NumElts = VT.getVectorNumElements();
if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
if (!Src.getValueType().isFixedLengthVector() ||
Src.getValueType().getVectorNumElements() != (NumElts * 2))
return SDValue();

// The extracts must extract the two halves of the source.
Expand Down
118 changes: 118 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/pr125306.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v | FileCheck %s

; Test for an "Invalid size request on a scalable vector". Attempts to reduce
; the test faurther were not successful. The failure requires a shuffle with 2
; scalable->fixed extracts from the same vector. 0 is the only valid index for a
; scalable->fixed extract so the 2 extract must be the same. Shuffles with the
;same source are aggressively canonicalized to a unary shuffle so it requires
; the extracts to become identical through other optimizations without the
; shuffle being canonicalized before it is lowered.

define <2 x i32> @main(ptr %0) {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vse32.v v8, (zero)
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: li a2, 64
; CHECK-NEXT: sw zero, 80(zero)
; CHECK-NEXT: lui a1, 7
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vid.v v11
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: lui a5, 2
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v10, (a2)
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: li a2, 24
; CHECK-NEXT: sh zero, -392(a3)
; CHECK-NEXT: sh zero, 534(a3)
; CHECK-NEXT: sh zero, 1460(a3)
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vse32.v v10, (a2)
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: sh zero, -1710(a5)
; CHECK-NEXT: sh zero, -784(a5)
; CHECK-NEXT: sh zero, 142(a5)
; CHECK-NEXT: lw a5, -304(a1)
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vadd.vi v9, v11, -1
; CHECK-NEXT: vse32.v v10, (a3)
; CHECK-NEXT: sh zero, 0(a0)
; CHECK-NEXT: lw a0, -188(a1)
; CHECK-NEXT: vse32.v v10, (a2)
; CHECK-NEXT: lw a2, -188(a1)
; CHECK-NEXT: lw a3, 1244(a1)
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lw a0, 1244(a1)
; CHECK-NEXT: lw a1, -304(a1)
; CHECK-NEXT: vmv.v.x v10, a3
; CHECK-NEXT: vmv.v.x v11, a5
; CHECK-NEXT: vslide1down.vx v8, v8, zero
; CHECK-NEXT: vslide1down.vx v10, v10, zero
; CHECK-NEXT: vmin.vv v8, v10, v8
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: vslide1down.vx v11, v11, zero
; CHECK-NEXT: vmin.vx v10, v10, a2
; CHECK-NEXT: vmin.vx v10, v10, a1
; CHECK-NEXT: vmin.vv v11, v8, v11
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: vand.vv v9, v11, v9
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: vse32.v v9, (a4)
; CHECK-NEXT: sh zero, 0(zero)
; CHECK-NEXT: ret
entry:
store <16 x i32> zeroinitializer, ptr null, align 4
store <8 x i32> zeroinitializer, ptr %0, align 4
store <4 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 64), align 4
store i32 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 80), align 4
%1 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
%broadcast.splatinsert53 = insertelement <4 x i32> zeroinitializer, i32 %1, i64 0
%2 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
%broadcast.splatinsert55 = insertelement <4 x i32> zeroinitializer, i32 %2, i64 0
%3 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert53, <4 x i32> %broadcast.splatinsert55)
%4 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
%broadcast.splatinsert57 = insertelement <4 x i32> zeroinitializer, i32 %4, i64 0
%5 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %3, <4 x i32> %broadcast.splatinsert57)
store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 3704), align 2
store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 4630), align 2
%6 = shufflevector <4 x i32> %5, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 4>
store <2 x i32> %6, ptr getelementptr inbounds nuw (i8, ptr null, i64 16), align 4
store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 5556), align 2
store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 6482), align 2
store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 24), align 4
store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 7408), align 2
store i16 0, ptr getelementptr inbounds nuw (i8, ptr null, i64 8334), align 2
store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 32), align 4
store i16 0, ptr %0, align 2
store <2 x i32> zeroinitializer, ptr getelementptr inbounds nuw (i8, ptr null, i64 40), align 4
%7 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
%broadcast.splatinsert165 = insertelement <4 x i32> poison, i32 %7, i64 0
%8 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
%broadcast.splatinsert167 = insertelement <4 x i32> poison, i32 %8, i64 0
%9 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert165, <4 x i32> %broadcast.splatinsert167)
%10 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
%broadcast.splatinsert169 = insertelement <4 x i32> poison, i32 %10, i64 0
%11 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %9, <4 x i32> %broadcast.splatinsert169)
store i16 0, ptr null, align 2
%12 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 29916), align 4
%broadcast.splatinsert179 = insertelement <4 x i32> poison, i32 %12, i64 0
%13 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28484), align 4
%broadcast.splatinsert181 = insertelement <4 x i32> poison, i32 %13, i64 0
%14 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splatinsert179, <4 x i32> %broadcast.splatinsert181)
%15 = load i32, ptr getelementptr inbounds nuw (i8, ptr null, i64 28368), align 4
%broadcast.splatinsert183 = insertelement <4 x i32> poison, i32 %15, i64 0
%16 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %14, <4 x i32> %broadcast.splatinsert183)
%17 = shufflevector <4 x i32> %11, <4 x i32> %16, <2 x i32> <i32 0, i32 4>
ret <2 x i32> %17
}
19 changes: 0 additions & 19 deletions llvm/utils/TableGen/DAGISelMatcherOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,25 +135,6 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
// variants.
}

// If we have a CheckType/CheckChildType/Record node followed by a
// CheckOpcode, invert the two nodes. We prefer to do structural checks
// before type checks, as this opens opportunities for factoring on targets
// like X86 where many operations are valid on multiple types.
if ((isa<CheckTypeMatcher>(N) || isa<CheckChildTypeMatcher>(N) ||
isa<RecordMatcher>(N)) &&
isa<CheckOpcodeMatcher>(N->getNext())) {
// Unlink the two nodes from the list.
Matcher *CheckType = MatcherPtr.release();
Matcher *CheckOpcode = CheckType->takeNext();
Matcher *Tail = CheckOpcode->takeNext();

// Relink them.
MatcherPtr.reset(CheckOpcode);
CheckOpcode->setNext(CheckType);
CheckType->setNext(Tail);
return ContractNodes(MatcherPtr, CGP);
}

ContractNodes(N->getNextPtr(), CGP);

// If we have a MoveParent followed by a MoveChild, we convert it to
Expand Down