Skip to content

AMDGPU: Add baseline tests for simplify elts of readfirstlane #128645

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Feb 25, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Feb 25, 2025

@llvmbot
Copy link
Member

llvmbot commented Feb 25, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 23.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128645.diff

1 Files Affected:

  • (added) llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll (+444)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
new file mode 100644
index 0000000000000..83d9d0d032ed1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < %s | FileCheck %s
+
+define i16 @extract_elt0_v2i16_readfirstlane(<2 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v2i16_readfirstlane(
+; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src)
+  %elt = extractelement <2 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt0_v1i16_readfirstlane(<1 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v1i16_readfirstlane(
+; CHECK-SAME: <1 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <1 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> %src)
+  %elt = extractelement <1 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt1_v2i16_readfirstlane(<2 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt1_v2i16_readfirstlane(
+; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 1
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src)
+  %elt = extractelement <2 x i16> %vec, i32 1
+  ret i16 %elt
+}
+
+define i16 @extract_elt0_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %elt = extractelement <4 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt2_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 2
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %elt = extractelement <4 x i16> %vec, i32 2
+  ret i16 %elt
+}
+
+define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 3, i32 2>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt30_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt30_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 3, i32 0>
+  ret <2 x i16> %shuffle
+}
+
+define half @extract_elt0_v2f16_readfirstlane(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt0_v2f16_readfirstlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 0
+; CHECK-NEXT:    ret half [[ELT]]
+;
+  %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src)
+  %elt = extractelement <2 x half> %vec, i32 0
+  ret half %elt
+}
+
+define half @extract_elt1_v2f16_readfirstlane(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt1_v2f16_readfirstlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 1
+; CHECK-NEXT:    ret half [[ELT]]
+;
+  %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src)
+  %elt = extractelement <2 x half> %vec, i32 1
+  ret half %elt
+}
+
+; Don't break on illegal types
+define i8 @extract_elt0_v4i8_readfirstlane(<4 x i8> %src) {
+; CHECK-LABEL: define i8 @extract_elt0_v4i8_readfirstlane(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i8> @llvm.amdgcn.readfirstlane.v4i8(<4 x i8> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i8> [[VEC]], i64 0
+; CHECK-NEXT:    ret i8 [[ELT]]
+;
+  %vec = call <4 x i8> @llvm.amdgcn.readfirstlane.v4ii8(<4 x i8> %src)
+  %elt = extractelement <4 x i8> %vec, i32 0
+  ret i8 %elt
+}
+
+; Don't break on illegal types
+define i32 @extract_elt0_nxv4i32_readfirstlane(<vscale x 2 x i32> %src) {
+; CHECK-LABEL: define i32 @extract_elt0_nxv4i32_readfirstlane(
+; CHECK-SAME: <vscale x 2 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <vscale x 2 x i32> @llvm.amdgcn.readfirstlane.nxv2i32(<vscale x 2 x i32> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %vec = call <vscale x 2 x i32> @llvm.amdgcn.readfirstlane.nxv2i32(<vscale x 2 x i32> %src)
+  %elt = extractelement <vscale x 2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define i32 @extract_elt0_v2i32_readfirstlane(<2 x i32> %src) {
+; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src)
+  %elt = extractelement <2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(<2 x ptr addrspace(3)> %src) {
+; CHECK-LABEL: define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(
+; CHECK-SAME: <2 x ptr addrspace(3)> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x ptr addrspace(3)> [[VEC]], i64 0
+; CHECK-NEXT:    ret ptr addrspace(3) [[ELT]]
+;
+  %vec = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> %src)
+  %elt = extractelement <2 x ptr addrspace(3)> %vec, i32 0
+  ret ptr addrspace(3) %elt
+}
+
+define i64 @extract_elt0_v2i64_readfirstlane(<2 x i64> %src) {
+; CHECK-LABEL: define i64 @extract_elt0_v2i64_readfirstlane(
+; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 0
+; CHECK-NEXT:    ret i64 [[ELT]]
+;
+  %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src)
+  %elt = extractelement <2 x i64> %vec, i32 0
+  ret i64 %elt
+}
+
+define i64 @extract_elt1_v2i64_readfirstlane(<2 x i64> %src) {
+; CHECK-LABEL: define i64 @extract_elt1_v2i64_readfirstlane(
+; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 1
+; CHECK-NEXT:    ret i64 [[ELT]]
+;
+  %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src)
+  %elt = extractelement <2 x i64> %vec, i32 1
+  ret i64 %elt
+}
+
+define <3 x i16> @extract_elt012_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <3 x i16> @extract_elt012_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x i16> %shuffle
+}
+
+define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <3 x i16> @extract_elt123_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <3 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x i16> %shuffle
+}
+
+define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x i32> %shuffle
+}
+
+define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x i32> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt13_v4i16readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 1
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(<2 x i32> %src) convergent {
+; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[T:%.*]] = call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %t = call token @llvm.experimental.convergence.entry()
+  %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ]
+  %elt = extractelement <2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) convergent {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[T:%.*]] = call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %t = call token @llvm.experimental.convergence.entry()
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3
+  %vec = call <4 ...
[truncated]

Copy link
Contributor Author

arsenm commented Feb 28, 2025

Merge activity

  • Feb 28, 12:52 AM EST: A user started a stack merge that includes this pull request via Graphite.
  • Feb 28, 12:54 AM EST: Graphite rebased this pull request as part of a merge.
  • Feb 28, 12:57 AM EST: A user merged this pull request with Graphite.

@arsenm arsenm force-pushed the users/arsenm/amdgpu/add-baseline-tests-simplify-demanded-vector-elts-readfirstlane branch from fe33057 to 859a8cb Compare February 28, 2025 05:54
@arsenm arsenm merged commit 2fa6c52 into main Feb 28, 2025
6 of 10 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/add-baseline-tests-simplify-demanded-vector-elts-readfirstlane branch February 28, 2025 05:57
cheezeburglar pushed a commit to cheezeburglar/llvm-project that referenced this pull request Feb 28, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants