From a63e05d2e090edf7834fb62296bccd071a8e38b8 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 14 Nov 2024 11:53:39 -0700 Subject: [PATCH 01/17] [HLSL] Implement elementwise firstbitlow builtin --- clang/include/clang/Basic/Builtins.td | 6 + clang/lib/CodeGen/CGBuiltin.cpp | 9 +- clang/lib/CodeGen/CGHLSLRuntime.h | 1 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 72 ++++++++ clang/lib/Sema/SemaHLSL.cpp | 3 +- .../CodeGenHLSL/builtins/firstbitlow.hlsl | 153 ++++++++++++++++ .../BuiltIns/firstbithigh-errors.hlsl | 6 +- .../SemaHLSL/BuiltIns/firstbitlow-errors.hlsl | 26 +++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + llvm/lib/Target/DirectX/DXIL.td | 13 ++ .../DirectX/DirectXTargetTransformInfo.cpp | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 169 ++++++++++++++++++ llvm/test/CodeGen/DirectX/firstbitlow.ll | 47 +++++ .../test/CodeGen/DirectX/firstbitlow_error.ll | 10 ++ .../SPIRV/hlsl-intrinsics/firstbitlow.ll | 104 +++++++++++ 16 files changed, 616 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl create mode 100644 llvm/test/CodeGen/DirectX/firstbitlow.ll create mode 100644 llvm/test/CodeGen/DirectX/firstbitlow_error.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 32a09e2ceb385..a4fb671e47930 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4834,6 +4834,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLFrac : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_frac"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc1..cbd4c931b05b0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19255,7 +19255,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "hlsl.dot4add.u8packed"); } case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: { - Value *X = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( @@ -19263,6 +19262,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()), ArrayRef{X}, nullptr, "hlsl.firstbithigh"); } + case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: { + Value *X = EmitScalarExpr(E->getArg(0)); + + return Builder.CreateIntrinsic( + /*ReturnType=*/ConvertType(E->getType()), + CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef{X}, + nullptr, "hlsl.firstbitlow"); + } case Builtin::BI__builtin_hlsl_lerp: { Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index bb120c8b5e9e6..df285e185173d 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -97,6 +97,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane) GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh) GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh) + GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow) GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp) GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp) GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 1126e13600f8a..c132c300da27a 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -1121,6 +1121,78 @@ uint3 firstbithigh(uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) uint4 firstbithigh(uint64_t4); +//===----------------------------------------------------------------------===// +// firstbitlow builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbitlow(T Val) +/// \brief Returns the location of the first set bit starting from the lowest +/// order bit and working upward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint64_t4); + //===----------------------------------------------------------------------===// // floor builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 88db3e1254119..bf74c62aa8f50 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2014,7 +2014,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } - case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: { + case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: + case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: { if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl b/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl new file mode 100644 index 0000000000000..5d490fabc5bc8 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl @@ -0,0 +1,153 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s -DTARGET=spv + +#ifdef __HLSL_ENABLE_16_BIT +// CHECK-LABEL: test_firstbitlow_ushort +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16 +uint test_firstbitlow_ushort(uint16_t p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ushort2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16 +uint2 test_firstbitlow_ushort2(uint16_t2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ushort3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16 +uint3 test_firstbitlow_ushort3(uint16_t3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ushort4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16 +uint4 test_firstbitlow_ushort4(uint16_t4 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_short +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16 +uint test_firstbitlow_short(int16_t p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_short2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16 +uint2 test_firstbitlow_short2(int16_t2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_short3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16 +uint3 test_firstbitlow_short3(int16_t3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_short4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16 +uint4 test_firstbitlow_short4(int16_t4 p0) { + return firstbitlow(p0); +} +#endif // __HLSL_ENABLE_16_BIT + +// CHECK-LABEL: test_firstbitlow_uint +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32 +uint test_firstbitlow_uint(uint p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_uint2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32 +uint2 test_firstbitlow_uint2(uint2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_uint3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32 +uint3 test_firstbitlow_uint3(uint3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_uint4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32 +uint4 test_firstbitlow_uint4(uint4 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ulong +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64 +uint test_firstbitlow_ulong(uint64_t p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ulong2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64 +uint2 test_firstbitlow_ulong2(uint64_t2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ulong3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64 +uint3 test_firstbitlow_ulong3(uint64_t3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ulong4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64 +uint4 test_firstbitlow_ulong4(uint64_t4 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_int +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32 +uint test_firstbitlow_int(int p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_int2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32 +uint2 test_firstbitlow_int2(int2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_int3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32 +uint3 test_firstbitlow_int3(int3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_int4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32 +uint4 test_firstbitlow_int4(int4 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_long +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64 +uint test_firstbitlow_long(int64_t p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_long2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64 +uint2 test_firstbitlow_long2(int64_t2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_long3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64 +uint3 test_firstbitlow_long3(int64_t3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_long4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64 +uint4 test_firstbitlow_long4(int64_t4 p0) { + return firstbitlow(p0); +} diff --git a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl index 1912ab3ae806b..b4024418dbba4 100644 --- a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl @@ -17,12 +17,10 @@ double test_int_builtin(double p0) { double2 test_int_builtin_2(double2 p0) { return __builtin_hlsl_elementwise_firstbithigh(p0); - // expected-error@-1 {{1st argument must be a vector of integers - // (was 'double2' (aka 'vector'))}} + // expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector'))}} } float test_int_builtin_3(float p0) { return __builtin_hlsl_elementwise_firstbithigh(p0); - // expected-error@-1 {{1st argument must be a vector of integers - // (was 'float')}} + // expected-error@-1 {{1st argument must be a vector of integers (was 'double')}} } diff --git a/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl new file mode 100644 index 0000000000000..95c25e9e2fb60 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected + +int test_too_few_arg() { + return firstbitlow(); + // expected-error@-1 {{no matching function for call to 'firstbitlow'}} +} + +int test_too_many_arg(int p0) { + return firstbitlow(p0, p0); + // expected-error@-1 {{no matching function for call to 'firstbitlow'}} +} + +double test_int_builtin(double p0) { + return firstbitlow(p0); + // expected-error@-1 {{call to 'firstbitlow' is ambiguous}} +} + +double2 test_int_builtin_2(double2 p0) { + return __builtin_hlsl_elementwise_firstbitlow(p0); + // expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector'))}} +} + +float test_int_builtin_3(float p0) { + return __builtin_hlsl_elementwise_firstbitlow(p0); + // expected-error@-1 {{1st argument must be a vector of integers (was 'double')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 5696345a617fe..1a182250b610b 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -110,6 +110,7 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0> def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>; def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; +def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>; } diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 1ae3129774e50..1b8dfc416441a 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -109,6 +109,7 @@ let TargetPrefix = "spv" in { def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; + def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_spv_bufferUpdateCounter : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty], diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index cff6cdce813de..a208ba7663a3b 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -618,6 +618,19 @@ def CountBits : DXILOp<31, unaryBits> { let attributes = [Attributes]; } +def FirstbitLo : DXILOp<32, unaryBits> { + let Doc = "Returns the location of the first set bit starting from " + "the lowest order bit and working upward."; + let LLVMIntrinsic = int_dx_firstbitlow; + let arguments = [OverloadTy]; + let result = Int32Ty; + let overloads = + [Overloads]; + let stages = [Stages]; + // TODO: check these + let attributes = [Attributes]; +} + def FirstbitHi : DXILOp<33, unaryBits> { let Doc = "Returns the location of the first set bit starting from " "the highest order bit and working downward."; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 2ca4e23594d56..0c0d324b21cdd 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -45,6 +45,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_splitdouble: case Intrinsic::dx_firstbituhigh: case Intrinsic::dx_firstbitshigh: + case Intrinsic::dx_firstbitlow: return true; default: return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 3a98b74b3d675..fe8879a699104 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -106,6 +106,18 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectFirstBitHigh64(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, bool IsSigned) const; + bool selectFirstBitLow(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectFirstBitLow16(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectFirstBitLow32(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, Register SrcReg) const; + + bool selectFirstBitLow64(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectGlobalValue(Register ResVReg, MachineInstr &I, const MachineInstr *Init = nullptr) const; @@ -2895,6 +2907,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true); + case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb + // (true?) + return selectFirstBitLow(ResVReg, ResType, I); case Intrinsic::spv_group_memory_barrier_with_group_sync: { bool Result = true; auto MemSemConstant = @@ -3292,6 +3307,160 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg, } } +bool SPIRVInstructionSelector::selectFirstBitLow16(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + // OpUConvert treats the operand bits as an unsigned i16 and zero extends it + // to an unsigned i32. As this leaves all the least significant bits unchanged + // the first set bit from the LSB side doesn't change. + Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + bool Result = selectNAryOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, + SPIRV::OpUConvert); + return Result && selectFirstBitLow32(ResVReg, ResType, I, ExtReg); +} + +bool SPIRVInstructionSelector::selectFirstBitLow32(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + Register SrcReg) const { + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast(SPIRV::InstructionSet::GLSL_std_450)) + .addImm(GL::FindILsb) + .addUse(SrcReg) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + Register OpReg = I.getOperand(2).getReg(); + + // 1. Split int64 into 2 pieces using a bitcast + unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); + SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); + MachineIRBuilder MIRBuilder(I); + SPIRVType *PostCastType = + GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder); + Register BitcastReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); + bool Result = + selectUnOpWithSrc(BitcastReg, PostCastType, I, OpReg, SPIRV::OpBitcast); + + // 2. Find the first set bit from the LSB side for all the pieces in #1 + Register FBLReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); + Result = Result && selectFirstBitLow32(FBLReg, PostCastType, I, BitcastReg); + + // 3. Split result vector into high bits and low bits + Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + + bool ZeroAsNull = STI.isOpenCLEnv(); + bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector; + if (IsScalarRes) { + // if scalar do a vector extract + Result = Result && selectNAryOpWithSrcs( + HighReg, ResType, I, + {FBLReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)}, + SPIRV::OpVectorExtractDynamic); + Result = Result && selectNAryOpWithSrcs( + LowReg, ResType, I, + {FBLReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)}, + SPIRV::OpVectorExtractDynamic); + } else { + // if vector do a shufflevector + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpVectorShuffle)) + .addDef(HighReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(FBLReg) + // Per the spec, repeat the vector if only one vec is needed + .addUse(FBLReg); + + // high bits are store in even indexes. Extract them from FBLReg + for (unsigned j = 0; j < ComponentCount * 2; j += 2) { + MIB.addImm(j); + } + Result = Result && MIB.constrainAllUses(TII, TRI, RBI); + + MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpVectorShuffle)) + .addDef(LowReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(FBLReg) + // Per the spec, repeat the vector if only one vec is needed + .addUse(FBLReg); + + // low bits are store in odd indexes. Extract them from FBLReg + for (unsigned j = 1; j < ComponentCount * 2; j += 2) { + MIB.addImm(j); + } + Result = Result && MIB.constrainAllUses(TII, TRI, RBI); + } + + // 4. Check if result of each bottom 32 bits is == -1 + SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII); + Register NegOneReg; + Register Reg0; + Register Reg32; + unsigned SelectOp; + unsigned AddOp; + + if (IsScalarRes) { + NegOneReg = + GR.getOrCreateConstInt((unsigned)-1, I, ResType, TII, ZeroAsNull); + Reg0 = GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull); + Reg32 = GR.getOrCreateConstInt(32, I, ResType, TII, ZeroAsNull); + SelectOp = SPIRV::OpSelectSISCond; + AddOp = SPIRV::OpIAddS; + } else { + BoolType = GR.getOrCreateSPIRVVectorType(BoolType, ComponentCount, MIRBuilder); + NegOneReg = + GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull); + Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull); + Reg32 = GR.getOrCreateConstVector(32, I, ResType, TII, ZeroAsNull); + SelectOp = SPIRV::OpSelectVIVCond; + AddOp = SPIRV::OpIAddV; + } + + // Check if the low bits are == -1; true if -1 + Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType)); + Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I, {LowReg, NegOneReg}, + SPIRV::OpIEqual); + + // Select high bits if true in BReg, otherwise low bits + Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I, {BReg, HighReg, LowReg}, + SelectOp); + + // Add 32 for high bits, 0 for low bits + Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + Result = Result && + selectNAryOpWithSrcs(ValReg, ResType, I, {BReg, Reg32, Reg0}, SelectOp); + + return Result && + selectNAryOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp); +} + +bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + // FindILsb intrinsic only supports 32 bit integers + Register OpReg = I.getOperand(2).getReg(); + SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg); + + switch (GR.getScalarOrVectorBitWidth(OpType)) { + case 16: + return selectFirstBitLow16(ResVReg, ResType, I); + case 32: + return selectFirstBitLow32(ResVReg, ResType, I, OpReg); + case 64: + return selectFirstBitLow64(ResVReg, ResType, I); + default: + report_fatal_error("spv_firstbitlow only supports 16,32,64 bits."); + } +} + bool SPIRVInstructionSelector::selectAllocaArray(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { diff --git a/llvm/test/CodeGen/DirectX/firstbitlow.ll b/llvm/test/CodeGen/DirectX/firstbitlow.ll new file mode 100644 index 0000000000000..884ec1164fc99 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/firstbitlow.ll @@ -0,0 +1,47 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for firstbitlow are generated for all integer types. + +define noundef i32 @test_firstbitlow_short(i16 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i16(i32 32, i16 %{{.*}}) + %elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i16(i16 %a) + ret i32 %elt.firstbitlow +} + +define noundef i32 @test_firstbitlow_int(i32 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i32(i32 32, i32 %{{.*}}) + %elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i32(i32 %a) + ret i32 %elt.firstbitlow +} + +define noundef i32 @test_firstbitlow_long(i64 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i64(i32 32, i64 %{{.*}}) + %elt.firstbitlow = call i32 @llvm.dx.firstbitlow.i64(i64 %a) + ret i32 %elt.firstbitlow +} + +define noundef <4 x i32> @test_firstbitlow_vec4_i32(<4 x i32> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 32, i32 [[ee3]]) + ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + %2 = call <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32> %a) + ret <4 x i32> %2 +} + +declare i32 @llvm.dx.firstbitlow.i16(i16) +declare i32 @llvm.dx.firstbitlow.i32(i32) +declare i32 @llvm.dx.firstbitlow.i64(i64) +declare <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/DirectX/firstbitlow_error.ll b/llvm/test/CodeGen/DirectX/firstbitlow_error.ll new file mode 100644 index 0000000000000..d8b9333067f4a --- /dev/null +++ b/llvm/test/CodeGen/DirectX/firstbitlow_error.ll @@ -0,0 +1,10 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation firstbitshigh does not support double overload type +; CHECK: invalid intrinsic signature + +define noundef double @firstbitlow_double(double noundef %a) { +entry: + %1 = call double @llvm.dx.firstbitlow.f64(double %a) + ret double %1 +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll new file mode 100644 index 0000000000000..9ebd8cc511eb6 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll @@ -0,0 +1,104 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: [[glsl_450_ext:%.+]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: OpMemoryModel Logical GLSL450 +; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0 +; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2 +; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4 +; CHECK-DAG: [[const_zero:%.*]] = OpConstant [[u32_t]] 0 +; CHECK-DAG: [[const_zerox2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_zero]] [[const_zero]] +; CHECK-DAG: [[const_one:%.*]] = OpConstant [[u32_t]] 1 +; CHECK-DAG: [[const_thirty_two:%.*]] = OpConstant [[u32_t]] 32 +; CHECK-DAG: [[const_thirty_twox2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_thirty_two]] [[const_thirty_two]] +; CHECK-DAG: [[const_neg_one:%.*]] = OpConstant [[u32_t]] 4294967295 +; CHECK-DAG: [[const_neg_onex2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg_one]] [[const_neg_one]] +; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0 +; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2 +; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0 +; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2 +; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool +; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2 + +; CHECK-LABEL: Begin function firstbitlow_i32 +define noundef i32 @firstbitlow_i32(i32 noundef %a) { +entry: +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindILsb [[a]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i32(i32 %a) + ret i32 %elt.firstbitlow +} + +; CHECK-LABEL: Begin function firstbitlow_2xi32 +define noundef <2 x i32> @firstbitlow_2xi32(<2 x i32> noundef %a) { +entry: +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i32(<2 x i32> %a) + ret <2 x i32> %elt.firstbitlow +} + +; CHECK-LABEL: Begin function firstbitlow_i16 +define noundef i32 @firstbitlow_i16(i16 noundef %a) { +entry: +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]] +; CHECK: [[a32:%.+]] = OpUConvert [[u32_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindILsb [[a32]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i16(i16 %a) + ret i32 %elt.firstbitlow +} + +; CHECK-LABEL: Begin function firstbitlow_v2i16 +define noundef <2 x i32> @firstbitlow_v2i16(<2 x i16> noundef %a) { +entry: +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x2_t]] +; CHECK: [[a32:%.+]] = OpUConvert [[u32x2_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a32]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i16(<2 x i16> %a) + ret <2 x i32> %elt.firstbitlow +} + +; CHECK-LABEL: Begin function firstbitlow_i64 +define noundef i32 @firstbitlow_i64(i64 noundef %a) { +entry: +; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]] +; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]] +; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a32x2]] +; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_zero]] +; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_one]] +; CHECK: [[should_use_high:%.+]] = OpIEqual [[bool_t]] [[low_bits]] [[const_neg_one]] +; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[high_bits]] [[low_bits]] +; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[const_thirty_two]] [[const_zero]] +; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i64(i64 %a) + ret i32 %elt.firstbitlow +} + +; CHECK-LABEL: Begin function firstbitlow_v2i64 +define noundef <2 x i32> @firstbitlow_v2i64(<2 x i64> noundef %a) { +entry: +; CHECK: [[a64x2:%.+]] = OpFunctionParameter [[u64x2_t]] +; CHECK: [[a32x4:%.+]] = OpBitcast [[u32x4_t]] [[a64x2]] +; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32x4]] +; CHECK: [[high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 0 2 +; CHECK: [[low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 1 3 +; CHECK: [[should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[low_bits]] [[const_neg_onex2]] +; CHECK: [[ans_bits:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[high_bits]] [[low_bits]] +; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[const_thirty_twox2]] [[const_zerox2]] +; CHECK: [[ret:%.+]] = OpIAdd [[u32x2_t]] [[ans_offset]] [[ans_bits]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i64(<2 x i64> %a) + ret <2 x i32> %elt.firstbitlow +} + +;declare i16 @llvm.spv.firstbitlow.i16(i16) +;declare i32 @llvm.spv.firstbitlow.i32(i32) +;declare i64 @llvm.spv.firstbitlow.i64(i64) +;declare i16 @llvm.spv.firstbitlow.v2i16(<2 x i16>) +;declare i32 @llvm.spv.firstbitlow.v2i32(<2 x i32>) +;declare i64 @llvm.spv.firstbitlow.v2i64(<2 x i64>) From 72f1999234cfa5de5bf3e46da46225a5b1e87924 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Tue, 19 Nov 2024 10:35:52 -0700 Subject: [PATCH 02/17] format --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index fe8879a699104..dd00947f98549 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3314,8 +3314,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow16(Register ResVReg, // to an unsigned i32. As this leaves all the least significant bits unchanged // the first set bit from the LSB side doesn't change. Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - bool Result = selectNAryOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, - SPIRV::OpUConvert); + bool Result = selectNAryOpWithSrcs( + ExtReg, ResType, I, {I.getOperand(2).getReg()}, SPIRV::OpUConvert); return Result && selectFirstBitLow32(ResVReg, ResType, I, ExtReg); } @@ -3343,7 +3343,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, MachineIRBuilder MIRBuilder(I); SPIRVType *PostCastType = GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder); - Register BitcastReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); + Register BitcastReg = + MRI->createVirtualRegister(GR.getRegClass(PostCastType)); bool Result = selectUnOpWithSrc(BitcastReg, PostCastType, I, OpReg, SPIRV::OpBitcast); @@ -3359,14 +3360,18 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector; if (IsScalarRes) { // if scalar do a vector extract - Result = Result && selectNAryOpWithSrcs( - HighReg, ResType, I, - {FBLReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)}, - SPIRV::OpVectorExtractDynamic); - Result = Result && selectNAryOpWithSrcs( - LowReg, ResType, I, - {FBLReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)}, - SPIRV::OpVectorExtractDynamic); + Result = + Result && + selectNAryOpWithSrcs( + HighReg, ResType, I, + {FBLReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)}, + SPIRV::OpVectorExtractDynamic); + Result = + Result && + selectNAryOpWithSrcs( + LowReg, ResType, I, + {FBLReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)}, + SPIRV::OpVectorExtractDynamic); } else { // if vector do a shufflevector auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -3414,7 +3419,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, SelectOp = SPIRV::OpSelectSISCond; AddOp = SPIRV::OpIAddS; } else { - BoolType = GR.getOrCreateSPIRVVectorType(BoolType, ComponentCount, MIRBuilder); + BoolType = + GR.getOrCreateSPIRVVectorType(BoolType, ComponentCount, MIRBuilder); NegOneReg = GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull); Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull); @@ -3425,18 +3431,18 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, // Check if the low bits are == -1; true if -1 Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType)); - Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I, {LowReg, NegOneReg}, - SPIRV::OpIEqual); + Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I, + {LowReg, NegOneReg}, SPIRV::OpIEqual); // Select high bits if true in BReg, otherwise low bits Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I, {BReg, HighReg, LowReg}, - SelectOp); + Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I, + {BReg, HighReg, LowReg}, SelectOp); // Add 32 for high bits, 0 for low bits Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Result = Result && - selectNAryOpWithSrcs(ValReg, ResType, I, {BReg, Reg32, Reg0}, SelectOp); + Result = Result && selectNAryOpWithSrcs(ValReg, ResType, I, + {BReg, Reg32, Reg0}, SelectOp); return Result && selectNAryOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp); From 8434e6ad8590baa3848192728433a0ad9fe02f4b Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Tue, 19 Nov 2024 11:56:07 -0700 Subject: [PATCH 03/17] cleanup --- llvm/lib/Target/DirectX/DXIL.td | 1 - .../Target/SPIRV/SPIRVInstructionSelector.cpp | 11 ++++---- .../SPIRV/hlsl-intrinsics/firstbitlow.ll | 26 +++++++++---------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index a208ba7663a3b..d6d78581bafbf 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -627,7 +627,6 @@ def FirstbitLo : DXILOp<32, unaryBits> { let overloads = [Overloads]; let stages = [Stages]; - // TODO: check these let attributes = [Attributes]; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index dd00947f98549..e1c58f8578554 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -2908,7 +2908,6 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true); case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb - // (true?) return selectFirstBitLow(ResVReg, ResType, I); case Intrinsic::spv_group_memory_barrier_with_group_sync: { bool Result = true; @@ -3382,7 +3381,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, // Per the spec, repeat the vector if only one vec is needed .addUse(FBLReg); - // high bits are store in even indexes. Extract them from FBLReg + // high bits are stored in even indexes. Extract them from FBLReg for (unsigned j = 0; j < ComponentCount * 2; j += 2) { MIB.addImm(j); } @@ -3396,14 +3395,14 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, // Per the spec, repeat the vector if only one vec is needed .addUse(FBLReg); - // low bits are store in odd indexes. Extract them from FBLReg + // low bits are stored in odd indexes. Extract them from FBLReg for (unsigned j = 1; j < ComponentCount * 2; j += 2) { MIB.addImm(j); } Result = Result && MIB.constrainAllUses(TII, TRI, RBI); } - // 4. Check if result of each bottom 32 bits is == -1 + // 4. Check the result. When low bits == -1 use high, otherwise use low SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII); Register NegOneReg; Register Reg0; @@ -3429,7 +3428,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, AddOp = SPIRV::OpIAddV; } - // Check if the low bits are == -1; true if -1 + // Check if the low bits are == -1 Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType)); Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I, {LowReg, NegOneReg}, SPIRV::OpIEqual); @@ -3439,7 +3438,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I, {BReg, HighReg, LowReg}, SelectOp); - // Add 32 for high bits, 0 for low bits + // 5. Add 32 when high bits are used, otherwise 0 for low bits Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); Result = Result && selectNAryOpWithSrcs(ValReg, ResType, I, {BReg, Reg32, Reg0}, SelectOp); diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll index 9ebd8cc511eb6..05488479e5bd0 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll @@ -6,13 +6,13 @@ ; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0 ; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2 ; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4 -; CHECK-DAG: [[const_zero:%.*]] = OpConstant [[u32_t]] 0 -; CHECK-DAG: [[const_zerox2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_zero]] [[const_zero]] -; CHECK-DAG: [[const_one:%.*]] = OpConstant [[u32_t]] 1 -; CHECK-DAG: [[const_thirty_two:%.*]] = OpConstant [[u32_t]] 32 -; CHECK-DAG: [[const_thirty_twox2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_thirty_two]] [[const_thirty_two]] -; CHECK-DAG: [[const_neg_one:%.*]] = OpConstant [[u32_t]] 4294967295 -; CHECK-DAG: [[const_neg_onex2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg_one]] [[const_neg_one]] +; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0 +; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]] +; CHECK-DAG: [[const_1:%.*]] = OpConstant [[u32_t]] 1 +; CHECK-DAG: [[const_32:%.*]] = OpConstant [[u32_t]] 32 +; CHECK-DAG: [[const_32x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_32]] [[const_32]] +; CHECK-DAG: [[const_neg1:%.*]] = OpConstant [[u32_t]] 4294967295 +; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]] ; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0 ; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2 ; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0 @@ -68,11 +68,11 @@ entry: ; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]] ; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]] ; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a32x2]] -; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_zero]] -; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_one]] -; CHECK: [[should_use_high:%.+]] = OpIEqual [[bool_t]] [[low_bits]] [[const_neg_one]] +; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]] +; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]] +; CHECK: [[should_use_high:%.+]] = OpIEqual [[bool_t]] [[low_bits]] [[const_neg1]] ; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[high_bits]] [[low_bits]] -; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[const_thirty_two]] [[const_zero]] +; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_high]] [[const_32]] [[const_0]] ; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]] ; CHECK: OpReturnValue [[ret]] %elt.firstbitlow = call i32 @llvm.spv.firstbitlow.i64(i64 %a) @@ -87,9 +87,9 @@ entry: ; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32x4]] ; CHECK: [[high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 0 2 ; CHECK: [[low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 1 3 -; CHECK: [[should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[low_bits]] [[const_neg_onex2]] +; CHECK: [[should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[low_bits]] [[const_neg1x2]] ; CHECK: [[ans_bits:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[high_bits]] [[low_bits]] -; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[const_thirty_twox2]] [[const_zerox2]] +; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_high]] [[const_32x2]] [[const_0x2]] ; CHECK: [[ret:%.+]] = OpIAdd [[u32x2_t]] [[ans_offset]] [[ans_bits]] ; CHECK: OpReturnValue [[ret]] %elt.firstbitlow = call <2 x i32> @llvm.spv.firstbitlow.v2i64(<2 x i64> %a) From b6bdc0dffb05163dedb5e5e82bf8b9f079298225 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Fri, 22 Nov 2024 13:19:43 -0700 Subject: [PATCH 04/17] Address comments --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 327 ++++++------------ 1 file changed, 108 insertions(+), 219 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index e1c58f8578554..bca67585d2858 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -96,27 +96,20 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectFirstBitHigh(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, bool IsSigned) const; - bool selectFirstBitHigh16(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I, bool IsSigned) const; - - bool selectFirstBitHigh32(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I, Register SrcReg, - bool IsSigned) const; - - bool selectFirstBitHigh64(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I, bool IsSigned) const; - bool selectFirstBitLow(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; - bool selectFirstBitLow16(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I) const; + bool selectFirstBitSet16(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, unsigned ExtendOpcode, + unsigned BitSetOpcode) const; - bool selectFirstBitLow32(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I, Register SrcReg) const; + bool selectFirstBitSet32(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, Register SrcReg, + unsigned Opcode) const; - bool selectFirstBitLow64(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I) const; + bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, unsigned ExtendOpcode, + unsigned BitSetOpcode, bool SwapPrimarySide) const; bool selectGlobalValue(Register ResVReg, MachineInstr &I, const MachineInstr *Init = nullptr) const; @@ -3153,187 +3146,34 @@ Register SPIRVInstructionSelector::buildPointerToResource( return AcReg; } -bool SPIRVInstructionSelector::selectFirstBitHigh16(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - bool IsSigned) const { - unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert; - // zero or sign extend +bool SPIRVInstructionSelector::selectFirstBitSet16( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + unsigned ExtendOpcode, unsigned BitSetOpcode) const { Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - bool Result = - selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, Opcode); - return Result && selectFirstBitHigh32(ResVReg, ResType, I, ExtReg, IsSigned); -} - -bool SPIRVInstructionSelector::selectFirstBitHigh32(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - Register SrcReg, - bool IsSigned) const { - unsigned Opcode = IsSigned ? GL::FindSMsb : GL::FindUMsb; - return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addImm(static_cast(SPIRV::InstructionSet::GLSL_std_450)) - .addImm(Opcode) - .addUse(SrcReg) - .constrainAllUses(TII, TRI, RBI); -} - -bool SPIRVInstructionSelector::selectFirstBitHigh64(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - bool IsSigned) const { - Register OpReg = I.getOperand(2).getReg(); - // 1. split our int64 into 2 pieces using a bitcast - unsigned count = GR.getScalarOrVectorComponentCount(ResType); - SPIRVType *baseType = GR.retrieveScalarOrVectorIntType(ResType); - MachineIRBuilder MIRBuilder(I); - SPIRVType *postCastT = - GR.getOrCreateSPIRVVectorType(baseType, 2 * count, MIRBuilder); - Register bitcastReg = MRI->createVirtualRegister(GR.getRegClass(postCastT)); - bool Result = - selectOpWithSrcs(bitcastReg, postCastT, I, {OpReg}, SPIRV::OpBitcast); - - // 2. call firstbithigh - Register FBHReg = MRI->createVirtualRegister(GR.getRegClass(postCastT)); - Result &= selectFirstBitHigh32(FBHReg, postCastT, I, bitcastReg, IsSigned); - - // 3. split result vector into high bits and low bits - Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - - bool ZeroAsNull = STI.isOpenCLEnv(); - bool isScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector; - if (isScalarRes) { - // if scalar do a vector extract - Result &= selectOpWithSrcs( - HighReg, ResType, I, - {FBHReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)}, - SPIRV::OpVectorExtractDynamic); - Result &= selectOpWithSrcs( - LowReg, ResType, I, - {FBHReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)}, - SPIRV::OpVectorExtractDynamic); - } else { // vector case do a shufflevector - auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(SPIRV::OpVectorShuffle)) - .addDef(HighReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(FBHReg) - .addUse(FBHReg); - // ^^ this vector will not be selected from; could be empty - unsigned j; - for (j = 0; j < count * 2; j += 2) { - MIB.addImm(j); - } - Result &= MIB.constrainAllUses(TII, TRI, RBI); - - // get low bits - MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(SPIRV::OpVectorShuffle)) - .addDef(LowReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(FBHReg) - .addUse(FBHReg); - // ^^ this vector will not be selected from; could be empty - for (j = 1; j < count * 2; j += 2) { - MIB.addImm(j); - } - Result &= MIB.constrainAllUses(TII, TRI, RBI); - } - - // 4. check if result of each top 32 bits is == -1 - SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII); - Register NegOneReg; - Register Reg0; - Register Reg32; - unsigned selectOp; - unsigned addOp; - if (isScalarRes) { - NegOneReg = - GR.getOrCreateConstInt((unsigned)-1, I, ResType, TII, ZeroAsNull); - Reg0 = GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull); - Reg32 = GR.getOrCreateConstInt(32, I, ResType, TII, ZeroAsNull); - selectOp = SPIRV::OpSelectSISCond; - addOp = SPIRV::OpIAddS; - } else { - BoolType = GR.getOrCreateSPIRVVectorType(BoolType, count, MIRBuilder); - NegOneReg = - GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull); - Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull); - Reg32 = GR.getOrCreateConstVector(32, I, ResType, TII, ZeroAsNull); - selectOp = SPIRV::OpSelectVIVCond; - addOp = SPIRV::OpIAddV; - } - - // check if the high bits are == -1; true if -1 - Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType)); - Result &= selectOpWithSrcs(BReg, BoolType, I, {HighReg, NegOneReg}, - SPIRV::OpIEqual); - - // Select low bits if true in BReg, otherwise high bits - Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Result &= - selectOpWithSrcs(TmpReg, ResType, I, {BReg, LowReg, HighReg}, selectOp); - - // Add 32 for high bits, 0 for low bits - Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Result &= selectOpWithSrcs(ValReg, ResType, I, {BReg, Reg0, Reg32}, selectOp); + bool Result = selectOpWithSrcs(ExtReg, ResType, I, {I.getOperand(2).getReg()}, + ExtendOpcode); return Result && - selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, addOp); -} - -bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - bool IsSigned) const { - // FindUMsb and FindSMsb intrinsics only support 32 bit integers - Register OpReg = I.getOperand(2).getReg(); - SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg); - - switch (GR.getScalarOrVectorBitWidth(OpType)) { - case 16: - return selectFirstBitHigh16(ResVReg, ResType, I, IsSigned); - case 32: - return selectFirstBitHigh32(ResVReg, ResType, I, OpReg, IsSigned); - case 64: - return selectFirstBitHigh64(ResVReg, ResType, I, IsSigned); - default: - report_fatal_error( - "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits."); - } + selectFirstBitSet32(ResVReg, ResType, I, ExtReg, BitSetOpcode); } -bool SPIRVInstructionSelector::selectFirstBitLow16(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I) const { - // OpUConvert treats the operand bits as an unsigned i16 and zero extends it - // to an unsigned i32. As this leaves all the least significant bits unchanged - // the first set bit from the LSB side doesn't change. - Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - bool Result = selectNAryOpWithSrcs( - ExtReg, ResType, I, {I.getOperand(2).getReg()}, SPIRV::OpUConvert); - return Result && selectFirstBitLow32(ResVReg, ResType, I, ExtReg); -} - -bool SPIRVInstructionSelector::selectFirstBitLow32(Register ResVReg, +bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, - Register SrcReg) const { + Register SrcReg, + unsigned Opcode) const { return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .addImm(static_cast(SPIRV::InstructionSet::GLSL_std_450)) - .addImm(GL::FindILsb) + .addImm(Opcode) .addUse(SrcReg) .constrainAllUses(TII, TRI, RBI); } -bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I) const { +bool SPIRVInstructionSelector::selectFirstBitSet64( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + unsigned ExtendOpcode, unsigned BitSetOpcode, bool SwapPrimarySide) const { Register OpReg = I.getOperand(2).getReg(); // 1. Split int64 into 2 pieces using a bitcast @@ -3345,11 +3185,12 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, Register BitcastReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); bool Result = - selectUnOpWithSrc(BitcastReg, PostCastType, I, OpReg, SPIRV::OpBitcast); + selectOpWithSrcs(BitcastReg, PostCastType, I, {OpReg}, SPIRV::OpBitcast); - // 2. Find the first set bit from the LSB side for all the pieces in #1 - Register FBLReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); - Result = Result && selectFirstBitLow32(FBLReg, PostCastType, I, BitcastReg); + // 2. Find the first set bit from the primary side for all the pieces in #1 + Register FBPReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); + Result = Result && selectFirstBitSet32(FBPReg, PostCastType, I, BitcastReg, + BitSetOpcode); // 3. Split result vector into high bits and low bits Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); @@ -3359,31 +3200,29 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector; if (IsScalarRes) { // if scalar do a vector extract - Result = - Result && - selectNAryOpWithSrcs( - HighReg, ResType, I, - {FBLReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)}, - SPIRV::OpVectorExtractDynamic); - Result = - Result && - selectNAryOpWithSrcs( - LowReg, ResType, I, - {FBLReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)}, - SPIRV::OpVectorExtractDynamic); + Result = Result && + selectOpWithSrcs(HighReg, ResType, I, + {FBPReg, GR.getOrCreateConstInt(0, I, ResType, + TII, ZeroAsNull)}, + SPIRV::OpVectorExtractDynamic); + Result = Result && + selectOpWithSrcs(LowReg, ResType, I, + {FBPReg, GR.getOrCreateConstInt(1, I, ResType, + TII, ZeroAsNull)}, + SPIRV::OpVectorExtractDynamic); } else { // if vector do a shufflevector auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) .addDef(HighReg) .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(FBLReg) + .addUse(FBPReg) // Per the spec, repeat the vector if only one vec is needed - .addUse(FBLReg); + .addUse(FBPReg); // high bits are stored in even indexes. Extract them from FBLReg - for (unsigned j = 0; j < ComponentCount * 2; j += 2) { - MIB.addImm(j); + for (unsigned J = 0; J < ComponentCount * 2; J += 2) { + MIB.addImm(J); } Result = Result && MIB.constrainAllUses(TII, TRI, RBI); @@ -3391,18 +3230,19 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, TII.get(SPIRV::OpVectorShuffle)) .addDef(LowReg) .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(FBLReg) + .addUse(FBPReg) // Per the spec, repeat the vector if only one vec is needed - .addUse(FBLReg); + .addUse(FBPReg); // low bits are stored in odd indexes. Extract them from FBLReg - for (unsigned j = 1; j < ComponentCount * 2; j += 2) { - MIB.addImm(j); + for (unsigned J = 1; J < ComponentCount * 2; J += 2) { + MIB.addImm(J); } Result = Result && MIB.constrainAllUses(TII, TRI, RBI); } - // 4. Check the result. When low bits == -1 use high, otherwise use low + // 4. Check the result. When primary bits == -1 use secondary, otherwise use + // primary SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII); Register NegOneReg; Register Reg0; @@ -3428,23 +3268,66 @@ bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, AddOp = SPIRV::OpIAddV; } - // Check if the low bits are == -1 + Register PrimaryReg; + Register SecondaryReg; + Register PrimaryShiftReg; + Register SecondaryShiftReg; + if (SwapPrimarySide) { + PrimaryReg = LowReg; + SecondaryReg = HighReg; + PrimaryShiftReg = Reg0; + SecondaryShiftReg = Reg32; + } else { + PrimaryReg = HighReg; + SecondaryReg = LowReg; + PrimaryShiftReg = Reg32; + SecondaryShiftReg = Reg0; + } + + // Check if the primary bits are == -1 Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType)); - Result = Result && selectNAryOpWithSrcs(BReg, BoolType, I, - {LowReg, NegOneReg}, SPIRV::OpIEqual); + Result = Result && selectOpWithSrcs(BReg, BoolType, I, + {PrimaryReg, NegOneReg}, SPIRV::OpIEqual); - // Select high bits if true in BReg, otherwise low bits + // Select secondary bits if true in BReg, otherwise primary bits Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Result = Result && selectNAryOpWithSrcs(TmpReg, ResType, I, - {BReg, HighReg, LowReg}, SelectOp); + Result = + Result && selectOpWithSrcs(TmpReg, ResType, I, + {BReg, SecondaryReg, PrimaryReg}, SelectOp); // 5. Add 32 when high bits are used, otherwise 0 for low bits Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Result = Result && selectNAryOpWithSrcs(ValReg, ResType, I, - {BReg, Reg32, Reg0}, SelectOp); + Result = Result && selectOpWithSrcs( + ValReg, ResType, I, + {BReg, SecondaryShiftReg, PrimaryShiftReg}, SelectOp); return Result && - selectNAryOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp); + selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp); +} + +bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + bool IsSigned) const { + // FindUMsb and FindSMsb intrinsics only support 32 bit integers + Register OpReg = I.getOperand(2).getReg(); + SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg); + // zero or sign extend + unsigned ExtendOpcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert; + unsigned BitSetOpcode = IsSigned ? GL::FindSMsb : GL::FindUMsb; + + switch (GR.getScalarOrVectorBitWidth(OpType)) { + case 16: + return selectFirstBitSet16(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode); + case 32: + return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode); + case 64: + return selectFirstBitSet64(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode, + /*SwapPrimarySide=*/false); + default: + report_fatal_error( + "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits."); + } } bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg, @@ -3453,14 +3336,20 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg, // FindILsb intrinsic only supports 32 bit integers Register OpReg = I.getOperand(2).getReg(); SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg); + // OpUConvert treats the operand bits as an unsigned i16 and zero extends it + // to an unsigned i32. As this leaves all the least significant bits unchanged + // so the first set bit from the LSB side doesn't change. + unsigned ExtendOpcode = SPIRV::OpUConvert; + unsigned BitSetOpcode = GL::FindILsb; switch (GR.getScalarOrVectorBitWidth(OpType)) { case 16: - return selectFirstBitLow16(ResVReg, ResType, I); + return selectFirstBitSet16(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode); case 32: - return selectFirstBitLow32(ResVReg, ResType, I, OpReg); + return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode); case 64: - return selectFirstBitLow64(ResVReg, ResType, I); + return selectFirstBitSet64(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode, + /*SwapPrimarySide=*/true); default: report_fatal_error("spv_firstbitlow only supports 16,32,64 bits."); } From 3c74bfe7d2835aded89dabbd2cc07cda9a987a7a Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Fri, 22 Nov 2024 13:40:45 -0700 Subject: [PATCH 05/17] cleanup --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index bca67585d2858..cb5e7c6be3573 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -108,8 +108,8 @@ class SPIRVInstructionSelector : public InstructionSelector { unsigned Opcode) const; bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I, unsigned ExtendOpcode, - unsigned BitSetOpcode, bool SwapPrimarySide) const; + MachineInstr &I, unsigned BitSetOpcode, + bool SwapPrimarySide) const; bool selectGlobalValue(Register ResVReg, MachineInstr &I, const MachineInstr *Init = nullptr) const; @@ -3171,9 +3171,11 @@ bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg, .constrainAllUses(TII, TRI, RBI); } -bool SPIRVInstructionSelector::selectFirstBitSet64( - Register ResVReg, const SPIRVType *ResType, MachineInstr &I, - unsigned ExtendOpcode, unsigned BitSetOpcode, bool SwapPrimarySide) const { +bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + unsigned BitSetOpcode, + bool SwapPrimarySide) const { Register OpReg = I.getOperand(2).getReg(); // 1. Split int64 into 2 pieces using a bitcast @@ -3188,8 +3190,8 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( selectOpWithSrcs(BitcastReg, PostCastType, I, {OpReg}, SPIRV::OpBitcast); // 2. Find the first set bit from the primary side for all the pieces in #1 - Register FBPReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); - Result = Result && selectFirstBitSet32(FBPReg, PostCastType, I, BitcastReg, + Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); + Result = Result && selectFirstBitSet32(FBSReg, PostCastType, I, BitcastReg, BitSetOpcode); // 3. Split result vector into high bits and low bits @@ -3202,12 +3204,12 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( // if scalar do a vector extract Result = Result && selectOpWithSrcs(HighReg, ResType, I, - {FBPReg, GR.getOrCreateConstInt(0, I, ResType, + {FBSReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)}, SPIRV::OpVectorExtractDynamic); Result = Result && selectOpWithSrcs(LowReg, ResType, I, - {FBPReg, GR.getOrCreateConstInt(1, I, ResType, + {FBSReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)}, SPIRV::OpVectorExtractDynamic); } else { @@ -3216,11 +3218,11 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( TII.get(SPIRV::OpVectorShuffle)) .addDef(HighReg) .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(FBPReg) + .addUse(FBSReg) // Per the spec, repeat the vector if only one vec is needed - .addUse(FBPReg); + .addUse(FBSReg); - // high bits are stored in even indexes. Extract them from FBLReg + // high bits are stored in even indexes. Extract them from FBSReg for (unsigned J = 0; J < ComponentCount * 2; J += 2) { MIB.addImm(J); } @@ -3230,11 +3232,11 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( TII.get(SPIRV::OpVectorShuffle)) .addDef(LowReg) .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(FBPReg) + .addUse(FBSReg) // Per the spec, repeat the vector if only one vec is needed - .addUse(FBPReg); + .addUse(FBSReg); - // low bits are stored in odd indexes. Extract them from FBLReg + // low bits are stored in odd indexes. Extract them from FBSReg for (unsigned J = 1; J < ComponentCount * 2; J += 2) { MIB.addImm(J); } @@ -3322,7 +3324,7 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg, case 32: return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode); case 64: - return selectFirstBitSet64(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode, + return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode, /*SwapPrimarySide=*/false); default: report_fatal_error( @@ -3348,7 +3350,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg, case 32: return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode); case 64: - return selectFirstBitSet64(ResVReg, ResType, I, ExtendOpcode, BitSetOpcode, + return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode, /*SwapPrimarySide=*/true); default: report_fatal_error("spv_firstbitlow only supports 16,32,64 bits."); From a90026c858f8db3b9f1bcb2b45d764d255672c99 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Fri, 22 Nov 2024 16:58:29 -0700 Subject: [PATCH 06/17] Divide vectors that surpass 4 element limit --- llvm/lib/Target/DirectX/DXIL.td | 2 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 136 ++++++++++++++---- .../SPIRV/hlsl-intrinsics/firstbitlow.ll | 119 ++++++++++++++- 3 files changed, 230 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index d6d78581bafbf..367009d7f92e6 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -621,7 +621,7 @@ def CountBits : DXILOp<31, unaryBits> { def FirstbitLo : DXILOp<32, unaryBits> { let Doc = "Returns the location of the first set bit starting from " "the lowest order bit and working upward."; - let LLVMIntrinsic = int_dx_firstbitlow; + let intrinsics = [ IntrinSelect ]; let arguments = [OverloadTy]; let result = Int32Ty; let overloads = diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index cb5e7c6be3573..b2115528b8dcb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -108,8 +108,8 @@ class SPIRVInstructionSelector : public InstructionSelector { unsigned Opcode) const; bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType, - MachineInstr &I, unsigned BitSetOpcode, - bool SwapPrimarySide) const; + MachineInstr &I, Register SrcReg, + unsigned BitSetOpcode, bool SwapPrimarySide) const; bool selectGlobalValue(Register ResVReg, MachineInstr &I, const MachineInstr *Init = nullptr) const; @@ -3171,23 +3171,116 @@ bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg, .constrainAllUses(TII, TRI, RBI); } -bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - unsigned BitSetOpcode, - bool SwapPrimarySide) const { - Register OpReg = I.getOperand(2).getReg(); - - // 1. Split int64 into 2 pieces using a bitcast +bool SPIRVInstructionSelector::selectFirstBitSet64( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); + bool ZeroAsNull = STI.isOpenCLEnv(); + Register ConstIntZero = + GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull); + Register ConstIntOne = + GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull); + + // SPIRV doesn't support vectors with more than 4 components. Since the + // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only + // operate on vectors with 2 or less components. When largers vectors are + // seen. Split them, recurse, then recombine them. + if (ComponentCount > 2) { + unsigned LeftComponentCount = ComponentCount / 2; + unsigned RightComponentCount = ComponentCount - LeftComponentCount; + bool LeftIsVector = LeftComponentCount > 1; + + // Split the SrcReg in half into 2 smaller vec registers + // (ie i64x4 -> i64x2, i64x2) + MachineIRBuilder MIRBuilder(I); + SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); + SPIRVType *LeftVecOpType; + SPIRVType *LeftVecResType; + if (LeftIsVector) { + LeftVecOpType = + GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder); + LeftVecResType = GR.getOrCreateSPIRVVectorType( + BaseType, LeftComponentCount, MIRBuilder); + } else { + LeftVecOpType = OpType; + LeftVecResType = BaseType; + } + + SPIRVType *RightVecOpType = + GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder); + SPIRVType *RightVecResType = GR.getOrCreateSPIRVVectorType( + BaseType, RightComponentCount, MIRBuilder); + + Register LeftSideIn = + MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType)); + Register RightSideIn = + MRI->createVirtualRegister(GR.getRegClass(RightVecOpType)); + + bool Result; + + if (LeftIsVector) { + auto MIB = + BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpVectorShuffle)) + .addDef(LeftSideIn) + .addUse(GR.getSPIRVTypeID(LeftVecOpType)) + .addUse(SrcReg) + // Per the spec, repeat the vector if only one vec is needed + .addUse(SrcReg); + + for (unsigned J = 0; J < LeftComponentCount; J++) { + MIB.addImm(J); + } + + Result = MIB.constrainAllUses(TII, TRI, RBI); + } else { + Result = + selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero}, + SPIRV::OpVectorExtractDynamic); + } + + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpVectorShuffle)) + .addDef(RightSideIn) + .addUse(GR.getSPIRVTypeID(RightVecOpType)) + .addUse(SrcReg) + // Per the spec, repeat the vector if only one vec is needed + .addUse(SrcReg); + + for (unsigned J = LeftComponentCount; J < ComponentCount; J++) { + MIB.addImm(J); + } + + Result = Result && MIB.constrainAllUses(TII, TRI, RBI); + + // Recursively call selectFirstBitSet64 on the 2 registers + Register LeftSideOut = + MRI->createVirtualRegister(GR.getRegClass(LeftVecResType)); + Register RightSideOut = + MRI->createVirtualRegister(GR.getRegClass(RightVecResType)); + Result = Result && + selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn, + BitSetOpcode, SwapPrimarySide); + Result = Result && + selectFirstBitSet64(RightSideOut, RightVecResType, I, RightSideIn, + BitSetOpcode, SwapPrimarySide); + + // Join the two resulting registers back into the return type + // (ie i32x2, i32x2 -> i32x4) + return Result && + selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut}, + SPIRV::OpCompositeConstruct); + } + + // 1. Split int64 into 2 pieces using a bitcast MachineIRBuilder MIRBuilder(I); SPIRVType *PostCastType = GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder); Register BitcastReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); bool Result = - selectOpWithSrcs(BitcastReg, PostCastType, I, {OpReg}, SPIRV::OpBitcast); + selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg}, SPIRV::OpBitcast); // 2. Find the first set bit from the primary side for all the pieces in #1 Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); @@ -3198,20 +3291,15 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg, Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - bool ZeroAsNull = STI.isOpenCLEnv(); bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector; if (IsScalarRes) { // if scalar do a vector extract - Result = Result && - selectOpWithSrcs(HighReg, ResType, I, - {FBSReg, GR.getOrCreateConstInt(0, I, ResType, - TII, ZeroAsNull)}, - SPIRV::OpVectorExtractDynamic); - Result = Result && - selectOpWithSrcs(LowReg, ResType, I, - {FBSReg, GR.getOrCreateConstInt(1, I, ResType, - TII, ZeroAsNull)}, - SPIRV::OpVectorExtractDynamic); + Result = + Result && selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero}, + SPIRV::OpVectorExtractDynamic); + Result = + Result && selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne}, + SPIRV::OpVectorExtractDynamic); } else { // if vector do a shufflevector auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -3324,7 +3412,7 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg, case 32: return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode); case 64: - return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode, + return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode, /*SwapPrimarySide=*/false); default: report_fatal_error( @@ -3350,7 +3438,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg, case 32: return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode); case 64: - return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode, + return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode, /*SwapPrimarySide=*/true); default: report_fatal_error("spv_firstbitlow only supports 16,32,64 bits."); diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll index 05488479e5bd0..f3cc73637b136 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll @@ -5,6 +5,7 @@ ; CHECK-DAG: OpMemoryModel Logical GLSL450 ; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0 ; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2 +; CHECK-DAG: [[u32x3_t:%.+]] = OpTypeVector [[u32_t]] 3 ; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4 ; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0 ; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]] @@ -15,8 +16,12 @@ ; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]] ; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0 ; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2 +; CHECK-DAG: [[u16x3_t:%.+]] = OpTypeVector [[u16_t]] 3 +; CHECK-DAG: [[u16x4_t:%.+]] = OpTypeVector [[u16_t]] 4 ; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0 ; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2 +; CHECK-DAG: [[u64x3_t:%.+]] = OpTypeVector [[u64_t]] 3 +; CHECK-DAG: [[u64x4_t:%.+]] = OpTypeVector [[u64_t]] 4 ; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool ; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2 @@ -30,8 +35,8 @@ entry: ret i32 %elt.firstbitlow } -; CHECK-LABEL: Begin function firstbitlow_2xi32 -define noundef <2 x i32> @firstbitlow_2xi32(<2 x i32> noundef %a) { +; CHECK-LABEL: Begin function firstbitlow_v2xi32 +define noundef <2 x i32> @firstbitlow_v2xi32(<2 x i32> noundef %a) { entry: ; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]] ; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a]] @@ -40,6 +45,26 @@ entry: ret <2 x i32> %elt.firstbitlow } +; CHECK-LABEL: Begin function firstbitlow_v3xi32 +define noundef <3 x i32> @firstbitlow_v3xi32(<3 x i32> noundef %a) { +entry: +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x3_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindILsb [[a]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i32(<3 x i32> %a) + ret <3 x i32> %elt.firstbitlow +} + +; CHECK-LABEL: Begin function firstbitlow_v4xi32 +define noundef <4 x i32> @firstbitlow_v4xi32(<4 x i32> noundef %a) { +entry: +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x4_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i32(<4 x i32> %a) + ret <4 x i32> %elt.firstbitlow +} + ; CHECK-LABEL: Begin function firstbitlow_i16 define noundef i32 @firstbitlow_i16(i16 noundef %a) { entry: @@ -62,6 +87,28 @@ entry: ret <2 x i32> %elt.firstbitlow } +; CHECK-LABEL: Begin function firstbitlow_v3xi16 +define noundef <3 x i32> @firstbitlow_v3xi16(<3 x i16> noundef %a) { +entry: +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x3_t]] +; CHECK: [[a32:%.+]] = OpUConvert [[u32x3_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindILsb [[a32]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i16(<3 x i16> %a) + ret <3 x i32> %elt.firstbitlow +} + +; CHECK-LABEL: Begin function firstbitlow_v4xi16 +define noundef <4 x i32> @firstbitlow_v4xi16(<4 x i16> noundef %a) { +entry: +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x4_t]] +; CHECK: [[a32:%.+]] = OpUConvert [[u32x4_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i16(<4 x i16> %a) + ret <4 x i32> %elt.firstbitlow +} + ; CHECK-LABEL: Begin function firstbitlow_i64 define noundef i32 @firstbitlow_i64(i64 noundef %a) { entry: @@ -96,6 +143,74 @@ entry: ret <2 x i32> %elt.firstbitlow } +; CHECK-LABEL: Begin function firstbitlow_v3i64 +define noundef <3 x i32> @firstbitlow_v3i64(<3 x i64> noundef %a) { +entry: +; Split the i64x3 into i64, i64x2 +; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]] +; CHECK: [[left:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_0]] +; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 1 2 + +; Do firstbitlow on i64, i64x2 +; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x2_t]] [[left]] +; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[left_cast]] +; CHECK: [[left_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_0]] +; CHECK: [[left_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_1]] +; CHECK: [[left_should_use_high:%.+]] = OpIEqual [[bool_t]] [[left_low_bits]] [[const_neg1]] +; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32_t]] [[left_should_use_high]] [[left_high_bits]] [[left_low_bits]] +; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32_t]] [[left_should_use_high]] [[const_32]] [[const_0]] +; CHECK: [[left_res:%.+]] = OpIAdd [[u32_t]] [[left_ans_offset]] [[left_ans_bits]] + +; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]] +; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[right_cast]] +; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2 +; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3 +; CHECK: [[right_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[right_low_bits]] [[const_neg1x2]] +; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[right_high_bits]] [[right_low_bits]] +; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[const_32x2]] [[const_0x2]] +; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]] + +; Merge the resulting i32, i32x2 into the final i32x3 and return it +; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[left_res]] [[right_res]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i64(<3 x i64> %a) + ret <3 x i32> %elt.firstbitlow +} + +; CHECK-LABEL: Begin function firstbitlow_v4i64 +define noundef <4 x i32> @firstbitlow_v4i64(<4 x i64> noundef %a) { +entry: +; Split the i64x4 into 2 i64x2 +; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]] +; CHECK: [[left:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1 +; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3 + +; Do firstbitlow on the 2 i64x2 +; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x4_t]] [[left]] +; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[left_cast]] +; CHECK: [[left_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 0 2 +; CHECK: [[left_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 1 3 +; CHECK: [[left_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[left_low_bits]] [[const_neg1x2]] +; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_high]] [[left_high_bits]] [[left_low_bits]] +; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_high]] [[const_32x2]] [[const_0x2]] +; CHECK: [[left_res:%.+]] = OpIAdd [[u32x2_t]] [[left_ans_offset]] [[left_ans_bits]] + +; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]] +; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[right_cast]] +; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2 +; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3 +; CHECK: [[right_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[right_low_bits]] [[const_neg1x2]] +; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[right_high_bits]] [[right_low_bits]] +; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[const_32x2]] [[const_0x2]] +; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]] + +; Merge the resulting 2 i32x2 into the final i32x4 and return it +; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[left_res]] [[right_res]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i64(<4 x i64> %a) + ret <4 x i32> %elt.firstbitlow +} + ;declare i16 @llvm.spv.firstbitlow.i16(i16) ;declare i32 @llvm.spv.firstbitlow.i32(i32) ;declare i64 @llvm.spv.firstbitlow.i64(i64) From e67adb99590fcc2fe256ec04e0f31c39ea315ab8 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 16 Dec 2024 10:59:17 -0700 Subject: [PATCH 07/17] Address comments --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 86 ++++++++++++------- 1 file changed, 55 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index b2115528b8dcb..4588c3bcd2e77 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -105,12 +105,17 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectFirstBitSet32(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, Register SrcReg, - unsigned Opcode) const; + unsigned BitSetOpcode) const; bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const; + bool selectFirstBitSet64Overflow(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, Register SrcReg, + unsigned BitSetOpcode, + bool SwapPrimarySide) const; + bool selectGlobalValue(Register ResVReg, MachineInstr &I, const MachineInstr *Init = nullptr) const; @@ -3157,51 +3162,42 @@ bool SPIRVInstructionSelector::selectFirstBitSet16( selectFirstBitSet32(ResVReg, ResType, I, ExtReg, BitSetOpcode); } -bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg, - const SPIRVType *ResType, - MachineInstr &I, - Register SrcReg, - unsigned Opcode) const { +bool SPIRVInstructionSelector::selectFirstBitSet32( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + Register SrcReg, unsigned BitSetOpcode) const { return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .addImm(static_cast(SPIRV::InstructionSet::GLSL_std_450)) - .addImm(Opcode) + .addImm(BitSetOpcode) .addUse(SrcReg) .constrainAllUses(TII, TRI, RBI); } -bool SPIRVInstructionSelector::selectFirstBitSet64( +bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( Register ResVReg, const SPIRVType *ResType, MachineInstr &I, Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { + unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); bool ZeroAsNull = STI.isOpenCLEnv(); Register ConstIntZero = GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull); - Register ConstIntOne = - GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull); + unsigned LeftComponentCount = ComponentCount / 2; + unsigned RightComponentCount = ComponentCount - LeftComponentCount; + bool LeftIsVector = LeftComponentCount > 1; - // SPIRV doesn't support vectors with more than 4 components. Since the - // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only - // operate on vectors with 2 or less components. When largers vectors are - // seen. Split them, recurse, then recombine them. - if (ComponentCount > 2) { - unsigned LeftComponentCount = ComponentCount / 2; - unsigned RightComponentCount = ComponentCount - LeftComponentCount; - bool LeftIsVector = LeftComponentCount > 1; - - // Split the SrcReg in half into 2 smaller vec registers - // (ie i64x4 -> i64x2, i64x2) - MachineIRBuilder MIRBuilder(I); - SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); - SPIRVType *LeftVecOpType; - SPIRVType *LeftVecResType; - if (LeftIsVector) { - LeftVecOpType = - GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder); - LeftVecResType = GR.getOrCreateSPIRVVectorType( - BaseType, LeftComponentCount, MIRBuilder); + // Split the SrcReg in half into 2 smaller vec registers + // (ie i64x4 -> i64x2, i64x2) + MachineIRBuilder MIRBuilder(I); + SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); + SPIRVType *LeftVecOpType; + SPIRVType *LeftVecResType; + if (LeftIsVector) { + LeftVecOpType = + GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder); + LeftVecResType = + GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder); } else { LeftVecOpType = OpType; LeftVecResType = BaseType; @@ -3219,6 +3215,8 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( bool Result; + // Extract the left half from the SrcReg into LeftSideIn + // accounting for the special case when it only has one element if (LeftIsVector) { auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -3240,6 +3238,9 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( SPIRV::OpVectorExtractDynamic); } + // Extract the right half from the SrcReg into RightSideIn. + // Right will always be a vector since the only time one element is left is + // when Component == 3, and in that case Left is one element. auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) .addDef(RightSideIn) @@ -3254,7 +3255,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( Result = Result && MIB.constrainAllUses(TII, TRI, RBI); - // Recursively call selectFirstBitSet64 on the 2 registers + // Recursively call selectFirstBitSet64 on the 2 halves Register LeftSideOut = MRI->createVirtualRegister(GR.getRegClass(LeftVecResType)); Register RightSideOut = @@ -3271,6 +3272,26 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( return Result && selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut}, SPIRV::OpCompositeConstruct); +} + +bool SPIRVInstructionSelector::selectFirstBitSet64( + Register ResVReg, const SPIRVType *ResType, MachineInstr &I, + Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { + unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); + SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); + bool ZeroAsNull = STI.isOpenCLEnv(); + Register ConstIntZero = + GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull); + Register ConstIntOne = + GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull); + + // SPIRV doesn't support vectors with more than 4 components. Since the + // algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only + // operate on vectors with 2 or less components. When largers vectors are + // seen. Split them, recurse, then recombine them. + if (ComponentCount > 2) { + return selectFirstBitSet64Overflow(ResVReg, ResType, I, SrcReg, + BitSetOpcode, SwapPrimarySide); } // 1. Split int64 into 2 pieces using a bitcast @@ -3362,6 +3383,9 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( Register SecondaryReg; Register PrimaryShiftReg; Register SecondaryShiftReg; + + // By default the emitted opcodes check for the set bit from the MSB side. + // Setting SwapPrimarySide checks the set bit from the LSB side if (SwapPrimarySide) { PrimaryReg = LowReg; SecondaryReg = HighReg; From 7b1a8ccb9bef76f39947118e9236231f66ed5712 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 16 Dec 2024 11:09:19 -0700 Subject: [PATCH 08/17] format --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 119 +++++++++--------- 1 file changed, 59 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 4588c3bcd2e77..9fe14bc415e04 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3198,80 +3198,79 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder); LeftVecResType = GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder); - } else { - LeftVecOpType = OpType; - LeftVecResType = BaseType; - } - - SPIRVType *RightVecOpType = - GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder); - SPIRVType *RightVecResType = GR.getOrCreateSPIRVVectorType( - BaseType, RightComponentCount, MIRBuilder); - - Register LeftSideIn = - MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType)); - Register RightSideIn = - MRI->createVirtualRegister(GR.getRegClass(RightVecOpType)); - - bool Result; + } else { + LeftVecOpType = OpType; + LeftVecResType = BaseType; + } - // Extract the left half from the SrcReg into LeftSideIn - // accounting for the special case when it only has one element - if (LeftIsVector) { - auto MIB = - BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(SPIRV::OpVectorShuffle)) - .addDef(LeftSideIn) - .addUse(GR.getSPIRVTypeID(LeftVecOpType)) - .addUse(SrcReg) - // Per the spec, repeat the vector if only one vec is needed - .addUse(SrcReg); + SPIRVType *RightVecOpType = + GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder); + SPIRVType *RightVecResType = + GR.getOrCreateSPIRVVectorType(BaseType, RightComponentCount, MIRBuilder); - for (unsigned J = 0; J < LeftComponentCount; J++) { - MIB.addImm(J); - } + Register LeftSideIn = + MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType)); + Register RightSideIn = + MRI->createVirtualRegister(GR.getRegClass(RightVecOpType)); - Result = MIB.constrainAllUses(TII, TRI, RBI); - } else { - Result = - selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero}, - SPIRV::OpVectorExtractDynamic); - } + bool Result; - // Extract the right half from the SrcReg into RightSideIn. - // Right will always be a vector since the only time one element is left is - // when Component == 3, and in that case Left is one element. + // Extract the left half from the SrcReg into LeftSideIn + // accounting for the special case when it only has one element + if (LeftIsVector) { auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) - .addDef(RightSideIn) - .addUse(GR.getSPIRVTypeID(RightVecOpType)) + .addDef(LeftSideIn) + .addUse(GR.getSPIRVTypeID(LeftVecOpType)) .addUse(SrcReg) // Per the spec, repeat the vector if only one vec is needed .addUse(SrcReg); - for (unsigned J = LeftComponentCount; J < ComponentCount; J++) { + for (unsigned J = 0; J < LeftComponentCount; J++) { MIB.addImm(J); } - Result = Result && MIB.constrainAllUses(TII, TRI, RBI); + Result = MIB.constrainAllUses(TII, TRI, RBI); + } else { + Result = + selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero}, + SPIRV::OpVectorExtractDynamic); + } - // Recursively call selectFirstBitSet64 on the 2 halves - Register LeftSideOut = - MRI->createVirtualRegister(GR.getRegClass(LeftVecResType)); - Register RightSideOut = - MRI->createVirtualRegister(GR.getRegClass(RightVecResType)); - Result = Result && - selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn, - BitSetOpcode, SwapPrimarySide); - Result = Result && - selectFirstBitSet64(RightSideOut, RightVecResType, I, RightSideIn, - BitSetOpcode, SwapPrimarySide); - - // Join the two resulting registers back into the return type - // (ie i32x2, i32x2 -> i32x4) - return Result && - selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut}, - SPIRV::OpCompositeConstruct); + // Extract the right half from the SrcReg into RightSideIn. + // Right will always be a vector since the only time one element is left is + // when Component == 3, and in that case Left is one element. + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpVectorShuffle)) + .addDef(RightSideIn) + .addUse(GR.getSPIRVTypeID(RightVecOpType)) + .addUse(SrcReg) + // Per the spec, repeat the vector if only one vec is needed + .addUse(SrcReg); + + for (unsigned J = LeftComponentCount; J < ComponentCount; J++) { + MIB.addImm(J); + } + + Result = Result && MIB.constrainAllUses(TII, TRI, RBI); + + // Recursively call selectFirstBitSet64 on the 2 halves + Register LeftSideOut = + MRI->createVirtualRegister(GR.getRegClass(LeftVecResType)); + Register RightSideOut = + MRI->createVirtualRegister(GR.getRegClass(RightVecResType)); + Result = + Result && selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn, + BitSetOpcode, SwapPrimarySide); + Result = + Result && selectFirstBitSet64(RightSideOut, RightVecResType, I, + RightSideIn, BitSetOpcode, SwapPrimarySide); + + // Join the two resulting registers back into the return type + // (ie i32x2, i32x2 -> i32x4) + return Result && + selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut}, + SPIRV::OpCompositeConstruct); } bool SPIRVInstructionSelector::selectFirstBitSet64( From 742647b68a4c676b059a67e462d4399677756742 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 16 Dec 2024 14:02:52 -0700 Subject: [PATCH 09/17] Address comments --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 9fe14bc415e04..3872409be44c6 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3191,27 +3191,26 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // (ie i64x4 -> i64x2, i64x2) MachineIRBuilder MIRBuilder(I); SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); - SPIRVType *LeftVecOpType; - SPIRVType *LeftVecResType; + SPIRVType *LeftOpType; + SPIRVType *LeftResType; if (LeftIsVector) { - LeftVecOpType = + LeftOpType = GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder); - LeftVecResType = + LeftResType = GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder); } else { - LeftVecOpType = OpType; - LeftVecResType = BaseType; + LeftOpType = OpType; + LeftResType = BaseType; } - SPIRVType *RightVecOpType = + SPIRVType *RightOpType = GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder); - SPIRVType *RightVecResType = + SPIRVType *RightResType = GR.getOrCreateSPIRVVectorType(BaseType, RightComponentCount, MIRBuilder); - Register LeftSideIn = - MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType)); + Register LeftSideIn = MRI->createVirtualRegister(GR.getRegClass(LeftOpType)); Register RightSideIn = - MRI->createVirtualRegister(GR.getRegClass(RightVecOpType)); + MRI->createVirtualRegister(GR.getRegClass(RightOpType)); bool Result; @@ -3221,7 +3220,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) .addDef(LeftSideIn) - .addUse(GR.getSPIRVTypeID(LeftVecOpType)) + .addUse(GR.getSPIRVTypeID(LeftOpType)) .addUse(SrcReg) // Per the spec, repeat the vector if only one vec is needed .addUse(SrcReg); @@ -3232,9 +3231,8 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( Result = MIB.constrainAllUses(TII, TRI, RBI); } else { - Result = - selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero}, - SPIRV::OpVectorExtractDynamic); + Result = selectOpWithSrcs(LeftSideIn, LeftOpType, I, {SrcReg, ConstIntZero}, + SPIRV::OpVectorExtractDynamic); } // Extract the right half from the SrcReg into RightSideIn. @@ -3243,7 +3241,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) .addDef(RightSideIn) - .addUse(GR.getSPIRVTypeID(RightVecOpType)) + .addUse(GR.getSPIRVTypeID(RightOpType)) .addUse(SrcReg) // Per the spec, repeat the vector if only one vec is needed .addUse(SrcReg); @@ -3256,15 +3254,15 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // Recursively call selectFirstBitSet64 on the 2 halves Register LeftSideOut = - MRI->createVirtualRegister(GR.getRegClass(LeftVecResType)); + MRI->createVirtualRegister(GR.getRegClass(LeftResType)); Register RightSideOut = - MRI->createVirtualRegister(GR.getRegClass(RightVecResType)); + MRI->createVirtualRegister(GR.getRegClass(RightResType)); Result = - Result && selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn, + Result && selectFirstBitSet64(LeftSideOut, LeftResType, I, LeftSideIn, BitSetOpcode, SwapPrimarySide); Result = - Result && selectFirstBitSet64(RightSideOut, RightVecResType, I, - RightSideIn, BitSetOpcode, SwapPrimarySide); + Result && selectFirstBitSet64(RightSideOut, RightResType, I, RightSideIn, + BitSetOpcode, SwapPrimarySide); // Join the two resulting registers back into the return type // (ie i32x2, i32x2 -> i32x4) From 553335fb8f2e43bee60ec3c8d19e925231d215c1 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 16 Dec 2024 17:54:47 -0700 Subject: [PATCH 10/17] Update tests --- .../SPIRV/hlsl-intrinsics/firstbithigh.ll | 236 +++++++++++++++--- .../SPIRV/hlsl-intrinsics/firstbitlow.ll | 16 +- 2 files changed, 204 insertions(+), 48 deletions(-) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll index 3d35e102310f5..dee48061d2fe1 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll @@ -1,94 +1,250 @@ ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK: OpMemoryModel Logical GLSL450 -; CHECK-DAG: [[Z:%.*]] = OpConstant %[[#]] 0 -; CHECK-DAG: [[X:%.*]] = OpConstant %[[#]] 1 +; CHECK-DAG: [[glsl_450_ext:%.+]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: OpMemoryModel Logical GLSL450 +; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0 +; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2 +; CHECK-DAG: [[u32x3_t:%.+]] = OpTypeVector [[u32_t]] 3 +; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4 +; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0 +; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]] +; CHECK-DAG: [[const_1:%.*]] = OpConstant [[u32_t]] 1 +; CHECK-DAG: [[const_32:%.*]] = OpConstant [[u32_t]] 32 +; CHECK-DAG: [[const_32x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_32]] [[const_32]] +; CHECK-DAG: [[const_neg1:%.*]] = OpConstant [[u32_t]] 4294967295 +; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]] +; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0 +; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2 +; CHECK-DAG: [[u16x3_t:%.+]] = OpTypeVector [[u16_t]] 3 +; CHECK-DAG: [[u16x4_t:%.+]] = OpTypeVector [[u16_t]] 4 +; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0 +; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2 +; CHECK-DAG: [[u64x3_t:%.+]] = OpTypeVector [[u64_t]] 3 +; CHECK-DAG: [[u64x4_t:%.+]] = OpTypeVector [[u64_t]] 4 +; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool +; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2 +; CHECK-LABEL: Begin function firstbituhigh_i32 define noundef i32 @firstbituhigh_i32(i32 noundef %a) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb %[[#]] +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindUMsb [[a]] +; CHECK: OpReturnValue [[ret]] %elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i32(i32 %a) ret i32 %elt.firstbituhigh } -define noundef <2 x i32> @firstbituhigh_2xi32(<2 x i32> noundef %a) { +; CHECK-LABEL: Begin function firstbituhigh_v2xi32 +define noundef <2 x i32> @firstbituhigh_v2xi32(<2 x i32> noundef %a) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb %[[#]] +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a]] +; CHECK: OpReturnValue [[ret]] %elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i32(<2 x i32> %a) ret <2 x i32> %elt.firstbituhigh } +; CHECK-LABEL: Begin function firstbituhigh_v3xi32 +define noundef <3 x i32> @firstbituhigh_v3xi32(<3 x i32> noundef %a) { +entry: +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x3_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindUMsb [[a]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i32(<3 x i32> %a) + ret <3 x i32> %elt.firstbituhigh +} + +; CHECK-LABEL: Begin function firstbituhigh_v4xi32 +define noundef <4 x i32> @firstbituhigh_v4xi32(<4 x i32> noundef %a) { +entry: +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x4_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i32(<4 x i32> %a) + ret <4 x i32> %elt.firstbituhigh +} + +; CHECK-LABEL: Begin function firstbituhigh_i16 define noundef i32 @firstbituhigh_i16(i16 noundef %a) { entry: -; CHECK: [[A:%.*]] = OpUConvert %[[#]] -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb [[A]] +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]] +; CHECK: [[a32:%.+]] = OpUConvert [[u32_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindUMsb [[a32]] +; CHECK: OpReturnValue [[ret]] %elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i16(i16 %a) ret i32 %elt.firstbituhigh } -define noundef <2 x i32> @firstbituhigh_v2i16(<2 x i16> noundef %a) { +; CHECK-LABEL: Begin function firstbituhigh_v2xi16 +define noundef <2 x i32> @firstbituhigh_v2xi16(<2 x i16> noundef %a) { entry: -; CHECK: [[A:%.*]] = OpUConvert %[[#]] -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb [[A]] +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x2_t]] +; CHECK: [[a32:%.+]] = OpUConvert [[u32x2_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a32]] +; CHECK: OpReturnValue [[ret]] %elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i16(<2 x i16> %a) ret <2 x i32> %elt.firstbituhigh } +; CHECK-LABEL: Begin function firstbituhigh_v3xi16 +define noundef <3 x i32> @firstbituhigh_v3xi16(<3 x i16> noundef %a) { +entry: +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x3_t]] +; CHECK: [[a32:%.+]] = OpUConvert [[u32x3_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindUMsb [[a32]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i16(<3 x i16> %a) + ret <3 x i32> %elt.firstbituhigh +} + +; CHECK-LABEL: Begin function firstbituhigh_v4xi16 +define noundef <4 x i32> @firstbituhigh_v4xi16(<4 x i16> noundef %a) { +entry: +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x4_t]] +; CHECK: [[a32:%.+]] = OpUConvert [[u32x4_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a32]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i16(<4 x i16> %a) + ret <4 x i32> %elt.firstbituhigh +} + +; CHECK-LABEL: Begin function firstbituhigh_i64 define noundef i32 @firstbituhigh_i64(i64 noundef %a) { entry: -; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]] -; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindUMsb [[O]] -; CHECK: [[M:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[Z]] -; CHECK: [[L:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[X]] -; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]] -; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]] -; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]] -; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]] +; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]] +; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]] +; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[a32x2]] +; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]] +; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]] +; CHECK: [[should_use_low:%.+]] = OpIEqual [[bool_t]] [[high_bits]] [[const_neg1]] +; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[low_bits]] [[high_bits]] +; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[const_0]] [[const_32]] +; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]] +; CHECK: OpReturnValue [[ret]] %elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i64(i64 %a) ret i32 %elt.firstbituhigh } -define noundef <2 x i32> @firstbituhigh_v2i64(<2 x i64> noundef %a) { +; CHECK-LABEL: Begin function firstbituhigh_v2xi64 +define noundef <2 x i32> @firstbituhigh_v2xi64(<2 x i64> noundef %a) { entry: -; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]] -; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindUMsb [[O]] -; CHECK: [[M:%.*]] = OpVectorShuffle %[[#]] [[N]] [[N]] 0 -; CHECK: [[L:%.*]] = OpVectorShuffle %[[#]] [[N]] [[N]] 1 -; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]] -; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]] -; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]] -; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]] -; CHECK: OpReturnValue [[B]] +; CHECK: [[a64x2:%.+]] = OpFunctionParameter [[u64x2_t]] +; CHECK: [[a32x4:%.+]] = OpBitcast [[u32x4_t]] [[a64x2]] +; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[a32x4]] +; CHECK: [[high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 0 2 +; CHECK: [[low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[lsb_bits]] [[lsb_bits]] 1 3 +; CHECK: [[should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[high_bits]] [[const_neg1x2]] +; CHECK: [[ans_bits:%.+]] = OpSelect [[u32x2_t]] [[should_use_low]] [[low_bits]] [[high_bits]] +; CHECK: [[ans_offset:%.+]] = OpSelect [[u32x2_t]] [[should_use_low]] [[const_0x2]] [[const_32x2]] +; CHECK: [[ret:%.+]] = OpIAdd [[u32x2_t]] [[ans_offset]] [[ans_bits]] +; CHECK: OpReturnValue [[ret]] %elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i64(<2 x i64> %a) ret <2 x i32> %elt.firstbituhigh } +; CHECK-LABEL: Begin function firstbituhigh_v3xi64 +define noundef <3 x i32> @firstbituhigh_v3xi64(<3 x i64> noundef %a) { +entry: +; Split the i64x3 into i64, i64x2 +; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]] +; CHECK: [[left:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_0]] +; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 1 2 + +; Do firstbituhigh on i64, i64x2 +; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x2_t]] [[left]] +; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[left_cast]] +; CHECK: [[left_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_0]] +; CHECK: [[left_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_1]] +; CHECK: [[left_should_use_low:%.+]] = OpIEqual [[bool_t]] [[left_high_bits]] [[const_neg1]] +; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32_t]] [[left_should_use_low]] [[left_low_bits]] [[left_high_bits]] +; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32_t]] [[left_should_use_low]] [[const_0]] [[const_32]] +; CHECK: [[left_res:%.+]] = OpIAdd [[u32_t]] [[left_ans_offset]] [[left_ans_bits]] + +; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]] +; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[right_cast]] +; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2 +; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3 +; CHECK: [[right_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[right_high_bits]] [[const_neg1x2]] +; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[right_low_bits]] [[right_high_bits]] +; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[const_0x2]] [[const_32x2]] +; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]] + +; Merge the resulting i32, i32x2 into the final i32x3 and return it +; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[left_res]] [[right_res]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i64(<3 x i64> %a) + ret <3 x i32> %elt.firstbituhigh +} + +; CHECK-LABEL: Begin function firstbituhigh_v4xi64 +define noundef <4 x i32> @firstbituhigh_v4xi64(<4 x i64> noundef %a) { +entry: +; Split the i64x4 into 2 i64x2 +; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]] +; CHECK: [[left:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1 +; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3 + +; Do firstbithigh on the 2 i64x2 +; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x4_t]] [[left]] +; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[left_cast]] +; CHECK: [[left_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 0 2 +; CHECK: [[left_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 1 3 +; CHECK: [[left_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[left_high_bits]] [[const_neg1x2]] +; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_low]] [[left_low_bits]] [[left_high_bits]] +; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_low]] [[const_0x2]] [[const_32x2]] +; CHECK: [[left_res:%.+]] = OpIAdd [[u32x2_t]] [[left_ans_offset]] [[left_ans_bits]] + +; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]] +; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[right_cast]] +; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2 +; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3 +; CHECK: [[right_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[right_high_bits]] [[const_neg1x2]] +; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[right_low_bits]] [[right_high_bits]] +; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[const_0x2]] [[const_32x2]] +; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]] + +; Merge the resulting 2 i32x2 into the final i32x4 and return it +; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[left_res]] [[right_res]] +; CHECK: OpReturnValue [[ret]] + %elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i64(<4 x i64> %a) + ret <4 x i32> %elt.firstbituhigh +} + +; CHECK-LABEL: Begin function firstbitshigh_i32 define noundef i32 @firstbitshigh_i32(i32 noundef %a) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindSMsb %[[#]] +; CHECK: [[a:%.+]] = OpFunctionParameter [[u32_t]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindSMsb [[a]] +; CHECK: OpReturnValue [[ret]] %elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i32(i32 %a) ret i32 %elt.firstbitshigh } +; CHECK-LABEL: Begin function firstbitshigh_i16 define noundef i32 @firstbitshigh_i16(i16 noundef %a) { entry: -; CHECK: [[A:%.*]] = OpSConvert %[[#]] -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindSMsb %[[#]] +; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16_t]] +; CHECK: [[a32:%.+]] = OpSConvert [[u32_t]] [[a16]] +; CHECK: [[ret:%.+]] = OpExtInst [[u32_t]] [[glsl_450_ext]] FindSMsb [[a32]] +; CHECK: OpReturnValue [[ret]] %elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i16(i16 %a) ret i32 %elt.firstbitshigh } +; CHECK-LABEL: Begin function firstbitshigh_i64 define noundef i32 @firstbitshigh_i64(i64 noundef %a) { entry: -; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]] -; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindSMsb [[O]] -; CHECK: [[M:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[Z]] -; CHECK: [[L:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[X]] -; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]] -; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]] -; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]] -; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]] +; CHECK: [[a64:%.+]] = OpFunctionParameter [[u64_t]] +; CHECK: [[a32x2:%.+]] = OpBitcast [[u32x2_t]] [[a64]] +; CHECK: [[lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindSMsb [[a32x2]] +; CHECK: [[high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_0]] +; CHECK: [[low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[lsb_bits]] [[const_1]] +; CHECK: [[should_use_low:%.+]] = OpIEqual [[bool_t]] [[high_bits]] [[const_neg1]] +; CHECK: [[ans_bits:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[low_bits]] [[high_bits]] +; CHECK: [[ans_offset:%.+]] = OpSelect [[u32_t]] [[should_use_low]] [[const_0]] [[const_32]] +; CHECK: [[ret:%.+]] = OpIAdd [[u32_t]] [[ans_offset]] [[ans_bits]] +; CHECK: OpReturnValue [[ret]] %elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i64(i64 %a) ret i32 %elt.firstbitshigh } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll index f3cc73637b136..262cc2610600f 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll @@ -76,8 +76,8 @@ entry: ret i32 %elt.firstbitlow } -; CHECK-LABEL: Begin function firstbitlow_v2i16 -define noundef <2 x i32> @firstbitlow_v2i16(<2 x i16> noundef %a) { +; CHECK-LABEL: Begin function firstbitlow_v2xi16 +define noundef <2 x i32> @firstbitlow_v2xi16(<2 x i16> noundef %a) { entry: ; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x2_t]] ; CHECK: [[a32:%.+]] = OpUConvert [[u32x2_t]] [[a16]] @@ -126,8 +126,8 @@ entry: ret i32 %elt.firstbitlow } -; CHECK-LABEL: Begin function firstbitlow_v2i64 -define noundef <2 x i32> @firstbitlow_v2i64(<2 x i64> noundef %a) { +; CHECK-LABEL: Begin function firstbitlow_v2xi64 +define noundef <2 x i32> @firstbitlow_v2xi64(<2 x i64> noundef %a) { entry: ; CHECK: [[a64x2:%.+]] = OpFunctionParameter [[u64x2_t]] ; CHECK: [[a32x4:%.+]] = OpBitcast [[u32x4_t]] [[a64x2]] @@ -143,8 +143,8 @@ entry: ret <2 x i32> %elt.firstbitlow } -; CHECK-LABEL: Begin function firstbitlow_v3i64 -define noundef <3 x i32> @firstbitlow_v3i64(<3 x i64> noundef %a) { +; CHECK-LABEL: Begin function firstbitlow_v3xi64 +define noundef <3 x i32> @firstbitlow_v3xi64(<3 x i64> noundef %a) { entry: ; Split the i64x3 into i64, i64x2 ; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]] @@ -177,8 +177,8 @@ entry: ret <3 x i32> %elt.firstbitlow } -; CHECK-LABEL: Begin function firstbitlow_v4i64 -define noundef <4 x i32> @firstbitlow_v4i64(<4 x i64> noundef %a) { +; CHECK-LABEL: Begin function firstbitlow_v4xi64 +define noundef <4 x i32> @firstbitlow_v4xi64(<4 x i64> noundef %a) { entry: ; Split the i64x4 into 2 i64x2 ; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]] From 525a6620435a26cc3eb2cc7bc25262d898780f90 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Tue, 17 Dec 2024 15:18:05 -0700 Subject: [PATCH 11/17] Address comments --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 105 +++++++++--------- 1 file changed, 51 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 3872409be44c6..86d44705f0982 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3191,16 +3191,13 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // (ie i64x4 -> i64x2, i64x2) MachineIRBuilder MIRBuilder(I); SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); - SPIRVType *LeftOpType; - SPIRVType *LeftResType; + SPIRVType *LeftOpType = OpType; + SPIRVType *LeftResType = BaseType; if (LeftIsVector) { LeftOpType = GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder); LeftResType = GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder); - } else { - LeftOpType = OpType; - LeftResType = BaseType; } SPIRVType *RightOpType = @@ -3212,8 +3209,6 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( Register RightSideIn = MRI->createVirtualRegister(GR.getRegClass(RightOpType)); - bool Result; - // Extract the left half from the SrcReg into LeftSideIn // accounting for the special case when it only has one element if (LeftIsVector) { @@ -3225,14 +3220,16 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // Per the spec, repeat the vector if only one vec is needed .addUse(SrcReg); - for (unsigned J = 0; J < LeftComponentCount; J++) { + for (unsigned J = 0; J < LeftComponentCount; J++) MIB.addImm(J); - } - Result = MIB.constrainAllUses(TII, TRI, RBI); + if (!MIB.constrainAllUses(TII, TRI, RBI)) + return false; + } else { - Result = selectOpWithSrcs(LeftSideIn, LeftOpType, I, {SrcReg, ConstIntZero}, - SPIRV::OpVectorExtractDynamic); + if (!selectOpWithSrcs(LeftSideIn, LeftOpType, I, {SrcReg, ConstIntZero}, + SPIRV::OpVectorExtractDynamic)) + return false; } // Extract the right half from the SrcReg into RightSideIn. @@ -3246,28 +3243,28 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // Per the spec, repeat the vector if only one vec is needed .addUse(SrcReg); - for (unsigned J = LeftComponentCount; J < ComponentCount; J++) { + for (unsigned J = LeftComponentCount; J < ComponentCount; J++) MIB.addImm(J); - } - Result = Result && MIB.constrainAllUses(TII, TRI, RBI); + if (!MIB.constrainAllUses(TII, TRI, RBI)) + return false; // Recursively call selectFirstBitSet64 on the 2 halves Register LeftSideOut = MRI->createVirtualRegister(GR.getRegClass(LeftResType)); Register RightSideOut = MRI->createVirtualRegister(GR.getRegClass(RightResType)); - Result = - Result && selectFirstBitSet64(LeftSideOut, LeftResType, I, LeftSideIn, - BitSetOpcode, SwapPrimarySide); - Result = - Result && selectFirstBitSet64(RightSideOut, RightResType, I, RightSideIn, - BitSetOpcode, SwapPrimarySide); + + if (!selectFirstBitSet64(LeftSideOut, LeftResType, I, LeftSideIn, + BitSetOpcode, SwapPrimarySide)) + return false; + if (!selectFirstBitSet64(RightSideOut, RightResType, I, RightSideIn, + BitSetOpcode, SwapPrimarySide)) + return false; // Join the two resulting registers back into the return type // (ie i32x2, i32x2 -> i32x4) - return Result && - selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut}, + return selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut}, SPIRV::OpCompositeConstruct); } @@ -3297,13 +3294,15 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder); Register BitcastReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); - bool Result = - selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg}, SPIRV::OpBitcast); + + if (!selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg}, + SPIRV::OpBitcast)) + return false; // 2. Find the first set bit from the primary side for all the pieces in #1 Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType)); - Result = Result && selectFirstBitSet32(FBSReg, PostCastType, I, BitcastReg, - BitSetOpcode); + if (!selectFirstBitSet32(FBSReg, PostCastType, I, BitcastReg, BitSetOpcode)) + return false; // 3. Split result vector into high bits and low bits Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); @@ -3312,12 +3311,12 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector; if (IsScalarRes) { // if scalar do a vector extract - Result = - Result && selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero}, - SPIRV::OpVectorExtractDynamic); - Result = - Result && selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne}, - SPIRV::OpVectorExtractDynamic); + if (!selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero}, + SPIRV::OpVectorExtractDynamic)) + return false; + if (!selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne}, + SPIRV::OpVectorExtractDynamic)) + return false; } else { // if vector do a shufflevector auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -3332,7 +3331,9 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( for (unsigned J = 0; J < ComponentCount * 2; J += 2) { MIB.addImm(J); } - Result = Result && MIB.constrainAllUses(TII, TRI, RBI); + + if (!MIB.constrainAllUses(TII, TRI, RBI)) + return false; MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) @@ -3346,7 +3347,8 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( for (unsigned J = 1; J < ComponentCount * 2; J += 2) { MIB.addImm(J); } - Result = Result && MIB.constrainAllUses(TII, TRI, RBI); + if (!MIB.constrainAllUses(TII, TRI, RBI)) + return false; } // 4. Check the result. When primary bits == -1 use secondary, otherwise use @@ -3376,10 +3378,10 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( AddOp = SPIRV::OpIAddV; } - Register PrimaryReg; - Register SecondaryReg; - Register PrimaryShiftReg; - Register SecondaryShiftReg; + Register PrimaryReg = HighReg; + Register SecondaryReg = LowReg; + Register PrimaryShiftReg = Reg32; + Register SecondaryShiftReg = Reg0; // By default the emitted opcodes check for the set bit from the MSB side. // Setting SwapPrimarySide checks the set bit from the LSB side @@ -3388,32 +3390,27 @@ bool SPIRVInstructionSelector::selectFirstBitSet64( SecondaryReg = HighReg; PrimaryShiftReg = Reg0; SecondaryShiftReg = Reg32; - } else { - PrimaryReg = HighReg; - SecondaryReg = LowReg; - PrimaryShiftReg = Reg32; - SecondaryShiftReg = Reg0; } // Check if the primary bits are == -1 Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType)); - Result = Result && selectOpWithSrcs(BReg, BoolType, I, - {PrimaryReg, NegOneReg}, SPIRV::OpIEqual); + if (!selectOpWithSrcs(BReg, BoolType, I, {PrimaryReg, NegOneReg}, + SPIRV::OpIEqual)) + return false; // Select secondary bits if true in BReg, otherwise primary bits Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Result = - Result && selectOpWithSrcs(TmpReg, ResType, I, - {BReg, SecondaryReg, PrimaryReg}, SelectOp); + if (!selectOpWithSrcs(TmpReg, ResType, I, {BReg, SecondaryReg, PrimaryReg}, + SelectOp)) + return false; // 5. Add 32 when high bits are used, otherwise 0 for low bits Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); - Result = Result && selectOpWithSrcs( - ValReg, ResType, I, - {BReg, SecondaryShiftReg, PrimaryShiftReg}, SelectOp); + if (!selectOpWithSrcs(ValReg, ResType, I, + {BReg, SecondaryShiftReg, PrimaryShiftReg}, SelectOp)) + return false; - return Result && - selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp); + return selectOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, AddOp); } bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg, From 08a732a4e38df2da04b79e197ff422144e4e5cf2 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 18 Dec 2024 09:47:59 -0700 Subject: [PATCH 12/17] cleanup --- llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 86d44705f0982..a85f6dcd9382e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3220,7 +3220,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // Per the spec, repeat the vector if only one vec is needed .addUse(SrcReg); - for (unsigned J = 0; J < LeftComponentCount; J++) + for (unsigned J = 0; J < LeftComponentCount; ++J) MIB.addImm(J); if (!MIB.constrainAllUses(TII, TRI, RBI)) @@ -3243,7 +3243,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // Per the spec, repeat the vector if only one vec is needed .addUse(SrcReg); - for (unsigned J = LeftComponentCount; J < ComponentCount; J++) + for (unsigned J = LeftComponentCount; J < ComponentCount; ++J) MIB.addImm(J); if (!MIB.constrainAllUses(TII, TRI, RBI)) From 2dfcd279cf18d26b64366873181460bc480d7f1f Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 18 Dec 2024 10:07:33 -0700 Subject: [PATCH 13/17] add assert --- llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index a85f6dcd9382e..ab80aa9bdef64 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3179,6 +3179,13 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); + // SPIR-V only allow vecs of size 2,3,4. Calling with a larger vec requires + // creating a return type with an invalid vec size. If that is resolved + // then this function is valid up to vec8 as the intermediate splitting + // would create 2 vec4. + assert(ComponentCount < 5 && "Vec 5+ will generate invalid SPIR-V ops"); + + SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); bool ZeroAsNull = STI.isOpenCLEnv(); Register ConstIntZero = From 15eaf6e97c9306129e85165a633d7b90ce89e15e Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 18 Dec 2024 13:42:37 -0700 Subject: [PATCH 14/17] use iterative approach --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 116 +++++++----------- .../SPIRV/hlsl-intrinsics/firstbithigh.ll | 99 ++++++++------- .../SPIRV/hlsl-intrinsics/firstbitlow.ll | 111 +++++++++-------- 3 files changed, 161 insertions(+), 165 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index ab80aa9bdef64..285068caac1c3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3178,100 +3178,74 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( Register ResVReg, const SPIRVType *ResType, MachineInstr &I, Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { - unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); // SPIR-V only allow vecs of size 2,3,4. Calling with a larger vec requires - // creating a return type with an invalid vec size. If that is resolved - // then this function is valid up to vec8 as the intermediate splitting - // would create 2 vec4. + // creating a param reg and return reg with an invalid vec size. If that is + // resolved then this function is valid for vectors of any component size. + unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); assert(ComponentCount < 5 && "Vec 5+ will generate invalid SPIR-V ops"); - - SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); bool ZeroAsNull = STI.isOpenCLEnv(); - Register ConstIntZero = - GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull); - unsigned LeftComponentCount = ComponentCount / 2; - unsigned RightComponentCount = ComponentCount - LeftComponentCount; - bool LeftIsVector = LeftComponentCount > 1; - - // Split the SrcReg in half into 2 smaller vec registers - // (ie i64x4 -> i64x2, i64x2) MachineIRBuilder MIRBuilder(I); - SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); - SPIRVType *LeftOpType = OpType; - SPIRVType *LeftResType = BaseType; - if (LeftIsVector) { - LeftOpType = - GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder); - LeftResType = - GR.getOrCreateSPIRVVectorType(BaseType, LeftComponentCount, MIRBuilder); - } - - SPIRVType *RightOpType = - GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder); - SPIRVType *RightResType = - GR.getOrCreateSPIRVVectorType(BaseType, RightComponentCount, MIRBuilder); - - Register LeftSideIn = MRI->createVirtualRegister(GR.getRegClass(LeftOpType)); - Register RightSideIn = - MRI->createVirtualRegister(GR.getRegClass(RightOpType)); - - // Extract the left half from the SrcReg into LeftSideIn - // accounting for the special case when it only has one element - if (LeftIsVector) { + SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); + SPIRVType *I64Type = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); + SPIRVType *I64x2Type = GR.getOrCreateSPIRVVectorType(I64Type, 2, MIRBuilder); + SPIRVType *Vec2ResType = + GR.getOrCreateSPIRVVectorType(BaseType, 2, MIRBuilder); + + std::vector PartialRegs; + + // Loops 0, 2, 4, ... but stops one loop early when ComponentCount is odd + unsigned CurrentComponent = 0; + for (; CurrentComponent + 1 < ComponentCount; CurrentComponent += 2) { + Register SubVecReg = MRI->createVirtualRegister(GR.getRegClass(I64x2Type)); + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) - .addDef(LeftSideIn) - .addUse(GR.getSPIRVTypeID(LeftOpType)) + .addDef(SubVecReg) + .addUse(GR.getSPIRVTypeID(I64x2Type)) .addUse(SrcReg) // Per the spec, repeat the vector if only one vec is needed .addUse(SrcReg); - for (unsigned J = 0; J < LeftComponentCount; ++J) - MIB.addImm(J); + MIB.addImm(CurrentComponent); + MIB.addImm(CurrentComponent + 1); if (!MIB.constrainAllUses(TII, TRI, RBI)) return false; - } else { - if (!selectOpWithSrcs(LeftSideIn, LeftOpType, I, {SrcReg, ConstIntZero}, - SPIRV::OpVectorExtractDynamic)) + Register SubVecBitSetReg = + MRI->createVirtualRegister(GR.getRegClass(Vec2ResType)); + + if (!selectFirstBitSet64(SubVecBitSetReg, Vec2ResType, I, SubVecReg, + BitSetOpcode, SwapPrimarySide)) return false; + + PartialRegs.push_back(SubVecBitSetReg); } - // Extract the right half from the SrcReg into RightSideIn. - // Right will always be a vector since the only time one element is left is - // when Component == 3, and in that case Left is one element. - auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(SPIRV::OpVectorShuffle)) - .addDef(RightSideIn) - .addUse(GR.getSPIRVTypeID(RightOpType)) - .addUse(SrcReg) - // Per the spec, repeat the vector if only one vec is needed - .addUse(SrcReg); + // On odd component counts we need to handle one more component + if (CurrentComponent != ComponentCount) { + Register FinalElemReg = MRI->createVirtualRegister(GR.getRegClass(I64Type)); + Register ConstIntLastIdx = GR.getOrCreateConstInt( + ComponentCount - 1, I, BaseType, TII, ZeroAsNull); - for (unsigned J = LeftComponentCount; J < ComponentCount; ++J) - MIB.addImm(J); + if (!selectOpWithSrcs(FinalElemReg, I64Type, I, {SrcReg, ConstIntLastIdx}, + SPIRV::OpVectorExtractDynamic)) + return false; - if (!MIB.constrainAllUses(TII, TRI, RBI)) - return false; + Register FinalElemBitSetReg = + MRI->createVirtualRegister(GR.getRegClass(BaseType)); - // Recursively call selectFirstBitSet64 on the 2 halves - Register LeftSideOut = - MRI->createVirtualRegister(GR.getRegClass(LeftResType)); - Register RightSideOut = - MRI->createVirtualRegister(GR.getRegClass(RightResType)); + if (!selectFirstBitSet64(FinalElemBitSetReg, BaseType, I, FinalElemReg, + BitSetOpcode, SwapPrimarySide)) + return false; - if (!selectFirstBitSet64(LeftSideOut, LeftResType, I, LeftSideIn, - BitSetOpcode, SwapPrimarySide)) - return false; - if (!selectFirstBitSet64(RightSideOut, RightResType, I, RightSideIn, - BitSetOpcode, SwapPrimarySide)) - return false; + PartialRegs.push_back(FinalElemBitSetReg); + } - // Join the two resulting registers back into the return type - // (ie i32x2, i32x2 -> i32x4) - return selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut}, + // Join all the resulting registers back into the return type in order + // (ie i32x2, i32x2, i32x1 -> i32x5) + return selectOpWithSrcs(ResVReg, ResType, I, PartialRegs, SPIRV::OpCompositeConstruct); } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll index dee48061d2fe1..a4dd09d84d996 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll @@ -8,6 +8,7 @@ ; CHECK-DAG: [[u32x3_t:%.+]] = OpTypeVector [[u32_t]] 3 ; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4 ; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0 +; CHECK-DAG: [[const_2:%.*]] = OpConstant [[u32_t]] 2 ; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]] ; CHECK-DAG: [[const_1:%.*]] = OpConstant [[u32_t]] 1 ; CHECK-DAG: [[const_32:%.*]] = OpConstant [[u32_t]] 32 @@ -146,32 +147,37 @@ entry: ; CHECK-LABEL: Begin function firstbituhigh_v3xi64 define noundef <3 x i32> @firstbituhigh_v3xi64(<3 x i64> noundef %a) { entry: -; Split the i64x3 into i64, i64x2 +; Preamble ; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]] -; CHECK: [[left:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_0]] -; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 1 2 -; Do firstbituhigh on i64, i64x2 -; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x2_t]] [[left]] -; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[left_cast]] -; CHECK: [[left_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_0]] -; CHECK: [[left_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_1]] -; CHECK: [[left_should_use_low:%.+]] = OpIEqual [[bool_t]] [[left_high_bits]] [[const_neg1]] -; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32_t]] [[left_should_use_low]] [[left_low_bits]] [[left_high_bits]] -; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32_t]] [[left_should_use_low]] [[const_0]] [[const_32]] -; CHECK: [[left_res:%.+]] = OpIAdd [[u32_t]] [[left_ans_offset]] [[left_ans_bits]] +; Extract first 2 components from %a +; CHECK: [[pt1:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1 -; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]] -; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[right_cast]] -; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2 -; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3 -; CHECK: [[right_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[right_high_bits]] [[const_neg1x2]] -; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[right_low_bits]] [[right_high_bits]] -; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[const_0x2]] [[const_32x2]] -; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]] +; Do firstbituhigh on the first 2 components +; CHECK: [[pt1_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt1]] +; CHECK: [[pt1_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[pt1_cast]] +; CHECK: [[pt1_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 0 2 +; CHECK: [[pt1_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 1 3 +; CHECK: [[pt1_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[pt1_high_bits]] [[const_neg1x2]] +; CHECK: [[pt1_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_low]] [[pt1_low_bits]] [[pt1_high_bits]] +; CHECK: [[pt1_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_low]] [[const_0x2]] [[const_32x2]] +; CHECK: [[pt1_res:%.+]] = OpIAdd [[u32x2_t]] [[pt1_ans_offset]] [[pt1_ans_bits]] -; Merge the resulting i32, i32x2 into the final i32x3 and return it -; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[left_res]] [[right_res]] +; Extract the last component from %a +; CHECK: [[pt2:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_2]] + +; Do firstbituhigh on the last component +; CHECK: [[pt2_cast:%.+]] = OpBitcast [[u32x2_t]] [[pt2]] +; CHECK: [[pt2_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindUMsb [[pt2_cast]] +; CHECK: [[pt2_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[pt2_lsb_bits]] [[const_0]] +; CHECK: [[pt2_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[pt2_lsb_bits]] [[const_1]] +; CHECK: [[pt2_should_use_low:%.+]] = OpIEqual [[bool_t]] [[pt2_high_bits]] [[const_neg1]] +; CHECK: [[pt2_ans_bits:%.+]] = OpSelect [[u32_t]] [[pt2_should_use_low]] [[pt2_low_bits]] [[pt2_high_bits]] +; CHECK: [[pt2_ans_offset:%.+]] = OpSelect [[u32_t]] [[pt2_should_use_low]] [[const_0]] [[const_32]] +; CHECK: [[pt2_res:%.+]] = OpIAdd [[u32_t]] [[pt2_ans_offset]] [[pt2_ans_bits]] + +; Merge the parts into the final i32x3 and return it +; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[pt1_res]] [[pt2_res]] ; CHECK: OpReturnValue [[ret]] %elt.firstbituhigh = call <3 x i32> @llvm.spv.firstbituhigh.v3i64(<3 x i64> %a) ret <3 x i32> %elt.firstbituhigh @@ -180,32 +186,37 @@ entry: ; CHECK-LABEL: Begin function firstbituhigh_v4xi64 define noundef <4 x i32> @firstbituhigh_v4xi64(<4 x i64> noundef %a) { entry: -; Split the i64x4 into 2 i64x2 +; Preamble ; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]] -; CHECK: [[left:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1 -; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3 -; Do firstbithigh on the 2 i64x2 -; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x4_t]] [[left]] -; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[left_cast]] -; CHECK: [[left_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 0 2 -; CHECK: [[left_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 1 3 -; CHECK: [[left_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[left_high_bits]] [[const_neg1x2]] -; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_low]] [[left_low_bits]] [[left_high_bits]] -; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_low]] [[const_0x2]] [[const_32x2]] -; CHECK: [[left_res:%.+]] = OpIAdd [[u32x2_t]] [[left_ans_offset]] [[left_ans_bits]] +; Extract first 2 components from %a +; CHECK: [[pt1:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1 + +; Do firstbituhigh on the first 2 components +; CHECK: [[pt1_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt1]] +; CHECK: [[pt1_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[pt1_cast]] +; CHECK: [[pt1_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 0 2 +; CHECK: [[pt1_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 1 3 +; CHECK: [[pt1_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[pt1_high_bits]] [[const_neg1x2]] +; CHECK: [[pt1_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_low]] [[pt1_low_bits]] [[pt1_high_bits]] +; CHECK: [[pt1_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_low]] [[const_0x2]] [[const_32x2]] +; CHECK: [[pt1_res:%.+]] = OpIAdd [[u32x2_t]] [[pt1_ans_offset]] [[pt1_ans_bits]] + +; Extract last 2 components from %a +; CHECK: [[pt2:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3 -; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]] -; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[right_cast]] -; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2 -; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3 -; CHECK: [[right_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[right_high_bits]] [[const_neg1x2]] -; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[right_low_bits]] [[right_high_bits]] -; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_low]] [[const_0x2]] [[const_32x2]] -; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]] +; Do firstbituhigh on the last 2 components +; CHECK: [[pt2_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt2]] +; CHECK: [[pt2_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindUMsb [[pt2_cast]] +; CHECK: [[pt2_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt2_lsb_bits]] [[pt2_lsb_bits]] 0 2 +; CHECK: [[pt2_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt2_lsb_bits]] [[pt2_lsb_bits]] 1 3 +; CHECK: [[pt2_should_use_low:%.+]] = OpIEqual [[boolx2_t]] [[pt2_high_bits]] [[const_neg1x2]] +; CHECK: [[pt2_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt2_should_use_low]] [[pt2_low_bits]] [[pt2_high_bits]] +; CHECK: [[pt2_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt2_should_use_low]] [[const_0x2]] [[const_32x2]] +; CHECK: [[pt2_res:%.+]] = OpIAdd [[u32x2_t]] [[pt2_ans_offset]] [[pt2_ans_bits]] -; Merge the resulting 2 i32x2 into the final i32x4 and return it -; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[left_res]] [[right_res]] +; Merge the parts into the final i32x4 and return it +; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[pt1_res]] [[pt2_res]] ; CHECK: OpReturnValue [[ret]] %elt.firstbituhigh = call <4 x i32> @llvm.spv.firstbituhigh.v4i64(<4 x i64> %a) ret <4 x i32> %elt.firstbituhigh diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll index 262cc2610600f..6de6cdc60ea9c 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll @@ -10,6 +10,7 @@ ; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0 ; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]] ; CHECK-DAG: [[const_1:%.*]] = OpConstant [[u32_t]] 1 +; CHECK-DAG: [[const_2:%.*]] = OpConstant [[u32_t]] 2 ; CHECK-DAG: [[const_32:%.*]] = OpConstant [[u32_t]] 32 ; CHECK-DAG: [[const_32x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_32]] [[const_32]] ; CHECK-DAG: [[const_neg1:%.*]] = OpConstant [[u32_t]] 4294967295 @@ -146,32 +147,37 @@ entry: ; CHECK-LABEL: Begin function firstbitlow_v3xi64 define noundef <3 x i32> @firstbitlow_v3xi64(<3 x i64> noundef %a) { entry: -; Split the i64x3 into i64, i64x2 +; Preamble ; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]] -; CHECK: [[left:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_0]] -; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 1 2 - -; Do firstbitlow on i64, i64x2 -; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x2_t]] [[left]] -; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[left_cast]] -; CHECK: [[left_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_0]] -; CHECK: [[left_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_1]] -; CHECK: [[left_should_use_high:%.+]] = OpIEqual [[bool_t]] [[left_low_bits]] [[const_neg1]] -; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32_t]] [[left_should_use_high]] [[left_high_bits]] [[left_low_bits]] -; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32_t]] [[left_should_use_high]] [[const_32]] [[const_0]] -; CHECK: [[left_res:%.+]] = OpIAdd [[u32_t]] [[left_ans_offset]] [[left_ans_bits]] - -; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]] -; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[right_cast]] -; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2 -; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3 -; CHECK: [[right_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[right_low_bits]] [[const_neg1x2]] -; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[right_high_bits]] [[right_low_bits]] -; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[const_32x2]] [[const_0x2]] -; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]] - -; Merge the resulting i32, i32x2 into the final i32x3 and return it -; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[left_res]] [[right_res]] + +; Extract first 2 components from %a +; CHECK: [[pt1:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1 + +; Do firstbitlow on the first 2 components +; CHECK: [[pt1_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt1]] +; CHECK: [[pt1_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[pt1_cast]] +; CHECK: [[pt1_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 0 2 +; CHECK: [[pt1_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 1 3 +; CHECK: [[pt1_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[pt1_low_bits]] [[const_neg1x2]] +; CHECK: [[pt1_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_high]] [[pt1_high_bits]] [[pt1_low_bits]] +; CHECK: [[pt1_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_high]] [[const_32x2]] [[const_0x2]] +; CHECK: [[pt1_res:%.+]] = OpIAdd [[u32x2_t]] [[pt1_ans_offset]] [[pt1_ans_bits]] + +; Extract the last component from %a +; CHECK: [[pt2:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_2]] + +; Do firstbitlow on the last component +; CHECK: [[pt2_cast:%.+]] = OpBitcast [[u32x2_t]] [[pt2]] +; CHECK: [[pt2_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[pt2_cast]] +; CHECK: [[pt2_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[pt2_lsb_bits]] [[const_0]] +; CHECK: [[pt2_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[pt2_lsb_bits]] [[const_1]] +; CHECK: [[pt2_should_use_high:%.+]] = OpIEqual [[bool_t]] [[pt2_low_bits]] [[const_neg1]] +; CHECK: [[pt2_ans_bits:%.+]] = OpSelect [[u32_t]] [[pt2_should_use_high]] [[pt2_high_bits]] [[pt2_low_bits]] +; CHECK: [[pt2_ans_offset:%.+]] = OpSelect [[u32_t]] [[pt2_should_use_high]] [[const_32]] [[const_0]] +; CHECK: [[pt2_res:%.+]] = OpIAdd [[u32_t]] [[pt2_ans_offset]] [[pt2_ans_bits]] + +; Merge the parts into the final i32x3 and return it +; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[pt1_res]] [[pt2_res]] ; CHECK: OpReturnValue [[ret]] %elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i64(<3 x i64> %a) ret <3 x i32> %elt.firstbitlow @@ -180,32 +186,37 @@ entry: ; CHECK-LABEL: Begin function firstbitlow_v4xi64 define noundef <4 x i32> @firstbitlow_v4xi64(<4 x i64> noundef %a) { entry: -; Split the i64x4 into 2 i64x2 +; Preamble ; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]] -; CHECK: [[left:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1 -; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3 - -; Do firstbitlow on the 2 i64x2 -; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x4_t]] [[left]] -; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[left_cast]] -; CHECK: [[left_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 0 2 -; CHECK: [[left_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 1 3 -; CHECK: [[left_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[left_low_bits]] [[const_neg1x2]] -; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_high]] [[left_high_bits]] [[left_low_bits]] -; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_high]] [[const_32x2]] [[const_0x2]] -; CHECK: [[left_res:%.+]] = OpIAdd [[u32x2_t]] [[left_ans_offset]] [[left_ans_bits]] - -; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]] -; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[right_cast]] -; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2 -; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3 -; CHECK: [[right_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[right_low_bits]] [[const_neg1x2]] -; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[right_high_bits]] [[right_low_bits]] -; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[const_32x2]] [[const_0x2]] -; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]] - -; Merge the resulting 2 i32x2 into the final i32x4 and return it -; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[left_res]] [[right_res]] + +; Extract first 2 components from %a +; CHECK: [[pt1:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1 + +; Do firstbitlow on the first 2 components +; CHECK: [[pt1_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt1]] +; CHECK: [[pt1_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[pt1_cast]] +; CHECK: [[pt1_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 0 2 +; CHECK: [[pt1_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt1_lsb_bits]] [[pt1_lsb_bits]] 1 3 +; CHECK: [[pt1_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[pt1_low_bits]] [[const_neg1x2]] +; CHECK: [[pt1_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_high]] [[pt1_high_bits]] [[pt1_low_bits]] +; CHECK: [[pt1_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt1_should_use_high]] [[const_32x2]] [[const_0x2]] +; CHECK: [[pt1_res:%.+]] = OpIAdd [[u32x2_t]] [[pt1_ans_offset]] [[pt1_ans_bits]] + +; Extract last 2 components from %a +; CHECK: [[pt2:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3 + +; Do firstbituhigh on the last 2 components +; CHECK: [[pt2_cast:%.+]] = OpBitcast [[u32x4_t]] [[pt2]] +; CHECK: [[pt2_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[pt2_cast]] +; CHECK: [[pt2_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt2_lsb_bits]] [[pt2_lsb_bits]] 0 2 +; CHECK: [[pt2_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[pt2_lsb_bits]] [[pt2_lsb_bits]] 1 3 +; CHECK: [[pt2_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[pt2_low_bits]] [[const_neg1x2]] +; CHECK: [[pt2_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[pt2_should_use_high]] [[pt2_high_bits]] [[pt2_low_bits]] +; CHECK: [[pt2_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[pt2_should_use_high]] [[const_32x2]] [[const_0x2]] +; CHECK: [[pt2_res:%.+]] = OpIAdd [[u32x2_t]] [[pt2_ans_offset]] [[pt2_ans_bits]] + +; Merge the parts into the final i32x4 and return it +; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[pt1_res]] [[pt2_res]] ; CHECK: OpReturnValue [[ret]] %elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i64(<4 x i64> %a) ret <4 x i32> %elt.firstbitlow From 2b8d7f0f98f81a00bc7bd6534ec41a1d6dbe25fc Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 9 Jan 2025 13:08:36 -0700 Subject: [PATCH 15/17] Address comments --- llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 6441435fca5f8..7d8182fc2d421 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3239,11 +3239,14 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // Loops 0, 2, 4, ... but stops one loop early when ComponentCount is odd unsigned CurrentComponent = 0; for (; CurrentComponent + 1 < ComponentCount; CurrentComponent += 2) { - Register SubVecReg = MRI->createVirtualRegister(GR.getRegClass(I64x2Type)); + // This register holds the firstbitX result for each of the i64x2 vectors + // extracted from SrcReg + Register BitSetResult = + MRI->createVirtualRegister(GR.getRegClass(I64x2Type)); auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle)) - .addDef(SubVecReg) + .addDef(BitSetResult) .addUse(GR.getSPIRVTypeID(I64x2Type)) .addUse(SrcReg) // Per the spec, repeat the vector if only one vec is needed @@ -3258,7 +3261,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( Register SubVecBitSetReg = MRI->createVirtualRegister(GR.getRegClass(Vec2ResType)); - if (!selectFirstBitSet64(SubVecBitSetReg, Vec2ResType, I, SubVecReg, + if (!selectFirstBitSet64(SubVecBitSetReg, Vec2ResType, I, BitSetResult, BitSetOpcode, SwapPrimarySide)) return false; From 2c0e21658dc00321f0cc5d05740fe883b7441520 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Fri, 10 Jan 2025 10:55:43 -0700 Subject: [PATCH 16/17] Update llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp Co-authored-by: Steven Perron --- llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 7d8182fc2d421..c1506651605fb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3220,9 +3220,9 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( Register ResVReg, const SPIRVType *ResType, MachineInstr &I, Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { - // SPIR-V only allow vecs of size 2,3,4. Calling with a larger vec requires - // creating a param reg and return reg with an invalid vec size. If that is - // resolved then this function is valid for vectors of any component size. + // SPIR-V allow vectors of size 2,3,4 only. Calling with a larger vectors requires + // creating a param register and return register with an invalid vector size. If that is + // resolved, then this function can be used for vectors of any component size. unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); assert(ComponentCount < 5 && "Vec 5+ will generate invalid SPIR-V ops"); From c1b7fadadaee8c43234daa87ed2c3489ecbea06e Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Fri, 10 Jan 2025 12:54:40 -0700 Subject: [PATCH 17/17] Address comments --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index c1506651605fb..64690d16d5c41 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3220,13 +3220,13 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( Register ResVReg, const SPIRVType *ResType, MachineInstr &I, Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const { - // SPIR-V allow vectors of size 2,3,4 only. Calling with a larger vectors requires - // creating a param register and return register with an invalid vector size. If that is - // resolved, then this function can be used for vectors of any component size. + // SPIR-V allow vectors of size 2,3,4 only. Calling with a larger vectors + // requires creating a param register and return register with an invalid + // vector size. If that is resolved, then this function can be used for + // vectors of any component size. unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); assert(ComponentCount < 5 && "Vec 5+ will generate invalid SPIR-V ops"); - bool ZeroAsNull = STI.isOpenCLEnv(); MachineIRBuilder MIRBuilder(I); SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); SPIRVType *I64Type = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder); @@ -3249,11 +3249,9 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( .addDef(BitSetResult) .addUse(GR.getSPIRVTypeID(I64x2Type)) .addUse(SrcReg) - // Per the spec, repeat the vector if only one vec is needed - .addUse(SrcReg); - - MIB.addImm(CurrentComponent); - MIB.addImm(CurrentComponent + 1); + .addUse(SrcReg) + .addImm(CurrentComponent) + .addImm(CurrentComponent + 1); if (!MIB.constrainAllUses(TII, TRI, RBI)) return false; @@ -3270,6 +3268,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // On odd component counts we need to handle one more component if (CurrentComponent != ComponentCount) { + bool ZeroAsNull = STI.isOpenCLEnv(); Register FinalElemReg = MRI->createVirtualRegister(GR.getRegClass(I64Type)); Register ConstIntLastIdx = GR.getOrCreateConstInt( ComponentCount - 1, I, BaseType, TII, ZeroAsNull);