diff --git a/offload/unittests/Conformance/device_code/CUDAMath.cpp b/offload/unittests/Conformance/device_code/CUDAMath.cpp index a351e924b8f89..86c5d698d80af 100644 --- a/offload/unittests/Conformance/device_code/CUDAMath.cpp +++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp @@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out, runKernelBody<__nv_expm1f>(NumElements, Out, X); } +__gpu_kernel void logKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_log>(NumElements, Out, X); +} + __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_logf>(NumElements, Out, X); diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp index 8476dcbeff0c9..7941a05010cc7 100644 --- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp +++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp @@ -63,6 +63,7 @@ float __nv_expf(float); float __nv_exp10f(float); float __nv_exp2f(float); float __nv_expm1f(float); +double __nv_log(double); float __nv_logf(float); float __nv_log10f(float); float __nv_log1pf(float); @@ -96,6 +97,7 @@ float __ocml_exp_f32(float); float __ocml_exp10_f32(float); float __ocml_exp2_f32(float); float __ocml_expm1_f32(float); +double __ocml_log_f64(double); float __ocml_log_f32(float); float __ocml_log10_f32(float); float __ocml_log1p_f32(float); diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp index 36efe6b2696ab..55f67669872c5 100644 --- a/offload/unittests/Conformance/device_code/HIPMath.cpp +++ b/offload/unittests/Conformance/device_code/HIPMath.cpp @@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out, runKernelBody<__ocml_expm1_f32>(NumElements, Out, X); } +__gpu_kernel void logKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_log_f64>(NumElements, Out, X); +} + __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_log_f32>(NumElements, Out, X); diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp index 8869d87017486..cf33e0a86e94c 100644 --- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp +++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp @@ -123,6 +123,11 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out, runKernelBody(NumElements, Out, X, Y); } +__gpu_kernel void logKernel(const double *X, double *Out, + size_t NumElements) noexcept { + runKernelBody(NumElements, Out, X); +} + __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody(NumElements, Out, X); diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp index 6f7f7a9b665d0..39c6838eecf7e 100644 --- a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp +++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp @@ -8,8 +8,8 @@ /// /// \file /// This file contains the definition of the ExhaustiveGenerator class, a -/// concrete input generator that exhaustively creates inputs from a given -/// sequence of ranges. +/// concrete range-based generator that exhaustively creates inputs from a +/// given sequence of ranges. /// //===----------------------------------------------------------------------===// @@ -17,89 +17,62 @@ #define MATHTEST_EXHAUSTIVEGENERATOR_HPP #include "mathtest/IndexedRange.hpp" -#include "mathtest/InputGenerator.hpp" +#include "mathtest/RangeBasedGenerator.hpp" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/Parallel.h" - -#include #include #include #include #include +#include #include namespace mathtest { template class [[nodiscard]] ExhaustiveGenerator final - : public InputGenerator { - static constexpr std::size_t NumInputs = sizeof...(InTypes); - static_assert(NumInputs > 0, "The number of inputs must be at least 1"); + : public RangeBasedGenerator, InTypes...> { + + friend class RangeBasedGenerator, InTypes...>; + + using Base = RangeBasedGenerator, InTypes...>; + using IndexArrayType = std::array; + + using Base::RangesTuple; + using Base::Size; public: explicit constexpr ExhaustiveGenerator( const IndexedRange &...Ranges) noexcept - : RangesTuple(Ranges...) { - bool Overflowed = getSizeWithOverflow(Ranges..., Size); + : Base(Ranges...) { + const auto MaybeSize = getInputSpaceSize(Ranges...); + + assert(MaybeSize.has_value() && "The size is too large"); + Size = *MaybeSize; - assert(!Overflowed && "The input space size is too large"); - assert((Size > 0) && "The input space size must be at least 1"); + assert((Size > 0) && "The size must be at least 1"); IndexArrayType DimSizes = {}; std::size_t DimIndex = 0; ((DimSizes[DimIndex++] = Ranges.getSize()), ...); - Strides[NumInputs - 1] = 1; - if constexpr (NumInputs > 1) - for (int Index = static_cast(NumInputs) - 2; Index >= 0; --Index) + Strides[Base::NumInputs - 1] = 1; + if constexpr (Base::NumInputs > 1) + for (int Index = static_cast(Base::NumInputs) - 2; Index >= 0; + --Index) Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1]; } - void reset() noexcept override { NextFlatIndex = 0; } - - [[nodiscard]] std::size_t - fill(llvm::MutableArrayRef... Buffers) noexcept override { - const std::array BufferSizes = {Buffers.size()...}; - const std::size_t BufferSize = BufferSizes[0]; - assert((BufferSize != 0) && "Buffer size cannot be zero"); - assert(std::all_of(BufferSizes.begin(), BufferSizes.end(), - [&](std::size_t Size) { return Size == BufferSize; }) && - "All input buffers must have the same size"); - - if (NextFlatIndex >= Size) - return 0; - - const auto BatchSize = std::min(BufferSize, Size - NextFlatIndex); - const auto CurrentFlatIndex = NextFlatIndex; - NextFlatIndex += BatchSize; - - auto BufferPtrsTuple = std::make_tuple(Buffers.data()...); - - llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) { - writeInputs(CurrentFlatIndex, Offset, BufferPtrsTuple); - }); - - return static_cast(BatchSize); - } - private: - using RangesTupleType = std::tuple...>; - using IndexArrayType = std::array; - - static bool getSizeWithOverflow(const IndexedRange &...Ranges, - uint64_t &Size) noexcept { - Size = 1; - bool Overflowed = false; - - auto Multiplier = [&](const uint64_t RangeSize) { - if (!Overflowed) - Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size); - }; + [[nodiscard]] constexpr IndexArrayType + getNDIndex(uint64_t FlatIndex) const noexcept { + IndexArrayType NDIndex; - (Multiplier(Ranges.getSize()), ...); + for (std::size_t Index = 0; Index < Base::NumInputs; ++Index) { + NDIndex[Index] = FlatIndex / Strides[Index]; + FlatIndex -= NDIndex[Index] * Strides[Index]; + } - return Overflowed; + return NDIndex; } template @@ -109,31 +82,37 @@ class [[nodiscard]] ExhaustiveGenerator final writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple); } - constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept { - IndexArrayType NDIndex; - - for (std::size_t Index = 0; Index < NumInputs; ++Index) { - NDIndex[Index] = FlatIndex / Strides[Index]; - FlatIndex -= NDIndex[Index] * Strides[Index]; - } - - return NDIndex; - } - template void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset, BufferPtrsTupleType BufferPtrsTuple) const noexcept { - if constexpr (Index < NumInputs) { + if constexpr (Index < Base::NumInputs) { const auto &Range = std::get(RangesTuple); std::get(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]]; + writeInputsImpl(NDIndex, Offset, BufferPtrsTuple); } } - uint64_t Size = 1; - RangesTupleType RangesTuple; + [[nodiscard]] static constexpr std::optional + getInputSpaceSize(const IndexedRange &...Ranges) noexcept { + uint64_t InputSpaceSize = 1; + bool Overflowed = false; + + auto Multiplier = [&](const uint64_t RangeSize) { + if (!Overflowed) + Overflowed = + __builtin_mul_overflow(InputSpaceSize, RangeSize, &InputSpaceSize); + }; + + (Multiplier(Ranges.getSize()), ...); + + if (Overflowed) + return std::nullopt; + + return InputSpaceSize; + } + IndexArrayType Strides = {}; - uint64_t NextFlatIndex = 0; }; } // namespace mathtest diff --git a/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp b/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp new file mode 100644 index 0000000000000..436cd05f0a3d3 --- /dev/null +++ b/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp @@ -0,0 +1,86 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the definition of the RandomGenerator class, a concrete +/// range-based generator that randomly creates inputs from a given sequence of +/// ranges. +/// +//===----------------------------------------------------------------------===// + +#ifndef MATHTEST_RANDOMGENERATOR_HPP +#define MATHTEST_RANDOMGENERATOR_HPP + +#include "mathtest/IndexedRange.hpp" +#include "mathtest/RandomState.hpp" +#include "mathtest/RangeBasedGenerator.hpp" + +#include +#include +#include + +namespace mathtest { + +template +class [[nodiscard]] RandomGenerator final + : public RangeBasedGenerator, InTypes...> { + + friend class RangeBasedGenerator, InTypes...>; + + using Base = RangeBasedGenerator, InTypes...>; + + using Base::RangesTuple; + using Base::Size; + +public: + explicit constexpr RandomGenerator( + SeedTy BaseSeed, uint64_t Size, + const IndexedRange &...Ranges) noexcept + : Base(Size, Ranges...), BaseSeed(BaseSeed) {} + +private: + [[nodiscard]] static uint64_t getRandomIndex(RandomState &RNG, + uint64_t RangeSize) noexcept { + if (RangeSize == 0) + return 0; + + const uint64_t Threshold = (-RangeSize) % RangeSize; + + uint64_t RandomNumber; + do { + RandomNumber = RNG.next(); + } while (RandomNumber < Threshold); + + return RandomNumber % RangeSize; + } + + template + void writeInputs(uint64_t CurrentFlatIndex, uint64_t Offset, + BufferPtrsTupleType BufferPtrsTuple) const noexcept { + + RandomState RNG(SeedTy{BaseSeed.Value ^ (CurrentFlatIndex + Offset)}); + writeInputsImpl<0>(RNG, Offset, BufferPtrsTuple); + } + + template + void writeInputsImpl(RandomState &RNG, uint64_t Offset, + BufferPtrsTupleType BufferPtrsTuple) const noexcept { + if constexpr (Index < Base::NumInputs) { + const auto &Range = std::get(RangesTuple); + const auto RandomIndex = getRandomIndex(RNG, Range.getSize()); + std::get(BufferPtrsTuple)[Offset] = Range[RandomIndex]; + + writeInputsImpl(RNG, Offset, BufferPtrsTuple); + } + } + + SeedTy BaseSeed; +}; +} // namespace mathtest + +#endif // MATHTEST_RANDOMGENERATOR_HPP diff --git a/offload/unittests/Conformance/include/mathtest/RandomState.hpp b/offload/unittests/Conformance/include/mathtest/RandomState.hpp new file mode 100644 index 0000000000000..322d53175236f --- /dev/null +++ b/offload/unittests/Conformance/include/mathtest/RandomState.hpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the definition of the RandomState class, a fast and +/// lightweight pseudo-random number generator. +/// +/// The implementation is based on the xorshift* generator, seeded using the +/// SplitMix64 generator for robust initialization. For more details on the +/// algorithm, see: https://en.wikipedia.org/wiki/Xorshift +/// +//===----------------------------------------------------------------------===// + +#ifndef MATHTEST_RANDOMSTATE_HPP +#define MATHTEST_RANDOMSTATE_HPP + +#include + +struct SeedTy { + uint64_t Value; +}; + +class [[nodiscard]] RandomState { + uint64_t State; + + [[nodiscard]] static constexpr uint64_t splitMix64(uint64_t X) noexcept { + X += 0x9E3779B97F4A7C15ULL; + X = (X ^ (X >> 30)) * 0xBF58476D1CE4E5B9ULL; + X = (X ^ (X >> 27)) * 0x94D049BB133111EBULL; + X = (X ^ (X >> 31)); + return X ? X : 0x9E3779B97F4A7C15ULL; + } + +public: + explicit constexpr RandomState(SeedTy Seed) noexcept + : State(splitMix64(Seed.Value)) {} + + inline uint64_t next() noexcept { + uint64_t X = State; + X ^= X >> 12; + X ^= X << 25; + X ^= X >> 27; + State = X; + return X * 0x2545F4914F6CDD1DULL; + } +}; + +#endif // MATHTEST_RANDOMSTATE_HPP diff --git a/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp b/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp new file mode 100644 index 0000000000000..5e1e1139aba96 --- /dev/null +++ b/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp @@ -0,0 +1,86 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the definition of the RangeBasedGenerator class, a base +/// class for input generators that operate on a sequence of ranges. +/// +//===----------------------------------------------------------------------===// + +#ifndef MATHTEST_RANGEBASEDGENERATOR_HPP +#define MATHTEST_RANGEBASEDGENERATOR_HPP + +#include "mathtest/IndexedRange.hpp" +#include "mathtest/InputGenerator.hpp" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Parallel.h" + +#include +#include +#include +#include +#include +#include + +namespace mathtest { + +template +class [[nodiscard]] RangeBasedGenerator : public InputGenerator { +public: + void reset() noexcept override { NextFlatIndex = 0; } + + [[nodiscard]] std::size_t + fill(llvm::MutableArrayRef... Buffers) noexcept override { + const std::array BufferSizes = {Buffers.size()...}; + const std::size_t BufferSize = BufferSizes[0]; + assert((BufferSize != 0) && "Buffer size cannot be zero"); + assert(std::all_of(BufferSizes.begin(), BufferSizes.end(), + [&](std::size_t Size) { return Size == BufferSize; }) && + "All input buffers must have the same size"); + + if (NextFlatIndex >= Size) + return 0; + + const auto BatchSize = std::min(BufferSize, Size - NextFlatIndex); + const auto CurrentFlatIndex = NextFlatIndex; + NextFlatIndex += BatchSize; + + auto BufferPtrsTuple = std::make_tuple(Buffers.data()...); + + llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) { + static_cast(this)->writeInputs(CurrentFlatIndex, Offset, + BufferPtrsTuple); + }); + + return static_cast(BatchSize); + } + +protected: + using RangesTupleType = std::tuple...>; + + static constexpr std::size_t NumInputs = sizeof...(InTypes); + static_assert(NumInputs > 0, "The number of inputs must be at least 1"); + + explicit constexpr RangeBasedGenerator( + const IndexedRange &...Ranges) noexcept + : RangesTuple(Ranges...) {} + + explicit constexpr RangeBasedGenerator( + uint64_t Size, const IndexedRange &...Ranges) noexcept + : RangesTuple(Ranges...), Size(Size) {} + + RangesTupleType RangesTuple; + uint64_t Size = 0; + +private: + uint64_t NextFlatIndex = 0; +}; +} // namespace mathtest + +#endif // MATHTEST_RANGEBASEDGENERATOR_HPP diff --git a/offload/unittests/Conformance/tests/CMakeLists.txt b/offload/unittests/Conformance/tests/CMakeLists.txt index 8c0109ba62ce3..7d45e7a8a5865 100644 --- a/offload/unittests/Conformance/tests/CMakeLists.txt +++ b/offload/unittests/Conformance/tests/CMakeLists.txt @@ -19,6 +19,7 @@ add_conformance_test(exp10f Exp10fTest.cpp) add_conformance_test(exp2f Exp2fTest.cpp) add_conformance_test(expm1f Expm1fTest.cpp) add_conformance_test(hypotf16 Hypotf16Test.cpp) +add_conformance_test(log LogTest.cpp) add_conformance_test(logf LogfTest.cpp) add_conformance_test(log10f Log10fTest.cpp) add_conformance_test(log1pf Log1pfTest.cpp) diff --git a/offload/unittests/Conformance/tests/LogTest.cpp b/offload/unittests/Conformance/tests/LogTest.cpp new file mode 100644 index 0000000000000..ae568e2c47404 --- /dev/null +++ b/offload/unittests/Conformance/tests/LogTest.cpp @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the conformance test of the log function. +/// +//===----------------------------------------------------------------------===// + +#include "mathtest/CommandLineExtras.hpp" +#include "mathtest/IndexedRange.hpp" +#include "mathtest/RandomGenerator.hpp" +#include "mathtest/RandomState.hpp" +#include "mathtest/TestConfig.hpp" +#include "mathtest/TestRunner.hpp" + +#include "llvm/ADT/StringRef.h" + +#include +#include +#include + +namespace { + +// Disambiguate the overloaded 'log' function to select the double version +constexpr auto logd // NOLINT(readability-identifier-naming) + = static_cast(log); +} // namespace + +namespace mathtest { + +template <> struct FunctionConfig { + static constexpr llvm::StringRef Name = "log"; + static constexpr llvm::StringRef KernelName = "logKernel"; + + // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, + // Table 68, Khronos Registry [July 10, 2025]. + static constexpr uint64_t UlpTolerance = 3; +}; +} // namespace mathtest + +int main(int argc, const char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv, + "Conformance test of the log function"); + + using namespace mathtest; + + uint64_t Seed = 42; + uint64_t Size = 1ULL << 32; + IndexedRange Range(/*Begin=*/0.0, + /*End=*/std::numeric_limits::infinity(), + /*Inclusive=*/true); + RandomGenerator Generator(SeedTy{Seed}, Size, Range); + + const auto Configs = cl::getTestConfigs(); + const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR; + const bool IsVerbose = cl::IsVerbose; + + bool Passed = runTests(Generator, Configs, DeviceBinaryDir, IsVerbose); + + return Passed ? EXIT_SUCCESS : EXIT_FAILURE; +}