-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[Offload][Conformance] Add RandomGenerator for large input spaces
#154252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-offload Author: Leandro Lacerda (leandrolcampos) ChangesThis patch implements the Architectural Refactoring To support different generation strategies in a clean and extensible way, the existing
New Components
Example Usage As a first use case and demonstration of this new capability, this patch also adds the first double-precision conformance test for the Patch is 20.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154252.diff 10 Files Affected:
diff --git a/offload/unittests/Conformance/device_code/CUDAMath.cpp b/offload/unittests/Conformance/device_code/CUDAMath.cpp
index a351e924b8f89..86c5d698d80af 100644
--- a/offload/unittests/Conformance/device_code/CUDAMath.cpp
+++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp
@@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<__nv_expm1f>(NumElements, Out, X);
}
+__gpu_kernel void logKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_log>(NumElements, Out, X);
+}
+
__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_logf>(NumElements, Out, X);
diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
index 8476dcbeff0c9..7941a05010cc7 100644
--- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
+++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@@ -63,6 +63,7 @@ float __nv_expf(float);
float __nv_exp10f(float);
float __nv_exp2f(float);
float __nv_expm1f(float);
+double __nv_log(double);
float __nv_logf(float);
float __nv_log10f(float);
float __nv_log1pf(float);
@@ -96,6 +97,7 @@ float __ocml_exp_f32(float);
float __ocml_exp10_f32(float);
float __ocml_exp2_f32(float);
float __ocml_expm1_f32(float);
+double __ocml_log_f64(double);
float __ocml_log_f32(float);
float __ocml_log10_f32(float);
float __ocml_log1p_f32(float);
diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp
index 36efe6b2696ab..55f67669872c5 100644
--- a/offload/unittests/Conformance/device_code/HIPMath.cpp
+++ b/offload/unittests/Conformance/device_code/HIPMath.cpp
@@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
}
+__gpu_kernel void logKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log_f32>(NumElements, Out, X);
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
index 8869d87017486..cf33e0a86e94c 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@@ -123,6 +123,11 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
runKernelBody<hypotf16>(NumElements, Out, X, Y);
}
+__gpu_kernel void logKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<log>(NumElements, Out, X);
+}
+
__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<logf>(NumElements, Out, X);
diff --git a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
index 6f7f7a9b665d0..1bed91251a041 100644
--- a/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
+++ b/offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
@@ -8,8 +8,8 @@
///
/// \file
/// This file contains the definition of the ExhaustiveGenerator class, a
-/// concrete input generator that exhaustively creates inputs from a given
-/// sequence of ranges.
+/// concrete range-based generator that exhaustively creates inputs from a
+/// given sequence of ranges.
///
//===----------------------------------------------------------------------===//
@@ -17,14 +17,9 @@
#define MATHTEST_EXHAUSTIVEGENERATOR_HPP
#include "mathtest/IndexedRange.hpp"
-#include "mathtest/InputGenerator.hpp"
+#include "mathtest/RangeBasedGenerator.hpp"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/Parallel.h"
-
-#include <algorithm>
#include <array>
-#include <cassert>
#include <cstddef>
#include <cstdint>
#include <tuple>
@@ -33,73 +28,44 @@ namespace mathtest {
template <typename... InTypes>
class [[nodiscard]] ExhaustiveGenerator final
- : public InputGenerator<InTypes...> {
- static constexpr std::size_t NumInputs = sizeof...(InTypes);
- static_assert(NumInputs > 0, "The number of inputs must be at least 1");
+ : public RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...> {
+
+ friend class RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
+
+ using Base = RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
+ using IndexArrayType = std::array<uint64_t, Base::NumInputs>;
+
+ using Base::InputSpaceSize;
+ using Base::RangesTuple;
+ using Base::SizeToGenerate;
public:
explicit constexpr ExhaustiveGenerator(
const IndexedRange<InTypes> &...Ranges) noexcept
- : RangesTuple(Ranges...) {
- bool Overflowed = getSizeWithOverflow(Ranges..., Size);
-
- assert(!Overflowed && "The input space size is too large");
- assert((Size > 0) && "The input space size must be at least 1");
+ : Base(Ranges...) {
+ SizeToGenerate = InputSpaceSize;
IndexArrayType DimSizes = {};
std::size_t DimIndex = 0;
((DimSizes[DimIndex++] = Ranges.getSize()), ...);
- Strides[NumInputs - 1] = 1;
- if constexpr (NumInputs > 1)
- for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index)
+ Strides[Base::NumInputs - 1] = 1;
+ if constexpr (Base::NumInputs > 1)
+ for (int Index = static_cast<int>(Base::NumInputs) - 2; Index >= 0;
+ --Index)
Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
}
- void reset() noexcept override { NextFlatIndex = 0; }
-
- [[nodiscard]] std::size_t
- fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
- const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
- const std::size_t BufferSize = BufferSizes[0];
- assert((BufferSize != 0) && "Buffer size cannot be zero");
- assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
- [&](std::size_t Size) { return Size == BufferSize; }) &&
- "All input buffers must have the same size");
-
- if (NextFlatIndex >= Size)
- return 0;
-
- const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
- const auto CurrentFlatIndex = NextFlatIndex;
- NextFlatIndex += BatchSize;
-
- auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
-
- llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
- writeInputs(CurrentFlatIndex, Offset, BufferPtrsTuple);
- });
-
- return static_cast<std::size_t>(BatchSize);
- }
-
private:
- using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
- using IndexArrayType = std::array<uint64_t, NumInputs>;
-
- static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
- uint64_t &Size) noexcept {
- Size = 1;
- bool Overflowed = false;
-
- auto Multiplier = [&](const uint64_t RangeSize) {
- if (!Overflowed)
- Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
- };
+ constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
+ IndexArrayType NDIndex;
- (Multiplier(Ranges.getSize()), ...);
+ for (std::size_t Index = 0; Index < Base::NumInputs; ++Index) {
+ NDIndex[Index] = FlatIndex / Strides[Index];
+ FlatIndex -= NDIndex[Index] * Strides[Index];
+ }
- return Overflowed;
+ return NDIndex;
}
template <typename BufferPtrsTupleType>
@@ -109,31 +75,18 @@ class [[nodiscard]] ExhaustiveGenerator final
writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
}
- constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
- IndexArrayType NDIndex;
-
- for (std::size_t Index = 0; Index < NumInputs; ++Index) {
- NDIndex[Index] = FlatIndex / Strides[Index];
- FlatIndex -= NDIndex[Index] * Strides[Index];
- }
-
- return NDIndex;
- }
-
template <std::size_t Index, typename BufferPtrsTupleType>
void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset,
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
- if constexpr (Index < NumInputs) {
+ if constexpr (Index < Base::NumInputs) {
const auto &Range = std::get<Index>(RangesTuple);
std::get<Index>(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]];
+
writeInputsImpl<Index + 1>(NDIndex, Offset, BufferPtrsTuple);
}
}
- uint64_t Size = 1;
- RangesTupleType RangesTuple;
IndexArrayType Strides = {};
- uint64_t NextFlatIndex = 0;
};
} // namespace mathtest
diff --git a/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp b/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp
new file mode 100644
index 0000000000000..0c62370f17c7a
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp
@@ -0,0 +1,88 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the RandomGenerator class, a concrete
+/// range-based generator that randomly creates inputs from a given sequence of
+/// ranges.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_RANDOMGENERATOR_HPP
+#define MATHTEST_RANDOMGENERATOR_HPP
+
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/RandomState.hpp"
+#include "mathtest/RangeBasedGenerator.hpp"
+
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+namespace mathtest {
+
+template <typename... InTypes>
+class [[nodiscard]] RandomGenerator final
+ : public RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...> {
+
+ friend class RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
+
+ using Base = RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;
+
+ using Base::RangesTuple;
+ using Base::SizeToGenerate;
+
+public:
+ explicit constexpr RandomGenerator(
+ SeedTy BaseSeed, uint64_t Size,
+ const IndexedRange<InTypes> &...Ranges) noexcept
+ : Base(Ranges...), BaseSeed(BaseSeed) {
+ SizeToGenerate = Size;
+ }
+
+private:
+ static uint64_t getRandomIndex(RandomState &RNG,
+ uint64_t RangeSize) noexcept {
+ if (RangeSize == 0)
+ return 0;
+
+ const uint64_t Threshold = (-RangeSize) % RangeSize;
+
+ uint64_t RandomNumber;
+ do {
+ RandomNumber = RNG.next();
+ } while (RandomNumber < Threshold);
+
+ return RandomNumber % RangeSize;
+ }
+
+ template <typename BufferPtrsTupleType>
+ void writeInputs(uint64_t CurrentFlatIndex, uint64_t Offset,
+ BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+
+ RandomState RNG(SeedTy{BaseSeed.Value ^ (CurrentFlatIndex + Offset)});
+ writeInputsImpl<0>(RNG, Offset, BufferPtrsTuple);
+ }
+
+ template <std::size_t Index, typename BufferPtrsTupleType>
+ void writeInputsImpl(RandomState &RNG, uint64_t Offset,
+ BufferPtrsTupleType BufferPtrsTuple) const noexcept {
+ if constexpr (Index < Base::NumInputs) {
+ const auto &Range = std::get<Index>(RangesTuple);
+ const auto RandomIndex = getRandomIndex(RNG, Range.getSize());
+ std::get<Index>(BufferPtrsTuple)[Offset] = Range[RandomIndex];
+
+ writeInputsImpl<Index + 1>(RNG, Offset, BufferPtrsTuple);
+ }
+ }
+
+ SeedTy BaseSeed;
+};
+} // namespace mathtest
+
+#endif // MATHTEST_RANDOMGENERATOR_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/RandomState.hpp b/offload/unittests/Conformance/include/mathtest/RandomState.hpp
new file mode 100644
index 0000000000000..a09cc18c57142
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/RandomState.hpp
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the RandomState class, a fast and
+/// lightweight pseudo-random number generator.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_RANDOMSTATE_HPP
+#define MATHTEST_RANDOMSTATE_HPP
+
+#include <cstdint>
+
+struct SeedTy {
+ uint64_t Value;
+};
+
+class [[nodiscard]] RandomState {
+ uint64_t State;
+
+ [[nodiscard]] static uint64_t splitMix64(uint64_t X) noexcept {
+ X += 0x9E3779B97F4A7C15ULL;
+ X = (X ^ (X >> 30)) * 0xBF58476D1CE4E5B9ULL;
+ X = (X ^ (X >> 27)) * 0x94D049BB133111EBULL;
+ X = (X ^ (X >> 31));
+ return X ? X : 0x9E3779B97F4A7C15ULL;
+ }
+
+public:
+ explicit inline RandomState(SeedTy Seed) noexcept
+ : State(splitMix64(Seed.Value)) {}
+
+ [[nodiscard]] inline uint64_t next() noexcept {
+ uint64_t X = State;
+ X ^= X >> 12;
+ X ^= X << 25;
+ X ^= X >> 27;
+ State = X;
+ return X * 0x2545F4914F6CDD1DULL;
+ }
+};
+
+#endif // MATHTEST_RANDOMSTATE_HPP
diff --git a/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp b/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp
new file mode 100644
index 0000000000000..7e8b0889b9625
--- /dev/null
+++ b/offload/unittests/Conformance/include/mathtest/RangeBasedGenerator.hpp
@@ -0,0 +1,104 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the definition of the RangeBasedGenerator class, a base
+/// class for input generators that operate on a sequence of ranges.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef MATHTEST_RANGEBASEDGENERATOR_HPP
+#define MATHTEST_RANGEBASEDGENERATOR_HPP
+
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/InputGenerator.hpp"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Parallel.h"
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+namespace mathtest {
+
+template <typename Derived, typename... InTypes>
+class [[nodiscard]] RangeBasedGenerator : public InputGenerator<InTypes...> {
+public:
+ void reset() noexcept override { NextFlatIndex = 0; }
+
+ [[nodiscard]] std::size_t
+ fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
+ const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
+ const std::size_t BufferSize = BufferSizes[0];
+ assert((BufferSize != 0) && "Buffer size cannot be zero");
+ assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
+ [&](std::size_t Size) { return Size == BufferSize; }) &&
+ "All input buffers must have the same size");
+
+ if (NextFlatIndex >= SizeToGenerate)
+ return 0;
+
+ const auto BatchSize =
+ std::min<uint64_t>(BufferSize, SizeToGenerate - NextFlatIndex);
+ const auto CurrentFlatIndex = NextFlatIndex;
+ NextFlatIndex += BatchSize;
+
+ auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);
+
+ llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
+ static_cast<Derived *>(this)->writeInputs(CurrentFlatIndex, Offset,
+ BufferPtrsTuple);
+ });
+
+ return static_cast<std::size_t>(BatchSize);
+ }
+
+protected:
+ using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
+
+ static constexpr std::size_t NumInputs = sizeof...(InTypes);
+ static_assert(NumInputs > 0, "The number of inputs must be at least 1");
+
+ explicit constexpr RangeBasedGenerator(
+ const IndexedRange<InTypes> &...Ranges) noexcept
+ : RangesTuple(Ranges...) {
+ bool Overflowed = getSizeWithOverflow(Ranges..., InputSpaceSize);
+
+ assert(!Overflowed && "The input space size is too large");
+ assert((InputSpaceSize > 0) && "The input space size must be at least 1");
+ }
+
+ uint64_t SizeToGenerate = 0;
+ uint64_t InputSpaceSize = 1;
+ RangesTupleType RangesTuple;
+
+private:
+ static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
+ uint64_t &Size) noexcept {
+ Size = 1;
+ bool Overflowed = false;
+
+ auto Multiplier = [&](const uint64_t RangeSize) {
+ if (!Overflowed)
+ Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
+ };
+
+ (Multiplier(Ranges.getSize()), ...);
+
+ return Overflowed;
+ }
+
+ uint64_t NextFlatIndex = 0;
+};
+} // namespace mathtest
+
+#endif // MATHTEST_RANGEBASEDGENERATOR_HPP
diff --git a/offload/unittests/Conformance/tests/CMakeLists.txt b/offload/unittests/Conformance/tests/CMakeLists.txt
index 8c0109ba62ce3..7d45e7a8a5865 100644
--- a/offload/unittests/Conformance/tests/CMakeLists.txt
+++ b/offload/unittests/Conformance/tests/CMakeLists.txt
@@ -19,6 +19,7 @@ add_conformance_test(exp10f Exp10fTest.cpp)
add_conformance_test(exp2f Exp2fTest.cpp)
add_conformance_test(expm1f Expm1fTest.cpp)
add_conformance_test(hypotf16 Hypotf16Test.cpp)
+add_conformance_test(log LogTest.cpp)
add_conformance_test(logf LogfTest.cpp)
add_conformance_test(log10f Log10fTest.cpp)
add_conformance_test(log1pf Log1pfTest.cpp)
diff --git a/offload/unittests/Conformance/tests/LogTest.cpp b/offload/unittests/Conformance/tests/LogTest.cpp
new file mode 100644
index 0000000000000..ae568e2c47404
--- /dev/null
+++ b/offload/unittests/Conformance/tests/LogTest.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the log function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/CommandLineExtras.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/RandomGenerator.hpp"
+#include "mathtest/RandomState.hpp"
+#include "mathtest/TestConfig.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <limits>
+#include <math.h>
+
+namespace {
+
+// Disambiguate the overloaded 'log' function to select the double version
+constexpr auto logd // NOLINT(readability-identifier-naming)
+ = static_cast<double (*)(double)>(log);
+} // namespace
+
+namespace mathtest {
+
+template <> struct FunctionConfig<logd> {
+ static constexpr llvm::StringRef Name = "log";
+ static constexpr llvm::StringRef KernelName = "logKernel";
+
+ // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+ // Table 68, Khronos Registry [July 10, 2025].
+ static constexpr uint64_t UlpTolerance = 3;
+};
+} // namespace mathtest
+
+int main(int argc, const char **argv) {
+ llvm::cl::ParseCommandLineOptions(argc, argv,
+ "Conformance test of the log function");
+
+ using namespace mathtest;
+
+ uint64_t Seed = 42;
+ uint64_t Size = 1ULL << 32;
+ IndexedRange<double> Range(/*Begin=*/0.0,
+ /*End=*/std::numeric_limits<double>::infinity(),
+ /*Inclusive=*/true);
+ RandomGenerator<double> Ge...
[truncated]
|
|
Here's what I get on my NVIDIA GeForce RTX 4070 Laptop GPU. |
| for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index) | ||
| Strides[Base::NumInputs - 1] = 1; | ||
| if constexpr (Base::NumInputs > 1) | ||
| for (int Index = static_cast<int>(Base::NumInputs) - 2; Index >= 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
size_t should be used when indexing arrays.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right that size_t is the idiomatic choice for indexing in general. However, for this specific reverse loop, I've intentionally used int to avoid a bug.
The loop termination condition is Index >= 0. If Index were an unsigned type like size_t, this condition would always be true. When Index is 0 and the loop decrements (--Index), it would underflow and "wrap around" to the largest possible size_t value, leading to an infinite loop.
Using a signed int ensures that when Index becomes -1, the condition Index >= 0 correctly evaluates to false, and the loop terminates as expected.
| IndexArrayType NDIndex; | ||
|
|
||
| (Multiplier(Ranges.getSize()), ...); | ||
| for (std::size_t Index = 0; Index < Base::NumInputs; ++Index) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why std::size_t rather than size_t?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm using std::size_t here primarily for consistency with the rest of the conformance test suite's codebase.
| if (RangeSize == 0) | ||
| return 0; | ||
|
|
||
| const uint64_t Threshold = (-RangeSize) % RangeSize; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this not always 0?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a great question, For unsigned types like uint64_t, the negation -RangeSize is defined by two's complement and is equivalent to (2^64 - RangeSize). The modulo of this value correctly gives us the number of values to discard from the beginning of the random sequence to avoid modulo bias.
It's a standard technique for debiasing the modulo operator. This article by Daniel Lemire explains it well: https://lemire.me/blog/2016/06/30/fast-random-shuffling/
| auto Multiplier = [&](const uint64_t RangeSize) { | ||
| if (!Overflowed) | ||
| Overflowed = | ||
| __builtin_mul_overflow(InputSpaceSize, RangeSize, &InputSpaceSize); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLVM provides MulOverflow for this, rather than relying on a compiler-specific function.
Also, I think Overflowed |= blah might be more idiomatic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point about using LLVM's own utilities. I looked into it, and it seems llvm::MulOverflow is constrained to signed integer types. Since this function correctly operates on uint64_t, I think that __builtin_mul_overflow is the more appropriate tool here for now.
And I think that the current if (!Overflowed) check has the minor benefit of short-circuiting the remaining multiplications once an overflow is found.
| explicit constexpr RandomState(SeedTy Seed) noexcept | ||
| : State(splitMix64(Seed.Value)) {} | ||
|
|
||
| [[nodiscard]] inline uint64_t next() noexcept { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this should be nodiscard. I think it's reasonable for the user to call next() to advance the state of the RNG without using the value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done! Thank you for this suggestion!
| /// | ||
| /// \file | ||
| /// This file contains the definition of the RandomState class, a fast and | ||
| /// lightweight pseudo-random number generator. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you include a link to where this implementation came from?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done! Thank you for this suggestion!
jhuber6
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
|
@RossBrunton do you have any concerns before I merge? |
This patch implements the
RandomGenerator, a new input generator that enables conformance testing for functions with large input spaces (e.g., double-precision math functions).Architectural Refactoring
To support different generation strategies in a clean and extensible way, the existing
ExhaustiveGeneratorwas refactored into a new class hierarchy:RangeBasedGenerator, was introduced using the Curiously Recurring Template Pattern (CRTP). It contains the common logic for generators that operate on a sequence of ranges.ExhaustiveGeneratornow inherits from this base class, simplifying its implementation.New Components
RandomGeneratorclass also inherits fromRangeBasedGenerator. It implements a strategy that randomly samples a specified number of points from the total input space.RandomStateclass (axorshift64*PRNG seeded withsplitmix64) to ensure deterministic and reproducible random streams for testing.Example Usage
As a first use case and demonstration of this new capability, this patch also adds the first double-precision conformance test for the
logfunction. This test uses the newRandomGeneratorto validate the implementations from thellvm-libm,cuda-math, andhip-mathproviders.