Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions offload/unittests/Conformance/device_code/CUDAMath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<__nv_expm1f>(NumElements, Out, X);
}

__gpu_kernel void logKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_log>(NumElements, Out, X);
}

__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_logf>(NumElements, Out, X);
Expand Down
2 changes: 2 additions & 0 deletions offload/unittests/Conformance/device_code/DeviceAPIs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ float __nv_expf(float);
float __nv_exp10f(float);
float __nv_exp2f(float);
float __nv_expm1f(float);
double __nv_log(double);
float __nv_logf(float);
float __nv_log10f(float);
float __nv_log1pf(float);
Expand Down Expand Up @@ -96,6 +97,7 @@ float __ocml_exp_f32(float);
float __ocml_exp10_f32(float);
float __ocml_exp2_f32(float);
float __ocml_expm1_f32(float);
double __ocml_log_f64(double);
float __ocml_log_f32(float);
float __ocml_log10_f32(float);
float __ocml_log1p_f32(float);
Expand Down
5 changes: 5 additions & 0 deletions offload/unittests/Conformance/device_code/HIPMath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
}

__gpu_kernel void logKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log_f64>(NumElements, Out, X);
}

__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log_f32>(NumElements, Out, X);
Expand Down
5 changes: 5 additions & 0 deletions offload/unittests/Conformance/device_code/LLVMLibm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
runKernelBody<hypotf16>(NumElements, Out, X, Y);
}

__gpu_kernel void logKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<log>(NumElements, Out, X);
}

__gpu_kernel void logfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<logf>(NumElements, Out, X);
Expand Down
125 changes: 52 additions & 73 deletions offload/unittests/Conformance/include/mathtest/ExhaustiveGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,98 +8,71 @@
///
/// \file
/// This file contains the definition of the ExhaustiveGenerator class, a
/// concrete input generator that exhaustively creates inputs from a given
/// sequence of ranges.
/// concrete range-based generator that exhaustively creates inputs from a
/// given sequence of ranges.
///
//===----------------------------------------------------------------------===//

#ifndef MATHTEST_EXHAUSTIVEGENERATOR_HPP
#define MATHTEST_EXHAUSTIVEGENERATOR_HPP

#include "mathtest/IndexedRange.hpp"
#include "mathtest/InputGenerator.hpp"
#include "mathtest/RangeBasedGenerator.hpp"

#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Parallel.h"

#include <algorithm>
#include <array>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <tuple>

namespace mathtest {

template <typename... InTypes>
class [[nodiscard]] ExhaustiveGenerator final
: public InputGenerator<InTypes...> {
static constexpr std::size_t NumInputs = sizeof...(InTypes);
static_assert(NumInputs > 0, "The number of inputs must be at least 1");
: public RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...> {

friend class RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;

using Base = RangeBasedGenerator<ExhaustiveGenerator<InTypes...>, InTypes...>;
using IndexArrayType = std::array<uint64_t, Base::NumInputs>;

using Base::RangesTuple;
using Base::Size;

public:
explicit constexpr ExhaustiveGenerator(
const IndexedRange<InTypes> &...Ranges) noexcept
: RangesTuple(Ranges...) {
bool Overflowed = getSizeWithOverflow(Ranges..., Size);
: Base(Ranges...) {
const auto MaybeSize = getInputSpaceSize(Ranges...);

assert(MaybeSize.has_value() && "The size is too large");
Size = *MaybeSize;

assert(!Overflowed && "The input space size is too large");
assert((Size > 0) && "The input space size must be at least 1");
assert((Size > 0) && "The size must be at least 1");

IndexArrayType DimSizes = {};
std::size_t DimIndex = 0;
((DimSizes[DimIndex++] = Ranges.getSize()), ...);

Strides[NumInputs - 1] = 1;
if constexpr (NumInputs > 1)
for (int Index = static_cast<int>(NumInputs) - 2; Index >= 0; --Index)
Strides[Base::NumInputs - 1] = 1;
if constexpr (Base::NumInputs > 1)
for (int Index = static_cast<int>(Base::NumInputs) - 2; Index >= 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

size_t should be used when indexing arrays.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right that size_t is the idiomatic choice for indexing in general. However, for this specific reverse loop, I've intentionally used int to avoid a bug.

The loop termination condition is Index >= 0. If Index were an unsigned type like size_t, this condition would always be true. When Index is 0 and the loop decrements (--Index), it would underflow and "wrap around" to the largest possible size_t value, leading to an infinite loop.

Using a signed int ensures that when Index becomes -1, the condition Index >= 0 correctly evaluates to false, and the loop terminates as expected.

--Index)
Strides[Index] = Strides[Index + 1] * DimSizes[Index + 1];
}

void reset() noexcept override { NextFlatIndex = 0; }

[[nodiscard]] std::size_t
fill(llvm::MutableArrayRef<InTypes>... Buffers) noexcept override {
const std::array<std::size_t, NumInputs> BufferSizes = {Buffers.size()...};
const std::size_t BufferSize = BufferSizes[0];
assert((BufferSize != 0) && "Buffer size cannot be zero");
assert(std::all_of(BufferSizes.begin(), BufferSizes.end(),
[&](std::size_t Size) { return Size == BufferSize; }) &&
"All input buffers must have the same size");

if (NextFlatIndex >= Size)
return 0;

const auto BatchSize = std::min<uint64_t>(BufferSize, Size - NextFlatIndex);
const auto CurrentFlatIndex = NextFlatIndex;
NextFlatIndex += BatchSize;

auto BufferPtrsTuple = std::make_tuple(Buffers.data()...);

llvm::parallelFor(0, BatchSize, [&](std::size_t Offset) {
writeInputs(CurrentFlatIndex, Offset, BufferPtrsTuple);
});

return static_cast<std::size_t>(BatchSize);
}

private:
using RangesTupleType = std::tuple<IndexedRange<InTypes>...>;
using IndexArrayType = std::array<uint64_t, NumInputs>;

static bool getSizeWithOverflow(const IndexedRange<InTypes> &...Ranges,
uint64_t &Size) noexcept {
Size = 1;
bool Overflowed = false;

auto Multiplier = [&](const uint64_t RangeSize) {
if (!Overflowed)
Overflowed = __builtin_mul_overflow(Size, RangeSize, &Size);
};
[[nodiscard]] constexpr IndexArrayType
getNDIndex(uint64_t FlatIndex) const noexcept {
IndexArrayType NDIndex;

(Multiplier(Ranges.getSize()), ...);
for (std::size_t Index = 0; Index < Base::NumInputs; ++Index) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why std::size_t rather than size_t?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm using std::size_t here primarily for consistency with the rest of the conformance test suite's codebase.

NDIndex[Index] = FlatIndex / Strides[Index];
FlatIndex -= NDIndex[Index] * Strides[Index];
}

return Overflowed;
return NDIndex;
}

template <typename BufferPtrsTupleType>
Expand All @@ -109,31 +82,37 @@ class [[nodiscard]] ExhaustiveGenerator final
writeInputsImpl<0>(NDIndex, Offset, BufferPtrsTuple);
}

constexpr IndexArrayType getNDIndex(uint64_t FlatIndex) const noexcept {
IndexArrayType NDIndex;

for (std::size_t Index = 0; Index < NumInputs; ++Index) {
NDIndex[Index] = FlatIndex / Strides[Index];
FlatIndex -= NDIndex[Index] * Strides[Index];
}

return NDIndex;
}

template <std::size_t Index, typename BufferPtrsTupleType>
void writeInputsImpl(IndexArrayType NDIndex, uint64_t Offset,
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
if constexpr (Index < NumInputs) {
if constexpr (Index < Base::NumInputs) {
const auto &Range = std::get<Index>(RangesTuple);
std::get<Index>(BufferPtrsTuple)[Offset] = Range[NDIndex[Index]];

writeInputsImpl<Index + 1>(NDIndex, Offset, BufferPtrsTuple);
}
}

uint64_t Size = 1;
RangesTupleType RangesTuple;
[[nodiscard]] static constexpr std::optional<uint64_t>
getInputSpaceSize(const IndexedRange<InTypes> &...Ranges) noexcept {
uint64_t InputSpaceSize = 1;
bool Overflowed = false;

auto Multiplier = [&](const uint64_t RangeSize) {
if (!Overflowed)
Overflowed =
__builtin_mul_overflow(InputSpaceSize, RangeSize, &InputSpaceSize);
};

(Multiplier(Ranges.getSize()), ...);

if (Overflowed)
return std::nullopt;

return InputSpaceSize;
}

IndexArrayType Strides = {};
uint64_t NextFlatIndex = 0;
};
} // namespace mathtest

Expand Down
86 changes: 86 additions & 0 deletions offload/unittests/Conformance/include/mathtest/RandomGenerator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the definition of the RandomGenerator class, a concrete
/// range-based generator that randomly creates inputs from a given sequence of
/// ranges.
///
//===----------------------------------------------------------------------===//

#ifndef MATHTEST_RANDOMGENERATOR_HPP
#define MATHTEST_RANDOMGENERATOR_HPP

#include "mathtest/IndexedRange.hpp"
#include "mathtest/RandomState.hpp"
#include "mathtest/RangeBasedGenerator.hpp"

#include <cstddef>
#include <cstdint>
#include <tuple>

namespace mathtest {

template <typename... InTypes>
class [[nodiscard]] RandomGenerator final
: public RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...> {

friend class RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;

using Base = RangeBasedGenerator<RandomGenerator<InTypes...>, InTypes...>;

using Base::RangesTuple;
using Base::Size;

public:
explicit constexpr RandomGenerator(
SeedTy BaseSeed, uint64_t Size,
const IndexedRange<InTypes> &...Ranges) noexcept
: Base(Size, Ranges...), BaseSeed(BaseSeed) {}

private:
[[nodiscard]] static uint64_t getRandomIndex(RandomState &RNG,
uint64_t RangeSize) noexcept {
if (RangeSize == 0)
return 0;

const uint64_t Threshold = (-RangeSize) % RangeSize;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this not always 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a great question, For unsigned types like uint64_t, the negation -RangeSize is defined by two's complement and is equivalent to (2^64 - RangeSize). The modulo of this value correctly gives us the number of values to discard from the beginning of the random sequence to avoid modulo bias.

It's a standard technique for debiasing the modulo operator. This article by Daniel Lemire explains it well: https://lemire.me/blog/2016/06/30/fast-random-shuffling/


uint64_t RandomNumber;
do {
RandomNumber = RNG.next();
} while (RandomNumber < Threshold);

return RandomNumber % RangeSize;
}

template <typename BufferPtrsTupleType>
void writeInputs(uint64_t CurrentFlatIndex, uint64_t Offset,
BufferPtrsTupleType BufferPtrsTuple) const noexcept {

RandomState RNG(SeedTy{BaseSeed.Value ^ (CurrentFlatIndex + Offset)});
writeInputsImpl<0>(RNG, Offset, BufferPtrsTuple);
}

template <std::size_t Index, typename BufferPtrsTupleType>
void writeInputsImpl(RandomState &RNG, uint64_t Offset,
BufferPtrsTupleType BufferPtrsTuple) const noexcept {
if constexpr (Index < Base::NumInputs) {
const auto &Range = std::get<Index>(RangesTuple);
const auto RandomIndex = getRandomIndex(RNG, Range.getSize());
std::get<Index>(BufferPtrsTuple)[Offset] = Range[RandomIndex];

writeInputsImpl<Index + 1>(RNG, Offset, BufferPtrsTuple);
}
}

SeedTy BaseSeed;
};
} // namespace mathtest

#endif // MATHTEST_RANDOMGENERATOR_HPP
53 changes: 53 additions & 0 deletions offload/unittests/Conformance/include/mathtest/RandomState.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the definition of the RandomState class, a fast and
/// lightweight pseudo-random number generator.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you include a link to where this implementation came from?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! Thank you for this suggestion!

///
/// The implementation is based on the xorshift* generator, seeded using the
/// SplitMix64 generator for robust initialization. For more details on the
/// algorithm, see: https://en.wikipedia.org/wiki/Xorshift
///
//===----------------------------------------------------------------------===//

#ifndef MATHTEST_RANDOMSTATE_HPP
#define MATHTEST_RANDOMSTATE_HPP

#include <cstdint>

struct SeedTy {
uint64_t Value;
};

class [[nodiscard]] RandomState {
uint64_t State;

[[nodiscard]] static constexpr uint64_t splitMix64(uint64_t X) noexcept {
X += 0x9E3779B97F4A7C15ULL;
X = (X ^ (X >> 30)) * 0xBF58476D1CE4E5B9ULL;
X = (X ^ (X >> 27)) * 0x94D049BB133111EBULL;
X = (X ^ (X >> 31));
return X ? X : 0x9E3779B97F4A7C15ULL;
}

public:
explicit constexpr RandomState(SeedTy Seed) noexcept
: State(splitMix64(Seed.Value)) {}

inline uint64_t next() noexcept {
uint64_t X = State;
X ^= X >> 12;
X ^= X << 25;
X ^= X >> 27;
State = X;
return X * 0x2545F4914F6CDD1DULL;
}
};

#endif // MATHTEST_RANDOMSTATE_HPP
Loading
Loading