Skip to content

Added missing math APIs for devicelib. #2558

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions libdevice/cmath_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,47 @@ short _FDtest(float *px) { // categorize *px
return ret;
}

// Returns _FP_LT, _FP_GT or _FP_EQ based on the ordering
// relationship between x and y. '0' means unordered.
DEVICE_EXTERN_C
int _fdpcomp(float x, float y) {
int res = 0;
if (_FDtest(&x) == _NANCODE || _FDtest(&y) == _NANCODE) {
// '0' means unordered.
return res;
}

if (x < y)
res |= _FP_LT;
else if (x > y)
res |= _FP_GT;
else
res |= _FP_EQ;

return res;
}

// Returns 0, if the sign bit is not set, and non-zero otherwise.
DEVICE_EXTERN_C
int _fdsign(float x) { return FSIGN(x); }

// fpclassify() equivalent with a pointer argument.
DEVICE_EXTERN_C
short _fdtest(float *px) {
switch (_FDtest(px)) {
case _DENORM:
return FP_SUBNORMAL;
case _FINITE:
return FP_NORMAL;
case _INFCODE:
return FP_INFINITE;
case _NANCODE:
return FP_NAN;
}

return FP_ZERO;
}

DEVICE_EXTERN_C
short _FDnorm(_Fval *ps) { // normalize float fraction
short xchar;
Expand Down
44 changes: 44 additions & 0 deletions libdevice/cmath_wrapper_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ double asinh(double x) { return __devicelib_asinh(x); }
DEVICE_EXTERN_C
double atanh(double x) { return __devicelib_atanh(x); }

DEVICE_EXTERN_C
double scalbn(double x, int exp) { return __devicelib_scalbn(x, exp); }

#if defined(_WIN32)
#include <math.h>
// FLOAT PROPERTIES
Expand Down Expand Up @@ -180,6 +183,47 @@ short _Dtest(double *px) { // categorize *px
return ret;
}

// Returns _FP_LT, _FP_GT or _FP_EQ based on the ordering
// relationship between x and y.
DEVICE_EXTERN_C
int _dpcomp(double x, double y) {
int res = 0;
if (_Dtest(&x) == _NANCODE || _Dtest(&y) == _NANCODE) {
// '0' means unordered.
return res;
}

if (x < y)
res |= _FP_LT;
else if (x > y)
res |= _FP_GT;
else
res |= _FP_EQ;

return res;
}

// Returns 0, if the sign bit is not set, and non-zero otherwise.
DEVICE_EXTERN_C
int _dsign(double x) { return DSIGN(x); }

// fpclassify() equivalent with a pointer argument.
DEVICE_EXTERN_C
short _dtest(double *px) {
switch (_Dtest(px)) {
case _DENORM:
return FP_SUBNORMAL;
case _FINITE:
return FP_NORMAL;
case _INFCODE:
return FP_INFINITE;
case _NANCODE:
return FP_NAN;
}

return FP_ZERO;
}

DEVICE_EXTERN_C
short _Dnorm(_Dval *ps) { // normalize double fraction
short xchar;
Expand Down
3 changes: 3 additions & 0 deletions libdevice/device_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,5 +249,8 @@ float __devicelib_logbf(float x);

DEVICE_EXTERN_C
float __devicelib_scalbnf(float x, int n);

DEVICE_EXTERN_C
double __devicelib_scalbn(double x, int exp);
#endif // __SPIR__
#endif // __LIBDEVICE_DEVICE_MATH_H__
5 changes: 5 additions & 0 deletions libdevice/fallback-cmath-fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,9 @@ double __devicelib_asinh(double x) { return __spirv_ocl_asinh(x); }

DEVICE_EXTERN_C
double __devicelib_atanh(double x) { return __spirv_ocl_atanh(x); }

DEVICE_EXTERN_C
double __devicelib_scalbn(double x, int exp) {
return __spirv_ocl_ldexp(x, exp);
Copy link
Contributor

@againull againull Oct 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I correct that we assume here that FLT_RADIX is always == 2?
Should we make a comment about this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we assume it is 2. Do you suggest mentioning this in the documentation for devicelib?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if it worth mentioning and how obvious is this that it is 100% equal to 2. If you think that it is obvious I am ok to leave it as is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to GCC documentation, it is 2 on the majority of the machines, so I would say it is quite expected to be equal to 2. At the same time, I was not able to find any documentation regarding floating point representation for SPIR-V devices.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thx.

}
#endif // __SPIR__
2 changes: 2 additions & 0 deletions llvm/tools/sycl-post-link/sycl-post-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ static std::unordered_map<std::string, DeviceLibExt> DeviceLibFuncMap = {
{"__devicelib_powf", DeviceLibExt::cl_intel_devicelib_math},
{"__devicelib_remainderf", DeviceLibExt::cl_intel_devicelib_math},
{"__devicelib_remquof", DeviceLibExt::cl_intel_devicelib_math},
{"__devicelib_scalbnf", DeviceLibExt::cl_intel_devicelib_math},
{"__devicelib_sinf", DeviceLibExt::cl_intel_devicelib_math},
{"__devicelib_sinhf", DeviceLibExt::cl_intel_devicelib_math},
{"__devicelib_sqrtf", DeviceLibExt::cl_intel_devicelib_math},
Expand Down Expand Up @@ -206,6 +207,7 @@ static std::unordered_map<std::string, DeviceLibExt> DeviceLibFuncMap = {
{"__devicelib_pow", DeviceLibExt::cl_intel_devicelib_math_fp64},
{"__devicelib_remainder", DeviceLibExt::cl_intel_devicelib_math_fp64},
{"__devicelib_remquo", DeviceLibExt::cl_intel_devicelib_math_fp64},
{"__devicelib_scalbn", DeviceLibExt::cl_intel_devicelib_math_fp64},
{"__devicelib_sin", DeviceLibExt::cl_intel_devicelib_math_fp64},
{"__devicelib_sinh", DeviceLibExt::cl_intel_devicelib_math_fp64},
{"__devicelib_sqrt", DeviceLibExt::cl_intel_devicelib_math_fp64},
Expand Down
12 changes: 12 additions & 0 deletions sycl/include/CL/sycl/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1551,11 +1551,23 @@ extern SYCL_EXTERNAL void __assert_fail(const char *expr, const char *file,
}
#elif defined(_WIN32)
extern "C" {
// TODO: documented C runtime library APIs must be recognized as
// builtins by FE. This includes _dpcomp, _dsign, _dtest,
// _fdpcomp, _fdsign, _fdtest, _hypotf, _wassert.
// APIs used by STL, such as _Cosh, are undocumented, even though
// they are open-sourced. Recognizing them as builtins is not
// straightforward currently.
extern SYCL_EXTERNAL double _Cosh(double x, double y);
extern SYCL_EXTERNAL int _dpcomp(double x, double y);
extern SYCL_EXTERNAL int _dsign(double x);
extern SYCL_EXTERNAL short _Dtest(double *px);
extern SYCL_EXTERNAL short _dtest(double *px);
extern SYCL_EXTERNAL short _Exp(double *px, double y, short eoff);
extern SYCL_EXTERNAL float _FCosh(float x, float y);
extern SYCL_EXTERNAL int _fdpcomp(float x, float y);
extern SYCL_EXTERNAL int _fdsign(float x);
extern SYCL_EXTERNAL short _FDtest(float *px);
extern SYCL_EXTERNAL short _fdtest(float *px);
extern SYCL_EXTERNAL short _FExp(float *px, float y, short eoff);
extern SYCL_EXTERNAL float _FSinh(float x, float y);
extern SYCL_EXTERNAL double _Sinh(double x, double y);
Expand Down
73 changes: 64 additions & 9 deletions sycl/test/devicelib/cmath_fp64_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

#include "math_utils.hpp"
#include <CL/sycl.hpp>
#include <cmath>
#include <cstdint>
#include <iostream>
#include "math_utils.hpp"

namespace s = cl::sycl;
constexpr s::access::mode sycl_read = s::access::mode::read;
constexpr s::access::mode sycl_write = s::access::mode::write;

#define TEST_NUM 38
#define TEST_NUM 63

double ref[TEST_NUM] = {
1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5,
0, 2, 0, 0, 1, 0, 2, 0, 0, 0,
0, 0, 1, 0, 1, 2, 0, 1, 2, 5,
0, 0, 0, 0, 0.5, 0.5, NAN, NAN,};
1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0,
0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

double refIptr = 1;

Expand Down Expand Up @@ -47,6 +47,12 @@ void device_cmath_test(s::queue &deviceQueue) {
auto quo_access = buffer4.template get_access<sycl_write>(cgh);
cgh.single_task<class DeviceMathTest>([=]() {
int i = 0;
T nan = NAN;
T minus_nan = -NAN;
T infinity = INFINITY;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I understand correctly that this test uses C macro instead of std::numeric_limits<T> methods intentionally to check the compatibility with "C"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. The C macro is just shorter to write.

T minus_infinity = -INFINITY;
double subnormal;
*((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL;
res_access[i++] = std::cos(0.0);
res_access[i++] = std::sin(0.0);
res_access[i++] = std::log(1.0);
Expand Down Expand Up @@ -83,9 +89,58 @@ void device_cmath_test(s::queue &deviceQueue) {
res_access[i++] = std::logb(1.0);
res_access[i++] = std::remainder(0.5, 1.0);
res_access[i++] = std::remquo(0.5, 1.0, &quo_access[0]);
T a = NAN;
res_access[i++] = std::tgamma(a);
res_access[i++] = std::lgamma(a);
res_access[i++] = std::tgamma(nan);
res_access[i++] = std::lgamma(nan);
res_access[i++] = std::scalbn(1.0, 1);

res_access[i++] = !(std::signbit(infinity) == 0);
res_access[i++] = !(std::signbit(minus_infinity) != 0);
res_access[i++] = !(std::signbit(nan) == 0);
res_access[i++] = !(std::signbit(minus_nan) != 0);

res_access[i++] = !(std::isunordered(minus_nan, nan) != 0);
res_access[i++] = !(std::isunordered(minus_infinity, infinity) == 0);
res_access[i++] = !(std::isgreater(minus_infinity, infinity) == 0);
res_access[i++] = !(std::isgreater(0.0f, minus_nan) == 0);
#ifdef _WIN32
res_access[i++] = !(std::isfinite(0.0f) != 0);
res_access[i++] = !(std::isfinite(nan) == 0);
res_access[i++] = !(std::isfinite(infinity) == 0);
res_access[i++] = !(std::isfinite(minus_infinity) == 0);

res_access[i++] = !(std::isinf(0.0f) == 0);
res_access[i++] = !(std::isinf(nan) == 0);
res_access[i++] = !(std::isinf(infinity) != 0);
res_access[i++] = !(std::isinf(minus_infinity) != 0);
#else // !_WIN32
// __builtin_isfinite is unsupported.
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;

// __builtin_isinf is unsupported.
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
#endif // !_WIN32
res_access[i++] = !(std::isnan(0.0f) == 0);
res_access[i++] = !(std::isnan(nan) != 0);
res_access[i++] = !(std::isnan(infinity) == 0);
res_access[i++] = !(std::isnan(minus_infinity) == 0);
#ifdef _WIN32
res_access[i++] = !(std::isnormal(nan) == 0);
res_access[i++] = !(std::isnormal(minus_infinity) == 0);
res_access[i++] = !(std::isnormal(subnormal) == 0);
res_access[i++] = !(std::isnormal(1.0f) != 0);
#else // !_WIN32
// __builtin_isnormal() is unsupported.
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
#endif // !_WIN32
});
});
}
Expand Down
72 changes: 65 additions & 7 deletions sycl/test/devicelib/cmath_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@
#include "math_utils.hpp"
#include <CL/sycl.hpp>
#include <cmath>
#include <cstdint>
#include <iostream>

namespace s = cl::sycl;
constexpr s::access::mode sycl_read = s::access::mode::read;
constexpr s::access::mode sycl_write = s::access::mode::write;

#define TEST_NUM 36
#define TEST_NUM 61

float ref[TEST_NUM] = {1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0,
1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2,
0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN};
float ref[TEST_NUM] = {1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0,
0, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0,
0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

float refIptr = 1;

Expand All @@ -39,6 +41,13 @@ template <class T> void device_cmath_test_1(s::queue &deviceQueue) {
auto quo_access = buffer3.template get_access<sycl_write>(cgh);
cgh.single_task<class DeviceMathTest1>([=]() {
int i = 0;
T nan = NAN;
T minus_nan = -NAN;
T infinity = INFINITY;
T minus_infinity = -INFINITY;
float subnormal;
*((uint32_t *)&subnormal) = 0x7FFFFF;

res_access[i++] = std::cos(0.0f);
res_access[i++] = std::sin(0.0f);
res_access[i++] = std::log(1.0f);
Expand Down Expand Up @@ -73,9 +82,58 @@ template <class T> void device_cmath_test_1(s::queue &deviceQueue) {
res_access[i++] = std::logb(1.0f);
res_access[i++] = std::remainder(0.5f, 1.0f);
res_access[i++] = std::remquo(0.5f, 1.0f, &quo_access[0]);
T a = NAN;
res_access[i++] = std::tgamma(a);
res_access[i++] = std::lgamma(a);
res_access[i++] = std::tgamma(nan);
res_access[i++] = std::lgamma(nan);
res_access[i++] = std::scalbn(1.0f, 1);

res_access[i++] = !(std::signbit(infinity) == 0);
res_access[i++] = !(std::signbit(minus_infinity) != 0);
res_access[i++] = !(std::signbit(nan) == 0);
res_access[i++] = !(std::signbit(minus_nan) != 0);

res_access[i++] = !(std::isunordered(minus_nan, nan) != 0);
res_access[i++] = !(std::isunordered(minus_infinity, infinity) == 0);
res_access[i++] = !(std::isgreater(minus_infinity, infinity) == 0);
res_access[i++] = !(std::isgreater(0.0f, minus_nan) == 0);
#ifdef _WIN32
res_access[i++] = !(std::isfinite(0.0f) != 0);
res_access[i++] = !(std::isfinite(nan) == 0);
res_access[i++] = !(std::isfinite(infinity) == 0);
res_access[i++] = !(std::isfinite(minus_infinity) == 0);

res_access[i++] = !(std::isinf(0.0f) == 0);
res_access[i++] = !(std::isinf(nan) == 0);
res_access[i++] = !(std::isinf(infinity) != 0);
res_access[i++] = !(std::isinf(minus_infinity) != 0);
#else // !_WIN32
// __builtin_isfinite is unsupported.
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;

// __builtin_isinf is unsupported.
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
#endif // !_WIN32
res_access[i++] = !(std::isnan(0.0f) == 0);
res_access[i++] = !(std::isnan(nan) != 0);
res_access[i++] = !(std::isnan(infinity) == 0);
res_access[i++] = !(std::isnan(minus_infinity) == 0);
#ifdef _WIN32
res_access[i++] = !(std::isnormal(nan) == 0);
res_access[i++] = !(std::isnormal(minus_infinity) == 0);
res_access[i++] = !(std::isnormal(subnormal) == 0);
res_access[i++] = !(std::isnormal(1.0f) != 0);
#else // !_WIN32
// __builtin_isnormal() is unsupported.
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
res_access[i++] = 0;
#endif // !_WIN32
});
});
}
Expand Down
Loading