Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libclc/clc/include/clc/clc_convert.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef __CLC_CLC_CONVERT_H__
#define __CLC_CLC_CONVERT_H__

#include <clc/clcmacro.h>

#define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
_CLC_OVERLOAD _CLC_DECL TO_TYPE __clc_convert_##TO_TYPE##SUFFIX(FROM_TYPE x);

Expand Down
43 changes: 13 additions & 30 deletions libclc/clc/lib/generic/integer/clc_mad_sat.cl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/integer/clc_add_sat.h>
#include <clc/integer/clc_mad24.h>
Expand All @@ -8,34 +9,23 @@
#include <clc/relational/clc_select.h>
#include <clc/shared/clc_clamp.h>

#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)

// Macro for defining mad_sat variants for char/uchar/short/ushort
// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs
#define __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \
_CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \
return __clc_clamp( \
(UP_TYPE)__clc_mad24((UP_TYPE)x, (UP_TYPE)y, (UP_TYPE)z), \
(UP_TYPE)LIT_PREFIX##_MIN, (UP_TYPE)LIT_PREFIX##_MAX); \
}

#define __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE, UP_TYPE, LIT_PREFIX) \
_CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \
UP_TYPE upscaled_mad = __clc_mad24(__CLC_CONVERT_TY(x, UP_TYPE), \
__CLC_CONVERT_TY(y, UP_TYPE), \
__CLC_CONVERT_TY(z, UP_TYPE)); \
UP_TYPE upscaled_mad = \
__clc_mad24(__clc_convert_##UP_TYPE(x), __clc_convert_##UP_TYPE(y), \
__clc_convert_##UP_TYPE(z)); \
UP_TYPE clamped_mad = __clc_clamp(upscaled_mad, (UP_TYPE)LIT_PREFIX##_MIN, \
(UP_TYPE)LIT_PREFIX##_MAX); \
return __CLC_CONVERT_TY(clamped_mad, TYPE); \
return __clc_convert_##TYPE(clamped_mad); \
}

#define __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(TYPE, UP_TYPE, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##2, UP_TYPE##2, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##3, UP_TYPE##3, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##4, UP_TYPE##4, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##8, UP_TYPE##8, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##16, UP_TYPE##16, LIT_PREFIX)
__CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##2, UP_TYPE##2, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##3, UP_TYPE##3, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##4, UP_TYPE##4, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##8, UP_TYPE##8, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##16, UP_TYPE##16, LIT_PREFIX)

__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(char, int, CHAR)
__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(uchar, uint, UCHAR)
Expand Down Expand Up @@ -67,20 +57,13 @@ __CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(ulong, long, ULONG)
INTTY mhi = __clc_mul_hi(x, y); \
UINTTY mlo = __clc_as_##UINTTY(x * y); \
SLONGTY m = __clc_upsample(mhi, mlo); \
m += __CLC_CONVERT_TY(z, SLONGTY); \
m += __clc_convert_##SLONGTY(z); \
m = __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \
return __CLC_CONVERT_TY(m, INTTY); \
return __clc_convert_##INTTY(m); \
}

// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs
#define __CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(INTTY, UINTTY, SLONGTY) \
_CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \
INTTY mhi = __clc_mul_hi(x, y); \
UINTTY mlo = __clc_as_##UINTTY(x * y); \
SLONGTY m = __clc_upsample(mhi, mlo); \
m += z; \
return __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \
} \
__CLC_DEFINE_SINT_MAD_SAT(INTTY, UINTTY, SLONGTY) \
__CLC_DEFINE_SINT_MAD_SAT(INTTY##2, UINTTY##2, SLONGTY##2) \
__CLC_DEFINE_SINT_MAD_SAT(INTTY##3, UINTTY##3, SLONGTY##3) \
__CLC_DEFINE_SINT_MAD_SAT(INTTY##4, UINTTY##4, SLONGTY##4) \
Expand Down
33 changes: 12 additions & 21 deletions libclc/clc/lib/generic/integer/clc_mul_hi.cl
Original file line number Diff line number Diff line change
@@ -1,31 +1,24 @@
#include <clc/clc_convert.h>
#include <clc/integer/clc_hadd.h>
#include <clc/integer/definitions.h>
#include <clc/internal/clc.h>

// TODO: Replace with __clc_convert_<type> when available
#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)

#define __CLC_MUL_HI_VEC_IMPL(BGENTYPE, GENTYPE, GENSIZE) \
_CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \
BGENTYPE large_x = __CLC_CONVERT_TY(x, BGENTYPE); \
BGENTYPE large_y = __CLC_CONVERT_TY(y, BGENTYPE); \
BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \
return __CLC_CONVERT_TY(large_mul_hi, GENTYPE); \
}

// For all types EXCEPT long, which is implemented separately
#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \
_CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \
return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \
BGENTYPE large_x = __clc_convert_##BGENTYPE(x); \
BGENTYPE large_y = __clc_convert_##BGENTYPE(y); \
BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \
return __clc_convert_##GENTYPE(large_mul_hi); \
}

#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \
__CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \
__CLC_MUL_HI_VEC_IMPL(BTYPE##2, TYPE##2, BITS) \
__CLC_MUL_HI_VEC_IMPL(BTYPE##3, TYPE##3, BITS) \
__CLC_MUL_HI_VEC_IMPL(BTYPE##4, TYPE##4, BITS) \
__CLC_MUL_HI_VEC_IMPL(BTYPE##8, TYPE##8, BITS) \
__CLC_MUL_HI_VEC_IMPL(BTYPE##16, TYPE##16, BITS)
__CLC_MUL_HI_IMPL(BTYPE##2, TYPE##2, BITS) \
__CLC_MUL_HI_IMPL(BTYPE##3, TYPE##3, BITS) \
__CLC_MUL_HI_IMPL(BTYPE##4, TYPE##4, BITS) \
__CLC_MUL_HI_IMPL(BTYPE##8, TYPE##8, BITS) \
__CLC_MUL_HI_IMPL(BTYPE##16, TYPE##16, BITS)

_CLC_OVERLOAD _CLC_DEF long __clc_mul_hi(long x, long y) {
long f, o, i;
Expand Down Expand Up @@ -98,8 +91,8 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) {
f = x_hi * y_hi; \
o = x_hi * y_lo; \
i = x_lo * y_hi; \
l = __CLC_CONVERT_TY(x_lo * y_lo, UTY); \
i += __CLC_CONVERT_TY(l >> (UTY)32, TY); \
l = __clc_convert_##UTY(x_lo * y_lo); \
i += __clc_convert_##TY(l >> (UTY)32); \
\
return f + (__clc_hadd(o, i) >> (TY)31); \
}
Expand Down Expand Up @@ -128,5 +121,3 @@ __CLC_MUL_HI_TYPES()
#undef __CLC_MUL_HI_LONG_VEC_IMPL
#undef __CLC_MUL_HI_DEC_IMPL
#undef __CLC_MUL_HI_IMPL
#undef __CLC_MUL_HI_VEC_IMPL
#undef __CLC_CONVERT_TY
40 changes: 18 additions & 22 deletions libclc/clc/lib/generic/integer/clc_upsample.cl
Original file line number Diff line number Diff line change
@@ -1,35 +1,31 @@
#include <clc/clc_convert.h>
#include <clc/internal/clc.h>

// TODO: Replace with __clc_convert_<type> when available
#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)

#define __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
_CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \
BGENTYPE large_hi = __CLC_CONVERT_TY(hi, BGENTYPE); \
BGENTYPE large_lo = __CLC_CONVERT_TY(lo, BGENTYPE); \
BGENTYPE large_hi = __clc_convert_##BGENTYPE(hi); \
BGENTYPE large_lo = __clc_convert_##BGENTYPE(lo); \
return (large_hi << (BGENTYPE)GENSIZE) | large_lo; \
}

#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
_CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \
return ((BGENTYPE)hi << GENSIZE) | lo; \
} \
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE) \
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE) \
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE) \
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE) \
__CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE)
#define __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(BGENTYPE, GENTYPE, UGENTYPE, \
GENSIZE) \
__CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
__CLC_UPSAMPLE_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE) \
__CLC_UPSAMPLE_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE) \
__CLC_UPSAMPLE_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE) \
__CLC_UPSAMPLE_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE) \
__CLC_UPSAMPLE_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE)

#define __CLC_UPSAMPLE_TYPES() \
__CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \
__CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \
__CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \
__CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \
__CLC_UPSAMPLE_IMPL(long, int, uint, 32) \
__CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32)
__CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(short, char, uchar, 8) \
__CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(ushort, uchar, uchar, 8) \
__CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(int, short, ushort, 16) \
__CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(uint, ushort, ushort, 16) \
__CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(long, int, uint, 32) \
__CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(ulong, uint, uint, 32)

__CLC_UPSAMPLE_TYPES()

#undef __CLC_UPSAMPLE_TYPES
#undef __CLC_UPSAMPLE_IMPL
#undef __CLC_CONVERT_TY
17 changes: 9 additions & 8 deletions libclc/generic/lib/math/clc_exp10.cl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
*/

#include <clc/clc.h>
#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/math/clc_mad.h>
#include <clc/math/clc_subnormal_config.h>
Expand Down Expand Up @@ -70,7 +71,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_exp10(float x) {
int return_inf = x > X_MAX;
int return_zero = x < X_MIN;

int n = convert_int(x * R_64_BY_LOG10_2);
int n = __clc_convert_int(x * R_64_BY_LOG10_2);

float fn = (float)n;
int j = n & 0x3f;
Expand All @@ -89,11 +90,11 @@ _CLC_DEF _CLC_OVERLOAD float __clc_exp10(float x) {
float two_to_jby64 = USE_TABLE(exp_tbl, j);
z2 = __clc_mad(two_to_jby64, z2, two_to_jby64);

float z2s = z2 * as_float(0x1 << (m + 149));
float z2n = as_float(as_int(z2) + m2);
float z2s = z2 * __clc_as_float(0x1 << (m + 149));
float z2n = __clc_as_float(__clc_as_int(z2) + m2);
z2 = m <= -126 ? z2s : z2n;

z2 = return_inf ? as_float(PINFBITPATT_SP32) : z2;
z2 = return_inf ? __clc_as_float(PINFBITPATT_SP32) : z2;
z2 = return_zero ? 0.0f : z2;
z2 = return_nan ? x : z2;
return z2;
Expand All @@ -115,7 +116,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) {
// ln(10)
const double R_LN10 = 0x1.26bb1bbb55516p+1;

int n = convert_int(x * R_64_BY_LOG10_2);
int n = __clc_convert_int(x * R_64_BY_LOG10_2);

double dn = (double)n;

Expand Down Expand Up @@ -144,15 +145,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) {

int n1 = m >> 2;
int n2 = m - n1;
double z3 = z2 * as_double(((long)n1 + 1023) << 52);
z3 *= as_double(((long)n2 + 1023) << 52);
double z3 = z2 * __clc_as_double(((long)n1 + 1023) << 52);
z3 *= __clc_as_double(((long)n2 + 1023) << 52);

z2 = ldexp(z2, m);
z2 = small_value ? z3 : z2;

z2 = __clc_isnan(x) ? x : z2;

z2 = x > X_MAX ? as_double(PINFBITPATT_DP64) : z2;
z2 = x > X_MAX ? __clc_as_double(PINFBITPATT_DP64) : z2;
z2 = x < X_MIN ? 0.0 : z2;

return z2;
Expand Down
41 changes: 21 additions & 20 deletions libclc/generic/lib/math/clc_fmod.cl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
*/

#include <clc/clc.h>
#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/integer/clc_clz.h>
#include <clc/math/clc_floor.h>
Expand All @@ -31,19 +32,19 @@
#include <math/clc_remainder.h>

_CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y) {
int ux = as_int(x);
int ux = __clc_as_int(x);
int ax = ux & EXSIGNBIT_SP32;
float xa = as_float(ax);
float xa = __clc_as_float(ax);
int sx = ux ^ ax;
int ex = ax >> EXPSHIFTBITS_SP32;

int uy = as_int(y);
int uy = __clc_as_int(y);
int ay = uy & EXSIGNBIT_SP32;
float ya = as_float(ay);
float ya = __clc_as_float(ay);
int ey = ay >> EXPSHIFTBITS_SP32;

float xr = as_float(0x3f800000 | (ax & 0x007fffff));
float yr = as_float(0x3f800000 | (ay & 0x007fffff));
float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff));
float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff));
int c;
int k = ex - ey;

Expand All @@ -62,36 +63,36 @@ _CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y) {
xr = lt ? xa : xr;
yr = lt ? ya : yr;

float s = as_float(ey << EXPSHIFTBITS_SP32);
float s = __clc_as_float(ey << EXPSHIFTBITS_SP32);
xr *= lt ? 1.0f : s;

c = ax == ay;
xr = c ? 0.0f : xr;

xr = as_float(sx ^ as_int(xr));
xr = __clc_as_float(sx ^ __clc_as_int(xr));

c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 |
ay == 0;
xr = c ? as_float(QNANBITPATT_SP32) : xr;
xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr;

return xr;
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_fmod, float, float);

#ifdef cl_khr_fp64
_CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
ulong ux = as_ulong(x);
ulong ux = __clc_as_ulong(x);
ulong ax = ux & ~SIGNBIT_DP64;
ulong xsgn = ux ^ ax;
double dx = as_double(ax);
int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
double dx = __clc_as_double(ax);
int xexp = __clc_convert_int(ax >> EXPSHIFTBITS_DP64);
int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64);
xexp1 = xexp < 1 ? xexp1 : xexp;

ulong uy = as_ulong(y);
ulong uy = __clc_as_ulong(y);
ulong ay = uy & ~SIGNBIT_DP64;
double dy = as_double(ay);
int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
double dy = __clc_as_double(ay);
int yexp = __clc_convert_int(ay >> EXPSHIFTBITS_DP64);
int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64);
yexp1 = yexp < 1 ? yexp1 : yexp;

Expand Down Expand Up @@ -151,12 +152,12 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
dx += i ? w : 0.0;

// At this point, dx lies in the range [0,dy)
double ret = as_double(xsgn ^ as_ulong(dx));
dx = as_double(ax);
double ret = __clc_as_double(xsgn ^ __clc_as_ulong(dx));
dx = __clc_as_double(ax);

// Now handle |x| == |y|
int c = dx == dy;
t = as_double(xsgn);
t = __clc_as_double(xsgn);
ret = c ? t : ret;

// Next, handle |x| < |y|
Expand All @@ -167,7 +168,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {

// |y| is 0
c = dy == 0.0;
ret = c ? as_double(QNANBITPATT_DP64) : ret;
ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret;

// y is +-Inf, NaN
c = yexp > BIASEDEMAX_DP64;
Expand All @@ -176,7 +177,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {

// x is +=Inf, NaN
c = xexp > BIASEDEMAX_DP64;
ret = c ? as_double(QNANBITPATT_DP64) : ret;
ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret;

return ret;
}
Expand Down
Loading