diff --git a/libclc/clc/include/clc/clc_convert.h b/libclc/clc/include/clc/clc_convert.h index 20bbd57540b30..12cd988d59c54 100644 --- a/libclc/clc/include/clc/clc_convert.h +++ b/libclc/clc/include/clc/clc_convert.h @@ -1,6 +1,8 @@ #ifndef __CLC_CLC_CONVERT_H__ #define __CLC_CLC_CONVERT_H__ +#include + #define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \ _CLC_OVERLOAD _CLC_DECL TO_TYPE __clc_convert_##TO_TYPE##SUFFIX(FROM_TYPE x); diff --git a/libclc/clc/lib/generic/integer/clc_mad_sat.cl b/libclc/clc/lib/generic/integer/clc_mad_sat.cl index 4e559dba2b2f5..530e9c84b10a0 100644 --- a/libclc/clc/lib/generic/integer/clc_mad_sat.cl +++ b/libclc/clc/lib/generic/integer/clc_mad_sat.cl @@ -1,3 +1,4 @@ +#include #include #include #include @@ -8,34 +9,23 @@ #include #include -#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY) - -// Macro for defining mad_sat variants for char/uchar/short/ushort -// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs #define __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \ _CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \ - return __clc_clamp( \ - (UP_TYPE)__clc_mad24((UP_TYPE)x, (UP_TYPE)y, (UP_TYPE)z), \ - (UP_TYPE)LIT_PREFIX##_MIN, (UP_TYPE)LIT_PREFIX##_MAX); \ - } - -#define __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE, UP_TYPE, LIT_PREFIX) \ - _CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \ - UP_TYPE upscaled_mad = __clc_mad24(__CLC_CONVERT_TY(x, UP_TYPE), \ - __CLC_CONVERT_TY(y, UP_TYPE), \ - __CLC_CONVERT_TY(z, UP_TYPE)); \ + UP_TYPE upscaled_mad = \ + __clc_mad24(__clc_convert_##UP_TYPE(x), __clc_convert_##UP_TYPE(y), \ + __clc_convert_##UP_TYPE(z)); \ UP_TYPE clamped_mad = __clc_clamp(upscaled_mad, (UP_TYPE)LIT_PREFIX##_MIN, \ (UP_TYPE)LIT_PREFIX##_MAX); \ - return __CLC_CONVERT_TY(clamped_mad, TYPE); \ + return __clc_convert_##TYPE(clamped_mad); \ } #define __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(TYPE, UP_TYPE, LIT_PREFIX) \ __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \ - __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##2, UP_TYPE##2, LIT_PREFIX) \ - __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##3, UP_TYPE##3, LIT_PREFIX) \ - __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##4, UP_TYPE##4, LIT_PREFIX) \ - __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##8, UP_TYPE##8, LIT_PREFIX) \ - __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##16, UP_TYPE##16, LIT_PREFIX) + __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##2, UP_TYPE##2, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##3, UP_TYPE##3, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##4, UP_TYPE##4, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##8, UP_TYPE##8, LIT_PREFIX) \ + __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##16, UP_TYPE##16, LIT_PREFIX) __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(char, int, CHAR) __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(uchar, uint, UCHAR) @@ -67,20 +57,13 @@ __CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(ulong, long, ULONG) INTTY mhi = __clc_mul_hi(x, y); \ UINTTY mlo = __clc_as_##UINTTY(x * y); \ SLONGTY m = __clc_upsample(mhi, mlo); \ - m += __CLC_CONVERT_TY(z, SLONGTY); \ + m += __clc_convert_##SLONGTY(z); \ m = __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \ - return __CLC_CONVERT_TY(m, INTTY); \ + return __clc_convert_##INTTY(m); \ } -// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs #define __CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(INTTY, UINTTY, SLONGTY) \ - _CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \ - INTTY mhi = __clc_mul_hi(x, y); \ - UINTTY mlo = __clc_as_##UINTTY(x * y); \ - SLONGTY m = __clc_upsample(mhi, mlo); \ - m += z; \ - return __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \ - } \ + __CLC_DEFINE_SINT_MAD_SAT(INTTY, UINTTY, SLONGTY) \ __CLC_DEFINE_SINT_MAD_SAT(INTTY##2, UINTTY##2, SLONGTY##2) \ __CLC_DEFINE_SINT_MAD_SAT(INTTY##3, UINTTY##3, SLONGTY##3) \ __CLC_DEFINE_SINT_MAD_SAT(INTTY##4, UINTTY##4, SLONGTY##4) \ diff --git a/libclc/clc/lib/generic/integer/clc_mul_hi.cl b/libclc/clc/lib/generic/integer/clc_mul_hi.cl index cf4acc5429cb4..28457ac6126dd 100644 --- a/libclc/clc/lib/generic/integer/clc_mul_hi.cl +++ b/libclc/clc/lib/generic/integer/clc_mul_hi.cl @@ -1,31 +1,24 @@ +#include #include #include #include -// TODO: Replace with __clc_convert_ when available -#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY) - -#define __CLC_MUL_HI_VEC_IMPL(BGENTYPE, GENTYPE, GENSIZE) \ - _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \ - BGENTYPE large_x = __CLC_CONVERT_TY(x, BGENTYPE); \ - BGENTYPE large_y = __CLC_CONVERT_TY(y, BGENTYPE); \ - BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \ - return __CLC_CONVERT_TY(large_mul_hi, GENTYPE); \ - } - // For all types EXCEPT long, which is implemented separately #define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \ _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \ - return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \ + BGENTYPE large_x = __clc_convert_##BGENTYPE(x); \ + BGENTYPE large_y = __clc_convert_##BGENTYPE(y); \ + BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \ + return __clc_convert_##GENTYPE(large_mul_hi); \ } #define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \ __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \ - __CLC_MUL_HI_VEC_IMPL(BTYPE##2, TYPE##2, BITS) \ - __CLC_MUL_HI_VEC_IMPL(BTYPE##3, TYPE##3, BITS) \ - __CLC_MUL_HI_VEC_IMPL(BTYPE##4, TYPE##4, BITS) \ - __CLC_MUL_HI_VEC_IMPL(BTYPE##8, TYPE##8, BITS) \ - __CLC_MUL_HI_VEC_IMPL(BTYPE##16, TYPE##16, BITS) + __CLC_MUL_HI_IMPL(BTYPE##2, TYPE##2, BITS) \ + __CLC_MUL_HI_IMPL(BTYPE##3, TYPE##3, BITS) \ + __CLC_MUL_HI_IMPL(BTYPE##4, TYPE##4, BITS) \ + __CLC_MUL_HI_IMPL(BTYPE##8, TYPE##8, BITS) \ + __CLC_MUL_HI_IMPL(BTYPE##16, TYPE##16, BITS) _CLC_OVERLOAD _CLC_DEF long __clc_mul_hi(long x, long y) { long f, o, i; @@ -98,8 +91,8 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) { f = x_hi * y_hi; \ o = x_hi * y_lo; \ i = x_lo * y_hi; \ - l = __CLC_CONVERT_TY(x_lo * y_lo, UTY); \ - i += __CLC_CONVERT_TY(l >> (UTY)32, TY); \ + l = __clc_convert_##UTY(x_lo * y_lo); \ + i += __clc_convert_##TY(l >> (UTY)32); \ \ return f + (__clc_hadd(o, i) >> (TY)31); \ } @@ -128,5 +121,3 @@ __CLC_MUL_HI_TYPES() #undef __CLC_MUL_HI_LONG_VEC_IMPL #undef __CLC_MUL_HI_DEC_IMPL #undef __CLC_MUL_HI_IMPL -#undef __CLC_MUL_HI_VEC_IMPL -#undef __CLC_CONVERT_TY diff --git a/libclc/clc/lib/generic/integer/clc_upsample.cl b/libclc/clc/lib/generic/integer/clc_upsample.cl index d53ef7240bfc2..a13a6ed88ddcd 100644 --- a/libclc/clc/lib/generic/integer/clc_upsample.cl +++ b/libclc/clc/lib/generic/integer/clc_upsample.cl @@ -1,35 +1,31 @@ +#include #include -// TODO: Replace with __clc_convert_ when available -#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY) - -#define __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ +#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ _CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \ - BGENTYPE large_hi = __CLC_CONVERT_TY(hi, BGENTYPE); \ - BGENTYPE large_lo = __CLC_CONVERT_TY(lo, BGENTYPE); \ + BGENTYPE large_hi = __clc_convert_##BGENTYPE(hi); \ + BGENTYPE large_lo = __clc_convert_##BGENTYPE(lo); \ return (large_hi << (BGENTYPE)GENSIZE) | large_lo; \ } -#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ - _CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \ - return ((BGENTYPE)hi << GENSIZE) | lo; \ - } \ - __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE) \ - __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE) \ - __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE) \ - __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE) \ - __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE) +#define __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(BGENTYPE, GENTYPE, UGENTYPE, \ + GENSIZE) \ + __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \ + __CLC_UPSAMPLE_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE) \ + __CLC_UPSAMPLE_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE) \ + __CLC_UPSAMPLE_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE) \ + __CLC_UPSAMPLE_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE) \ + __CLC_UPSAMPLE_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE) #define __CLC_UPSAMPLE_TYPES() \ - __CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \ - __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \ - __CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \ - __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \ - __CLC_UPSAMPLE_IMPL(long, int, uint, 32) \ - __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32) + __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(short, char, uchar, 8) \ + __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(ushort, uchar, uchar, 8) \ + __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(int, short, ushort, 16) \ + __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(uint, ushort, ushort, 16) \ + __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(long, int, uint, 32) \ + __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(ulong, uint, uint, 32) __CLC_UPSAMPLE_TYPES() #undef __CLC_UPSAMPLE_TYPES #undef __CLC_UPSAMPLE_IMPL -#undef __CLC_CONVERT_TY diff --git a/libclc/generic/lib/math/clc_exp10.cl b/libclc/generic/lib/math/clc_exp10.cl index 0eb53d013a85a..4f839a9815ac0 100644 --- a/libclc/generic/lib/math/clc_exp10.cl +++ b/libclc/generic/lib/math/clc_exp10.cl @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -70,7 +71,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_exp10(float x) { int return_inf = x > X_MAX; int return_zero = x < X_MIN; - int n = convert_int(x * R_64_BY_LOG10_2); + int n = __clc_convert_int(x * R_64_BY_LOG10_2); float fn = (float)n; int j = n & 0x3f; @@ -89,11 +90,11 @@ _CLC_DEF _CLC_OVERLOAD float __clc_exp10(float x) { float two_to_jby64 = USE_TABLE(exp_tbl, j); z2 = __clc_mad(two_to_jby64, z2, two_to_jby64); - float z2s = z2 * as_float(0x1 << (m + 149)); - float z2n = as_float(as_int(z2) + m2); + float z2s = z2 * __clc_as_float(0x1 << (m + 149)); + float z2n = __clc_as_float(__clc_as_int(z2) + m2); z2 = m <= -126 ? z2s : z2n; - z2 = return_inf ? as_float(PINFBITPATT_SP32) : z2; + z2 = return_inf ? __clc_as_float(PINFBITPATT_SP32) : z2; z2 = return_zero ? 0.0f : z2; z2 = return_nan ? x : z2; return z2; @@ -115,7 +116,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) { // ln(10) const double R_LN10 = 0x1.26bb1bbb55516p+1; - int n = convert_int(x * R_64_BY_LOG10_2); + int n = __clc_convert_int(x * R_64_BY_LOG10_2); double dn = (double)n; @@ -144,15 +145,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) { int n1 = m >> 2; int n2 = m - n1; - double z3 = z2 * as_double(((long)n1 + 1023) << 52); - z3 *= as_double(((long)n2 + 1023) << 52); + double z3 = z2 * __clc_as_double(((long)n1 + 1023) << 52); + z3 *= __clc_as_double(((long)n2 + 1023) << 52); z2 = ldexp(z2, m); z2 = small_value ? z3 : z2; z2 = __clc_isnan(x) ? x : z2; - z2 = x > X_MAX ? as_double(PINFBITPATT_DP64) : z2; + z2 = x > X_MAX ? __clc_as_double(PINFBITPATT_DP64) : z2; z2 = x < X_MIN ? 0.0 : z2; return z2; diff --git a/libclc/generic/lib/math/clc_fmod.cl b/libclc/generic/lib/math/clc_fmod.cl index a4a2ab791df68..31a5d4dc05c03 100644 --- a/libclc/generic/lib/math/clc_fmod.cl +++ b/libclc/generic/lib/math/clc_fmod.cl @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -31,19 +32,19 @@ #include _CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y) { - int ux = as_int(x); + int ux = __clc_as_int(x); int ax = ux & EXSIGNBIT_SP32; - float xa = as_float(ax); + float xa = __clc_as_float(ax); int sx = ux ^ ax; int ex = ax >> EXPSHIFTBITS_SP32; - int uy = as_int(y); + int uy = __clc_as_int(y); int ay = uy & EXSIGNBIT_SP32; - float ya = as_float(ay); + float ya = __clc_as_float(ay); int ey = ay >> EXPSHIFTBITS_SP32; - float xr = as_float(0x3f800000 | (ax & 0x007fffff)); - float yr = as_float(0x3f800000 | (ay & 0x007fffff)); + float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff)); + float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff)); int c; int k = ex - ey; @@ -62,17 +63,17 @@ _CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y) { xr = lt ? xa : xr; yr = lt ? ya : yr; - float s = as_float(ey << EXPSHIFTBITS_SP32); + float s = __clc_as_float(ey << EXPSHIFTBITS_SP32); xr *= lt ? 1.0f : s; c = ax == ay; xr = c ? 0.0f : xr; - xr = as_float(sx ^ as_int(xr)); + xr = __clc_as_float(sx ^ __clc_as_int(xr)); c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | ay == 0; - xr = c ? as_float(QNANBITPATT_SP32) : xr; + xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr; return xr; } @@ -80,18 +81,18 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_fmod, float, float); #ifdef cl_khr_fp64 _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) { - ulong ux = as_ulong(x); + ulong ux = __clc_as_ulong(x); ulong ax = ux & ~SIGNBIT_DP64; ulong xsgn = ux ^ ax; - double dx = as_double(ax); - int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); + double dx = __clc_as_double(ax); + int xexp = __clc_convert_int(ax >> EXPSHIFTBITS_DP64); int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; - ulong uy = as_ulong(y); + ulong uy = __clc_as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; - double dy = as_double(ay); - int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); + double dy = __clc_as_double(ay); + int yexp = __clc_convert_int(ay >> EXPSHIFTBITS_DP64); int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; @@ -151,12 +152,12 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) { dx += i ? w : 0.0; // At this point, dx lies in the range [0,dy) - double ret = as_double(xsgn ^ as_ulong(dx)); - dx = as_double(ax); + double ret = __clc_as_double(xsgn ^ __clc_as_ulong(dx)); + dx = __clc_as_double(ax); // Now handle |x| == |y| int c = dx == dy; - t = as_double(xsgn); + t = __clc_as_double(xsgn); ret = c ? t : ret; // Next, handle |x| < |y| @@ -167,7 +168,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) { // |y| is 0 c = dy == 0.0; - ret = c ? as_double(QNANBITPATT_DP64) : ret; + ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret; // y is +-Inf, NaN c = yexp > BIASEDEMAX_DP64; @@ -176,7 +177,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) { // x is +=Inf, NaN c = xexp > BIASEDEMAX_DP64; - ret = c ? as_double(QNANBITPATT_DP64) : ret; + ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret; return ret; } diff --git a/libclc/generic/lib/math/clc_pow.cl b/libclc/generic/lib/math/clc_pow.cl index 5dcd392c0f7ed..fce9573c39bac 100644 --- a/libclc/generic/lib/math/clc_pow.cl +++ b/libclc/generic/lib/math/clc_pow.cl @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -68,18 +69,18 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { - int ix = as_int(x); + int ix = __clc_as_int(x); int ax = ix & EXSIGNBIT_SP32; int xpos = ix == ax; - int iy = as_int(y); + int iy = __clc_as_int(y); int ay = iy & EXSIGNBIT_SP32; int ypos = iy == ay; /* Extra precise log calculation * First handle case that x is close to 1 */ - float r = 1.0f - as_float(ax); + float r = 1.0f - __clc_as_float(ax); int near1 = __clc_fabs(r) < 0x1.0p-4f; float r2 = r * r; @@ -103,7 +104,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { /* Computations for x not near 1 */ int m = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; float mf = (float)m; - int ixs = as_int(as_float(ax | 0x3f800000) - 1.0f); + int ixs = __clc_as_int(__clc_as_float(ax | 0x3f800000) - 1.0f); float mfs = (float)((ixs >> EXPSHIFTBITS_SP32) - 253); int c = m == -127; int ixn = c ? ixs : ax; @@ -112,8 +113,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { int indx = (ixn & 0x007f0000) + ((ixn & 0x00008000) << 1); /* F - Y */ - float f = as_float(0x3f000000 | indx) - - as_float(0x3f000000 | (ixn & MANTBITS_SP32)); + float f = __clc_as_float(0x3f000000 | indx) - + __clc_as_float(0x3f000000 | (ixn & MANTBITS_SP32)); indx = indx >> 16; float2 tv = USE_TABLE(log_inv_tbl_ep, indx); @@ -141,10 +142,10 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { lh = near1 ? lh_near1 : lh; l = near1 ? l_near1 : l; - float gh = as_float(as_int(l) & 0xfffff000); + float gh = __clc_as_float(__clc_as_int(l) & 0xfffff000); float gt = ((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh); - float yh = as_float(iy & 0xfffff000); + float yh = __clc_as_float(iy & 0xfffff000); float yt = y - yh; @@ -155,7 +156,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { /* Extra precise exp of ylogx */ /* 64/log2 : 92.332482616893657 */ const float R_64_BY_LOG2 = 0x1.715476p+6f; - int n = convert_int(ylogx * R_64_BY_LOG2); + int n = __clc_convert_int(ylogx * R_64_BY_LOG2); float nf = (float)n; int j = n & 0x3f; @@ -178,14 +179,14 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { float expylogx = __clc_mad(tv.s0, poly, __clc_mad(tv.s1, poly, tv.s1)) + tv.s0; - float sexpylogx = expylogx * as_float(0x1 << (m + 149)); - float texpylogx = as_float(as_int(expylogx) + m2); + float sexpylogx = expylogx * __clc_as_float(0x1 << (m + 149)); + float texpylogx = __clc_as_float(__clc_as_int(expylogx) + m2); expylogx = m < -125 ? sexpylogx : texpylogx; /* Result is +-Inf if (ylogx + ylogx_t) > 128*log2 */ expylogx = (ylogx > 0x1.62e430p+6f) | (ylogx == 0x1.62e430p+6f & ylogx_t > -0x1.05c610p-22f) - ? as_float(PINFBITPATT_SP32) + ? __clc_as_float(PINFBITPATT_SP32) : expylogx; /* Result is 0 if ylogx < -149*log2 */ @@ -205,9 +206,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { inty = yexp < 1 ? 0 : inty; inty = yexp > 24 ? 2 : inty; - float signval = as_float((as_uint(expylogx) ^ SIGNBIT_SP32)); + float signval = __clc_as_float((__clc_as_uint(expylogx) ^ SIGNBIT_SP32)); expylogx = ((inty == 1) & !xpos) ? signval : expylogx; - int ret = as_int(expylogx); + int ret = __clc_as_int(expylogx); /* Corner case handling */ ret = (!xpos & (inty == 0)) ? QNANBITPATT_SP32 : ret; @@ -236,7 +237,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) { ret = ay == 0 ? 0x3f800000 : ret; ret = ix == 0x3f800000 ? 0x3f800000 : ret; - return as_float(ret); + return __clc_as_float(ret); } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_pow, float, float) @@ -245,11 +246,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) { const double real_log2_tail = 5.76999904754328540596e-08; const double real_log2_lead = 6.93147122859954833984e-01; - long ux = as_long(x); + long ux = __clc_as_long(x); long ax = ux & (~SIGNBIT_DP64); int xpos = ax == ux; - long uy = as_long(y); + long uy = __clc_as_long(y); long ay = uy & (~SIGNBIT_DP64); int ypos = ay == uy; @@ -261,7 +262,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) { double xexp = (double)exp; long mantissa = ax & 0x000FFFFFFFFFFFFFL; - long temp_ux = as_long(as_double(0x3ff0000000000000L | mantissa) - 1.0); + long temp_ux = + __clc_as_long(__clc_as_double(0x3ff0000000000000L | mantissa) - 1.0); exp = ((temp_ux & 0x7FF0000000000000L) >> 52) - 2045; double xexp1 = (double)exp; long mantissa1 = temp_ux & 0x000FFFFFFFFFFFFFL; @@ -273,14 +275,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) { ((mantissa & 0x0000080000000000) << 1); int index = rax >> 44; - double F = as_double(rax | 0x3FE0000000000000L); - double Y = as_double(mantissa | 0x3FE0000000000000L); + double F = __clc_as_double(rax | 0x3FE0000000000000L); + double Y = __clc_as_double(mantissa | 0x3FE0000000000000L); double f = F - Y; double2 tv = USE_TABLE(log_f_inv_tbl, index); double log_h = tv.s0; double log_t = tv.s1; double f_inv = (log_h + log_t) * f; - double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L); + double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L); double r2 = fma(-F, r1, f) * (log_h + log_t); double r = r1 + r2; @@ -304,11 +306,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) { double resT_h = poly0h; double H = resT + resH; - double H_h = as_double(as_long(H) & 0xfffffffff8000000L); + double H_h = __clc_as_double(__clc_as_long(H) & 0xfffffffff8000000L); double T = (resH - H + resT) + (resT_t - (resT + resT_h)) + (H - H_h); H = H_h; - double y_head = as_double(uy & 0xfffffffff8000000L); + double y_head = __clc_as_double(uy & 0xfffffffff8000000L); double y_tail = y - y_head; double temp = fma(y_tail, H, fma(y_head, T, y_tail * T)); @@ -354,7 +356,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) { expv = fma(f, q, f2) + f1; expv = ldexp(expv, m); - expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv; + expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv; expv = v < min_exp_arg ? 0.0 : expv; } @@ -376,7 +378,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) { expv *= (inty == 1) & !xpos ? -1.0 : 1.0; - long ret = as_long(expv); + long ret = __clc_as_long(expv); // Now all the edge cases ret = !xpos & (inty == 0) ? QNANBITPATT_DP64 : ret; @@ -410,7 +412,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) { ret = ay == 0L ? 0x3ff0000000000000L : ret; ret = ux == 0x3ff0000000000000L ? 0x3ff0000000000000L : ret; - return as_double(ret); + return __clc_as_double(ret); } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pow, double, double) #endif diff --git a/libclc/generic/lib/math/clc_pown.cl b/libclc/generic/lib/math/clc_pown.cl index a0f968c238e99..a613b2998c3f6 100644 --- a/libclc/generic/lib/math/clc_pown.cl +++ b/libclc/generic/lib/math/clc_pown.cl @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -67,17 +68,17 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { float y = (float)ny; - int ix = as_int(x); + int ix = __clc_as_int(x); int ax = ix & EXSIGNBIT_SP32; int xpos = ix == ax; - int iy = as_int(y); + int iy = __clc_as_int(y); int ay = iy & EXSIGNBIT_SP32; int ypos = iy == ay; // Extra precise log calculation // First handle case that x is close to 1 - float r = 1.0f - as_float(ax); + float r = 1.0f - __clc_as_float(ax); int near1 = __clc_fabs(r) < 0x1.0p-4f; float r2 = r * r; @@ -101,7 +102,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { // Computations for x not near 1 int m = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; float mf = (float)m; - int ixs = as_int(as_float(ax | 0x3f800000) - 1.0f); + int ixs = __clc_as_int(__clc_as_float(ax | 0x3f800000) - 1.0f); float mfs = (float)((ixs >> EXPSHIFTBITS_SP32) - 253); int c = m == -127; int ixn = c ? ixs : ax; @@ -110,8 +111,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { int indx = (ixn & 0x007f0000) + ((ixn & 0x00008000) << 1); // F - Y - float f = as_float(0x3f000000 | indx) - - as_float(0x3f000000 | (ixn & MANTBITS_SP32)); + float f = __clc_as_float(0x3f000000 | indx) - + __clc_as_float(0x3f000000 | (ixn & MANTBITS_SP32)); indx = indx >> 16; float2 tv = USE_TABLE(log_inv_tbl_ep, indx); @@ -139,10 +140,10 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { lh = near1 ? lh_near1 : lh; l = near1 ? l_near1 : l; - float gh = as_float(as_int(l) & 0xfffff000); + float gh = __clc_as_float(__clc_as_int(l) & 0xfffff000); float gt = ((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh); - float yh = as_float(iy & 0xfffff000); + float yh = __clc_as_float(iy & 0xfffff000); float yt = (float)(ny - (int)yh); @@ -153,7 +154,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { // Extra precise exp of ylogx // 64/log2 : 92.332482616893657 const float R_64_BY_LOG2 = 0x1.715476p+6f; - int n = convert_int(ylogx * R_64_BY_LOG2); + int n = __clc_convert_int(ylogx * R_64_BY_LOG2); float nf = (float)n; int j = n & 0x3f; @@ -176,14 +177,14 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { float expylogx = __clc_mad(tv.s0, poly, __clc_mad(tv.s1, poly, tv.s1)) + tv.s0; - float sexpylogx = expylogx * as_float(0x1 << (m + 149)); - float texpylogx = as_float(as_int(expylogx) + m2); + float sexpylogx = expylogx * __clc_as_float(0x1 << (m + 149)); + float texpylogx = __clc_as_float(__clc_as_int(expylogx) + m2); expylogx = m < -125 ? sexpylogx : texpylogx; // Result is +-Inf if (ylogx + ylogx_t) > 128*log2 expylogx = ((ylogx > 0x1.62e430p+6f) | (ylogx == 0x1.62e430p+6f & ylogx_t > -0x1.05c610p-22f)) - ? as_float(PINFBITPATT_SP32) + ? __clc_as_float(PINFBITPATT_SP32) : expylogx; // Result is 0 if ylogx < -149*log2 @@ -196,9 +197,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { int inty = 2 - (ny & 1); - float signval = as_float((as_uint(expylogx) ^ SIGNBIT_SP32)); + float signval = __clc_as_float((__clc_as_uint(expylogx) ^ SIGNBIT_SP32)); expylogx = ((inty == 1) & !xpos) ? signval : expylogx; - int ret = as_int(expylogx); + int ret = __clc_as_int(expylogx); // Corner case handling int xinf = xpos ? PINFBITPATT_SP32 : NINFBITPATT_SP32; @@ -218,7 +219,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) { ret = ax > PINFBITPATT_SP32 ? ix : ret; ret = ny == 0 ? 0x3f800000 : ret; - return as_float(ret); + return __clc_as_float(ret); } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_pown, float, int) @@ -229,11 +230,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) { double y = (double)ny; - long ux = as_long(x); + long ux = __clc_as_long(x); long ax = ux & (~SIGNBIT_DP64); int xpos = ax == ux; - long uy = as_long(y); + long uy = __clc_as_long(y); long ay = uy & (~SIGNBIT_DP64); int ypos = ay == uy; @@ -245,7 +246,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) { double xexp = (double)exp; long mantissa = ax & 0x000FFFFFFFFFFFFFL; - long temp_ux = as_long(as_double(0x3ff0000000000000L | mantissa) - 1.0); + long temp_ux = + __clc_as_long(__clc_as_double(0x3ff0000000000000L | mantissa) - 1.0); exp = ((temp_ux & 0x7FF0000000000000L) >> 52) - 2045; double xexp1 = (double)exp; long mantissa1 = temp_ux & 0x000FFFFFFFFFFFFFL; @@ -257,14 +259,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) { ((mantissa & 0x0000080000000000) << 1); int index = rax >> 44; - double F = as_double(rax | 0x3FE0000000000000L); - double Y = as_double(mantissa | 0x3FE0000000000000L); + double F = __clc_as_double(rax | 0x3FE0000000000000L); + double Y = __clc_as_double(mantissa | 0x3FE0000000000000L); double f = F - Y; double2 tv = USE_TABLE(log_f_inv_tbl, index); double log_h = tv.s0; double log_t = tv.s1; double f_inv = (log_h + log_t) * f; - double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L); + double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L); double r2 = fma(-F, r1, f) * (log_h + log_t); double r = r1 + r2; @@ -288,15 +290,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) { double resT_h = poly0h; double H = resT + resH; - double H_h = as_double(as_long(H) & 0xfffffffff8000000L); + double H_h = __clc_as_double(__clc_as_long(H) & 0xfffffffff8000000L); double T = (resH - H + resT) + (resT_t - (resT + resT_h)) + (H - H_h); H = H_h; - double y_head = as_double(uy & 0xfffffffff8000000L); + double y_head = __clc_as_double(uy & 0xfffffffff8000000L); double y_tail = y - y_head; int mask_2_24 = ay > 0x4170000000000000; // 2^24 - int nyh = convert_int(y_head); + int nyh = __clc_convert_int(y_head); int nyt = ny - nyh; double y_tail1 = (double)nyt; y_tail = mask_2_24 ? y_tail1 : y_tail; @@ -344,7 +346,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) { expv = fma(f, q, f2) + f1; expv = ldexp(expv, m); - expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv; + expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv; expv = v < min_exp_arg ? 0.0 : expv; } @@ -357,7 +359,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) { expv *= ((inty == 1) & !xpos) ? -1.0 : 1.0; - long ret = as_long(expv); + long ret = __clc_as_long(expv); // Now all the edge cases long xinf = xpos ? PINFBITPATT_DP64 : NINFBITPATT_DP64; @@ -378,7 +380,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) { ret = ax > PINFBITPATT_DP64 ? ux : ret; ret = ny == 0 ? 0x3ff0000000000000L : ret; - return as_double(ret); + return __clc_as_double(ret); } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int) #endif @@ -388,7 +390,7 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int) #pragma OPENCL EXTENSION cl_khr_fp16 : enable _CLC_OVERLOAD _CLC_DEF half __clc_pown(half x, int y) { - return (half)__clc_pown((float)x, y); + return (half)__clc_pown((float)x, y); } _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_pown, half, int); diff --git a/libclc/generic/lib/math/clc_powr.cl b/libclc/generic/lib/math/clc_powr.cl index 7e1a6f2a02e7a..7876acaee89a6 100644 --- a/libclc/generic/lib/math/clc_powr.cl +++ b/libclc/generic/lib/math/clc_powr.cl @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -65,17 +66,17 @@ // ((((expT * poly) + expT) + expH*poly) + expH) _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { - int ix = as_int(x); + int ix = __clc_as_int(x); int ax = ix & EXSIGNBIT_SP32; int xpos = ix == ax; - int iy = as_int(y); + int iy = __clc_as_int(y); int ay = iy & EXSIGNBIT_SP32; int ypos = iy == ay; // Extra precise log calculation // First handle case that x is close to 1 - float r = 1.0f - as_float(ax); + float r = 1.0f - __clc_as_float(ax); int near1 = __clc_fabs(r) < 0x1.0p-4f; float r2 = r * r; @@ -99,7 +100,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { // Computations for x not near 1 int m = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; float mf = (float)m; - int ixs = as_int(as_float(ax | 0x3f800000) - 1.0f); + int ixs = __clc_as_int(__clc_as_float(ax | 0x3f800000) - 1.0f); float mfs = (float)((ixs >> EXPSHIFTBITS_SP32) - 253); int c = m == -127; int ixn = c ? ixs : ax; @@ -108,8 +109,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { int indx = (ixn & 0x007f0000) + ((ixn & 0x00008000) << 1); // F - Y - float f = as_float(0x3f000000 | indx) - - as_float(0x3f000000 | (ixn & MANTBITS_SP32)); + float f = __clc_as_float(0x3f000000 | indx) - + __clc_as_float(0x3f000000 | (ixn & MANTBITS_SP32)); indx = indx >> 16; float2 tv = USE_TABLE(log_inv_tbl_ep, indx); @@ -137,10 +138,10 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { lh = near1 ? lh_near1 : lh; l = near1 ? l_near1 : l; - float gh = as_float(as_int(l) & 0xfffff000); + float gh = __clc_as_float(__clc_as_int(l) & 0xfffff000); float gt = ((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh); - float yh = as_float(iy & 0xfffff000); + float yh = __clc_as_float(iy & 0xfffff000); float yt = y - yh; @@ -151,7 +152,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { // Extra precise exp of ylogx // 64/log2 : 92.332482616893657 const float R_64_BY_LOG2 = 0x1.715476p+6f; - int n = convert_int(ylogx * R_64_BY_LOG2); + int n = __clc_convert_int(ylogx * R_64_BY_LOG2); float nf = (float)n; int j = n & 0x3f; @@ -173,14 +174,14 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { float expylogx = __clc_mad(tv.s0, poly, __clc_mad(tv.s1, poly, tv.s1)) + tv.s0; - float sexpylogx = expylogx * as_float(0x1 << (m + 149)); - float texpylogx = as_float(as_int(expylogx) + m2); + float sexpylogx = expylogx * __clc_as_float(0x1 << (m + 149)); + float texpylogx = __clc_as_float(__clc_as_int(expylogx) + m2); expylogx = m < -125 ? sexpylogx : texpylogx; // Result is +-Inf if (ylogx + ylogx_t) > 128*log2 expylogx = ((ylogx > 0x1.62e430p+6f) | (ylogx == 0x1.62e430p+6f & ylogx_t > -0x1.05c610p-22f)) - ? as_float(PINFBITPATT_SP32) + ? __clc_as_float(PINFBITPATT_SP32) : expylogx; // Result is 0 if ylogx < -149*log2 @@ -199,9 +200,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { inty = yexp < 1 ? 0 : inty; inty = yexp > 24 ? 2 : inty; - float signval = as_float((as_uint(expylogx) ^ SIGNBIT_SP32)); + float signval = __clc_as_float((__clc_as_uint(expylogx) ^ SIGNBIT_SP32)); expylogx = ((inty == 1) & !xpos) ? signval : expylogx; - int ret = as_int(expylogx); + int ret = __clc_as_int(expylogx); // Corner case handling ret = ax < 0x3f800000 & iy == NINFBITPATT_SP32 ? PINFBITPATT_SP32 : ret; @@ -223,7 +224,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) { ret = ax > PINFBITPATT_SP32 ? ix : ret; ret = ay > PINFBITPATT_SP32 ? iy : ret; - return as_float(ret); + return __clc_as_float(ret); } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_powr, float, float) @@ -232,11 +233,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) { const double real_log2_tail = 5.76999904754328540596e-08; const double real_log2_lead = 6.93147122859954833984e-01; - long ux = as_long(x); + long ux = __clc_as_long(x); long ax = ux & (~SIGNBIT_DP64); int xpos = ax == ux; - long uy = as_long(y); + long uy = __clc_as_long(y); long ay = uy & (~SIGNBIT_DP64); int ypos = ay == uy; @@ -248,7 +249,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) { double xexp = (double)exp; long mantissa = ax & 0x000FFFFFFFFFFFFFL; - long temp_ux = as_long(as_double(0x3ff0000000000000L | mantissa) - 1.0); + long temp_ux = + __clc_as_long(__clc_as_double(0x3ff0000000000000L | mantissa) - 1.0); exp = ((temp_ux & 0x7FF0000000000000L) >> 52) - 2045; double xexp1 = (double)exp; long mantissa1 = temp_ux & 0x000FFFFFFFFFFFFFL; @@ -260,14 +262,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) { ((mantissa & 0x0000080000000000) << 1); int index = rax >> 44; - double F = as_double(rax | 0x3FE0000000000000L); - double Y = as_double(mantissa | 0x3FE0000000000000L); + double F = __clc_as_double(rax | 0x3FE0000000000000L); + double Y = __clc_as_double(mantissa | 0x3FE0000000000000L); double f = F - Y; double2 tv = USE_TABLE(log_f_inv_tbl, index); double log_h = tv.s0; double log_t = tv.s1; double f_inv = (log_h + log_t) * f; - double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L); + double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L); double r2 = fma(-F, r1, f) * (log_h + log_t); double r = r1 + r2; @@ -291,11 +293,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) { double resT_h = poly0h; double H = resT + resH; - double H_h = as_double(as_long(H) & 0xfffffffff8000000L); + double H_h = __clc_as_double(__clc_as_long(H) & 0xfffffffff8000000L); double T = (resH - H + resT) + (resT_t - (resT + resT_h)) + (H - H_h); H = H_h; - double y_head = as_double(uy & 0xfffffffff8000000L); + double y_head = __clc_as_double(uy & 0xfffffffff8000000L); double y_tail = y - y_head; double temp = fma(y_tail, H, fma(y_head, T, y_tail * T)); @@ -341,7 +343,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) { expv = fma(f, q, f2) + f1; expv = ldexp(expv, m); - expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv; + expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv; expv = v < min_exp_arg ? 0.0 : expv; } @@ -363,7 +365,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) { expv *= ((inty == 1) & !xpos) ? -1.0 : 1.0; - long ret = as_long(expv); + long ret = __clc_as_long(expv); // Now all the edge cases ret = ax < 0x3ff0000000000000L & uy == NINFBITPATT_DP64 ? PINFBITPATT_DP64 @@ -389,7 +391,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) { ret = ax > PINFBITPATT_DP64 ? ux : ret; ret = ay > PINFBITPATT_DP64 ? uy : ret; - return as_double(ret); + return __clc_as_double(ret); } -_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_powr, double, double) +_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_powr, double, + double) #endif diff --git a/libclc/generic/lib/math/clc_remainder.cl b/libclc/generic/lib/math/clc_remainder.cl index 31d17d5aaf6b6..6302b9776782f 100644 --- a/libclc/generic/lib/math/clc_remainder.cl +++ b/libclc/generic/lib/math/clc_remainder.cl @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -31,19 +32,19 @@ #include _CLC_DEF _CLC_OVERLOAD float __clc_remainder(float x, float y) { - int ux = as_int(x); + int ux = __clc_as_int(x); int ax = ux & EXSIGNBIT_SP32; - float xa = as_float(ax); + float xa = __clc_as_float(ax); int sx = ux ^ ax; int ex = ax >> EXPSHIFTBITS_SP32; - int uy = as_int(y); + int uy = __clc_as_int(y); int ay = uy & EXSIGNBIT_SP32; - float ya = as_float(ay); + float ya = __clc_as_float(ay); int ey = ay >> EXPSHIFTBITS_SP32; - float xr = as_float(0x3f800000 | (ax & 0x007fffff)); - float yr = as_float(0x3f800000 | (ay & 0x007fffff)); + float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff)); + float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff)); int c; int k = ex - ey; @@ -71,17 +72,17 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remainder(float x, float y) { xr -= c ? yr : 0.0f; q += c; - float s = as_float(ey << EXPSHIFTBITS_SP32); + float s = __clc_as_float(ey << EXPSHIFTBITS_SP32); xr *= lt ? 1.0f : s; c = ax == ay; xr = c ? 0.0f : xr; - xr = as_float(sx ^ as_int(xr)); + xr = __clc_as_float(sx ^ __clc_as_int(xr)); c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | ay == 0; - xr = c ? as_float(QNANBITPATT_SP32) : xr; + xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr; return xr; } @@ -90,18 +91,18 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_remainder, float, #ifdef cl_khr_fp64 _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) { - ulong ux = as_ulong(x); + ulong ux = __clc_as_ulong(x); ulong ax = ux & ~SIGNBIT_DP64; ulong xsgn = ux ^ ax; - double dx = as_double(ax); - int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); + double dx = __clc_as_double(ax); + int xexp = __clc_convert_int(ax >> EXPSHIFTBITS_DP64); int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; - ulong uy = as_ulong(y); + ulong uy = __clc_as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; - double dy = as_double(ay); - int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); + double dy = __clc_as_double(ay); + int yexp = __clc_convert_int(ay >> EXPSHIFTBITS_DP64); int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; @@ -181,12 +182,12 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) { dx = dy < 0x1.0p+1022 ? dxl : dxg; - double ret = as_double(xsgn ^ as_ulong(dx)); - dx = as_double(ax); + double ret = __clc_as_double(xsgn ^ __clc_as_ulong(dx)); + dx = __clc_as_double(ax); // Now handle |x| == |y| int c = dx == dy; - t = as_double(xsgn); + t = __clc_as_double(xsgn); ret = c ? t : ret; // Next, handle |x| < |y| @@ -203,7 +204,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) { // |y| is 0 c = dy == 0.0; - ret = c ? as_double(QNANBITPATT_DP64) : ret; + ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret; // y is +-Inf, NaN c = yexp > BIASEDEMAX_DP64; @@ -212,7 +213,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) { // x is +=Inf, NaN c = xexp > BIASEDEMAX_DP64; - ret = c ? as_double(QNANBITPATT_DP64) : ret; + ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret; return ret; } diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl index af3e7a2b07500..699517e180708 100644 --- a/libclc/generic/lib/math/clc_remquo.cl +++ b/libclc/generic/lib/math/clc_remquo.cl @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -34,20 +35,20 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y, __private int *quo) { x = __clc_flush_denormal_if_not_supported(x); y = __clc_flush_denormal_if_not_supported(y); - int ux = as_int(x); + int ux = __clc_as_int(x); int ax = ux & EXSIGNBIT_SP32; - float xa = as_float(ax); + float xa = __clc_as_float(ax); int sx = ux ^ ax; int ex = ax >> EXPSHIFTBITS_SP32; - int uy = as_int(y); + int uy = __clc_as_int(y); int ay = uy & EXSIGNBIT_SP32; - float ya = as_float(ay); + float ya = __clc_as_float(ay); int sy = uy ^ ay; int ey = ay >> EXPSHIFTBITS_SP32; - float xr = as_float(0x3f800000 | (ax & 0x007fffff)); - float yr = as_float(0x3f800000 | (ay & 0x007fffff)); + float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff)); + float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff)); int c; int k = ex - ey; @@ -75,7 +76,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y, xr -= c ? yr : 0.0f; q += c; - float s = as_float(ey << EXPSHIFTBITS_SP32); + float s = __clc_as_float(ey << EXPSHIFTBITS_SP32); xr *= lt ? 1.0f : s; int qsgn = sx == sy ? 1 : -1; @@ -85,12 +86,12 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y, quot = c ? qsgn : quot; xr = c ? 0.0f : xr; - xr = as_float(sx ^ as_int(xr)); + xr = __clc_as_float(sx ^ __clc_as_int(xr)); c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | ay == 0; quot = c ? 0 : quot; - xr = c ? as_float(QNANBITPATT_SP32) : xr; + xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr; *quo = quot; @@ -130,18 +131,18 @@ __VEC_REMQUO(float, 16, 8) #ifdef cl_khr_fp64 _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, __private int *pquo) { - ulong ux = as_ulong(x); + ulong ux = __clc_as_ulong(x); ulong ax = ux & ~SIGNBIT_DP64; ulong xsgn = ux ^ ax; - double dx = as_double(ax); - int xexp = convert_int(ax >> EXPSHIFTBITS_DP64); + double dx = __clc_as_double(ax); + int xexp = __clc_convert_int(ax >> EXPSHIFTBITS_DP64); int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64); xexp1 = xexp < 1 ? xexp1 : xexp; - ulong uy = as_ulong(y); + ulong uy = __clc_as_ulong(y); ulong ay = uy & ~SIGNBIT_DP64; - double dy = as_double(ay); - int yexp = convert_int(ay >> EXPSHIFTBITS_DP64); + double dy = __clc_as_double(ay); + int yexp = __clc_convert_int(ay >> EXPSHIFTBITS_DP64); int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64); yexp1 = yexp < 1 ? yexp1 : yexp; @@ -223,12 +224,12 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, lt += dy < 0x1.0p+1022 ? al : ag; int quo = ((int)lt & 0x7f) * qsgn; - double ret = as_double(xsgn ^ as_ulong(dx)); - dx = as_double(ax); + double ret = __clc_as_double(xsgn ^ __clc_as_ulong(dx)); + dx = __clc_as_double(ax); // Now handle |x| == |y| int c = dx == dy; - t = as_double(xsgn); + t = __clc_as_double(xsgn); quo = c ? qsgn : quo; ret = c ? t : ret; @@ -249,7 +250,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, // |y| is 0 c = dy == 0.0; quo = c ? 0 : quo; - ret = c ? as_double(QNANBITPATT_DP64) : ret; + ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret; // y is +-Inf, NaN c = yexp > BIASEDEMAX_DP64; @@ -260,7 +261,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, // x is +=Inf, NaN c = xexp > BIASEDEMAX_DP64; quo = c ? 0 : quo; - ret = c ? as_double(QNANBITPATT_DP64) : ret; + ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret; *pquo = quo; return ret; diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl index 42b983784c14d..dabaa2a4f3f2a 100644 --- a/libclc/generic/lib/math/clc_rootn.cl +++ b/libclc/generic/lib/math/clc_rootn.cl @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -67,17 +68,17 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { float y = MATH_RECIP((float)ny); - int ix = as_int(x); + int ix = __clc_as_int(x); int ax = ix & EXSIGNBIT_SP32; int xpos = ix == ax; - int iy = as_int(y); + int iy = __clc_as_int(y); int ay = iy & EXSIGNBIT_SP32; int ypos = iy == ay; // Extra precise log calculation // First handle case that x is close to 1 - float r = 1.0f - as_float(ax); + float r = 1.0f - __clc_as_float(ax); int near1 = __clc_fabs(r) < 0x1.0p-4f; float r2 = r * r; @@ -101,7 +102,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { // Computations for x not near 1 int m = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; float mf = (float)m; - int ixs = as_int(as_float(ax | 0x3f800000) - 1.0f); + int ixs = __clc_as_int(__clc_as_float(ax | 0x3f800000) - 1.0f); float mfs = (float)((ixs >> EXPSHIFTBITS_SP32) - 253); int c = m == -127; int ixn = c ? ixs : ax; @@ -110,8 +111,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { int indx = (ixn & 0x007f0000) + ((ixn & 0x00008000) << 1); // F - Y - float f = as_float(0x3f000000 | indx) - - as_float(0x3f000000 | (ixn & MANTBITS_SP32)); + float f = __clc_as_float(0x3f000000 | indx) - + __clc_as_float(0x3f000000 | (ixn & MANTBITS_SP32)); indx = indx >> 16; float2 tv = USE_TABLE(log_inv_tbl_ep, indx); @@ -139,13 +140,13 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { lh = near1 ? lh_near1 : lh; l = near1 ? l_near1 : l; - float gh = as_float(as_int(l) & 0xfffff000); + float gh = __clc_as_float(__clc_as_int(l) & 0xfffff000); float gt = ((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh); - float yh = as_float(iy & 0xfffff000); + float yh = __clc_as_float(iy & 0xfffff000); float fny = (float)ny; - float fnyh = as_float(as_int(fny) & 0xfffff000); + float fnyh = __clc_as_float(__clc_as_int(fny) & 0xfffff000); float fnyt = (float)(ny - (int)fnyh); float yt = MATH_DIVIDE(__clc_mad(-fnyt, yh, __clc_mad(-fnyh, yh, 1.0f)), fny); @@ -155,7 +156,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { // Extra precise exp of ylogx const float R_64_BY_LOG2 = 0x1.715476p+6f; // 64/log2 : 92.332482616893657 - int n = convert_int(ylogx * R_64_BY_LOG2); + int n = __clc_convert_int(ylogx * R_64_BY_LOG2); float nf = (float)n; int j = n & 0x3f; @@ -179,16 +180,16 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { float expylogx = __clc_mad(tv.s0, poly, __clc_mad(tv.s1, poly, tv.s1)) + tv.s0; float sexpylogx = __clc_fp32_subnormals_supported() - ? expylogx * as_float(0x1 << (m + 149)) + ? expylogx * __clc_as_float(0x1 << (m + 149)) : 0.0f; - float texpylogx = as_float(as_int(expylogx) + m2); + float texpylogx = __clc_as_float(__clc_as_int(expylogx) + m2); expylogx = m < -125 ? sexpylogx : texpylogx; // Result is +-Inf if (ylogx + ylogx_t) > 128*log2 expylogx = ((ylogx > 0x1.62e430p+6f) | (ylogx == 0x1.62e430p+6f & ylogx_t > -0x1.05c610p-22f)) - ? as_float(PINFBITPATT_SP32) + ? __clc_as_float(PINFBITPATT_SP32) : expylogx; // Result is 0 if ylogx < -149*log2 @@ -201,9 +202,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { int inty = 2 - (ny & 1); - float signval = as_float((as_uint(expylogx) ^ SIGNBIT_SP32)); + float signval = __clc_as_float((__clc_as_uint(expylogx) ^ SIGNBIT_SP32)); expylogx = ((inty == 1) & !xpos) ? signval : expylogx; - int ret = as_int(expylogx); + int ret = __clc_as_int(expylogx); // Corner case handling ret = (!xpos & (inty == 2)) ? QNANBITPATT_SP32 : ret; @@ -221,7 +222,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) { ret = ax > PINFBITPATT_SP32 ? ix : ret; ret = ny == 0 ? QNANBITPATT_SP32 : ret; - return as_float(ret); + return __clc_as_float(ret); } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_rootn, float, int) @@ -233,11 +234,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) { double dny = (double)ny; double y = 1.0 / dny; - long ux = as_long(x); + long ux = __clc_as_long(x); long ax = ux & (~SIGNBIT_DP64); int xpos = ax == ux; - long uy = as_long(y); + long uy = __clc_as_long(y); long ay = uy & (~SIGNBIT_DP64); int ypos = ay == uy; @@ -249,7 +250,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) { double xexp = (double)exp; long mantissa = ax & 0x000FFFFFFFFFFFFFL; - long temp_ux = as_long(as_double(0x3ff0000000000000L | mantissa) - 1.0); + long temp_ux = + __clc_as_long(__clc_as_double(0x3ff0000000000000L | mantissa) - 1.0); exp = ((temp_ux & 0x7FF0000000000000L) >> 52) - 2045; double xexp1 = (double)exp; long mantissa1 = temp_ux & 0x000FFFFFFFFFFFFFL; @@ -261,14 +263,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) { ((mantissa & 0x0000080000000000) << 1); int index = rax >> 44; - double F = as_double(rax | 0x3FE0000000000000L); - double Y = as_double(mantissa | 0x3FE0000000000000L); + double F = __clc_as_double(rax | 0x3FE0000000000000L); + double Y = __clc_as_double(mantissa | 0x3FE0000000000000L); double f = F - Y; double2 tv = USE_TABLE(log_f_inv_tbl, index); double log_h = tv.s0; double log_t = tv.s1; double f_inv = (log_h + log_t) * f; - double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L); + double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L); double r2 = fma(-F, r1, f) * (log_h + log_t); double r = r1 + r2; @@ -292,14 +294,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) { double resT_h = poly0h; double H = resT + resH; - double H_h = as_double(as_long(H) & 0xfffffffff8000000L); + double H_h = __clc_as_double(__clc_as_long(H) & 0xfffffffff8000000L); double T = (resH - H + resT) + (resT_t - (resT + resT_h)) + (H - H_h); H = H_h; - double y_head = as_double(uy & 0xfffffffff8000000L); + double y_head = __clc_as_double(uy & 0xfffffffff8000000L); double y_tail = y - y_head; - double fnyh = as_double(as_long(dny) & 0xfffffffffff00000); + double fnyh = __clc_as_double(__clc_as_long(dny) & 0xfffffffffff00000); double fnyt = (double)(ny - (int)fnyh); y_tail = fma(-fnyt, y_head, fma(-fnyh, y_head, 1.0)) / dny; @@ -346,7 +348,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) { expv = fma(f, q, f2) + f1; expv = ldexp(expv, m); - expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv; + expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv; expv = v < min_exp_arg ? 0.0 : expv; } @@ -359,7 +361,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) { expv *= ((inty == 1) & !xpos) ? -1.0 : 1.0; - long ret = as_long(expv); + long ret = __clc_as_long(expv); // Now all the edge cases ret = (!xpos & (inty == 2)) ? QNANBITPATT_DP64 : ret; @@ -377,7 +379,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) { ret = ((ux == PINFBITPATT_DP64) & ypos) ? PINFBITPATT_DP64 : ret; ret = ax > PINFBITPATT_DP64 ? ux : ret; ret = ny == 0 ? QNANBITPATT_DP64 : ret; - return as_double(ret); + return __clc_as_double(ret); } _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int) #endif @@ -387,7 +389,7 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int) #pragma OPENCL EXTENSION cl_khr_fp16 : enable _CLC_OVERLOAD _CLC_DEF half __clc_rootn(half x, int y) { - return (half)__clc_rootn((float)x, y); + return (half)__clc_rootn((float)x, y); } _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_rootn, half, int);