Skip to content

Commit 760eeac

Browse files
authored
[libclc] Reduce bithacking in CLC frexp (#129871)
Also replace some magic constants with named ones. Checking against FP zero and using isnan and isinf functions allows the optimizer to create one unified @llvm.is.fpclass intrinsic. This results in fewer more canonical IR instructions.
1 parent adb5d6a commit 760eeac

File tree

2 files changed

+32
-22
lines changed

2 files changed

+32
-22
lines changed

libclc/clc/lib/generic/math/clc_frexp.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <clc/clc_convert.h>
2424
#include <clc/internal/clc.h>
2525
#include <clc/math/math.h>
26+
#include <clc/relational/clc_isinf.h>
27+
#include <clc/relational/clc_isnan.h>
2628
#include <clc/relational/clc_select.h>
2729
#include <clc/utils.h>
2830

libclc/clc/lib/generic/math/clc_frexp.inc

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,22 @@
2828
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
2929
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
3030
__CLC_INTN i = __CLC_AS_INTN(x);
31-
__CLC_INTN ai = i & 0x7fffffff;
32-
__CLC_INTN d = ai > 0 & ai < 0x00800000;
33-
/* scale subnormal by 2^26 without multiplying */
31+
__CLC_INTN ai = i & EXSIGNBIT_SP32;
32+
33+
// Scale subnormal by 2^26 without multiplying
34+
__CLC_INTN is_subnormal = ai > 0 && ai < 0x00800000;
3435
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0d800000) - 0x1.0p-100f;
35-
ai = __clc_select(ai, __CLC_AS_INTN(s), d);
36-
__CLC_INTN e =
37-
(ai >> 23) - 126 - __clc_select((__CLC_INTN)0, (__CLC_INTN)26, d);
38-
__CLC_INTN t = ai == (__CLC_INTN)0 | e == (__CLC_INTN)129;
39-
i = (i & (__CLC_INTN)0x80000000) | (__CLC_INTN)0x3f000000 | (ai & 0x007fffff);
40-
*ep = __clc_select(e, (__CLC_INTN)0, t);
41-
return __clc_select(__CLC_AS_GENTYPE(i), x, t);
36+
ai = __clc_select(ai, __CLC_AS_INTN(s), is_subnormal);
37+
__CLC_INTN e = (ai >> EXPSHIFTBITS_SP32) - 126 -
38+
__clc_select((__CLC_INTN)0, (__CLC_INTN)26, is_subnormal);
39+
40+
i = (i & (__CLC_INTN)SIGNBIT_SP32) | (__CLC_INTN)HALFEXPBITS_SP32 |
41+
(ai & (__CLC_INTN)MANTBITS_SP32);
42+
43+
__CLC_INTN is_inf_nan_or_zero =
44+
x == __CLC_FP_LIT(0.0) || __clc_isinf(x) || __clc_isnan(x);
45+
*ep = __clc_select(e, (__CLC_INTN)0, is_inf_nan_or_zero);
46+
return __clc_select(__CLC_AS_GENTYPE(i), x, is_inf_nan_or_zero);
4247
}
4348
#endif
4449

@@ -56,19 +61,22 @@ __clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
5661
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
5762
__clc_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE __CLC_INTN *ep) {
5863
__CLC_LONGN i = __CLC_AS_LONGN(x);
59-
__CLC_LONGN ai = i & 0x7fffffffffffffffL;
60-
__CLC_LONGN d = ai > 0 & ai < 0x0010000000000000L;
61-
// scale subnormal by 2^54 without multiplying
64+
__CLC_LONGN ai = i & EXSIGNBIT_DP64;
65+
66+
// Scale subnormal by 2^54 without multiplying
67+
__CLC_LONGN is_subnormal = ai > 0 && ai < 0x0010000000000000L;
6268
__CLC_GENTYPE s = __CLC_AS_GENTYPE(ai | 0x0370000000000000L) - 0x1.0p-968;
63-
ai = __clc_select(ai, __CLC_AS_LONGN(s), d);
64-
__CLC_LONGN e = (ai >> 52) - (__CLC_LONGN)1022 -
65-
__clc_select((__CLC_LONGN)0, (__CLC_LONGN)54, d);
66-
__CLC_LONGN t = ai == 0 | e == 1025;
67-
i = (i & (__CLC_LONGN)0x8000000000000000L) |
68-
(__CLC_LONGN)0x3fe0000000000000L |
69-
(ai & (__CLC_LONGN)0x000fffffffffffffL);
70-
*ep = __CLC_CONVERT_INTN(__clc_select(e, 0L, t));
71-
return __clc_select(__CLC_AS_GENTYPE(i), x, t);
69+
ai = __clc_select(ai, __CLC_AS_LONGN(s), is_subnormal);
70+
__CLC_LONGN e = (ai >> EXPSHIFTBITS_DP64) - (__CLC_LONGN)1022 -
71+
__clc_select((__CLC_LONGN)0, (__CLC_LONGN)54, is_subnormal);
72+
73+
i = (i & (__CLC_LONGN)SIGNBIT_DP64) | (__CLC_LONGN)HALFEXPBITS_DP64 |
74+
(ai & (__CLC_LONGN)MANTBITS_DP64);
75+
76+
__CLC_LONGN is_inf_nan_or_zero =
77+
x == __CLC_FP_LIT(0.0) || __clc_isinf(x) || __clc_isnan(x);
78+
*ep = __CLC_CONVERT_INTN(__clc_select(e, 0L, is_inf_nan_or_zero));
79+
return __clc_select(__CLC_AS_GENTYPE(i), x, is_inf_nan_or_zero);
7280
}
7381

7482
#endif

0 commit comments

Comments
 (0)