@@ -220,42 +220,61 @@ pub inline fn floatEpsAt(comptime T: type, x: T) T {
220
220
}
221
221
}
222
222
223
- /// Returns the value inf for floating point type T.
224
- pub inline fn inf (comptime T : type ) T {
225
- return reconstructFloat (T , floatExponentMax (T ) + 1 , mantissaOne (T ));
223
+ /// Returns the inf value for a floating point `Type`.
224
+ pub inline fn inf (comptime Type : type ) Type {
225
+ const RuntimeType = switch (Type ) {
226
+ else = > Type ,
227
+ comptime_float = > f128 , // any float type will do
228
+ };
229
+ return reconstructFloat (RuntimeType , floatExponentMax (RuntimeType ) + 1 , mantissaOne (RuntimeType ));
226
230
}
227
231
228
- /// Returns the canonical quiet NaN representation for floating point type T.
229
- pub inline fn nan (comptime T : type ) T {
232
+ /// Returns the canonical quiet NaN representation for a floating point `Type`.
233
+ pub inline fn nan (comptime Type : type ) Type {
234
+ const RuntimeType = switch (Type ) {
235
+ else = > Type ,
236
+ comptime_float = > f128 , // any float type will do
237
+ };
230
238
return reconstructFloat (
231
- T ,
232
- floatExponentMax (T ) + 1 ,
233
- mantissaOne (T ) | 1 << (floatFractionalBits (T ) - 1 ),
239
+ RuntimeType ,
240
+ floatExponentMax (RuntimeType ) + 1 ,
241
+ mantissaOne (RuntimeType ) | 1 << (floatFractionalBits (RuntimeType ) - 1 ),
234
242
);
235
243
}
236
244
237
- /// Returns a signalling NaN representation for floating point type T .
245
+ /// Returns a signalling NaN representation for a floating point `Type` .
238
246
///
239
247
/// TODO: LLVM is known to miscompile on some architectures to quiet NaN -
240
248
/// this is tracked by https://github.com/ziglang/zig/issues/14366
241
- pub inline fn snan (comptime T : type ) T {
249
+ pub inline fn snan (comptime Type : type ) Type {
250
+ const RuntimeType = switch (Type ) {
251
+ else = > Type ,
252
+ comptime_float = > f128 , // any float type will do
253
+ };
242
254
return reconstructFloat (
243
- T ,
244
- floatExponentMax (T ) + 1 ,
245
- mantissaOne (T ) | 1 << (floatFractionalBits (T ) - 2 ),
255
+ RuntimeType ,
256
+ floatExponentMax (RuntimeType ) + 1 ,
257
+ mantissaOne (RuntimeType ) | 1 << (floatFractionalBits (RuntimeType ) - 2 ),
246
258
);
247
259
}
248
260
249
- test "float bits" {
250
- inline for ([ _ ] type { f16 , f32 , f64 , f80 , f128 , c_longdouble }) | T | {
251
- // (1 +) for the sign bit, since it is separate from the other bits
252
- const size = 1 + floatExponentBits ( T ) + floatMantissaBits ( T );
253
- try expect (@bitSizeOf ( T ) == size );
261
+ fn floatBits ( comptime Type : type ) ! void {
262
+ // (1 +) for the sign bit, since it is separate from the other bits
263
+ const size = 1 + floatExponentBits ( Type ) + floatMantissaBits ( Type );
264
+ try expect ( @bitSizeOf ( Type ) == size );
265
+ try expect (floatFractionalBits ( Type ) <= floatMantissaBits ( Type ) );
254
266
255
- // for machine epsilon, assert expmin <= -prec <= expmax
256
- try expect (floatExponentMin (T ) <= - floatFractionalBits (T ));
257
- try expect (- floatFractionalBits (T ) <= floatExponentMax (T ));
258
- }
267
+ // for machine epsilon, assert expmin <= -prec <= expmax
268
+ try expect (floatExponentMin (Type ) <= - floatFractionalBits (Type ));
269
+ try expect (- floatFractionalBits (Type ) <= floatExponentMax (Type ));
270
+ }
271
+ test floatBits {
272
+ try floatBits (f16 );
273
+ try floatBits (f32 );
274
+ try floatBits (f64 );
275
+ try floatBits (f80 );
276
+ try floatBits (f128 );
277
+ try floatBits (c_longdouble );
259
278
}
260
279
261
280
test inf {
0 commit comments