Skip to content

Commit 31f942d

Browse files
committed
BUG: Correct handling of nans
Improve consistency of nan handling Prevent nans prducing values from int functions
1 parent c59585e commit 31f942d

File tree

8 files changed

+80
-63
lines changed

8 files changed

+80
-63
lines changed

numpy/random/randomgen/common.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cdef enum ConstraintType:
1616
CONS_NONE
1717
CONS_NON_NEGATIVE
1818
CONS_POSITIVE
19+
CONS_POSITIVE_NOT_NAN
1920
CONS_BOUNDED_0_1
2021
CONS_BOUNDED_0_1_NOTNAN
2122
CONS_BOUNDED_GT_0_1

numpy/random/randomgen/common.pyx

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -295,52 +295,57 @@ cdef uint64_t MAXSIZE = <uint64_t>sys.maxsize
295295

296296
cdef int check_array_constraint(np.ndarray val, object name, constraint_type cons) except -1:
297297
if cons == CONS_NON_NEGATIVE:
298-
if np.any(np.signbit(val)) or np.any(np.isnan(val)):
298+
if np.any(np.logical_and(np.logical_not(np.isnan(val)), np.signbit(val))):
299299
raise ValueError(name + " < 0")
300-
elif cons == CONS_POSITIVE:
301-
if not np.all(np.greater(val, 0)):
300+
elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN:
301+
if cons == CONS_POSITIVE_NOT_NAN and np.any(np.isnan(val)):
302+
raise ValueError(name + " must not be NaN")
303+
elif np.any(np.less_equal(val, 0)):
302304
raise ValueError(name + " <= 0")
303305
elif cons == CONS_BOUNDED_0_1:
304306
if not np.all(np.greater_equal(val, 0)) or \
305307
not np.all(np.less_equal(val, 1)):
306-
raise ValueError(name + " < 0 or " + name + " > 1")
308+
raise ValueError("{0} < 0 , {0} > 1 or {0} contains NaNs".format(name))
307309
elif cons == CONS_BOUNDED_GT_0_1:
308310
if not np.all(np.greater(val, 0)) or not np.all(np.less_equal(val, 1)):
309-
raise ValueError(name + " <= 0 or " + name + " > 1")
311+
raise ValueError("{0} <= 0 , {0} > 1 or {0} contains NaNs".format(name))
310312
elif cons == CONS_GT_1:
311313
if not np.all(np.greater(val, 1)):
312-
raise ValueError(name + " <= 1")
314+
raise ValueError("{0} <= 1 or {0} contains NaNs".format(name))
313315
elif cons == CONS_GTE_1:
314316
if not np.all(np.greater_equal(val, 1)):
315-
raise ValueError(name + " < 1")
317+
raise ValueError("{0} < 1 or {0} contains NaNs".format(name))
316318
elif cons == CONS_POISSON:
317319
if not np.all(np.less_equal(val, POISSON_LAM_MAX)):
318-
raise ValueError(name + " value too large")
319-
if not np.all(np.greater_equal(val, 0.0)):
320-
raise ValueError(name + " < 0")
320+
raise ValueError("{0} value too large".format(name))
321+
elif not np.all(np.greater_equal(val, 0.0)):
322+
raise ValueError("{0} < 0 or {0} contains NaNs".format(name))
321323

322324
return 0
323325

324326

325327
cdef int check_constraint(double val, object name, constraint_type cons) except -1:
328+
cdef bint is_nan
326329
if cons == CONS_NON_NEGATIVE:
327-
if np.signbit(val) or np.isnan(val):
330+
if not np.isnan(val) and np.signbit(val):
328331
raise ValueError(name + " < 0")
329-
elif cons == CONS_POSITIVE:
330-
if not (val > 0):
332+
elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN:
333+
if cons == CONS_POSITIVE_NOT_NAN and np.isnan(val):
334+
raise ValueError(name + " must not be NaN")
335+
elif val <= 0:
331336
raise ValueError(name + " <= 0")
332337
elif cons == CONS_BOUNDED_0_1:
333338
if not (val >= 0) or not (val <= 1):
334-
raise ValueError(name + " < 0 or " + name + " > 1")
339+
raise ValueError("{0} < 0 , {0} > 1 or {0} is NaN".format(name))
335340
elif cons == CONS_GT_1:
336341
if not (val > 1):
337-
raise ValueError(name + " <= 1")
342+
raise ValueError("{0} <= 1 or {0} is NaN".format(name))
338343
elif cons == CONS_GTE_1:
339344
if not (val >= 1):
340-
raise ValueError(name + " < 1")
345+
raise ValueError("{0} < 1 or {0} is NaN".format(name))
341346
elif cons == CONS_POISSON:
342347
if not (val >= 0):
343-
raise ValueError(name + " < 0")
348+
raise ValueError("{0} < 0 or {0} is NaN".format(name))
344349
elif not (val <= POISSON_LAM_MAX):
345350
raise ValueError(name + " value too large")
346351

numpy/random/randomgen/generator.pyx

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@ cdef class RandomGenerator:
903903
Parameters
904904
----------
905905
d0, d1, ..., dn : int, optional
906-
The dimensions of the returned array, should all be positive.
906+
The dimensions of the returned array, must be non-negative.
907907
If no argument is given a single Python float is returned.
908908
dtype : {str, dtype}, optional
909909
Desired dtype of the result, either 'd' (or 'float64') or 'f'
@@ -953,7 +953,7 @@ cdef class RandomGenerator:
953953
Parameters
954954
----------
955955
d0, d1, ..., dn : int, optional
956-
The dimensions of the returned array, should be all positive.
956+
The dimensions of the returned array, must be non-negative.
957957
If no argument is given a single Python float is returned.
958958
dtype : {str, dtype}, optional
959959
Desired dtype of the result, either 'd' (or 'float64') or 'f'
@@ -1442,7 +1442,7 @@ cdef class RandomGenerator:
14421442
14431443
Samples are drawn from an F distribution with specified parameters,
14441444
`dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
1445-
freedom in denominator), where both parameters should be greater than
1445+
freedom in denominator), where both parameters must be greater than
14461446
zero.
14471447
14481448
The random variate of the F distribution (also known as the
@@ -1453,9 +1453,9 @@ cdef class RandomGenerator:
14531453
Parameters
14541454
----------
14551455
dfnum : float or array_like of floats
1456-
Degrees of freedom in numerator, should be > 0.
1456+
Degrees of freedom in numerator, must be > 0.
14571457
dfden : float or array_like of float
1458-
Degrees of freedom in denominator, should be > 0.
1458+
Degrees of freedom in denominator, must be > 0.
14591459
size : int or tuple of ints, optional
14601460
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
14611461
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -1536,15 +1536,15 @@ cdef class RandomGenerator:
15361536
Parameters
15371537
----------
15381538
dfnum : float or array_like of floats
1539-
Numerator degrees of freedom, should be > 0.
1539+
Numerator degrees of freedom, must be > 0.
15401540
15411541
.. versionchanged:: 1.14.0
15421542
Earlier NumPy versions required dfnum > 1.
15431543
dfden : float or array_like of floats
1544-
Denominator degrees of freedom, should be > 0.
1544+
Denominator degrees of freedom, must be > 0.
15451545
nonc : float or array_like of floats
15461546
Non-centrality parameter, the sum of the squares of the numerator
1547-
means, should be >= 0.
1547+
means, must be >= 0.
15481548
size : int or tuple of ints, optional
15491549
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
15501550
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -1613,7 +1613,7 @@ cdef class RandomGenerator:
16131613
Parameters
16141614
----------
16151615
df : float or array_like of floats
1616-
Number of degrees of freedom, should be > 0.
1616+
Number of degrees of freedom, must be > 0.
16171617
size : int or tuple of ints, optional
16181618
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
16191619
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -1679,12 +1679,12 @@ cdef class RandomGenerator:
16791679
Parameters
16801680
----------
16811681
df : float or array_like of floats
1682-
Degrees of freedom, should be > 0.
1682+
Degrees of freedom, must be > 0.
16831683
16841684
.. versionchanged:: 1.10.0
16851685
Earlier NumPy versions required dfnum > 1.
16861686
nonc : float or array_like of floats
1687-
Non-centrality, should be non-negative.
1687+
Non-centrality, must be non-negative.
16881688
size : int or tuple of ints, optional
16891689
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
16901690
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -1825,7 +1825,7 @@ cdef class RandomGenerator:
18251825
Parameters
18261826
----------
18271827
df : float or array_like of floats
1828-
Degrees of freedom, should be > 0.
1828+
Degrees of freedom, must be > 0.
18291829
size : int or tuple of ints, optional
18301830
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
18311831
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -2015,7 +2015,7 @@ cdef class RandomGenerator:
20152015
Parameters
20162016
----------
20172017
a : float or array_like of floats
2018-
Shape of the distribution. Must all be positive.
2018+
Shape of the distribution. Must be positive.
20192019
size : int or tuple of ints, optional
20202020
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
20212021
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -2831,9 +2831,9 @@ cdef class RandomGenerator:
28312831
Lower limit.
28322832
mode : float or array_like of floats
28332833
The value where the peak of the distribution occurs.
2834-
The value should fulfill the condition ``left <= mode <= right``.
2834+
The value must fulfill the condition ``left <= mode <= right``.
28352835
right : float or array_like of floats
2836-
Upper limit, should be larger than `left`.
2836+
Upper limit, must be larger than `left`.
28372837
size : int or tuple of ints, optional
28382838
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
28392839
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -3128,7 +3128,7 @@ cdef class RandomGenerator:
31283128
31293129
"""
31303130
return disc(&random_negative_binomial, self._brng, size, self.lock, 2, 0,
3131-
n, 'n', CONS_POSITIVE,
3131+
n, 'n', CONS_POSITIVE_NOT_NAN,
31323132
p, 'p', CONS_BOUNDED_0_1,
31333133
0.0, '', CONS_NONE)
31343134

@@ -3144,7 +3144,7 @@ cdef class RandomGenerator:
31443144
Parameters
31453145
----------
31463146
lam : float or array_like of floats
3147-
Expectation of interval, should be >= 0. A sequence of expectation
3147+
Expectation of interval, must be >= 0. A sequence of expectation
31483148
intervals must be broadcastable over the requested size.
31493149
size : int or tuple of ints, optional
31503150
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
@@ -3483,7 +3483,7 @@ cdef class RandomGenerator:
34833483
34843484
Notes
34853485
-----
3486-
The probability density for the Log Series distribution is
3486+
The probability mass function for the Log Series distribution is
34873487
34883488
.. math:: P(k) = \\frac{-p^k}{k \\ln(1-p)},
34893489
@@ -3717,7 +3717,7 @@ cdef class RandomGenerator:
37173717
Number of experiments.
37183718
pvals : sequence of floats, length p
37193719
Probabilities of each of the ``p`` different outcomes. These
3720-
should sum to 1 (however, the last element is always assumed to
3720+
must sum to 1 (however, the last element is always assumed to
37213721
account for the remaining probability, as long as
37223722
``sum(pvals[:-1]) <= 1)``.
37233723
size : int or tuple of ints, optional

numpy/random/randomgen/mtrand.pyx

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3
33
import operator
44
import warnings
5-
from collections.abc import Mapping
5+
66
from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
77
from cpython cimport (Py_INCREF, PyFloat_AsDouble)
88
from libc cimport string
@@ -250,7 +250,7 @@ cdef class RandomState:
250250
Vol. 8, No. 1, pp. 3-30, Jan. 1998.
251251
252252
"""
253-
if isinstance(state, Mapping):
253+
if isinstance(state, dict):
254254
if 'brng' not in state or 'state' not in state:
255255
raise ValueError('state dictionary is not valid.')
256256
st = state
@@ -955,7 +955,7 @@ cdef class RandomState:
955955
Parameters
956956
----------
957957
d0, d1, ..., dn : int, optional
958-
The dimensions of the returned array, should all be positive.
958+
The dimensions of the returned array, must be non-negative.
959959
If no argument is given a single Python float is returned.
960960
961961
Returns
@@ -1001,7 +1001,7 @@ cdef class RandomState:
10011001
Parameters
10021002
----------
10031003
d0, d1, ..., dn : int, optional
1004-
The dimensions of the returned array, should be all positive.
1004+
The dimensions of the returned array, must be non-negative.
10051005
If no argument is given a single Python float is returned.
10061006
10071007
Returns
@@ -1458,7 +1458,7 @@ cdef class RandomState:
14581458
14591459
Samples are drawn from an F distribution with specified parameters,
14601460
`dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
1461-
freedom in denominator), where both parameters should be greater than
1461+
freedom in denominator), where both parameters must be greater than
14621462
zero.
14631463
14641464
The random variate of the F distribution (also known as the
@@ -1469,9 +1469,9 @@ cdef class RandomState:
14691469
Parameters
14701470
----------
14711471
dfnum : float or array_like of floats
1472-
Degrees of freedom in numerator, should be > 0.
1472+
Degrees of freedom in numerator, must be > 0.
14731473
dfden : float or array_like of float
1474-
Degrees of freedom in denominator, should be > 0.
1474+
Degrees of freedom in denominator, must be > 0.
14751475
size : int or tuple of ints, optional
14761476
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
14771477
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -1552,15 +1552,15 @@ cdef class RandomState:
15521552
Parameters
15531553
----------
15541554
dfnum : float or array_like of floats
1555-
Numerator degrees of freedom, should be > 0.
1555+
Numerator degrees of freedom, must be > 0.
15561556
15571557
.. versionchanged:: 1.14.0
15581558
Earlier NumPy versions required dfnum > 1.
15591559
dfden : float or array_like of floats
1560-
Denominator degrees of freedom, should be > 0.
1560+
Denominator degrees of freedom, must be > 0.
15611561
nonc : float or array_like of floats
15621562
Non-centrality parameter, the sum of the squares of the numerator
1563-
means, should be >= 0.
1563+
means, must be >= 0.
15641564
size : int or tuple of ints, optional
15651565
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
15661566
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -1629,7 +1629,7 @@ cdef class RandomState:
16291629
Parameters
16301630
----------
16311631
df : float or array_like of floats
1632-
Number of degrees of freedom, should be > 0.
1632+
Number of degrees of freedom, must be > 0.
16331633
size : int or tuple of ints, optional
16341634
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
16351635
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -1695,12 +1695,12 @@ cdef class RandomState:
16951695
Parameters
16961696
----------
16971697
df : float or array_like of floats
1698-
Degrees of freedom, should be > 0.
1698+
Degrees of freedom, must be > 0.
16991699
17001700
.. versionchanged:: 1.10.0
17011701
Earlier NumPy versions required dfnum > 1.
17021702
nonc : float or array_like of floats
1703-
Non-centrality, should be non-negative.
1703+
Non-centrality, must be non-negative.
17041704
size : int or tuple of ints, optional
17051705
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
17061706
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -1841,7 +1841,7 @@ cdef class RandomState:
18411841
Parameters
18421842
----------
18431843
df : float or array_like of floats
1844-
Degrees of freedom, should be > 0.
1844+
Degrees of freedom, must be > 0.
18451845
size : int or tuple of ints, optional
18461846
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
18471847
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -2031,7 +2031,7 @@ cdef class RandomState:
20312031
Parameters
20322032
----------
20332033
a : float or array_like of floats
2034-
Shape of the distribution. Must all be positive.
2034+
Shape of the distribution. Must be positive.
20352035
size : int or tuple of ints, optional
20362036
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
20372037
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -2847,9 +2847,9 @@ cdef class RandomState:
28472847
Lower limit.
28482848
mode : float or array_like of floats
28492849
The value where the peak of the distribution occurs.
2850-
The value should fulfill the condition ``left <= mode <= right``.
2850+
The value must fulfill the condition ``left <= mode <= right``.
28512851
right : float or array_like of floats
2852-
Upper limit, should be larger than `left`.
2852+
Upper limit, must be larger than `left`.
28532853
size : int or tuple of ints, optional
28542854
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
28552855
``m * n * k`` samples are drawn. If size is ``None`` (default),
@@ -3160,7 +3160,7 @@ cdef class RandomState:
31603160
Parameters
31613161
----------
31623162
lam : float or array_like of floats
3163-
Expectation of interval, should be >= 0. A sequence of expectation
3163+
Expectation of interval, must be >= 0. A sequence of expectation
31643164
intervals must be broadcastable over the requested size.
31653165
size : int or tuple of ints, optional
31663166
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
@@ -3735,7 +3735,7 @@ cdef class RandomState:
37353735
Number of experiments.
37363736
pvals : sequence of floats, length p
37373737
Probabilities of each of the ``p`` different outcomes. These
3738-
should sum to 1 (however, the last element is always assumed to
3738+
must sum to 1 (however, the last element is always assumed to
37393739
account for the remaining probability, as long as
37403740
``sum(pvals[:-1]) <= 1)``.
37413741
size : int or tuple of ints, optional

numpy/random/randomgen/src/distributions/distributions.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,9 @@ int64_t random_binomial(brng_t *brng_state, double p, int64_t n,
899899
}
900900

901901
double random_noncentral_chisquare(brng_t *brng_state, double df, double nonc) {
902+
if (npy_isnan(nonc)){
903+
return NPY_NAN;
904+
}
902905
if (nonc == 0) {
903906
return random_chisquare(brng_state, df);
904907
}
@@ -939,7 +942,9 @@ double random_vonmises(brng_t *brng_state, double mu, double kappa) {
939942
double U, V, W, Y, Z;
940943
double result, mod;
941944
int neg;
942-
945+
if (npy_isnan(kappa)){
946+
return NPY_NAN;
947+
}
943948
if (kappa < 1e-8) {
944949
return M_PI * (2 * next_double(brng_state) - 1);
945950
} else {

0 commit comments

Comments
 (0)