From 31f942d773e74a94c829b70064c6b2b9e78c86a0 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 8 Apr 2019 10:11:07 +0100 Subject: [PATCH] BUG: Correct handling of nans Improve consistency of nan handling Prevent nans prducing values from int functions --- numpy/random/randomgen/common.pxd | 1 + numpy/random/randomgen/common.pyx | 39 +++++++++++-------- numpy/random/randomgen/generator.pyx | 38 +++++++++--------- numpy/random/randomgen/mtrand.pyx | 38 +++++++++--------- .../src/distributions/distributions.c | 7 +++- .../src/distributions/distributions.h | 2 +- .../src/legacy/distributions-boxmuller.c | 10 ++++- .../randomgen/tests/test_against_numpy.py | 8 ++-- 8 files changed, 80 insertions(+), 63 deletions(-) diff --git a/numpy/random/randomgen/common.pxd b/numpy/random/randomgen/common.pxd index 37182a29d6bf..f6748e5aa27d 100644 --- a/numpy/random/randomgen/common.pxd +++ b/numpy/random/randomgen/common.pxd @@ -16,6 +16,7 @@ cdef enum ConstraintType: CONS_NONE CONS_NON_NEGATIVE CONS_POSITIVE + CONS_POSITIVE_NOT_NAN CONS_BOUNDED_0_1 CONS_BOUNDED_0_1_NOTNAN CONS_BOUNDED_GT_0_1 diff --git a/numpy/random/randomgen/common.pyx b/numpy/random/randomgen/common.pyx index 2ade3e821cc4..1f7cd40cac76 100644 --- a/numpy/random/randomgen/common.pyx +++ b/numpy/random/randomgen/common.pyx @@ -295,52 +295,57 @@ cdef uint64_t MAXSIZE = sys.maxsize cdef int check_array_constraint(np.ndarray val, object name, constraint_type cons) except -1: if cons == CONS_NON_NEGATIVE: - if np.any(np.signbit(val)) or np.any(np.isnan(val)): + if np.any(np.logical_and(np.logical_not(np.isnan(val)), np.signbit(val))): raise ValueError(name + " < 0") - elif cons == CONS_POSITIVE: - if not np.all(np.greater(val, 0)): + elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN: + if cons == CONS_POSITIVE_NOT_NAN and np.any(np.isnan(val)): + raise ValueError(name + " must not be NaN") + elif np.any(np.less_equal(val, 0)): raise ValueError(name + " <= 0") elif cons == CONS_BOUNDED_0_1: if not np.all(np.greater_equal(val, 0)) or \ not np.all(np.less_equal(val, 1)): - raise ValueError(name + " < 0 or " + name + " > 1") + raise ValueError("{0} < 0 , {0} > 1 or {0} contains NaNs".format(name)) elif cons == CONS_BOUNDED_GT_0_1: if not np.all(np.greater(val, 0)) or not np.all(np.less_equal(val, 1)): - raise ValueError(name + " <= 0 or " + name + " > 1") + raise ValueError("{0} <= 0 , {0} > 1 or {0} contains NaNs".format(name)) elif cons == CONS_GT_1: if not np.all(np.greater(val, 1)): - raise ValueError(name + " <= 1") + raise ValueError("{0} <= 1 or {0} contains NaNs".format(name)) elif cons == CONS_GTE_1: if not np.all(np.greater_equal(val, 1)): - raise ValueError(name + " < 1") + raise ValueError("{0} < 1 or {0} contains NaNs".format(name)) elif cons == CONS_POISSON: if not np.all(np.less_equal(val, POISSON_LAM_MAX)): - raise ValueError(name + " value too large") - if not np.all(np.greater_equal(val, 0.0)): - raise ValueError(name + " < 0") + raise ValueError("{0} value too large".format(name)) + elif not np.all(np.greater_equal(val, 0.0)): + raise ValueError("{0} < 0 or {0} contains NaNs".format(name)) return 0 cdef int check_constraint(double val, object name, constraint_type cons) except -1: + cdef bint is_nan if cons == CONS_NON_NEGATIVE: - if np.signbit(val) or np.isnan(val): + if not np.isnan(val) and np.signbit(val): raise ValueError(name + " < 0") - elif cons == CONS_POSITIVE: - if not (val > 0): + elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN: + if cons == CONS_POSITIVE_NOT_NAN and np.isnan(val): + raise ValueError(name + " must not be NaN") + elif val <= 0: raise ValueError(name + " <= 0") elif cons == CONS_BOUNDED_0_1: if not (val >= 0) or not (val <= 1): - raise ValueError(name + " < 0 or " + name + " > 1") + raise ValueError("{0} < 0 , {0} > 1 or {0} is NaN".format(name)) elif cons == CONS_GT_1: if not (val > 1): - raise ValueError(name + " <= 1") + raise ValueError("{0} <= 1 or {0} is NaN".format(name)) elif cons == CONS_GTE_1: if not (val >= 1): - raise ValueError(name + " < 1") + raise ValueError("{0} < 1 or {0} is NaN".format(name)) elif cons == CONS_POISSON: if not (val >= 0): - raise ValueError(name + " < 0") + raise ValueError("{0} < 0 or {0} is NaN".format(name)) elif not (val <= POISSON_LAM_MAX): raise ValueError(name + " value too large") diff --git a/numpy/random/randomgen/generator.pyx b/numpy/random/randomgen/generator.pyx index b05f490334fe..a244bca578d3 100644 --- a/numpy/random/randomgen/generator.pyx +++ b/numpy/random/randomgen/generator.pyx @@ -903,7 +903,7 @@ cdef class RandomGenerator: Parameters ---------- d0, d1, ..., dn : int, optional - The dimensions of the returned array, should all be positive. + The dimensions of the returned array, must be non-negative. If no argument is given a single Python float is returned. dtype : {str, dtype}, optional Desired dtype of the result, either 'd' (or 'float64') or 'f' @@ -953,7 +953,7 @@ cdef class RandomGenerator: Parameters ---------- d0, d1, ..., dn : int, optional - The dimensions of the returned array, should be all positive. + The dimensions of the returned array, must be non-negative. If no argument is given a single Python float is returned. dtype : {str, dtype}, optional Desired dtype of the result, either 'd' (or 'float64') or 'f' @@ -1442,7 +1442,7 @@ cdef class RandomGenerator: Samples are drawn from an F distribution with specified parameters, `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of - freedom in denominator), where both parameters should be greater than + freedom in denominator), where both parameters must be greater than zero. The random variate of the F distribution (also known as the @@ -1453,9 +1453,9 @@ cdef class RandomGenerator: Parameters ---------- dfnum : float or array_like of floats - Degrees of freedom in numerator, should be > 0. + Degrees of freedom in numerator, must be > 0. dfden : float or array_like of float - Degrees of freedom in denominator, should be > 0. + Degrees of freedom in denominator, must be > 0. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -1536,15 +1536,15 @@ cdef class RandomGenerator: Parameters ---------- dfnum : float or array_like of floats - Numerator degrees of freedom, should be > 0. + Numerator degrees of freedom, must be > 0. .. versionchanged:: 1.14.0 Earlier NumPy versions required dfnum > 1. dfden : float or array_like of floats - Denominator degrees of freedom, should be > 0. + Denominator degrees of freedom, must be > 0. nonc : float or array_like of floats Non-centrality parameter, the sum of the squares of the numerator - means, should be >= 0. + means, must be >= 0. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -1613,7 +1613,7 @@ cdef class RandomGenerator: Parameters ---------- df : float or array_like of floats - Number of degrees of freedom, should be > 0. + Number of degrees of freedom, must be > 0. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -1679,12 +1679,12 @@ cdef class RandomGenerator: Parameters ---------- df : float or array_like of floats - Degrees of freedom, should be > 0. + Degrees of freedom, must be > 0. .. versionchanged:: 1.10.0 Earlier NumPy versions required dfnum > 1. nonc : float or array_like of floats - Non-centrality, should be non-negative. + Non-centrality, must be non-negative. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -1825,7 +1825,7 @@ cdef class RandomGenerator: Parameters ---------- df : float or array_like of floats - Degrees of freedom, should be > 0. + Degrees of freedom, must be > 0. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -2015,7 +2015,7 @@ cdef class RandomGenerator: Parameters ---------- a : float or array_like of floats - Shape of the distribution. Must all be positive. + Shape of the distribution. Must be positive. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -2831,9 +2831,9 @@ cdef class RandomGenerator: Lower limit. mode : float or array_like of floats The value where the peak of the distribution occurs. - The value should fulfill the condition ``left <= mode <= right``. + The value must fulfill the condition ``left <= mode <= right``. right : float or array_like of floats - Upper limit, should be larger than `left`. + Upper limit, must be larger than `left`. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -3128,7 +3128,7 @@ cdef class RandomGenerator: """ return disc(&random_negative_binomial, self._brng, size, self.lock, 2, 0, - n, 'n', CONS_POSITIVE, + n, 'n', CONS_POSITIVE_NOT_NAN, p, 'p', CONS_BOUNDED_0_1, 0.0, '', CONS_NONE) @@ -3144,7 +3144,7 @@ cdef class RandomGenerator: Parameters ---------- lam : float or array_like of floats - Expectation of interval, should be >= 0. A sequence of expectation + Expectation of interval, must be >= 0. A sequence of expectation intervals must be broadcastable over the requested size. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then @@ -3483,7 +3483,7 @@ cdef class RandomGenerator: Notes ----- - The probability density for the Log Series distribution is + The probability mass function for the Log Series distribution is .. math:: P(k) = \\frac{-p^k}{k \\ln(1-p)}, @@ -3717,7 +3717,7 @@ cdef class RandomGenerator: Number of experiments. pvals : sequence of floats, length p Probabilities of each of the ``p`` different outcomes. These - should sum to 1 (however, the last element is always assumed to + must sum to 1 (however, the last element is always assumed to account for the remaining probability, as long as ``sum(pvals[:-1]) <= 1)``. size : int or tuple of ints, optional diff --git a/numpy/random/randomgen/mtrand.pyx b/numpy/random/randomgen/mtrand.pyx index 2f89f5eb0cd7..875ba491d3a7 100644 --- a/numpy/random/randomgen/mtrand.pyx +++ b/numpy/random/randomgen/mtrand.pyx @@ -2,7 +2,7 @@ #cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3 import operator import warnings -from collections.abc import Mapping + from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer from cpython cimport (Py_INCREF, PyFloat_AsDouble) from libc cimport string @@ -250,7 +250,7 @@ cdef class RandomState: Vol. 8, No. 1, pp. 3-30, Jan. 1998. """ - if isinstance(state, Mapping): + if isinstance(state, dict): if 'brng' not in state or 'state' not in state: raise ValueError('state dictionary is not valid.') st = state @@ -955,7 +955,7 @@ cdef class RandomState: Parameters ---------- d0, d1, ..., dn : int, optional - The dimensions of the returned array, should all be positive. + The dimensions of the returned array, must be non-negative. If no argument is given a single Python float is returned. Returns @@ -1001,7 +1001,7 @@ cdef class RandomState: Parameters ---------- d0, d1, ..., dn : int, optional - The dimensions of the returned array, should be all positive. + The dimensions of the returned array, must be non-negative. If no argument is given a single Python float is returned. Returns @@ -1458,7 +1458,7 @@ cdef class RandomState: Samples are drawn from an F distribution with specified parameters, `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of - freedom in denominator), where both parameters should be greater than + freedom in denominator), where both parameters must be greater than zero. The random variate of the F distribution (also known as the @@ -1469,9 +1469,9 @@ cdef class RandomState: Parameters ---------- dfnum : float or array_like of floats - Degrees of freedom in numerator, should be > 0. + Degrees of freedom in numerator, must be > 0. dfden : float or array_like of float - Degrees of freedom in denominator, should be > 0. + Degrees of freedom in denominator, must be > 0. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -1552,15 +1552,15 @@ cdef class RandomState: Parameters ---------- dfnum : float or array_like of floats - Numerator degrees of freedom, should be > 0. + Numerator degrees of freedom, must be > 0. .. versionchanged:: 1.14.0 Earlier NumPy versions required dfnum > 1. dfden : float or array_like of floats - Denominator degrees of freedom, should be > 0. + Denominator degrees of freedom, must be > 0. nonc : float or array_like of floats Non-centrality parameter, the sum of the squares of the numerator - means, should be >= 0. + means, must be >= 0. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -1629,7 +1629,7 @@ cdef class RandomState: Parameters ---------- df : float or array_like of floats - Number of degrees of freedom, should be > 0. + Number of degrees of freedom, must be > 0. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -1695,12 +1695,12 @@ cdef class RandomState: Parameters ---------- df : float or array_like of floats - Degrees of freedom, should be > 0. + Degrees of freedom, must be > 0. .. versionchanged:: 1.10.0 Earlier NumPy versions required dfnum > 1. nonc : float or array_like of floats - Non-centrality, should be non-negative. + Non-centrality, must be non-negative. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -1841,7 +1841,7 @@ cdef class RandomState: Parameters ---------- df : float or array_like of floats - Degrees of freedom, should be > 0. + Degrees of freedom, must be > 0. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -2031,7 +2031,7 @@ cdef class RandomState: Parameters ---------- a : float or array_like of floats - Shape of the distribution. Must all be positive. + Shape of the distribution. Must be positive. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -2847,9 +2847,9 @@ cdef class RandomState: Lower limit. mode : float or array_like of floats The value where the peak of the distribution occurs. - The value should fulfill the condition ``left <= mode <= right``. + The value must fulfill the condition ``left <= mode <= right``. right : float or array_like of floats - Upper limit, should be larger than `left`. + Upper limit, must be larger than `left`. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -3160,7 +3160,7 @@ cdef class RandomState: Parameters ---------- lam : float or array_like of floats - Expectation of interval, should be >= 0. A sequence of expectation + Expectation of interval, must be >= 0. A sequence of expectation intervals must be broadcastable over the requested size. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then @@ -3735,7 +3735,7 @@ cdef class RandomState: Number of experiments. pvals : sequence of floats, length p Probabilities of each of the ``p`` different outcomes. These - should sum to 1 (however, the last element is always assumed to + must sum to 1 (however, the last element is always assumed to account for the remaining probability, as long as ``sum(pvals[:-1]) <= 1)``. size : int or tuple of ints, optional diff --git a/numpy/random/randomgen/src/distributions/distributions.c b/numpy/random/randomgen/src/distributions/distributions.c index 20cdbe8632bf..83806de38997 100644 --- a/numpy/random/randomgen/src/distributions/distributions.c +++ b/numpy/random/randomgen/src/distributions/distributions.c @@ -899,6 +899,9 @@ int64_t random_binomial(brng_t *brng_state, double p, int64_t n, } double random_noncentral_chisquare(brng_t *brng_state, double df, double nonc) { + if (npy_isnan(nonc)){ + return NPY_NAN; + } if (nonc == 0) { return random_chisquare(brng_state, df); } @@ -939,7 +942,9 @@ double random_vonmises(brng_t *brng_state, double mu, double kappa) { double U, V, W, Y, Z; double result, mod; int neg; - + if (npy_isnan(kappa)){ + return NPY_NAN; + } if (kappa < 1e-8) { return M_PI * (2 * next_double(brng_state) - 1); } else { diff --git a/numpy/random/randomgen/src/distributions/distributions.h b/numpy/random/randomgen/src/distributions/distributions.h index 5cf9c72b25bc..7ca31a16cba6 100644 --- a/numpy/random/randomgen/src/distributions/distributions.h +++ b/numpy/random/randomgen/src/distributions/distributions.h @@ -20,7 +20,7 @@ typedef int bool; #include "Python.h" #include "numpy/npy_common.h" -#include +#include "numpy/npy_math.h" #ifdef _WIN32 #if _MSC_VER == 1500 diff --git a/numpy/random/randomgen/src/legacy/distributions-boxmuller.c b/numpy/random/randomgen/src/legacy/distributions-boxmuller.c index 768de066ced2..e96d3f19a0cb 100644 --- a/numpy/random/randomgen/src/legacy/distributions-boxmuller.c +++ b/numpy/random/randomgen/src/legacy/distributions-boxmuller.c @@ -12,6 +12,7 @@ double legacy_gauss(aug_brng_t *aug_state) { return temp; } else { double f, x1, x2, r2; + double f, x1, x2, r2; do { x1 = 2.0 * legacy_double(aug_state) - 1.0; @@ -103,6 +104,7 @@ double legacy_chisquare(aug_brng_t *aug_state, double df) { double legacy_noncentral_chisquare(aug_brng_t *aug_state, double df, double nonc) { + double out; if (nonc == 0) { return legacy_chisquare(aug_state, df); } @@ -112,7 +114,13 @@ double legacy_noncentral_chisquare(aug_brng_t *aug_state, double df, return Chi2 + n * n; } else { const long i = random_poisson(aug_state->basicrng, nonc / 2.0); - return legacy_chisquare(aug_state, df + 2 * i); + out = legacy_chisquare(aug_state, df + 2 * i); + /* Insert nan guard here to avoid changing the stream */ + if (npy_isnan(nonc)){ + return NPY_NAN; + } else { + return out; + } } } diff --git a/numpy/random/randomgen/tests/test_against_numpy.py b/numpy/random/randomgen/tests/test_against_numpy.py index 8a9ef2962750..b0fcdfb710f1 100644 --- a/numpy/random/randomgen/tests/test_against_numpy.py +++ b/numpy/random/randomgen/tests/test_against_numpy.py @@ -570,11 +570,9 @@ def test_multivariate_normal(self): self._is_state_common_legacy() -funcs = [generator.exponential, - generator.zipf, - generator.chisquare, - generator.logseries, - generator.poisson] +funcs = [randomgen.generator.zipf, + randomgen.generator.logseries, + randomgen.generator.poisson] ids = [f.__name__ for f in funcs]