From 31f942d773e74a94c829b70064c6b2b9e78c86a0 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Mon, 8 Apr 2019 10:11:07 +0100
Subject: [PATCH] BUG: Correct handling of nans

Improve consistency of nan handling
Prevent nans prducing values from int functions
---
 numpy/random/randomgen/common.pxd             |  1 +
 numpy/random/randomgen/common.pyx             | 39 +++++++++++--------
 numpy/random/randomgen/generator.pyx          | 38 +++++++++---------
 numpy/random/randomgen/mtrand.pyx             | 38 +++++++++---------
 .../src/distributions/distributions.c         |  7 +++-
 .../src/distributions/distributions.h         |  2 +-
 .../src/legacy/distributions-boxmuller.c      | 10 ++++-
 .../randomgen/tests/test_against_numpy.py     |  8 ++--
 8 files changed, 80 insertions(+), 63 deletions(-)

diff --git a/numpy/random/randomgen/common.pxd b/numpy/random/randomgen/common.pxd
index 37182a29d6bf..f6748e5aa27d 100644
--- a/numpy/random/randomgen/common.pxd
+++ b/numpy/random/randomgen/common.pxd
@@ -16,6 +16,7 @@ cdef enum ConstraintType:
     CONS_NONE
     CONS_NON_NEGATIVE
     CONS_POSITIVE
+    CONS_POSITIVE_NOT_NAN
     CONS_BOUNDED_0_1
     CONS_BOUNDED_0_1_NOTNAN
     CONS_BOUNDED_GT_0_1
diff --git a/numpy/random/randomgen/common.pyx b/numpy/random/randomgen/common.pyx
index 2ade3e821cc4..1f7cd40cac76 100644
--- a/numpy/random/randomgen/common.pyx
+++ b/numpy/random/randomgen/common.pyx
@@ -295,52 +295,57 @@ cdef uint64_t MAXSIZE = <uint64_t>sys.maxsize
 
 cdef int check_array_constraint(np.ndarray val, object name, constraint_type cons) except -1:
     if cons == CONS_NON_NEGATIVE:
-        if np.any(np.signbit(val)) or np.any(np.isnan(val)):
+        if np.any(np.logical_and(np.logical_not(np.isnan(val)), np.signbit(val))):
             raise ValueError(name + " < 0")
-    elif cons == CONS_POSITIVE:
-        if not np.all(np.greater(val, 0)):
+    elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN:
+        if cons == CONS_POSITIVE_NOT_NAN and np.any(np.isnan(val)):
+            raise ValueError(name + " must not be NaN")
+        elif np.any(np.less_equal(val, 0)):
             raise ValueError(name + " <= 0")
     elif cons == CONS_BOUNDED_0_1:
         if not np.all(np.greater_equal(val, 0)) or \
                 not np.all(np.less_equal(val, 1)):
-            raise ValueError(name + " < 0 or " + name + " > 1")
+            raise ValueError("{0} < 0 , {0} > 1 or {0} contains NaNs".format(name))
     elif cons == CONS_BOUNDED_GT_0_1:
         if not np.all(np.greater(val, 0)) or not np.all(np.less_equal(val, 1)):
-            raise ValueError(name + " <= 0 or " + name + " > 1")
+            raise ValueError("{0} <= 0 , {0} > 1 or {0} contains NaNs".format(name))
     elif cons == CONS_GT_1:
         if not np.all(np.greater(val, 1)):
-            raise ValueError(name + " <= 1")
+            raise ValueError("{0} <= 1 or {0} contains NaNs".format(name))
     elif cons == CONS_GTE_1:
         if not np.all(np.greater_equal(val, 1)):
-            raise ValueError(name + " < 1")
+            raise ValueError("{0} < 1 or {0} contains NaNs".format(name))
     elif cons == CONS_POISSON:
         if not np.all(np.less_equal(val, POISSON_LAM_MAX)):
-            raise ValueError(name + " value too large")
-        if not np.all(np.greater_equal(val, 0.0)):
-            raise ValueError(name + " < 0")
+            raise ValueError("{0} value too large".format(name))
+        elif not np.all(np.greater_equal(val, 0.0)):
+            raise ValueError("{0} < 0 or {0} contains NaNs".format(name))
 
     return 0
 
 
 cdef int check_constraint(double val, object name, constraint_type cons) except -1:
+    cdef bint is_nan
     if cons == CONS_NON_NEGATIVE:
-        if np.signbit(val) or np.isnan(val):
+        if not np.isnan(val) and np.signbit(val):
             raise ValueError(name + " < 0")
-    elif cons == CONS_POSITIVE:
-        if not (val > 0):
+    elif cons == CONS_POSITIVE or cons == CONS_POSITIVE_NOT_NAN:
+        if cons == CONS_POSITIVE_NOT_NAN and np.isnan(val):
+            raise ValueError(name + " must not be NaN")
+        elif val <= 0:
             raise ValueError(name + " <= 0")
     elif cons == CONS_BOUNDED_0_1:
         if not (val >= 0) or not (val <= 1):
-            raise ValueError(name + " < 0 or " + name + " > 1")
+            raise ValueError("{0} < 0 , {0} > 1 or {0} is NaN".format(name))
     elif cons == CONS_GT_1:
         if not (val > 1):
-            raise ValueError(name + " <= 1")
+            raise ValueError("{0} <= 1 or {0} is NaN".format(name))
     elif cons == CONS_GTE_1:
         if not (val >= 1):
-            raise ValueError(name + " < 1")
+            raise ValueError("{0} < 1 or {0} is NaN".format(name))
     elif cons == CONS_POISSON:
         if not (val >= 0):
-            raise ValueError(name + " < 0")
+            raise ValueError("{0} < 0 or {0} is NaN".format(name))
         elif not (val <= POISSON_LAM_MAX):
             raise ValueError(name + " value too large")
 
diff --git a/numpy/random/randomgen/generator.pyx b/numpy/random/randomgen/generator.pyx
index b05f490334fe..a244bca578d3 100644
--- a/numpy/random/randomgen/generator.pyx
+++ b/numpy/random/randomgen/generator.pyx
@@ -903,7 +903,7 @@ cdef class RandomGenerator:
         Parameters
         ----------
         d0, d1, ..., dn : int, optional
-            The dimensions of the returned array, should all be positive.
+            The dimensions of the returned array, must be non-negative.
             If no argument is given a single Python float is returned.
         dtype : {str, dtype}, optional
             Desired dtype of the result, either 'd' (or 'float64') or 'f'
@@ -953,7 +953,7 @@ cdef class RandomGenerator:
         Parameters
         ----------
         d0, d1, ..., dn : int, optional
-            The dimensions of the returned array, should be all positive.
+            The dimensions of the returned array, must be non-negative.
             If no argument is given a single Python float is returned.
         dtype : {str, dtype}, optional
             Desired dtype of the result, either 'd' (or 'float64') or 'f'
@@ -1442,7 +1442,7 @@ cdef class RandomGenerator:
 
         Samples are drawn from an F distribution with specified parameters,
         `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
-        freedom in denominator), where both parameters should be greater than
+        freedom in denominator), where both parameters must be greater than
         zero.
 
         The random variate of the F distribution (also known as the
@@ -1453,9 +1453,9 @@ cdef class RandomGenerator:
         Parameters
         ----------
         dfnum : float or array_like of floats
-            Degrees of freedom in numerator, should be > 0.
+            Degrees of freedom in numerator, must be > 0.
         dfden : float or array_like of float
-            Degrees of freedom in denominator, should be > 0.
+            Degrees of freedom in denominator, must be > 0.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -1536,15 +1536,15 @@ cdef class RandomGenerator:
         Parameters
         ----------
         dfnum : float or array_like of floats
-            Numerator degrees of freedom, should be > 0.
+            Numerator degrees of freedom, must be > 0.
 
             .. versionchanged:: 1.14.0
                Earlier NumPy versions required dfnum > 1.
         dfden : float or array_like of floats
-            Denominator degrees of freedom, should be > 0.
+            Denominator degrees of freedom, must be > 0.
         nonc : float or array_like of floats
             Non-centrality parameter, the sum of the squares of the numerator
-            means, should be >= 0.
+            means, must be >= 0.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -1613,7 +1613,7 @@ cdef class RandomGenerator:
         Parameters
         ----------
         df : float or array_like of floats
-             Number of degrees of freedom, should be > 0.
+             Number of degrees of freedom, must be > 0.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -1679,12 +1679,12 @@ cdef class RandomGenerator:
         Parameters
         ----------
         df : float or array_like of floats
-            Degrees of freedom, should be > 0.
+            Degrees of freedom, must be > 0.
 
             .. versionchanged:: 1.10.0
                Earlier NumPy versions required dfnum > 1.
         nonc : float or array_like of floats
-            Non-centrality, should be non-negative.
+            Non-centrality, must be non-negative.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -1825,7 +1825,7 @@ cdef class RandomGenerator:
         Parameters
         ----------
         df : float or array_like of floats
-            Degrees of freedom, should be > 0.
+            Degrees of freedom, must be > 0.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -2015,7 +2015,7 @@ cdef class RandomGenerator:
         Parameters
         ----------
         a : float or array_like of floats
-            Shape of the distribution. Must all be positive.
+            Shape of the distribution. Must be positive.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -2831,9 +2831,9 @@ cdef class RandomGenerator:
             Lower limit.
         mode : float or array_like of floats
             The value where the peak of the distribution occurs.
-            The value should fulfill the condition ``left <= mode <= right``.
+            The value must fulfill the condition ``left <= mode <= right``.
         right : float or array_like of floats
-            Upper limit, should be larger than `left`.
+            Upper limit, must be larger than `left`.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -3128,7 +3128,7 @@ cdef class RandomGenerator:
 
         """
         return disc(&random_negative_binomial, self._brng, size, self.lock, 2, 0,
-                    n, 'n', CONS_POSITIVE,
+                    n, 'n', CONS_POSITIVE_NOT_NAN,
                     p, 'p', CONS_BOUNDED_0_1,
                     0.0, '', CONS_NONE)
 
@@ -3144,7 +3144,7 @@ cdef class RandomGenerator:
         Parameters
         ----------
         lam : float or array_like of floats
-            Expectation of interval, should be >= 0. A sequence of expectation
+            Expectation of interval, must be >= 0. A sequence of expectation
             intervals must be broadcastable over the requested size.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
@@ -3483,7 +3483,7 @@ cdef class RandomGenerator:
 
         Notes
         -----
-        The probability density for the Log Series distribution is
+        The probability mass function for the Log Series distribution is
 
         .. math:: P(k) = \\frac{-p^k}{k \\ln(1-p)},
 
@@ -3717,7 +3717,7 @@ cdef class RandomGenerator:
             Number of experiments.
         pvals : sequence of floats, length p
             Probabilities of each of the ``p`` different outcomes.  These
-            should sum to 1 (however, the last element is always assumed to
+            must sum to 1 (however, the last element is always assumed to
             account for the remaining probability, as long as
             ``sum(pvals[:-1]) <= 1)``.
         size : int or tuple of ints, optional
diff --git a/numpy/random/randomgen/mtrand.pyx b/numpy/random/randomgen/mtrand.pyx
index 2f89f5eb0cd7..875ba491d3a7 100644
--- a/numpy/random/randomgen/mtrand.pyx
+++ b/numpy/random/randomgen/mtrand.pyx
@@ -2,7 +2,7 @@
 #cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3
 import operator
 import warnings
-from collections.abc import Mapping
+
 from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
 from cpython cimport (Py_INCREF, PyFloat_AsDouble)
 from libc cimport string
@@ -250,7 +250,7 @@ cdef class RandomState:
            Vol. 8, No. 1, pp. 3-30, Jan. 1998.
 
         """
-        if isinstance(state, Mapping):
+        if isinstance(state, dict):
             if 'brng' not in state or 'state' not in state:
                 raise ValueError('state dictionary is not valid.')
             st = state
@@ -955,7 +955,7 @@ cdef class RandomState:
         Parameters
         ----------
         d0, d1, ..., dn : int, optional
-            The dimensions of the returned array, should all be positive.
+            The dimensions of the returned array, must be non-negative.
             If no argument is given a single Python float is returned.
 
         Returns
@@ -1001,7 +1001,7 @@ cdef class RandomState:
         Parameters
         ----------
         d0, d1, ..., dn : int, optional
-            The dimensions of the returned array, should be all positive.
+            The dimensions of the returned array, must be non-negative.
             If no argument is given a single Python float is returned.
 
         Returns
@@ -1458,7 +1458,7 @@ cdef class RandomState:
 
         Samples are drawn from an F distribution with specified parameters,
         `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
-        freedom in denominator), where both parameters should be greater than
+        freedom in denominator), where both parameters must be greater than
         zero.
 
         The random variate of the F distribution (also known as the
@@ -1469,9 +1469,9 @@ cdef class RandomState:
         Parameters
         ----------
         dfnum : float or array_like of floats
-            Degrees of freedom in numerator, should be > 0.
+            Degrees of freedom in numerator, must be > 0.
         dfden : float or array_like of float
-            Degrees of freedom in denominator, should be > 0.
+            Degrees of freedom in denominator, must be > 0.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -1552,15 +1552,15 @@ cdef class RandomState:
         Parameters
         ----------
         dfnum : float or array_like of floats
-            Numerator degrees of freedom, should be > 0.
+            Numerator degrees of freedom, must be > 0.
 
             .. versionchanged:: 1.14.0
                Earlier NumPy versions required dfnum > 1.
         dfden : float or array_like of floats
-            Denominator degrees of freedom, should be > 0.
+            Denominator degrees of freedom, must be > 0.
         nonc : float or array_like of floats
             Non-centrality parameter, the sum of the squares of the numerator
-            means, should be >= 0.
+            means, must be >= 0.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -1629,7 +1629,7 @@ cdef class RandomState:
         Parameters
         ----------
         df : float or array_like of floats
-             Number of degrees of freedom, should be > 0.
+             Number of degrees of freedom, must be > 0.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -1695,12 +1695,12 @@ cdef class RandomState:
         Parameters
         ----------
         df : float or array_like of floats
-            Degrees of freedom, should be > 0.
+            Degrees of freedom, must be > 0.
 
             .. versionchanged:: 1.10.0
                Earlier NumPy versions required dfnum > 1.
         nonc : float or array_like of floats
-            Non-centrality, should be non-negative.
+            Non-centrality, must be non-negative.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -1841,7 +1841,7 @@ cdef class RandomState:
         Parameters
         ----------
         df : float or array_like of floats
-            Degrees of freedom, should be > 0.
+            Degrees of freedom, must be > 0.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -2031,7 +2031,7 @@ cdef class RandomState:
         Parameters
         ----------
         a : float or array_like of floats
-            Shape of the distribution. Must all be positive.
+            Shape of the distribution. Must be positive.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -2847,9 +2847,9 @@ cdef class RandomState:
             Lower limit.
         mode : float or array_like of floats
             The value where the peak of the distribution occurs.
-            The value should fulfill the condition ``left <= mode <= right``.
+            The value must fulfill the condition ``left <= mode <= right``.
         right : float or array_like of floats
-            Upper limit, should be larger than `left`.
+            Upper limit, must be larger than `left`.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
             ``m * n * k`` samples are drawn.  If size is ``None`` (default),
@@ -3160,7 +3160,7 @@ cdef class RandomState:
         Parameters
         ----------
         lam : float or array_like of floats
-            Expectation of interval, should be >= 0. A sequence of expectation
+            Expectation of interval, must be >= 0. A sequence of expectation
             intervals must be broadcastable over the requested size.
         size : int or tuple of ints, optional
             Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
@@ -3735,7 +3735,7 @@ cdef class RandomState:
             Number of experiments.
         pvals : sequence of floats, length p
             Probabilities of each of the ``p`` different outcomes.  These
-            should sum to 1 (however, the last element is always assumed to
+            must sum to 1 (however, the last element is always assumed to
             account for the remaining probability, as long as
             ``sum(pvals[:-1]) <= 1)``.
         size : int or tuple of ints, optional
diff --git a/numpy/random/randomgen/src/distributions/distributions.c b/numpy/random/randomgen/src/distributions/distributions.c
index 20cdbe8632bf..83806de38997 100644
--- a/numpy/random/randomgen/src/distributions/distributions.c
+++ b/numpy/random/randomgen/src/distributions/distributions.c
@@ -899,6 +899,9 @@ int64_t random_binomial(brng_t *brng_state, double p, int64_t n,
 }
 
 double random_noncentral_chisquare(brng_t *brng_state, double df, double nonc) {
+  if (npy_isnan(nonc)){
+    return NPY_NAN;
+  }
   if (nonc == 0) {
     return random_chisquare(brng_state, df);
   }
@@ -939,7 +942,9 @@ double random_vonmises(brng_t *brng_state, double mu, double kappa) {
   double U, V, W, Y, Z;
   double result, mod;
   int neg;
-
+  if (npy_isnan(kappa)){
+    return NPY_NAN;
+  }
   if (kappa < 1e-8) {
     return M_PI * (2 * next_double(brng_state) - 1);
   } else {
diff --git a/numpy/random/randomgen/src/distributions/distributions.h b/numpy/random/randomgen/src/distributions/distributions.h
index 5cf9c72b25bc..7ca31a16cba6 100644
--- a/numpy/random/randomgen/src/distributions/distributions.h
+++ b/numpy/random/randomgen/src/distributions/distributions.h
@@ -20,7 +20,7 @@ typedef int bool;
 
 #include "Python.h"
 #include "numpy/npy_common.h"
-#include <math.h>
+#include "numpy/npy_math.h"
 
 #ifdef _WIN32
 #if _MSC_VER == 1500
diff --git a/numpy/random/randomgen/src/legacy/distributions-boxmuller.c b/numpy/random/randomgen/src/legacy/distributions-boxmuller.c
index 768de066ced2..e96d3f19a0cb 100644
--- a/numpy/random/randomgen/src/legacy/distributions-boxmuller.c
+++ b/numpy/random/randomgen/src/legacy/distributions-boxmuller.c
@@ -12,6 +12,7 @@ double legacy_gauss(aug_brng_t *aug_state) {
     return temp;
   } else {
     double f, x1, x2, r2;
+    double f, x1, x2, r2;
 
     do {
       x1 = 2.0 * legacy_double(aug_state) - 1.0;
@@ -103,6 +104,7 @@ double legacy_chisquare(aug_brng_t *aug_state, double df) {
 
 double legacy_noncentral_chisquare(aug_brng_t *aug_state, double df,
                                    double nonc) {
+  double out;
   if (nonc == 0) {
     return legacy_chisquare(aug_state, df);
   }
@@ -112,7 +114,13 @@ double legacy_noncentral_chisquare(aug_brng_t *aug_state, double df,
     return Chi2 + n * n;
   } else {
     const long i = random_poisson(aug_state->basicrng, nonc / 2.0);
-    return legacy_chisquare(aug_state, df + 2 * i);
+    out = legacy_chisquare(aug_state, df + 2 * i);
+    /* Insert nan guard here to avoid changing the stream */
+    if (npy_isnan(nonc)){
+      return NPY_NAN;
+    } else {
+    return out;
+    }
   }
 }
 
diff --git a/numpy/random/randomgen/tests/test_against_numpy.py b/numpy/random/randomgen/tests/test_against_numpy.py
index 8a9ef2962750..b0fcdfb710f1 100644
--- a/numpy/random/randomgen/tests/test_against_numpy.py
+++ b/numpy/random/randomgen/tests/test_against_numpy.py
@@ -570,11 +570,9 @@ def test_multivariate_normal(self):
         self._is_state_common_legacy()
 
 
-funcs = [generator.exponential,
-         generator.zipf,
-         generator.chisquare,
-         generator.logseries,
-         generator.poisson]
+funcs = [randomgen.generator.zipf,
+         randomgen.generator.logseries,
+         randomgen.generator.poisson]
 ids = [f.__name__ for f in funcs]