diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index ce56790ee8..9806620c3c 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1,3 +1,4 @@ +# coding: utf-8 """ pymc3.distributions @@ -17,8 +18,11 @@ from pymc3.theanof import floatX from . import transforms -from .dist_math import (bound, logpow, gammaln, betaln, std_cdf, i0, i1, - alltrue_elemwise) +from .dist_math import ( + alltrue_elemwise, betaln, bound, gammaln, i0, i1, logpow, normallogcdf, + std_cdf, zvalue +) + from .distribution import Continuous, draw_values, generate_samples, Bound __all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace', @@ -183,6 +187,17 @@ def random(self, point=None, size=None, repeat=None): def logp(self, value): return tt.zeros_like(value) + def logcdf(self, value): + return tt.switch( + tt.eq(value, -np.inf), + -np.inf, + tt.switch( + tt.eq(value, np.inf), + 0, + tt.log(0.5) + ) + ) + class Normal(Continuous): R""" @@ -247,16 +262,7 @@ def logp(self, value): sd > 0) def logcdf(self, value): - mu = self.mu - sd = self.sd - z = zvalue(value, mu=mu, sd=sd) - - return tt.switch( - tt.lt(z, -1.0), - tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - - tt.sqr(z) / 2, - tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.) - ) + return normallogcdf(value, mu=self.mu, sd=self.sd) class HalfNormal(PositiveContinuous): @@ -373,6 +379,9 @@ class Wald(PositiveContinuous): .. [Michael1976] Michael, J. R., Schucany, W. R. and Hass, R. W. (1976). Generating Random Variates Using Transformations with Multiple Roots. The American Statistician, Vol. 30, No. 2, pp. 88-90 + + .. [Giner2016] Göknur Giner, Gordon K. Smyth (2016) + statmod: Probability Calculations for the Inverse Gaussian Distribution """ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs): @@ -381,7 +390,7 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs): self.alpha = alpha = tt.as_tensor_variable(alpha) self.mu = mu = tt.as_tensor_variable(mu) self.lam = lam = tt.as_tensor_variable(lam) - self.phi = phi =tt.as_tensor_variable(phi) + self.phi = phi = tt.as_tensor_variable(phi) self.mean = self.mu + self.alpha self.mode = self.mu * (tt.sqrt(1. + (1.5 * self.mu / self.lam)**2) @@ -439,6 +448,46 @@ def logp(self, value): value > 0, value - alpha > 0, mu > 0, lam > 0, alpha >= 0) + def logcdf(self, value): + # Distribution parameters + mu = self.mu + lam = self.lam + alpha = self.alpha + + value -= alpha + q = value / mu + l = lam * mu + r = tt.sqrt(value * lam) + + a = normallogcdf((q - 1.)/r) + b = 2./l + normallogcdf(-(q + 1.)/r) + return tt.switch( + ( + # Left limit + tt.lt(value, 0) | + (tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) | + (tt.lt(value, mu) & tt.eq(lam, 0)) + ), + -np.inf, + tt.switch( + ( + # Right limit + tt.eq(value, np.inf) | + (tt.eq(lam, 0) & tt.gt(value, mu)) | + (tt.gt(value, 0) & tt.eq(lam, np.inf)) | + # Degenerate distribution + ( + tt.lt(mu, np.inf) & + tt.eq(mu, value) & + tt.eq(lam, 0) + ) | + (tt.eq(value, 0) & tt.eq(lam, np.inf)) + ), + 0, + a + tt.log1p(tt.exp(b - a)) + ) + ) + def cont_fraction_beta(value, a, b, max_iter=200): '''Evaluates the continued fraction form of the incomplete Beta function. @@ -627,6 +676,31 @@ def logp(self, value): lam = self.lam return bound(tt.log(lam) - lam * value, value > 0, lam > 0) + def logcdf(self, value): + """ + Compute the log CDF for the Exponential distribution + + References + ---------- + .. [Machler2012] Martin Mächler (2012). + "Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr + package" + """ + value = floatX(tt.as_tensor(value)) + lam = self.lam + a = lam * value + return tt.switch( + tt.le(value, 0.0), + -np.inf, + tt.switch( + tt.le(a, tt.log(2.0)), + tt.log(-tt.expm1(-a)), + tt.log1p(-tt.exp(-a)), + ) + ) + + + class Laplace(Continuous): R""" @@ -892,6 +966,20 @@ def logp(self, value): - logpow(value, alpha + 1), value >= m, alpha > 0, m > 0) + def logcdf(self, value): + m = self.m + alpha = self.alpha + arg = (m / value) ** alpha + return tt.switch( + tt.lt(value, m), + -np.inf, + tt.switch( + tt.le(arg, 1e-5), + tt.log1p(-arg), + tt.log(1 - arg) + ) + ) + class Cauchy(Continuous): R""" diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index e609ed011b..0b790ccdd1 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -213,3 +213,16 @@ def logdet(m): result = k * tt.log(2 * np.pi) - log_det result += delta.dot(T).dot(delta) return -1 / 2. * result + + +def normallogcdf(value, mu=0, sd=1): + """ + Normal log CDF. Useful for many log CDF methods. + """ + z = zvalue(value, mu=mu, sd=sd) + return tt.switch( + tt.lt(z, -1.0), + tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - + tt.sqr(z) / 2, + tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.) + ) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index fb12c0a66d..4d0f288cc7 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -385,6 +385,10 @@ def test_discrete_unif(self): def test_flat(self): self.pymc3_matches_scipy(Flat, Runif, {}, lambda value: 0) + self.check_logcdf(Flat, Runif, {}, lambda value: np.log(0.5)) + # Check infinite cases individually. + assert 0. == Flat.dist().logcdf(np.inf).tag.test_value + assert -np.inf == Flat.dist().logcdf(-np.inf).tag.test_value def test_normal(self): self.pymc3_matches_scipy(Normal, R, {'mu': R, 'sd': Rplus}, @@ -403,8 +407,10 @@ def test_chi_squared(self): lambda value, nu: sp.chi2.logpdf(value, df=nu)) def test_wald_scipy(self): - self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus}, - lambda value, mu: sp.invgauss.logpdf(value, mu)) + self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus}, + lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha)) + self.check_logcdf(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus}, + lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha)) @pytest.mark.parametrize('value,mu,lam,phi,alpha,logp', [ (.5, .001, .5, None, 0., -124500.7257914), @@ -441,6 +447,8 @@ def test_beta(self): def test_exponential(self): self.pymc3_matches_scipy(Exponential, Rplus, {'lam': Rplus}, lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam)) + self.check_logcdf(Exponential, Rplus, {'lam': Rplus}, + lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam)) def test_geometric(self): self.pymc3_matches_scipy(Geometric, Nat, {'p': Unit}, @@ -495,6 +503,8 @@ def test_inverse_gamma(self): def test_pareto(self): self.pymc3_matches_scipy(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig}, lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m)) + self.check_logcdf(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig}, + lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m)) def test_weibull(self): self.pymc3_matches_scipy(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig},