diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 72bb927a42..bdc816a341 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -6,6 +6,8 @@ - Track the model log-likelihood as a sampler stat for NUTS and HMC samplers (accessible as `trace.get_sampler_stats('model_logp')`) (#3134) +- Add Incomplete Beta function `incomplete_beta(a, b, value)` +- Add log CDF functions to continuous distributions: `Beta`, `Cauchy`, `ExGaussian`, `Exponential`, `Flat`, `Gumbel`, `HalfCauchy`, `HalfFlat`, `HalfNormal`, `Laplace`, `Logistic`, `Lognormal`, `Normal`, `Pareto`, `StudentT`, `Triangular`, `Uniform`, `Wald`, `Weibull`. ### Maintenance diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index e264220219..dc74c7f1d6 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1,3 +1,4 @@ +# coding: utf-8 """ A collection of common probability distributions for stochastic nodes in PyMC. @@ -19,8 +20,8 @@ from .special import log_i0 from ..math import invlogit, logit, logdiffexp from .dist_math import ( - bound, logpow, gammaln, betaln, std_cdf, alltrue_elemwise, - SplineWrapper, i0e, normal_lcdf, normal_lccdf + alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow, + normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue, ) from .distribution import Continuous, draw_values, generate_samples @@ -239,6 +240,18 @@ def _repr_latex_(self, name=None, dist=None): return r'${} \sim \text{{Uniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$'.format( name, get_variable_name(lower), get_variable_name(upper)) + def logcdf(self, value): + return tt.switch( + tt.or_(tt.lt(value, self.lower), tt.gt(value, self.upper)), + -np.inf, + tt.switch( + tt.eq(value, self.upper), + 0, + tt.log((value - self.lower)) - + tt.log((self.upper - self.lower)) + ) + ) + class Flat(Continuous): """ @@ -284,6 +297,17 @@ def _repr_latex_(self, name=None, dist=None): name = r'\text{%s}' % name return r'${} \sim \text{{Flat}}()$'.format(name) + def logcdf(self, value): + return tt.switch( + tt.eq(value, -np.inf), + -np.inf, + tt.switch( + tt.eq(value, np.inf), + 0, + tt.log(0.5) + ) + ) + class HalfFlat(PositiveContinuous): """Improper flat prior over the positive reals.""" @@ -326,6 +350,17 @@ def _repr_latex_(self, name=None, dist=None): name = r'\text{%s}' % name return r'${} \sim \text{{HalfFlat}}()$'.format(name) + def logcdf(self, value): + return tt.switch( + tt.lt(value, np.inf), + -np.inf, + tt.switch( + tt.eq(value, np.inf), + 0, + -np.inf + ) + ) + class Normal(Continuous): R""" @@ -457,6 +492,9 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(mu), get_variable_name(sd)) + def logcdf(self, value): + return normal_lcdf(self.mu, self.sd, value) + class TruncatedNormal(BoundedContinuous): R""" @@ -778,6 +816,15 @@ def _repr_latex_(self, name=None, dist=None): return r'${} \sim \text{{HalfNormal}}(\mathit{{sd}}={})$'.format(name, get_variable_name(sd)) + def logcdf(self, value): + sd = self.sd + z = zvalue(value, mu=0, sd=sd) + return tt.switch( + tt.lt(z, -1.0), + tt.log(tt.erfcx(-z / tt.sqrt(2.))) - tt.sqr(z), + tt.log1p(-tt.erfc(z / tt.sqrt(2.))) + ) + class Wald(PositiveContinuous): R""" @@ -852,6 +899,9 @@ class Wald(PositiveContinuous): .. [Michael1976] Michael, J. R., Schucany, W. R. and Hass, R. W. (1976). Generating Random Variates Using Transformations with Multiple Roots. The American Statistician, Vol. 30, No. 2, pp. 88-90 + + .. [Giner2016] Göknur Giner, Gordon K. Smyth (2016) + statmod: Probability Calculations for the Inverse Gaussian Distribution """ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs): @@ -959,6 +1009,46 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(lam), get_variable_name(alpha)) + def logcdf(self, value): + # Distribution parameters + mu = self.mu + lam = self.lam + alpha = self.alpha + + value -= alpha + q = value / mu + l = lam * mu + r = tt.sqrt(value * lam) + + a = normal_lcdf(0, 1, (q - 1.)/r) + b = 2./l + normal_lcdf(0, 1, -(q + 1.)/r) + return tt.switch( + ( + # Left limit + tt.lt(value, 0) | + (tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) | + (tt.lt(value, mu) & tt.eq(lam, 0)) + ), + -np.inf, + tt.switch( + ( + # Right limit + tt.eq(value, np.inf) | + (tt.eq(lam, 0) & tt.gt(value, mu)) | + (tt.gt(value, 0) & tt.eq(lam, np.inf)) | + # Degenerate distribution + ( + tt.lt(mu, np.inf) & + tt.eq(mu, value) & + tt.eq(lam, 0) + ) | + (tt.eq(value, 0) & tt.eq(lam, np.inf)) + ), + 0, + a + tt.log1p(tt.exp(b - a)) + ) + ) + class Beta(UnitContinuous): R""" @@ -1101,6 +1191,20 @@ def logp(self, value): value >= 0, value <= 1, alpha > 0, beta > 0) + def logcdf(self, value): + value = floatX(tt.as_tensor(value)) + a = floatX(tt.as_tensor(self.alpha)) + b = floatX(tt.as_tensor(self.beta)) + return tt.switch( + tt.le(value, 0), + -np.inf, + tt.switch( + tt.ge(value, 1), + 0, + tt.log(incomplete_beta(a, b, value)) + ) + ) + def _repr_latex_(self, name=None, dist=None): if dist is None: dist = self @@ -1227,6 +1331,7 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(a), get_variable_name(b)) + class Exponential(PositiveContinuous): R""" Exponential log-likelihood. @@ -1322,6 +1427,30 @@ def _repr_latex_(self, name=None, dist=None): return r'${} \sim \text{{Exponential}}(\mathit{{lam}}={})$'.format(name, get_variable_name(lam)) + def logcdf(self, value): + """ + Compute the log CDF for the Exponential distribution + + References + ---------- + .. [Machler2012] Martin Mächler (2012). + "Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr + package" + """ + value = floatX(tt.as_tensor(value)) + lam = self.lam + a = lam * value + return tt.switch( + tt.le(value, 0.0), + -np.inf, + tt.switch( + tt.le(a, tt.log(2.0)), + tt.log(-tt.expm1(-a)), + tt.log1p(-tt.exp(-a)), + ) + ) + + class Laplace(Continuous): R""" Laplace log-likelihood. @@ -1424,6 +1553,20 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(mu), get_variable_name(b)) + def logcdf(self, value): + a = self.mu + b = self.b + y = (value - a) / b + return tt.switch( + tt.le(value, a), + tt.log(0.5) + y, + tt.switch( + tt.gt(y, 1), + tt.log1p(-0.5 * tt.exp(-y)), + tt.log(1 - 0.5 * tt.exp(-y)) + ) + ) + class Lognormal(PositiveContinuous): R""" @@ -1559,6 +1702,22 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(mu), get_variable_name(tau)) + def logcdf(self, value): + mu = self.mu + sd = self.sd + z = zvalue(tt.log(value), mu=mu, sd=sd) + + return tt.switch( + tt.le(value, 0), + -np.inf, + tt.switch( + tt.lt(z, -1.0), + tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - + tt.sqr(z) / 2, + tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.) + ) + ) + class StudentT(Continuous): R""" @@ -1698,6 +1857,15 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(mu), get_variable_name(lam)) + def logcdf(self, value): + nu = self.nu + mu = self.mu + sd = self.sd + t = (value - mu)/sd + sqrt_t2_nu = tt.sqrt(t**2 + nu) + x = (t + sqrt_t2_nu)/(2.0 * sqrt_t2_nu) + return tt.log(incomplete_beta(nu/2., nu/2., x)) + class Pareto(Continuous): R""" @@ -1820,6 +1988,20 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(alpha), get_variable_name(m)) + def logcdf(self, value): + m = self.m + alpha = self.alpha + arg = (m / value) ** alpha + return tt.switch( + tt.lt(value, m), + -np.inf, + tt.switch( + tt.le(arg, 1e-5), + tt.log1p(-arg), + tt.log(1 - arg) + ) + ) + class Cauchy(Continuous): R""" @@ -1930,6 +2112,11 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(alpha), get_variable_name(beta)) + def logcdf(self, value): + return tt.log( + 0.5 + tt.arctan((value - self.alpha) / self.beta) / np.pi + ) + class HalfCauchy(PositiveContinuous): R""" @@ -2030,6 +2217,15 @@ def _repr_latex_(self, name=None, dist=None): return r'${} \sim \text{{HalfCauchy}}(\mathit{{beta}}={})$'.format(name, get_variable_name(beta)) + def logcdf(self, value): + return tt.switch( + tt.le(value, 0), + -np.inf, + tt.log( + 2 * tt.arctan(value / self.beta) / np.pi + )) + + class Gamma(PositiveContinuous): R""" Gamma log-likelihood. @@ -2459,6 +2655,28 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(alpha), get_variable_name(beta)) + def logcdf(self, value): + ''' + Compute the log CDF for the Weibull distribution + + References + ---------- + .. [Machler2012] Martin Mächler (2012). + "Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr + package" + ''' + alpha = self.alpha + beta = self.beta + a = (value / beta)**alpha + return tt.switch( + tt.le(value, 0.0), + -np.inf, + tt.switch( + tt.le(a, tt.log(2.0)), + tt.log(-tt.expm1(-a)), + tt.log1p(-tt.exp(-a))) + ) + class HalfStudentT(PositiveContinuous): R""" @@ -2729,6 +2947,30 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(sigma), get_variable_name(nu)) + def logcdf(self, value): + """ + Compute the log CDF for the ExGaussian distribution + + References + ---------- + .. [Rigby2005] R.A. Rigby (2005). + "Generalized additive models for location, scale and shape" + http://dx.doi.org/10.1111/j.1467-9876.2005.00510.x + """ + mu = self.mu + sigma = self.sigma + sigma_2 = sigma**2 + nu = self.nu + z = value - mu - sigma_2/nu + return tt.switch( + tt.gt(nu, 0.05 * sigma), + tt.log(std_cdf((value - mu)/sigma) - + std_cdf(z/sigma) * tt.exp( + ((mu + (sigma_2/nu))**2 - + (mu**2) - + 2 * value * ((sigma_2)/nu))/(2 * sigma_2))), + normal_lcdf(mu, sigma, value)) + class VonMises(Continuous): R""" @@ -3093,6 +3335,24 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(lower), get_variable_name(upper)) + def logcdf(self, value): + l = self.lower + u = self.upper + c = self.c + return tt.switch( + tt.le(value, l), + -np.inf, + tt.switch( + tt.le(value, c), + tt.log(((value - l) ** 2) / ((u - l) * (c - l))), + tt.switch( + tt.lt(value, u), + tt.log1p(-((u - value) ** 2) / ((u - l) * (u - c))), + 0 + ) + ) + ) + class Gumbel(Continuous): R""" @@ -3198,6 +3458,12 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(mu), get_variable_name(beta)) + def logcdf(self, value): + beta = self.beta + mu = self.mu + + return -tt.exp(-(value - mu)/beta) + class Rice(PositiveContinuous): R""" @@ -3388,6 +3654,30 @@ def _repr_latex_(self, name=None, dist=None): get_variable_name(mu), get_variable_name(s)) + def logcdf(self, value): + """ + Compute the log CDF for the Logistic distribution + + References + ---------- + .. [Machler2012] Martin Mächler (2012). + "Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr + package" + """ + mu = self.mu + s = self.s + a = -(value - mu)/s + return - tt.switch( + tt.le(a, -37), + tt.exp(a), + tt.switch( + tt.le(a, 18), + tt.log1p(tt.exp(a)), + tt.switch( + tt.le(a, 33.3), + tt.exp(-a) + a, + a))) + class LogitNormal(UnitContinuous): R""" diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index ec87d86573..7ea1b6b28d 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -11,6 +11,8 @@ import theano from theano.scalar import UnaryScalarOp, upgrade_to_float from theano.tensor.slinalg import Cholesky +from theano.scan_module import until +from theano import scan from .special import gammaln from pymc3.theanof import floatX @@ -308,3 +310,195 @@ def random_choice(*args, **kwargs): samples = np.random.choice(k, p=p, size=size) return samples + +def zvalue(value, sd, mu): + """ + Calculate the z-value for a normal distribution. + """ + return (value - mu) / sd + + +def incomplete_beta_cfe(a, b, x, small): + '''Incomplete beta continued fraction expansions + based on Cephes library by Steve Moshier (incbet.c). + small: Choose element-wise which continued fraction expansion to use. + ''' + BIG = tt.constant(4.503599627370496e15, dtype='float64') + BIGINV = tt.constant(2.22044604925031308085e-16, dtype='float64') + THRESH = tt.constant(3. * np.MachAr().eps, dtype='float64') + + zero = tt.constant(0., dtype='float64') + one = tt.constant(1., dtype='float64') + two = tt.constant(2., dtype='float64') + + r = one + k1 = a + k3 = a + k4 = a + one + k5 = one + k8 = a + two + + k2 = tt.switch(small, a + b, b - one) + k6 = tt.switch(small, b - one, a + b) + k7 = tt.switch(small, k4, a + one) + k26update = tt.switch(small, one, -one) + x = tt.switch(small, x, x / (one - x)) + + pkm2 = zero + qkm2 = one + pkm1 = one + qkm1 = one + r = one + + def _step( + i, + pkm1, pkm2, qkm1, qkm2, + k1, k2, k3, k4, k5, k6, k7, k8, r + ): + xk = -(x * k1 * k2) / (k3 * k4) + pk = pkm1 + pkm2 * xk + qk = qkm1 + qkm2 * xk + pkm2 = pkm1 + pkm1 = pk + qkm2 = qkm1 + qkm1 = qk + + xk = (x * k5 * k6) / (k7 * k8) + pk = pkm1 + pkm2 * xk + qk = qkm1 + qkm2 * xk + pkm2 = pkm1 + pkm1 = pk + qkm2 = qkm1 + qkm1 = qk + + old_r = r + r = tt.switch(tt.eq(qk, zero), r, pk/qk) + + k1 += one + k2 += k26update + k3 += two + k4 += two + k5 += one + k6 -= k26update + k7 += two + k8 += two + + big_cond = tt.gt(tt.abs_(qk) + tt.abs_(pk), BIG) + biginv_cond = tt.or_( + tt.lt(tt.abs_(qk), BIGINV), + tt.lt(tt.abs_(pk), BIGINV) + ) + + pkm2 = tt.switch(big_cond, pkm2 * BIGINV, pkm2) + pkm1 = tt.switch(big_cond, pkm1 * BIGINV, pkm1) + qkm2 = tt.switch(big_cond, qkm2 * BIGINV, qkm2) + qkm1 = tt.switch(big_cond, qkm1 * BIGINV, qkm1) + + pkm2 = tt.switch(biginv_cond, pkm2 * BIG, pkm2) + pkm1 = tt.switch(biginv_cond, pkm1 * BIG, pkm1) + qkm2 = tt.switch(biginv_cond, qkm2 * BIG, qkm2) + qkm1 = tt.switch(biginv_cond, qkm1 * BIG, qkm1) + + return ((pkm1, pkm2, qkm1, qkm2, + k1, k2, k3, k4, k5, k6, k7, k8, r), + until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r)))) + + (pkm1, pkm2, qkm1, qkm2, + k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan( + _step, + sequences=[tt.arange(0, 300)], + outputs_info=[ + e for e in + tt.cast((pkm1, pkm2, qkm1, qkm2, + k1, k2, k3, k4, k5, k6, k7, k8, r), + 'float64') + ] + ) + + return r[-1] + + +def incomplete_beta_ps(a, b, value): + '''Power series for incomplete beta + Use when b*x is small and value not too close to 1. + Based on Cephes library by Steve Moshier (incbet.c) + ''' + one = tt.constant(1, dtype='float64') + ai = one / a + u = (one - b) * value + t1 = u / (a + one) + t = u + threshold = np.MachAr().eps * ai + s = tt.constant(0, dtype='float64') + + def _step(i, t, s): + t *= (i - b) * value / i + step = t / (a + i) + s += step + return ((t, s), until(tt.abs_(step) < threshold)) + + (t, s), _ = scan( + _step, + sequences=[tt.arange(2, 302)], + outputs_info=[ + e for e in + tt.cast((t, s), + 'float64') + ] + ) + + s = s[-1] + t1 + ai + + t = ( + gammaln(a + b) - gammaln(a) - gammaln(b) + + a * tt.log(value) + + tt.log(s) + ) + return tt.exp(t) + + +def incomplete_beta(a, b, value): + '''Incomplete beta implementation + Power series and continued fraction expansions chosen for best numerical + convergence across the board based on inputs. + ''' + machep = tt.constant(np.MachAr().eps, dtype='float64') + one = tt.constant(1, dtype='float64') + w = one - value + + ps = incomplete_beta_ps(a, b, value) + + flip = tt.gt(value, (a / (a + b))) + aa, bb = a, b + a = tt.switch(flip, bb, aa) + b = tt.switch(flip, aa, bb) + xc = tt.switch(flip, value, w) + x = tt.switch(flip, w, value) + + tps = incomplete_beta_ps(a, b, x) + tps = tt.switch(tt.le(tps, machep), one - machep, one - tps) + + # Choose which continued fraction expansion for best convergence. + small = tt.lt(x * (a + b - 2.0) - (a - one), 0.0) + cfe = incomplete_beta_cfe(a, b, x, small) + w = tt.switch(small, cfe, cfe / xc) + + # Direct incomplete beta accounting for flipped a, b. + t = tt.exp( + a * tt.log(x) + b * tt.log(xc) + + gammaln(a + b) - gammaln(a) - gammaln(b) + + tt.log(w / a) + ) + + t = tt.switch( + flip, + tt.switch(tt.le(t, machep), one - machep, one - t), + t + ) + return tt.switch( + tt.and_(flip, tt.and_(tt.le((b * x), one), tt.le(x, 0.95))), + tps, + tt.switch( + tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), + ps, + t)) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 0bb1c859f4..17a1cf7ecb 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -447,6 +447,19 @@ def check_logp(self, model, value, domain, paramdomains, logp_reference, decimal decimal = select_by_precision(float64=6, float32=3) assert_almost_equal(logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt)) + def check_logcdf(self, pymc3_dist, domain, paramdomains, scipy_logcdf, decimal=None): + domains = paramdomains.copy() + domains['value'] = domain + if decimal is None: + decimal = select_by_precision(float64=6, float32=3) + for pt in product(domains, n_samples=100): + params = dict(pt) + scipy_cdf = scipy_logcdf(**params) + value = params.pop('value') + dist = pymc3_dist.dist(**params) + assert_almost_equal(dist.logcdf(value).tag.test_value, scipy_cdf, + decimal=decimal, err_msg=str(pt)) + def check_int_to_1(self, model, value, domain, paramdomains): pdf = model.fastfn(exp(model.logpt)) for pt in product(paramdomains, n_samples=10): @@ -498,11 +511,15 @@ def test_uniform(self): self.pymc3_matches_scipy( Uniform, Runif, {'lower': -Rplusunif, 'upper': Rplusunif}, lambda value, lower, upper: sp.uniform.logpdf(value, lower, upper - lower)) + self.check_logcdf(Uniform, Runif, {'lower': -Rplusunif, 'upper': Rplusunif}, + lambda value, lower, upper: sp.uniform.logcdf(value, lower, upper - lower)) def test_triangular(self): self.pymc3_matches_scipy( Triangular, Runif, {'lower': -Rplusunif, 'c': Runif, 'upper': Rplusunif}, lambda value, c, lower, upper: sp.triang.logpdf(value, c-lower, lower, upper-lower)) + self.check_logcdf(Triangular, Runif, {'lower': -Rplusunif, 'c': Runif, 'upper': Rplusunif}, + lambda value, c, lower, upper: sp.triang.logcdf(value, c-lower, lower, upper-lower)) def test_bound_normal(self): PositiveNormal = Bound(Normal, lower=0.) @@ -522,6 +539,10 @@ def test_flat(self): with Model(): x = Flat('a') assert_allclose(x.tag.test_value, 0) + self.check_logcdf(Flat, Runif, {}, lambda value: np.log(0.5)) + # Check infinite cases individually. + assert 0. == Flat.dist().logcdf(np.inf).tag.test_value + assert -np.inf == Flat.dist().logcdf(-np.inf).tag.test_value def test_half_flat(self): self.pymc3_matches_scipy(HalfFlat, Rplus, {}, lambda value: 0) @@ -529,12 +550,18 @@ def test_half_flat(self): x = HalfFlat('a', shape=2) assert_allclose(x.tag.test_value, 1) assert x.tag.test_value.shape == (2,) + self.check_logcdf(HalfFlat, Runif, {}, lambda value: -np.inf) + # Check infinite cases individually. + assert 0. == HalfFlat.dist().logcdf(np.inf).tag.test_value + assert -np.inf == HalfFlat.dist().logcdf(-np.inf).tag.test_value def test_normal(self): self.pymc3_matches_scipy(Normal, R, {'mu': R, 'sd': Rplus}, lambda value, mu, sd: sp.norm.logpdf(value, mu, sd), decimal=select_by_precision(float64=6, float32=1) ) + self.check_logcdf(Normal, R, {'mu': R, 'sd': Rplus}, + lambda value, mu, sd: sp.norm.logcdf(value, mu, sd)) def test_truncated_normal(self): def scipy_logp(value, mu, sd, lower, upper): @@ -558,16 +585,21 @@ def test_half_normal(self): lambda value, sd: sp.halfnorm.logpdf(value, scale=sd), decimal=select_by_precision(float64=6, float32=-1) ) + self.check_logcdf(HalfNormal, Rplus, {'sd': Rplus}, + lambda value, sd: sp.halfnorm.logcdf(value, scale=sd)) def test_chi_squared(self): self.pymc3_matches_scipy(ChiSquared, Rplus, {'nu': Rplusdunif}, lambda value, nu: sp.chi2.logpdf(value, df=nu)) + @pytest.mark.xfail(reason="Poor CDF in SciPy. See scipy/scipy#869 for details.") def test_wald_scipy(self): - self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus}, - lambda value, mu: sp.invgauss.logpdf(value, mu), + self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus}, + lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha), decimal=select_by_precision(float64=6, float32=1) ) + self.check_logcdf(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus}, + lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha)) @pytest.mark.parametrize('value,mu,lam,phi,alpha,logp', [ (.5, .001, .5, None, 0., -124500.7257914), @@ -599,6 +631,8 @@ def test_beta(self): self.pymc3_matches_scipy(Beta, Unit, {'alpha': Rplus, 'beta': Rplus}, lambda value, alpha, beta: sp.beta.logpdf(value, alpha, beta)) self.pymc3_matches_scipy(Beta, Unit, {'mu': Unit, 'sd': Rplus}, beta_mu_sd) + self.check_logcdf(Beta, Unit, {'alpha': Rplus, 'beta': Rplus}, + lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta)) def test_kumaraswamy(self): # Scipy does not have a built-in Kumaraswamy pdf @@ -609,6 +643,8 @@ def scipy_log_pdf(value, a, b): def test_exponential(self): self.pymc3_matches_scipy(Exponential, Rplus, {'lam': Rplus}, lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam)) + self.check_logcdf(Exponential, Rplus, {'lam': Rplus}, + lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam)) def test_geometric(self): self.pymc3_matches_scipy(Geometric, Nat, {'p': Unit}, @@ -623,23 +659,33 @@ def test_fun(value, mu, alpha): def test_laplace(self): self.pymc3_matches_scipy(Laplace, R, {'mu': R, 'b': Rplus}, lambda value, mu, b: sp.laplace.logpdf(value, mu, b)) + self.check_logcdf(Laplace, R, {'mu': R, 'b': Rplus}, + lambda value, mu, b: sp.laplace.logcdf(value, mu, b)) def test_lognormal(self): self.pymc3_matches_scipy( Lognormal, Rplus, {'mu': R, 'tau': Rplusbig}, lambda value, mu, tau: floatX(sp.lognorm.logpdf(value, tau**-.5, 0, np.exp(mu)))) + self.check_logcdf(Lognormal, Rplus, {'mu': R, 'tau': Rplusbig}, + lambda value, mu, tau: sp.lognorm.logcdf(value, tau**-.5, 0, np.exp(mu))) def test_t(self): self.pymc3_matches_scipy(StudentT, R, {'nu': Rplus, 'mu': R, 'lam': Rplus}, lambda value, nu, mu, lam: sp.t.logpdf(value, nu, mu, lam**-0.5)) + self.check_logcdf(StudentT, R, {'nu': Rplus, 'mu': R, 'lam': Rplus}, + lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam**-0.5)) def test_cauchy(self): self.pymc3_matches_scipy(Cauchy, R, {'alpha': R, 'beta': Rplusbig}, lambda value, alpha, beta: sp.cauchy.logpdf(value, alpha, beta)) + self.check_logcdf(Cauchy, R, {'alpha': R, 'beta': Rplusbig}, + lambda value, alpha, beta: sp.cauchy.logcdf(value, alpha, beta)) def test_half_cauchy(self): self.pymc3_matches_scipy(HalfCauchy, Rplus, {'beta': Rplusbig}, lambda value, beta: sp.halfcauchy.logpdf(value, scale=beta)) + self.check_logcdf(HalfCauchy, Rplus, {'beta': Rplusbig}, + lambda value, beta: sp.halfcauchy.logcdf(value, scale=beta)) def test_gamma(self): self.pymc3_matches_scipy( @@ -659,12 +705,17 @@ def test_inverse_gamma(self): def test_pareto(self): self.pymc3_matches_scipy(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig}, lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m)) + self.check_logcdf(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig}, + lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m)) @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues") def test_weibull(self): self.pymc3_matches_scipy(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig}, lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta), ) + self.check_logcdf(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig}, + lambda value, alpha, beta: + sp.exponweib.logcdf(value, 1, alpha, scale=beta),) def test_half_studentt(self): # this is only testing for nu=1 (halfcauchy) @@ -1064,6 +1115,25 @@ def test_ex_gaussian(self, value, mu, sigma, nu, logp): pt = {'eg': value} assert_almost_equal(model.fastlogp(pt), logp, decimal=select_by_precision(float64=6, float32=2), err_msg=str(pt)) + @pytest.mark.parametrize('value,mu,sigma,nu,logcdf', [ + (0.5, -50.000, 0.500, 0.500, 0.0000000), + (1.0, -1.000, 0.001, 0.001, 0.0000000), + (2.0, 0.001, 1.000, 1.000, -0.2365674), + (5.0, 0.500, 2.500, 2.500, -0.2886489), + (7.5, 2.000, 5.000, 5.000, -0.5655104), + (15.0, 5.000, 7.500, 7.500, -0.4545255), + (50.0, 50.000, 10.000, 10.000, -1.433714), + (1000.0, 500.000, 10.000, 20.000, -1.573708e-11), + ]) + def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf): + """Log probabilities calculated using the pexGAUS function from the R package gamlss. + See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or http://www.gamlss.org/.""" + assert_almost_equal( + ExGaussian.dist(mu=mu, sigma=sigma, nu=nu).logcdf(value).tag.test_value, + logcdf, + decimal=select_by_precision(float64=6, float32=2), + err_msg=str((value, mu, sigma, nu, logcdf))) + @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") def test_vonmises(self): self.pymc3_matches_scipy( @@ -1075,10 +1145,17 @@ def gumbel(value, mu, beta): return floatX(sp.gumbel_r.logpdf(value, loc=mu, scale=beta)) self.pymc3_matches_scipy(Gumbel, R, {'mu': R, 'beta': Rplusbig}, gumbel) + def gumbellcdf(value, mu, beta): + return floatX(sp.gumbel_r.logcdf(value, loc=mu, scale=beta)) + self.check_logcdf(Gumbel, R, {'mu': R, 'beta': Rplusbig}, gumbellcdf) + def test_logistic(self): self.pymc3_matches_scipy(Logistic, R, {'mu': R, 's': Rplus}, lambda value, mu, s: sp.logistic.logpdf(value, mu, s), decimal=select_by_precision(float64=6, float32=1)) + self.check_logcdf(Logistic, R, {'mu': R, 's': Rplus}, + lambda value, mu, s: sp.logistic.logcdf(value, mu, s), + decimal=select_by_precision(float64=6, float32=1)) def test_logitnormal(self): self.pymc3_matches_scipy(LogitNormal, Unit, {'mu': R, 'sd': Rplus},