Skip to content

WIP: Implement more continuous log CDF functions #2048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 24, 2017
114 changes: 101 additions & 13 deletions pymc3/distributions/continuous.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# coding: utf-8
"""
pymc3.distributions

Expand All @@ -17,8 +18,11 @@
from pymc3.theanof import floatX
from . import transforms

from .dist_math import (bound, logpow, gammaln, betaln, std_cdf, i0, i1,
alltrue_elemwise)
from .dist_math import (
alltrue_elemwise, betaln, bound, gammaln, i0, i1, logpow, normallogcdf,
std_cdf, zvalue
)

from .distribution import Continuous, draw_values, generate_samples, Bound

__all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
Expand Down Expand Up @@ -183,6 +187,17 @@ def random(self, point=None, size=None, repeat=None):
def logp(self, value):
return tt.zeros_like(value)

def logcdf(self, value):
return tt.switch(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you check vector value for this op?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think all of these are element-wise operations.
When a tensor is passed as argument they would behave as expected:

>>> tt.eq(np.array([1, 2, np.nan, -np.inf, np.inf]), np.inf).eval()
array([False, False, False, False,  True], dtype=bool)

tt.eq(value, -np.inf),
-np.inf,
tt.switch(
tt.eq(value, np.inf),
0,
tt.log(0.5)
)
)


class Normal(Continuous):
R"""
Expand Down Expand Up @@ -247,16 +262,7 @@ def logp(self, value):
sd > 0)

def logcdf(self, value):
mu = self.mu
sd = self.sd
z = zvalue(value, mu=mu, sd=sd)

return tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) -
tt.sqr(z) / 2,
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
)
return normallogcdf(value, mu=self.mu, sd=self.sd)


class HalfNormal(PositiveContinuous):
Expand Down Expand Up @@ -373,6 +379,9 @@ class Wald(PositiveContinuous):
.. [Michael1976] Michael, J. R., Schucany, W. R. and Hass, R. W. (1976).
Generating Random Variates Using Transformations with Multiple Roots.
The American Statistician, Vol. 30, No. 2, pp. 88-90

.. [Giner2016] Göknur Giner, Gordon K. Smyth (2016)
statmod: Probability Calculations for the Inverse Gaussian Distribution
"""

def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
Expand All @@ -381,7 +390,7 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
self.alpha = alpha = tt.as_tensor_variable(alpha)
self.mu = mu = tt.as_tensor_variable(mu)
self.lam = lam = tt.as_tensor_variable(lam)
self.phi = phi =tt.as_tensor_variable(phi)
self.phi = phi = tt.as_tensor_variable(phi)

self.mean = self.mu + self.alpha
self.mode = self.mu * (tt.sqrt(1. + (1.5 * self.mu / self.lam)**2)
Expand Down Expand Up @@ -439,6 +448,46 @@ def logp(self, value):
value > 0, value - alpha > 0,
mu > 0, lam > 0, alpha >= 0)

def logcdf(self, value):
# Distribution parameters
mu = self.mu
lam = self.lam
alpha = self.alpha

value -= alpha
q = value / mu
l = lam * mu
r = tt.sqrt(value * lam)

a = normallogcdf((q - 1.)/r)
b = 2./l + normallogcdf(-(q + 1.)/r)
return tt.switch(
(
# Left limit
tt.lt(value, 0) |
(tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) |
(tt.lt(value, mu) & tt.eq(lam, 0))
),
-np.inf,
tt.switch(
(
# Right limit
tt.eq(value, np.inf) |
(tt.eq(lam, 0) & tt.gt(value, mu)) |
(tt.gt(value, 0) & tt.eq(lam, np.inf)) |
# Degenerate distribution
(
tt.lt(mu, np.inf) &
tt.eq(mu, value) &
tt.eq(lam, 0)
) |
(tt.eq(value, 0) & tt.eq(lam, np.inf))
),
0,
a + tt.log1p(tt.exp(b - a))
)
)


def cont_fraction_beta(value, a, b, max_iter=200):
'''Evaluates the continued fraction form of the incomplete Beta function.
Expand Down Expand Up @@ -627,6 +676,31 @@ def logp(self, value):
lam = self.lam
return bound(tt.log(lam) - lam * value, value > 0, lam > 0)

def logcdf(self, value):
"""
Compute the log CDF for the Exponential distribution

References
----------
.. [Machler2012] Martin Mächler (2012).
"Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr
package"
"""
value = floatX(tt.as_tensor(value))
lam = self.lam
a = lam * value
return tt.switch(
tt.le(value, 0.0),
-np.inf,
tt.switch(
tt.le(a, tt.log(2.0)),
tt.log(-tt.expm1(-a)),
tt.log1p(-tt.exp(-a)),
)
)




class Laplace(Continuous):
R"""
Expand Down Expand Up @@ -892,6 +966,20 @@ def logp(self, value):
- logpow(value, alpha + 1),
value >= m, alpha > 0, m > 0)

def logcdf(self, value):
m = self.m
alpha = self.alpha
arg = (m / value) ** alpha
return tt.switch(
tt.lt(value, m),
-np.inf,
tt.switch(
tt.le(arg, 1e-5),
tt.log1p(-arg),
tt.log(1 - arg)
)
)


class Cauchy(Continuous):
R"""
Expand Down
13 changes: 13 additions & 0 deletions pymc3/distributions/dist_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,16 @@ def logdet(m):
result = k * tt.log(2 * np.pi) - log_det
result += delta.dot(T).dot(delta)
return -1 / 2. * result


def normallogcdf(value, mu=0, sd=1):
"""
Normal log CDF. Useful for many log CDF methods.
"""
z = zvalue(value, mu=mu, sd=sd)
return tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) -
tt.sqr(z) / 2,
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
)
14 changes: 12 additions & 2 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ def test_discrete_unif(self):

def test_flat(self):
self.pymc3_matches_scipy(Flat, Runif, {}, lambda value: 0)
self.check_logcdf(Flat, Runif, {}, lambda value: np.log(0.5))
# Check infinite cases individually.
assert 0. == Flat.dist().logcdf(np.inf).tag.test_value
assert -np.inf == Flat.dist().logcdf(-np.inf).tag.test_value

def test_normal(self):
self.pymc3_matches_scipy(Normal, R, {'mu': R, 'sd': Rplus},
Expand All @@ -403,8 +407,10 @@ def test_chi_squared(self):
lambda value, nu: sp.chi2.logpdf(value, df=nu))

def test_wald_scipy(self):
self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus},
lambda value, mu: sp.invgauss.logpdf(value, mu))
self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus},
lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha))
self.check_logcdf(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus},
lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha))

@pytest.mark.parametrize('value,mu,lam,phi,alpha,logp', [
(.5, .001, .5, None, 0., -124500.7257914),
Expand Down Expand Up @@ -441,6 +447,8 @@ def test_beta(self):
def test_exponential(self):
self.pymc3_matches_scipy(Exponential, Rplus, {'lam': Rplus},
lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam))
self.check_logcdf(Exponential, Rplus, {'lam': Rplus},
lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam))

def test_geometric(self):
self.pymc3_matches_scipy(Geometric, Nat, {'p': Unit},
Expand Down Expand Up @@ -495,6 +503,8 @@ def test_inverse_gamma(self):
def test_pareto(self):
self.pymc3_matches_scipy(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig},
lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m))
self.check_logcdf(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig},
lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m))

def test_weibull(self):
self.pymc3_matches_scipy(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig},
Expand Down