Skip to content

Commit 2df35b3

Browse files
authored
Merge pull request #2048 from domenzain/cdf_methods
WIP: Implement more continuous log CDF functions
2 parents 269766b + 9ebaa17 commit 2df35b3

File tree

3 files changed

+126
-15
lines changed

3 files changed

+126
-15
lines changed

pymc3/distributions/continuous.py

Lines changed: 101 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# coding: utf-8
12
"""
23
pymc3.distributions
34
@@ -17,8 +18,11 @@
1718
from pymc3.theanof import floatX
1819
from . import transforms
1920

20-
from .dist_math import (bound, logpow, gammaln, betaln, std_cdf, i0, i1,
21-
alltrue_elemwise)
21+
from .dist_math import (
22+
alltrue_elemwise, betaln, bound, gammaln, i0, i1, logpow, normallogcdf,
23+
std_cdf, zvalue
24+
)
25+
2226
from .distribution import Continuous, draw_values, generate_samples, Bound
2327

2428
__all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
@@ -183,6 +187,17 @@ def random(self, point=None, size=None, repeat=None):
183187
def logp(self, value):
184188
return tt.zeros_like(value)
185189

190+
def logcdf(self, value):
191+
return tt.switch(
192+
tt.eq(value, -np.inf),
193+
-np.inf,
194+
tt.switch(
195+
tt.eq(value, np.inf),
196+
0,
197+
tt.log(0.5)
198+
)
199+
)
200+
186201

187202
class Normal(Continuous):
188203
R"""
@@ -247,16 +262,7 @@ def logp(self, value):
247262
sd > 0)
248263

249264
def logcdf(self, value):
250-
mu = self.mu
251-
sd = self.sd
252-
z = zvalue(value, mu=mu, sd=sd)
253-
254-
return tt.switch(
255-
tt.lt(z, -1.0),
256-
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) -
257-
tt.sqr(z) / 2,
258-
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
259-
)
265+
return normallogcdf(value, mu=self.mu, sd=self.sd)
260266

261267

262268
class HalfNormal(PositiveContinuous):
@@ -373,6 +379,9 @@ class Wald(PositiveContinuous):
373379
.. [Michael1976] Michael, J. R., Schucany, W. R. and Hass, R. W. (1976).
374380
Generating Random Variates Using Transformations with Multiple Roots.
375381
The American Statistician, Vol. 30, No. 2, pp. 88-90
382+
383+
.. [Giner2016] Göknur Giner, Gordon K. Smyth (2016)
384+
statmod: Probability Calculations for the Inverse Gaussian Distribution
376385
"""
377386

378387
def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
@@ -381,7 +390,7 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
381390
self.alpha = alpha = tt.as_tensor_variable(alpha)
382391
self.mu = mu = tt.as_tensor_variable(mu)
383392
self.lam = lam = tt.as_tensor_variable(lam)
384-
self.phi = phi =tt.as_tensor_variable(phi)
393+
self.phi = phi = tt.as_tensor_variable(phi)
385394

386395
self.mean = self.mu + self.alpha
387396
self.mode = self.mu * (tt.sqrt(1. + (1.5 * self.mu / self.lam)**2)
@@ -439,6 +448,46 @@ def logp(self, value):
439448
value > 0, value - alpha > 0,
440449
mu > 0, lam > 0, alpha >= 0)
441450

451+
def logcdf(self, value):
452+
# Distribution parameters
453+
mu = self.mu
454+
lam = self.lam
455+
alpha = self.alpha
456+
457+
value -= alpha
458+
q = value / mu
459+
l = lam * mu
460+
r = tt.sqrt(value * lam)
461+
462+
a = normallogcdf((q - 1.)/r)
463+
b = 2./l + normallogcdf(-(q + 1.)/r)
464+
return tt.switch(
465+
(
466+
# Left limit
467+
tt.lt(value, 0) |
468+
(tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) |
469+
(tt.lt(value, mu) & tt.eq(lam, 0))
470+
),
471+
-np.inf,
472+
tt.switch(
473+
(
474+
# Right limit
475+
tt.eq(value, np.inf) |
476+
(tt.eq(lam, 0) & tt.gt(value, mu)) |
477+
(tt.gt(value, 0) & tt.eq(lam, np.inf)) |
478+
# Degenerate distribution
479+
(
480+
tt.lt(mu, np.inf) &
481+
tt.eq(mu, value) &
482+
tt.eq(lam, 0)
483+
) |
484+
(tt.eq(value, 0) & tt.eq(lam, np.inf))
485+
),
486+
0,
487+
a + tt.log1p(tt.exp(b - a))
488+
)
489+
)
490+
442491

443492
def cont_fraction_beta(value, a, b, max_iter=200):
444493
'''Evaluates the continued fraction form of the incomplete Beta function.
@@ -627,6 +676,31 @@ def logp(self, value):
627676
lam = self.lam
628677
return bound(tt.log(lam) - lam * value, value > 0, lam > 0)
629678

679+
def logcdf(self, value):
680+
"""
681+
Compute the log CDF for the Exponential distribution
682+
683+
References
684+
----------
685+
.. [Machler2012] Martin Mächler (2012).
686+
"Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr
687+
package"
688+
"""
689+
value = floatX(tt.as_tensor(value))
690+
lam = self.lam
691+
a = lam * value
692+
return tt.switch(
693+
tt.le(value, 0.0),
694+
-np.inf,
695+
tt.switch(
696+
tt.le(a, tt.log(2.0)),
697+
tt.log(-tt.expm1(-a)),
698+
tt.log1p(-tt.exp(-a)),
699+
)
700+
)
701+
702+
703+
630704

631705
class Laplace(Continuous):
632706
R"""
@@ -892,6 +966,20 @@ def logp(self, value):
892966
- logpow(value, alpha + 1),
893967
value >= m, alpha > 0, m > 0)
894968

969+
def logcdf(self, value):
970+
m = self.m
971+
alpha = self.alpha
972+
arg = (m / value) ** alpha
973+
return tt.switch(
974+
tt.lt(value, m),
975+
-np.inf,
976+
tt.switch(
977+
tt.le(arg, 1e-5),
978+
tt.log1p(-arg),
979+
tt.log(1 - arg)
980+
)
981+
)
982+
895983

896984
class Cauchy(Continuous):
897985
R"""

pymc3/distributions/dist_math.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,3 +213,16 @@ def logdet(m):
213213
result = k * tt.log(2 * np.pi) - log_det
214214
result += delta.dot(T).dot(delta)
215215
return -1 / 2. * result
216+
217+
218+
def normallogcdf(value, mu=0, sd=1):
219+
"""
220+
Normal log CDF. Useful for many log CDF methods.
221+
"""
222+
z = zvalue(value, mu=mu, sd=sd)
223+
return tt.switch(
224+
tt.lt(z, -1.0),
225+
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) -
226+
tt.sqr(z) / 2,
227+
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
228+
)

pymc3/tests/test_distributions.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,10 @@ def test_discrete_unif(self):
385385

386386
def test_flat(self):
387387
self.pymc3_matches_scipy(Flat, Runif, {}, lambda value: 0)
388+
self.check_logcdf(Flat, Runif, {}, lambda value: np.log(0.5))
389+
# Check infinite cases individually.
390+
assert 0. == Flat.dist().logcdf(np.inf).tag.test_value
391+
assert -np.inf == Flat.dist().logcdf(-np.inf).tag.test_value
388392

389393
def test_normal(self):
390394
self.pymc3_matches_scipy(Normal, R, {'mu': R, 'sd': Rplus},
@@ -403,8 +407,10 @@ def test_chi_squared(self):
403407
lambda value, nu: sp.chi2.logpdf(value, df=nu))
404408

405409
def test_wald_scipy(self):
406-
self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus},
407-
lambda value, mu: sp.invgauss.logpdf(value, mu))
410+
self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus},
411+
lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha))
412+
self.check_logcdf(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus},
413+
lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha))
408414

409415
@pytest.mark.parametrize('value,mu,lam,phi,alpha,logp', [
410416
(.5, .001, .5, None, 0., -124500.7257914),
@@ -441,6 +447,8 @@ def test_beta(self):
441447
def test_exponential(self):
442448
self.pymc3_matches_scipy(Exponential, Rplus, {'lam': Rplus},
443449
lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam))
450+
self.check_logcdf(Exponential, Rplus, {'lam': Rplus},
451+
lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam))
444452

445453
def test_geometric(self):
446454
self.pymc3_matches_scipy(Geometric, Nat, {'p': Unit},
@@ -495,6 +503,8 @@ def test_inverse_gamma(self):
495503
def test_pareto(self):
496504
self.pymc3_matches_scipy(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig},
497505
lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m))
506+
self.check_logcdf(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig},
507+
lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m))
498508

499509
def test_weibull(self):
500510
self.pymc3_matches_scipy(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig},

0 commit comments

Comments
 (0)