Skip to content

Add CDF methods to continuous distributions #2073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
a2c3d97
Normal CDF and test (does not yet pass)
fonnesbeck Mar 6, 2017
f8c015f
Fixed incorrect calculation in zvalue
fonnesbeck Mar 6, 2017
e4e8734
Implement log CDF for Uniform distribution
domenzain Mar 6, 2017
4fb89a0
Implement log CDF for HalfNormal distribution
domenzain Mar 6, 2017
14908eb
Remove useless operation
domenzain Mar 6, 2017
46a1cbb
Implement log CDF for Lognormal distribution
domenzain Mar 7, 2017
dc8cf20
Implement log CDF for Cauchy distribution
domenzain Mar 7, 2017
a9f1b02
Implement log CDF for Laplace distribution
domenzain Mar 7, 2017
6c76f1f
Implement log CDF for Triangular distribution
domenzain Mar 7, 2017
ae6f1da
Merge pull request #1871 from domenzain/cdf_methods
fonnesbeck Mar 15, 2017
467c2f9
Implement log CDF for Pareto distribution
domenzain Apr 18, 2017
af32ac6
Implement log CDF for Flat distribution
domenzain Apr 18, 2017
8f341dc
Implement log CDF for Wald distribution
domenzain Apr 18, 2017
fb229bf
Implement log CDF for Exponential distribution
domenzain Apr 18, 2017
8dd6e79
Test for Flat distribution infinite cases individually
domenzain Apr 20, 2017
de5826f
Merge branch 'master' into cdf_methods
fonnesbeck Apr 22, 2017
da16cce
Use decrement
domenzain Apr 22, 2017
72a25c0
Draft beta CDF
fonnesbeck Apr 23, 2017
3dc5573
Put log CDF for Normal in dist_math for reuse
domenzain Apr 24, 2017
e09b250
Use tensor floatX
domenzain Apr 24, 2017
d63e7f5
Merge branch 'cdf_methods' of github.com:domenzain/pymc3 into cdf_met…
domenzain Apr 24, 2017
269766b
Working beta CDF and test
fonnesbeck Apr 24, 2017
9ebaa17
Merge branch 'cdf_methods' into cdf_methods
fonnesbeck Apr 24, 2017
2df35b3
Merge pull request #2048 from domenzain/cdf_methods
fonnesbeck Apr 24, 2017
2e4a4c8
Replaced check_logcdf with pymc3_matches_scipy
fonnesbeck Apr 24, 2017
a54c225
Removed defaults for zvalue.
fonnesbeck Apr 24, 2017
c66ecd0
Added missing check_logcdf to test_distributions
fonnesbeck Apr 24, 2017
43990d1
Improved precision of beta logcdf
fonnesbeck Apr 24, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 235 additions & 2 deletions pymc3/distributions/continuous.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# coding: utf-8
"""
pymc3.distributions

Expand All @@ -9,13 +10,19 @@

import numpy as np
import theano.tensor as tt
from theano.scan_module import until
from theano import scan, shared, Constant
from scipy import stats
import warnings

from pymc3.theanof import floatX
from . import transforms

from .dist_math import bound, logpow, gammaln, betaln, std_cdf, i0, i1, alltrue_elemwise
from .dist_math import (
alltrue_elemwise, betaln, bound, gammaln, i0, i1, logpow, normallogcdf,
std_cdf, zvalue
)

from .distribution import Continuous, draw_values, generate_samples, Bound

__all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
Expand Down Expand Up @@ -151,6 +158,18 @@ def logp(self, value):
return bound(-tt.log(upper - lower),
value >= lower, value <= upper)

def logcdf(self, value):
return tt.switch(
tt.or_(tt.lt(value, self.lower), tt.gt(value, self.upper)),
-np.inf,
tt.switch(
tt.eq(value, self.upper),
0,
tt.log((value - self.lower)) -
tt.log((self.upper - self.lower))
)
)


class Flat(Continuous):
"""
Expand All @@ -168,6 +187,17 @@ def random(self, point=None, size=None, repeat=None):
def logp(self, value):
return tt.zeros_like(value)

def logcdf(self, value):
return tt.switch(
tt.eq(value, -np.inf),
-np.inf,
tt.switch(
tt.eq(value, np.inf),
0,
tt.log(0.5)
)
)


class Normal(Continuous):
R"""
Expand Down Expand Up @@ -231,6 +261,9 @@ def logp(self, value):
return bound((-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2.,
sd > 0)

def logcdf(self, value):
return normallogcdf(value, mu=self.mu, sd=self.sd)


class HalfNormal(PositiveContinuous):
R"""
Expand Down Expand Up @@ -282,6 +315,15 @@ def logp(self, value):
value >= 0,
tau > 0, sd > 0)

def logcdf(self, value):
sd = self.sd
z = zvalue(value, mu=0, sd=sd)
return tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.))) - tt.sqr(z),
tt.log1p(-tt.erfc(z / tt.sqrt(2.)))
)


class Wald(PositiveContinuous):
R"""
Expand Down Expand Up @@ -337,6 +379,9 @@ class Wald(PositiveContinuous):
.. [Michael1976] Michael, J. R., Schucany, W. R. and Hass, R. W. (1976).
Generating Random Variates Using Transformations with Multiple Roots.
The American Statistician, Vol. 30, No. 2, pp. 88-90

.. [Giner2016] Göknur Giner, Gordon K. Smyth (2016)
statmod: Probability Calculations for the Inverse Gaussian Distribution
"""

def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
Expand All @@ -345,7 +390,7 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
self.alpha = alpha = tt.as_tensor_variable(alpha)
self.mu = mu = tt.as_tensor_variable(mu)
self.lam = lam = tt.as_tensor_variable(lam)
self.phi = phi =tt.as_tensor_variable(phi)
self.phi = phi = tt.as_tensor_variable(phi)

self.mean = self.mu + self.alpha
self.mode = self.mu * (tt.sqrt(1. + (1.5 * self.mu / self.lam)**2)
Expand Down Expand Up @@ -403,6 +448,85 @@ def logp(self, value):
value > 0, value - alpha > 0,
mu > 0, lam > 0, alpha >= 0)

def logcdf(self, value):
# Distribution parameters
mu = self.mu
lam = self.lam
alpha = self.alpha

value -= alpha
q = value / mu
l = lam * mu
r = tt.sqrt(value * lam)

a = normallogcdf((q - 1.)/r)
b = 2./l + normallogcdf(-(q + 1.)/r)
return tt.switch(
(
# Left limit
tt.lt(value, 0) |
(tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) |
(tt.lt(value, mu) & tt.eq(lam, 0))
),
-np.inf,
tt.switch(
(
# Right limit
tt.eq(value, np.inf) |
(tt.eq(lam, 0) & tt.gt(value, mu)) |
(tt.gt(value, 0) & tt.eq(lam, np.inf)) |
# Degenerate distribution
(
tt.lt(mu, np.inf) &
tt.eq(mu, value) &
tt.eq(lam, 0)
) |
(tt.eq(value, 0) & tt.eq(lam, np.inf))
),
0,
a + tt.log1p(tt.exp(b - a))
)
)


def cont_fraction_beta(value, a, b, max_iter=200):
'''Evaluates the continued fraction form of the incomplete Beta function.
Derived from implementation by Ali Shoaib (https://goo.gl/HxjIJx).
'''

EPS = 3.0e-7
qab = a + b
qap = a + 1.0
qam = a - 1.0

def _step(i, az, bm, am, bz):

tem = i + i
d = i * (b - i) * value / ((qam + tem) * (a + tem))
d =- (a + i) * i * value / ((qap + tem) * (a + tem))

ap = az + d * am
bp = bz + d * bm

app = ap + d * az
bpp = bp + d * bz

aold = az

am = ap / bpp
bm = bp / bpp
az = app / bpp

bz = tt.constant(1.0, dtype='float64')

return (az, bm, am, bz), until(abs(az - aold) < (EPS * abs(az)))

(az, bm, am, bz), _ = scan(_step,
sequences=[tt.arange(1, max_iter)],
outputs_info=[*tt.cast((1., 1., 1., 1. - qab * value / qap), 'float64')])

return az[-1]


class Beta(UnitContinuous):
R"""
Expand Down Expand Up @@ -491,6 +615,25 @@ def logp(self, value):
value >= 0, value <= 1,
alpha > 0, beta > 0)

def logcdf(self, value):
a = self.alpha
b = self.beta
log_beta = tt.gammaln(a+b) - tt.gammaln(a) - tt.gammaln(b)
log_beta += a * tt.log(value) + b * tt.log(1 - value)
return tt.switch(
tt.le(value, 0),
-np.inf,
tt.switch(
tt.ge(value, 1),
0,
tt.switch(
tt.lt(value, (a + 1) / (a + b + 2)),
tt.log(tt.exp(log_beta) * cont_fraction_beta(value, a, b) / a),
tt.log(1. - tt.exp(log_beta) * cont_fraction_beta(1. - value, b, a) / b)
)
)
)


class Exponential(PositiveContinuous):
R"""
Expand Down Expand Up @@ -533,6 +676,31 @@ def logp(self, value):
lam = self.lam
return bound(tt.log(lam) - lam * value, value > 0, lam > 0)

def logcdf(self, value):
"""
Compute the log CDF for the Exponential distribution

References
----------
.. [Machler2012] Martin Mächler (2012).
"Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr
package"
"""
value = floatX(tt.as_tensor(value))
lam = self.lam
a = lam * value
return tt.switch(
tt.le(value, 0.0),
-np.inf,
tt.switch(
tt.le(a, tt.log(2.0)),
tt.log(-tt.expm1(-a)),
tt.log1p(-tt.exp(-a)),
)
)




class Laplace(Continuous):
R"""
Expand Down Expand Up @@ -578,6 +746,20 @@ def logp(self, value):

return -tt.log(2 * b) - abs(value - mu) / b

def logcdf(self, value):
a = self.mu
b = self.b
y = (value - a) / b
return tt.switch(
tt.le(value, a),
tt.log(0.5) + y,
tt.switch(
tt.gt(y, 1),
tt.log1p(-0.5 * tt.exp(-y)),
tt.log(1 - 0.5 * tt.exp(-y))
)
)


class Lognormal(PositiveContinuous):
R"""
Expand Down Expand Up @@ -642,6 +824,22 @@ def logp(self, value):
- tt.log(value),
tau > 0)

def logcdf(self, value):
mu = self.mu
sd = self.sd
z = zvalue(tt.log(value), mu=mu, sd=sd)

return tt.switch(
tt.le(value, 0),
-np.inf,
tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) -
tt.sqr(z) / 2,
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
)
)


class StudentT(Continuous):
R"""
Expand Down Expand Up @@ -768,6 +966,20 @@ def logp(self, value):
- logpow(value, alpha + 1),
value >= m, alpha > 0, m > 0)

def logcdf(self, value):
m = self.m
alpha = self.alpha
arg = (m / value) ** alpha
return tt.switch(
tt.lt(value, m),
-np.inf,
tt.switch(
tt.le(arg, 1e-5),
tt.log1p(-arg),
tt.log(1 - arg)
)
)


class Cauchy(Continuous):
R"""
Expand Down Expand Up @@ -820,6 +1032,9 @@ def logp(self, value):
- tt.log1p(((value - alpha) / beta)**2),
beta > 0)

def logcdf (self, value):
return tt.log(0.5 + tt.arctan ((value - self.alpha) / self.beta) / np.pi)


class HalfCauchy(PositiveContinuous):
R"""
Expand Down Expand Up @@ -1346,3 +1561,21 @@ def logp(self, value):
tt.switch(tt.eq(value, c), tt.log(2 / (upper - lower)),
tt.switch(alltrue_elemwise([c < value, value <= upper]),
tt.log(2 * (upper - value) / ((upper - lower) * (upper - c))),np.inf)))

def logcdf(self, value):
l = self.lower
u = self.upper
c = self.c
return tt.switch(
tt.le(value, l),
-np.inf,
tt.switch(
tt.le(value, c),
tt.log(((value - l) ** 2) / ((u - l) * (c - l))),
tt.switch(
tt.lt(value, u),
tt.log1p(-((u - value) ** 2) / ((u - l) * (u - c))),
0
)
)
)
20 changes: 20 additions & 0 deletions pymc3/distributions/dist_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,23 @@ def logdet(m):
result = k * tt.log(2 * np.pi) - log_det
result += delta.dot(T).dot(delta)
return -1 / 2. * result


def zvalue(value, sd, mu):
"""
Calculate the z-value for a normal distribution.
"""
return (value - mu) / sd


def normallogcdf(value, mu=0, sd=1):
"""
Normal log CDF. Useful for many log CDF methods.
"""
z = zvalue(value, mu=mu, sd=sd)
return tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) -
tt.sqr(z) / 2,
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
)
Loading