From 663241c7200a46c7e9aee75a026c0a73f491ab84 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 13 Apr 2021 16:55:22 +0200 Subject: [PATCH 1/9] Refactor several distributions --- pymc3/distributions/continuous.py | 291 +++++++---------------- pymc3/distributions/discrete.py | 112 +++------ pymc3/tests/test_distributions.py | 38 ++- pymc3/tests/test_distributions_random.py | 172 +++++++++++++- pymc3/tests/test_transforms.py | 32 ++- 5 files changed, 342 insertions(+), 303 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 3ffa68df93..6b3d5e9170 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -26,6 +26,7 @@ from aesara.assert_op import Assert from aesara.tensor.random.basic import ( BetaRV, + WeibullRV, cauchy, exponential, gamma, @@ -33,9 +34,13 @@ halfcauchy, halfnormal, invgamma, + logistic, + lognormal, normal, pareto, + triangular, uniform, + vonmises, ) from aesara.tensor.random.op import RandomVariable from aesara.tensor.var import TensorVariable @@ -110,6 +115,10 @@ class BoundedContinuous(Continuous): """Base class for bounded continuous distributions""" +class CircularContinuous(Continuous): + """Base class for circular continuous distributions""" + + @logp_transform.register(PositiveContinuous) def pos_cont_transform(op): return transforms.log @@ -123,7 +132,18 @@ def unit_cont_transform(op): @logp_transform.register(BoundedContinuous) def bounded_cont_transform(op): def transform_params(rv_var): - _, _, _, lower, upper = rv_var.owner.inputs + _, _, _, *args = rv_var.owner.inputs + if hasattr(op, "bound_args"): + # Use explicit argument position for lower and upper bound + # TODO: Enforce this for all bounded rvs? + lower = args[op.bound_args[0]] + upper = args[op.bound_args[1]] + else: + # Assume first argument is lower bound and second is upper bound + # Will fail if number of arguments is different than 2 + lower, upper = args + + print(lower.eval(), upper.eval()) lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper @@ -131,6 +151,11 @@ def transform_params(rv_var): return transforms.interval(transform_params) +@logp_transform.register(CircularContinuous) +def circ_cont_transform(op): + return transforms.circular + + def assert_negative_support(var, label, distname, value=-1e-6): msg = f"The variable specified for {label} has negative support for {distname}, " msg += "likely making it unsuitable for this parameter." @@ -1716,50 +1741,24 @@ class Lognormal(PositiveContinuous): x = pm.Lognormal('x', mu=2, tau=1/100) """ - def __init__(self, mu=0, sigma=None, tau=None, sd=None, *args, **kwargs): - super().__init__(*args, **kwargs) + rv_op = lognormal + + @classmethod + def dist(cls, mu=0, sigma=None, tau=None, sd=None, *args, **kwargs): if sd is not None: sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.mu = mu = at.as_tensor_variable(floatX(mu)) - self.tau = tau = at.as_tensor_variable(tau) - self.sigma = self.sd = sigma = at.as_tensor_variable(sigma) - - self.mean = at.exp(self.mu + 1.0 / (2 * self.tau)) - self.median = at.exp(self.mu) - self.mode = at.exp(self.mu - 1.0 / self.tau) - self.variance = (at.exp(1.0 / self.tau) - 1) * at.exp(2 * self.mu + 1.0 / self.tau) - - assert_negative_support(tau, "tau", "Lognormal") - assert_negative_support(sigma, "sigma", "Lognormal") - - def _random(self, mu, tau, size=None): - samples = np.random.normal(size=size) - return np.exp(mu + (tau ** -0.5) * samples) - - def random(self, point=None, size=None): - """ - Draw random values from Lognormal distribution. + mu = at.as_tensor_variable(floatX(mu)) + sigma = at.as_tensor_variable(floatX(sigma)) - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). + assert_negative_support(tau, "tau", "LogNormal") + assert_negative_support(sigma, "sigma", "LogNormal") - Returns - ------- - array - """ - # mu, tau = draw_values([self.mu, self.tau], point=point, size=size) - # return generate_samples(self._random, mu, tau, dist_shape=self.shape, size=size) + return super().dist([mu, sigma], *args, **kwargs) - def logp(self, value): + def logp(value, mu, sigma): """ Calculate log-probability of Lognormal distribution at specified value. @@ -1773,8 +1772,7 @@ def logp(self, value): ------- TensorVariable """ - mu = self.mu - tau = self.tau + tau, sigma = get_tau_sigma(tau=None, sigma=sigma) return bound( -0.5 * tau * (at.log(value) - mu) ** 2 + 0.5 * at.log(tau / (2.0 * np.pi)) @@ -1782,10 +1780,7 @@ def logp(self, value): tau > 0, ) - def _distr_parameters_for_repr(self): - return ["mu", "tau"] - - def logcdf(self, value): + def logcdf(value, mu, sigma): """ Compute the log of the cumulative distribution function for Lognormal distribution at the specified value. @@ -1800,14 +1795,11 @@ def logcdf(self, value): ------- TensorVariable """ - mu = self.mu - sigma = self.sigma - tau = self.tau return bound( normal_lcdf(mu, sigma, at.log(value)), 0 < value, - 0 < tau, + 0 < sigma, ) @@ -2646,6 +2638,17 @@ def __init__(self, nu, *args, **kwargs): super().__init__(alpha=nu / 2.0, beta=0.5, *args, **kwargs) +class WeibullBetaRV(WeibullRV): + ndims_params = [0, 0] + + @classmethod + def rng_fn(cls, rng, alpha, beta, size): + return beta * rng.weibull(alpha, size=size) + + +weibull_beta = WeibullBetaRV() + + class Weibull(PositiveContinuous): r""" Weibull log-likelihood. @@ -2691,45 +2694,19 @@ class Weibull(PositiveContinuous): Scale parameter (beta > 0). """ - def __init__(self, alpha, beta, *args, **kwargs): - super().__init__(*args, **kwargs) - self.alpha = alpha = at.as_tensor_variable(floatX(alpha)) - self.beta = beta = at.as_tensor_variable(floatX(beta)) - self.mean = beta * at.exp(gammaln(1 + 1.0 / alpha)) - self.median = beta * at.exp(gammaln(at.log(2))) ** (1.0 / alpha) - self.variance = beta ** 2 * at.exp(gammaln(1 + 2.0 / alpha)) - self.mean ** 2 - self.mode = at.switch( - alpha >= 1, beta * ((alpha - 1) / alpha) ** (1 / alpha), 0 - ) # Reference: https://en.wikipedia.org/wiki/Weibull_distribution + rv_op = weibull_beta + + @classmethod + def dist(cls, alpha, beta, *args, **kwargs): + alpha = at.as_tensor_variable(floatX(alpha)) + beta = at.as_tensor_variable(floatX(beta)) assert_negative_support(alpha, "alpha", "Weibull") assert_negative_support(beta, "beta", "Weibull") - def random(self, point=None, size=None): - """ - Draw random values from Weibull distribution. + return super().dist([alpha, beta], *args, **kwargs) - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size) - # - # def _random(a, b, size=None): - # return b * (-np.log(np.random.uniform(size=size))) ** (1 / a) - # - # return generate_samples(_random, alpha, beta, dist_shape=self.shape, size=size) - - def logp(self, value): + def logp(value, alpha, beta): """ Calculate log-probability of Weibull distribution at specified value. @@ -2743,8 +2720,6 @@ def logp(self, value): ------- TensorVariable """ - alpha = self.alpha - beta = self.beta return bound( at.log(alpha) - at.log(beta) @@ -2755,7 +2730,7 @@ def logp(self, value): beta > 0, ) - def logcdf(self, value): + def logcdf(value, alpha, beta): r""" Compute the log of the cumulative distribution function for Weibull distribution at the specified value. @@ -2770,8 +2745,6 @@ def logcdf(self, value): ------- TensorVariable """ - alpha = self.alpha - beta = self.beta a = (value / beta) ** alpha return bound( log1mexp(a), @@ -3099,7 +3072,7 @@ def _distr_parameters_for_repr(self): return ["mu", "sigma", "nu"] -class VonMises(Continuous): +class VonMises(CircularContinuous): r""" Univariate VonMises log-likelihood. @@ -3144,38 +3117,16 @@ class VonMises(Continuous): Concentration (\frac{1}{kappa} is analogous to \sigma^2). """ - def __init__(self, mu=0.0, kappa=None, transform="circular", *args, **kwargs): - if transform == "circular": - transform = transforms.Circular() - super().__init__(transform=transform, *args, **kwargs) - self.mean = self.median = self.mode = self.mu = mu = at.as_tensor_variable(floatX(mu)) - self.kappa = kappa = at.as_tensor_variable(floatX(kappa)) + rv_op = vonmises + @classmethod + def dist(cls, mu=0.0, kappa=None, *args, **kwargs): + mu = at.as_tensor_variable(floatX(mu)) + kappa = at.as_tensor_variable(floatX(kappa)) assert_negative_support(kappa, "kappa", "VonMises") + return super().dist([mu, kappa], *args, **kwargs) - def random(self, point=None, size=None): - """ - Draw random values from VonMises distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # mu, kappa = draw_values([self.mu, self.kappa], point=point, size=size) - # return generate_samples( - # stats.vonmises.rvs, loc=mu, kappa=kappa, dist_shape=self.shape, size=size - # ) - - def logp(self, value): + def logp(value, mu, kappa): """ Calculate log-probability of VonMises distribution at specified value. @@ -3189,8 +3140,6 @@ def logp(self, value): ------- TensorVariable """ - mu = self.mu - kappa = self.kappa return bound( kappa * at.cos(mu - value) - (at.log(2 * np.pi) + log_i0(kappa)), kappa > 0, @@ -3198,9 +3147,6 @@ def logp(self, value): value <= np.pi, ) - def _distr_parameters_for_repr(self): - return ["mu", "kappa"] - class SkewNormal(Continuous): r""" @@ -3388,45 +3334,18 @@ class Triangular(BoundedContinuous): Upper limit. """ - def __init__(self, lower=0, upper=1, c=0.5, *args, **kwargs): - self.median = self.mean = self.c = c = at.as_tensor_variable(floatX(c)) - self.lower = lower = at.as_tensor_variable(floatX(lower)) - self.upper = upper = at.as_tensor_variable(floatX(upper)) - - super().__init__(lower=lower, upper=upper, *args, **kwargs) + rv_op = triangular + rv_op.bound_args = [0, 2] # TODO: Find less hacking solution? - def random(self, point=None, size=None): - """ - Draw random values from Triangular distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # c, lower, upper = draw_values([self.c, self.lower, self.upper], point=point, size=size) - # return generate_samples( - # self._random, c=c, lower=lower, upper=upper, size=size, dist_shape=self.shape - # ) + @classmethod + def dist(cls, lower=0, upper=1, c=0.5, *args, **kwargs): + lower = at.as_tensor_variable(floatX(lower)) + upper = at.as_tensor_variable(floatX(upper)) + c = at.as_tensor_variable(floatX(c)) - def _random(self, c, lower, upper, size): - """Wrapper around stats.triang.rvs that converts Triangular's - parametrization to scipy.triang. All parameter arrays should have - been broadcasted properly by generate_samples at this point and size is - the scipy.rvs representation. - """ - scale = upper - lower - return stats.triang.rvs(c=(c - lower) / scale, loc=lower, scale=scale, size=size) + return super().dist([lower, c, upper], *args, **kwargs) - def logp(self, value): + def logp(value, lower, c, upper): """ Calculate log-probability of Triangular distribution at specified value. @@ -3440,9 +3359,6 @@ def logp(self, value): ------- TensorVariable """ - c = self.c - lower = self.lower - upper = self.upper return bound( at.switch( at.lt(value, c), @@ -3451,9 +3367,11 @@ def logp(self, value): ), lower <= value, value <= upper, + lower <= c, + c <= upper, ) - def logcdf(self, value): + def logcdf(value, lower, c, upper): """ Compute the log of the cumulative distribution function for Triangular distribution at the specified value. @@ -3468,9 +3386,6 @@ def logcdf(self, value): ------- TensorVariable """ - c = self.c - lower = self.lower - upper = self.upper return bound( at.switch( at.le(value, lower), @@ -3485,7 +3400,8 @@ def logcdf(self, value): ), ), ), - lower <= upper, + lower <= c, + c <= upper, ) @@ -3810,39 +3726,15 @@ class Logistic(Continuous): Scale (s > 0). """ - def __init__(self, mu=0.0, s=1.0, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.mu = at.as_tensor_variable(floatX(mu)) - self.s = at.as_tensor_variable(floatX(s)) - - self.mean = self.mode = mu - self.variance = s ** 2 * np.pi ** 2 / 3.0 - - def random(self, point=None, size=None): - """ - Draw random values from Logistic distribution. + rv_op = logistic - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # mu, s = draw_values([self.mu, self.s], point=point, size=size) - # - # return generate_samples( - # stats.logistic.rvs, loc=mu, scale=s, dist_shape=self.shape, size=size - # ) + @classmethod + def dist(cls, mu=0.0, s=1.0, *args, **kwargs): + mu = at.as_tensor_variable(floatX(mu)) + s = at.as_tensor_variable(floatX(s)) + return super().dist([mu, s], *args, **kwargs) - def logp(self, value): + def logp(value, mu, s): """ Calculate log-probability of Logistic distribution at specified value. @@ -3856,15 +3748,13 @@ def logp(self, value): ------- TensorVariable """ - mu = self.mu - s = self.s return bound( -(value - mu) / s - at.log(s) - 2 * at.log1p(at.exp(-(value - mu) / s)), s > 0, ) - def logcdf(self, value): + def logcdf(value, mu, s): r""" Compute the log of the cumulative distribution function for Logistic distribution at the specified value. @@ -3879,8 +3769,7 @@ def logcdf(self, value): ------- TensorVariable """ - mu = self.mu - s = self.s + return bound( -log1pexp(-(value - mu) / s), 0 < s, diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 633539d178..6faccb6f49 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -21,6 +21,8 @@ bernoulli, binomial, categorical, + geometric, + hypergeometric, nbinom, poisson, ) @@ -833,32 +835,14 @@ class Geometric(Discrete): Probability of success on an individual trial (0 < p <= 1). """ - def __init__(self, p, *args, **kwargs): - super().__init__(*args, **kwargs) - self.p = p = at.as_tensor_variable(floatX(p)) - self.mode = 1 - - def random(self, point=None, size=None): - r""" - Draw random values from Geometric distribution. + rv_op = geometric - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # p = draw_values([self.p], point=point, size=size)[0] - # return generate_samples(np.random.geometric, p, dist_shape=self.shape, size=size) + @classmethod + def dist(cls, p, *args, **kwargs): + p = at.as_tensor_variable(floatX(p)) + return super().dist([p], *args, **kwargs) - def logp(self, value): + def logp(value, p): r""" Calculate log-probability of Geometric distribution at specified value. @@ -872,10 +856,14 @@ def logp(self, value): ------- TensorVariable """ - p = self.p - return bound(at.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1) + return bound( + at.log(p) + logpow(1 - p, value - 1), + 0 <= p, + p <= 1, + value >= 1, + ) - def logcdf(self, value): + def logcdf(value, p): """ Compute the log of the cumulative distribution function for Geometric distribution at the specified value. @@ -890,7 +878,6 @@ def logcdf(self, value): ------- TensorVariable """ - p = self.p return bound( log1mexp(-at.log1p(-p) * value), @@ -947,43 +934,16 @@ class HyperGeometric(Discrete): Number of samples drawn from the population """ - def __init__(self, N, k, n, *args, **kwargs): - super().__init__(*args, **kwargs) - self.N = intX(N) - self.k = intX(k) - self.n = intX(n) - self.mode = intX(at.floor((n + 1) * (k + 1) / (N + 2))) - - def random(self, point=None, size=None): - r""" - Draw random values from HyperGeometric distribution. - - Parameters - ---------- - point : dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size : int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - - # N, k, n = draw_values([self.N, self.k, self.n], point=point, size=size) - # return generate_samples(self._random, N, k, n, dist_shape=self.shape, size=size) + rv_op = hypergeometric - def _random(self, M, n, N, size=None): - r"""Wrapper around scipy stat's hypergeom.rvs""" - try: - samples = stats.hypergeom.rvs(M=M, n=n, N=N, size=size) - return samples - except ValueError: - raise ValueError("Domain error in arguments") + @classmethod + def dist(cls, N, k, n, *args, **kwargs): + good = at.as_tensor_variable(intX(k)) + bad = at.as_tensor_variable(intX(N - k)) + n = at.as_tensor_variable(intX(n)) + return super().dist([good, bad, n], *args, **kwargs) - def logp(self, value): + def logp(value, good, bad, n): r""" Calculate log-probability of HyperGeometric distribution at specified value. @@ -997,11 +957,8 @@ def logp(self, value): ------- TensorVariable """ - N = self.N - k = self.k - n = self.n - tot, good = N, k - bad = tot - good + + tot = good + bad result = ( betaln(good + 1, 1) + betaln(bad + 1, 1) @@ -1011,11 +968,11 @@ def logp(self, value): - betaln(tot + 1, 1) ) # value in [max(0, n - N + k), min(k, n)] - lower = at.switch(at.gt(n - N + k, 0), n - N + k, 0) - upper = at.switch(at.lt(k, n), k, n) + lower = at.switch(at.gt(n - tot + good, 0), n - tot + good, 0) + upper = at.switch(at.lt(good, n), good, n) return bound(result, lower <= value, value <= upper) - def logcdf(self, value): + def logcdf(value, good, bad, n): """ Compute the log of the cumulative distribution function for HyperGeometric distribution at the specified value. @@ -1035,23 +992,24 @@ def logcdf(self, value): f"HyperGeometric.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) + N = good + bad # TODO: Use lower upper in locgdf for smarter logsumexp? - N = self.N - n = self.n - k = self.k safe_lower = at.switch(at.lt(value, 0), value, 0) return bound( at.switch( at.lt(value, n), - logsumexp(self.logp(at.arange(safe_lower, value + 1)), keepdims=False), + logsumexp( + HyperGeometric.logp(at.arange(safe_lower, value + 1), good, bad, n), + keepdims=False, + ), 0, ), 0 <= value, 0 < N, - 0 <= k, + 0 <= good, 0 <= n, - k <= N, + good <= N, n <= N, ) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index c6869ec110..1db5e97e76 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -883,7 +883,6 @@ def test_uniform(self): assert logpt(invalid_dist, np.array(0.5)).eval() == -np.inf assert logcdf(invalid_dist, np.array(2.0)).eval() == -np.inf - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_triangular(self): self.check_logp( Triangular, @@ -898,12 +897,20 @@ def test_triangular(self): lambda value, c, lower, upper: sp.triang.logcdf(value, c - lower, lower, upper - lower), skip_paramdomain_outside_edge_test=True, ) - # Custom logp check for invalid value - valid_dist = Triangular.dist(lower=0, upper=1, c=2.0) - assert np.all(logpt(valid_dist, np.array([1.9, 2.0, 2.1])).tag.test_value == -np.inf) + + # Custom logp/logcdf check for values outside of domain + valid_dist = Triangular.dist(lower=0, upper=1, c=0.9, size=2) + with aesara.config.change_flags(mode=Mode("py")): + assert np.all(logpt(valid_dist, np.array([-1, 2])).eval() == -np.inf) + assert np.all(logcdf(valid_dist, np.array([-1, 2])).eval() == [-np.inf, 0]) # Custom logp / logcdf check for invalid parameters - invalid_dist = Triangular.dist(lower=1, upper=0, c=2.0) + invalid_dist = Triangular.dist(lower=1, upper=0, c=0.1) + with aesara.config.change_flags(mode=Mode("py")): + assert logpt(invalid_dist, 0.5).eval() == -np.inf + assert logcdf(invalid_dist, 2).eval() == -np.inf + + invalid_dist = Triangular.dist(lower=0, upper=1, c=2.0) with aesara.config.change_flags(mode=Mode("py")): assert logpt(invalid_dist, 0.5).eval() == -np.inf assert logcdf(invalid_dist, 2).eval() == -np.inf @@ -1121,7 +1128,6 @@ def test_exponential(self): lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_geometric(self): self.check_logp( Geometric, @@ -1141,7 +1147,6 @@ def test_geometric(self): {"p": Unit}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_hypergeometric(self): def modified_scipy_hypergeom_logpmf(value, N, k, n): # Convert nan to -np.inf @@ -1263,7 +1268,6 @@ def test_laplace_asymmetric(self): laplace_asymmetric_logpdf, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_lognormal(self): self.check_logp( Lognormal, @@ -1271,12 +1275,26 @@ def test_lognormal(self): {"mu": R, "tau": Rplusbig}, lambda value, mu, tau: floatX(sp.lognorm.logpdf(value, tau ** -0.5, 0, np.exp(mu))), ) + self.check_logp( + Lognormal, + Rplus, + {"mu": R, "sigma": Rplusbig}, + lambda value, mu, sigma: floatX(sp.lognorm.logpdf(value, sigma, 0, np.exp(mu))), + n_samples=5, # Just testing alternative parametrization + ) self.check_logcdf( Lognormal, Rplus, {"mu": R, "tau": Rplusbig}, lambda value, mu, tau: sp.lognorm.logcdf(value, tau ** -0.5, 0, np.exp(mu)), ) + self.check_logcdf( + Lognormal, + Rplus, + {"mu": R, "sigma": Rplusbig}, + lambda value, mu, sigma: sp.lognorm.logcdf(value, sigma, 0, np.exp(mu)), + n_samples=5, # Just testing alternative parametrization + ) @pytest.mark.xfail(reason="Distribution not refactored yet") def test_t(self): @@ -1413,7 +1431,6 @@ def test_pareto(self): lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_weibull_logp(self): self.check_logp( Weibull, @@ -1422,7 +1439,6 @@ def test_weibull_logp(self): lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.xfail( condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to inf issues", @@ -2381,7 +2397,6 @@ def test_ex_gaussian_cdf_outside_edges(self): ) @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_vonmises(self): self.check_logp( VonMises, @@ -2402,7 +2417,6 @@ def gumbellcdf(value, mu, beta): self.check_logcdf(Gumbel, R, {"mu": R, "beta": Rplusbig}, gumbellcdf) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_logistic(self): self.check_logp( Logistic, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 03190f3bcd..28d425a689 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -297,7 +297,7 @@ class TestAsymmetricLaplace(BaseTestCases.BaseTestCase): params = {"kappa": 1.0, "b": 1.0, "mu": 0.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") +@pytest.mark.skip(reason="This test is covered by Aesara") class TestLognormal(BaseTestCases.BaseTestCase): distribution = pm.Lognormal params = {"mu": 1.0, "tau": 1.0} @@ -315,7 +315,6 @@ class TestChiSquared(BaseTestCases.BaseTestCase): params = {"nu": 2.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestWeibull(BaseTestCases.BaseTestCase): distribution = pm.Weibull params = {"alpha": 1.0, "beta": 1.0} @@ -327,7 +326,7 @@ class TestExGaussian(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0, "nu": 1.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") +@pytest.mark.skip(reason="This test is covered by Aesara") class TestVonMises(BaseTestCases.BaseTestCase): distribution = pm.VonMises params = {"mu": 0.0, "kappa": 1.0} @@ -381,13 +380,13 @@ class TestDiscreteUniform(BaseTestCases.BaseTestCase): params = {"lower": 0.0, "upper": 10.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") +@pytest.mark.skip(reason="This test is covered by Aesara") class TestGeometric(BaseTestCases.BaseTestCase): distribution = pm.Geometric params = {"p": 0.5} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") +@pytest.mark.skip(reason="This test is covered by Aesara") class TestHyperGeometric(BaseTestCases.BaseTestCase): distribution = pm.HyperGeometric params = {"N": 50, "k": 25, "n": 10} @@ -867,7 +866,7 @@ def ref_rand(size, kappa, b, mu): pymc3_random(pm.AsymmetricLaplace, {"b": Rplus, "kappa": Rplus, "mu": R}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") + @pytest.mark.skip(reason="This test is covered by Aesara") def test_lognormal(self): def ref_rand(size, mu, tau): return np.exp(mu + (tau ** -0.5) * st.norm.rvs(loc=0.0, scale=1.0, size=size)) @@ -888,14 +887,14 @@ def ref_rand(size, mu, sigma, nu): pymc3_random(pm.ExGaussian, {"mu": R, "sigma": Rplus, "nu": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") + @pytest.mark.skip(reason="This test is covered by Aesara") def test_vonmises(self): def ref_rand(size, mu, kappa): - return st.vonmises.rvs(size=size, loc=mu, kappa=kappa) + x = st.vonmises.rvs(size=size, loc=mu, kappa=kappa) pymc3_random(pm.VonMises, {"mu": R, "kappa": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") + @pytest.mark.skip(reason="This test is covered by Aesara") def test_triangular(self): def ref_rand(size, lower, upper, c): scale = upper - lower @@ -933,11 +932,34 @@ def test_beta_binomial(self): def _beta_bin(self, n, alpha, beta, size=None): return st.binom.rvs(n, st.beta.rvs(a=alpha, b=beta, size=size)) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") + @pytest.mark.skip(reason="This test is covered by Aesara") + def test_bernoulli(self): + pymc3_random_discrete( + pm.Bernoulli, {"p": Unit}, ref_rand=lambda size, p=None: st.bernoulli.rvs(p, size=size) + ) + + @pytest.mark.skip(reason="This test is covered by Aesara") + def test_poisson(self): + pymc3_random_discrete(pm.Poisson, {"mu": Rplusbig}, size=500, ref_rand=st.poisson.rvs) + + @pytest.mark.skip(reason="This test is covered by Aesara") + def test_negative_binomial(self): + def ref_rand(size, alpha, mu): + return st.nbinom.rvs(alpha, alpha / (mu + alpha), size=size) + + pymc3_random_discrete( + pm.NegativeBinomial, + {"mu": Rplusbig, "alpha": Rplusbig}, + size=100, + fails=50, + ref_rand=ref_rand, + ) + + @pytest.mark.skip(reason="This test is covered by Aesara") def test_geometric(self): pymc3_random_discrete(pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") + @pytest.mark.skip(reason="This test is covered by Aesara") def test_hypergeometric(self): def ref_rand(size, N, k, n): return st.hypergeom.rvs(M=N, n=k, N=n, size=size) @@ -973,6 +995,22 @@ def ref_rand(size, q, beta): pm.DiscreteWeibull, {"q": Unit, "beta": Rplusdunif}, ref_rand=ref_rand ) + # TODO: Remove wasteful test + def test_weibull(self): + def ref_rand(size, alpha, beta): + u = np.random.uniform(size=size) + return beta * np.power(-np.log(u), 1 / alpha) + + pymc3_random(pm.Weibull, {"alpha": Rplusbig, "beta": Rplusbig}, ref_rand=ref_rand) + + @pytest.mark.skip(reason="This test is covered by Aesara") + @pytest.mark.parametrize("s", [2, 3, 4]) + def test_categorical_random(self, s): + def ref_rand(size, p): + return nr.choice(np.arange(p.shape[0]), p=p, size=size) + + pymc3_random_discrete(pm.Categorical, {"p": Simplex(s)}, ref_rand=ref_rand) + @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_constant_dist(self): def ref_rand(size, c): @@ -1940,3 +1978,115 @@ def test_with_cov_rv(self, sample_shape, dist_shape, mu_shape): prior = pm.sample_prior_predictive(samples=sample_shape) assert prior["mv"].shape == to_tuple(sample_shape) + dist_shape + + +def test_exponential_parameterization(): + test_lambda = floatX(10.0) + + exp_pymc = pm.Exponential.dist(lam=test_lambda) + (rv_scale,) = exp_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_scale.eval(), 1 / test_lambda) + + +def test_gamma_parameterization(): + + test_alpha = floatX(10.0) + test_beta = floatX(100.0) + + gamma_pymc = pm.Gamma.dist(alpha=test_alpha, beta=test_beta) + rv_alpha, rv_inv_beta = gamma_pymc.owner.inputs[3:] + + assert np.array_equal(rv_alpha.eval(), test_alpha) + + decimal = select_by_precision(float64=6, float32=3) + + npt.assert_almost_equal(rv_inv_beta.eval(), 1.0 / test_beta, decimal) + + test_mu = test_alpha / test_beta + test_sigma = np.sqrt(test_mu / test_beta) + + gamma_pymc = pm.Gamma.dist(mu=test_mu, sigma=test_sigma) + rv_alpha, rv_inv_beta = gamma_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_alpha.eval(), test_alpha, decimal) + npt.assert_almost_equal(rv_inv_beta.eval(), 1.0 / test_beta, decimal) + + +def test_geometric_parameterization(): + test_p = floatX(0.9) + + geom_pymc = pm.Geometric.dist(p=test_p) + (rv_p,) = geom_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_p.eval(), test_p) + + +def test_hypergeometric_parameterization(): + test_N = intX(20.0) + test_k = intX(12.0) + test_n = intX(5) + + hypergeom_pymc = pm.HyperGeometric.dist(N=test_N, k=test_k, n=test_n) + rv_good, rv_bad, rv_sample = hypergeom_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_good.eval(), test_k) + npt.assert_almost_equal(rv_bad.eval(), test_N - test_k) + npt.assert_almost_equal(rv_sample.eval(), test_n) + + +def test_logistic_parameterization(): + test_mu = floatX(1.0) + test_s = floatX(2) + + logistic_pymc = pm.Logistic.dist(mu=test_mu, s=test_s) + rv_loc, rv_scale = logistic_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_loc.eval(), test_mu) + npt.assert_almost_equal(rv_scale.eval(), test_s) + + +def test_lognormal_parameterization(): + test_mu = floatX(1.0) + test_tau = floatX(5) + + lognormal_pymc = pm.Lognormal.dist(mu=test_mu, tau=test_tau) + rv_loc, rv_scale = lognormal_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_loc.eval(), test_mu) + npt.assert_almost_equal(rv_scale.eval(), test_tau ** -0.5) + + +def test_triangular_parameterization(): + test_lower = floatX(0) + test_upper = floatX(1) + test_c = float(0.5) + + triang_pymc = pm.Triangular.dist(lower=test_lower, upper=test_upper, c=test_c) + rv_left, rv_mode, rv_right = triang_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_left.eval(), test_lower) + npt.assert_almost_equal(rv_mode.eval(), test_c) + npt.assert_almost_equal(rv_right.eval(), test_upper) + + +def test_vonmises_parameterization(): + test_mu = floatX(-2.1) + test_kappa = floatX(5) + + vonmises_pymc = pm.VonMises.dist(mu=test_mu, kappa=test_kappa) + rv_mu, rv_kappa = vonmises_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_mu.eval(), test_mu) + npt.assert_almost_equal(rv_kappa.eval(), test_kappa) + + +def test_weibull_parameterization(): + test_alpha = floatX(1) + test_beta = floatX(2) + + weibull_pymc = pm.Weibull.dist(alpha=test_alpha, beta=test_beta) + rv_alpha, rv_beta = weibull_pymc.owner.inputs[3:] + + npt.assert_almost_equal(rv_alpha.eval(), test_alpha) + npt.assert_almost_equal(rv_beta.eval(), test_beta) diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py index e040e6e244..c2a8f0eb00 100644 --- a/pymc3/tests/test_transforms.py +++ b/pymc3/tests/test_transforms.py @@ -368,10 +368,30 @@ def transform_params(rv_var): ) self.check_transform_elementwise_logp(model) + @pytest.mark.parametrize( + "lower, c, upper, size", + [ + (0.0, 1.0, 2.0, 2), + (-10, 0, 200, (2, 3)), + (np.zeros(3), np.ones(3), np.ones(3), (4, 3)), + ], + ) + def test_triangular(self, lower, c, upper, size): + def transform_params(rv_var): + _, _, _, lower, _, upper = rv_var.owner.inputs + lower = at.as_tensor_variable(lower) if lower is not None else None + upper = at.as_tensor_variable(upper) if upper is not None else None + return lower, upper + + interval = tr.Interval(transform_params) + model = self.build_model( + pm.Triangular, {"lower": lower, "c": c, "upper": upper}, size=size, transform=interval + ) + self.check_transform_elementwise_logp(model) + @pytest.mark.parametrize( "mu,kappa,shape", [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))] ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_vonmises(self, mu, kappa, shape): model = self.build_model( pm.VonMises, {"mu": mu, "kappa": kappa}, shape=shape, transform=tr.circular @@ -470,7 +490,6 @@ def transform_params(rv_var): @pytest.mark.parametrize( "mu,kappa,shape", [(0.0, 1.0, (2,)), (np.zeros(3), np.ones(3), (4, 3))] ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_vonmises_ordered(self, mu, kappa, shape): testval = np.sort(np.abs(np.random.rand(*shape))) model = self.build_model( @@ -514,3 +533,12 @@ def test_mvnormal_ordered(self, mu, cov, size, shape): pm.MvNormal, {"mu": mu, "cov": cov}, size=size, testval=testval, transform=tr.ordered ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) + + +def test_triangular_transform(): + with pm.Model() as m: + x = pm.Triangular("x", lower=0, c=1, upper=2) + + transform = x.tag.value_var.tag.transform + assert np.isclose(transform.backward(x, -np.inf).eval(), 0) + assert np.isclose(transform.backward(x, np.inf).eval(), 2) From 6c247638c506e4b3d6eff86ff24c5e98d34ae055 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 16 Apr 2021 10:17:45 +0200 Subject: [PATCH 2/9] Fix continuous bounded default transform --- pymc3/distributions/continuous.py | 55 +++++++++++++++-------------- pymc3/distributions/distribution.py | 7 ---- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 6b3d5e9170..081f30fd08 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -66,6 +66,7 @@ from pymc3.distributions.distribution import Continuous from pymc3.distributions.special import log_i0 from pymc3.math import invlogit, log1mexp, log1pexp, logdiffexp, logit +from pymc3.util import UNSET __all__ = [ "Uniform", @@ -111,10 +112,6 @@ class UnitContinuous(Continuous): """Base class for continuous distributions on [0,1]""" -class BoundedContinuous(Continuous): - """Base class for bounded continuous distributions""" - - class CircularContinuous(Continuous): """Base class for circular continuous distributions""" @@ -129,31 +126,36 @@ def unit_cont_transform(op): return transforms.logodds -@logp_transform.register(BoundedContinuous) -def bounded_cont_transform(op): - def transform_params(rv_var): - _, _, _, *args = rv_var.owner.inputs - if hasattr(op, "bound_args"): - # Use explicit argument position for lower and upper bound - # TODO: Enforce this for all bounded rvs? - lower = args[op.bound_args[0]] - upper = args[op.bound_args[1]] - else: - # Assume first argument is lower bound and second is upper bound - # Will fail if number of arguments is different than 2 - lower, upper = args +@logp_transform.register(CircularContinuous) +def circ_cont_transform(op): + return transforms.circular - print(lower.eval(), upper.eval()) - lower = at.as_tensor_variable(lower) if lower is not None else None - upper = at.as_tensor_variable(upper) if upper is not None else None - return lower, upper - return transforms.interval(transform_params) +class BoundedContinuous(Continuous): + """Base class for bounded continuous distributions""" + transform_args = None -@logp_transform.register(CircularContinuous) -def circ_cont_transform(op): - return transforms.circular + def __new__(cls, *args, **kwargs): + transform = kwargs.get("transform", UNSET) + if transform is UNSET: + kwargs["transform"] = cls.default_transform() + return super().__new__(cls, *args, **kwargs) + + @classmethod + def default_transform(cls): + if cls.transform_args is None: + raise ValueError(f"Must specify transform args for {cls.__name__} bounded distribution") + + def transform_params(rv_var): + _, _, _, *args = rv_var.owner.inputs + lower = args[cls.transform_args[0]] + upper = args[cls.transform_args[1]] + lower = at.as_tensor_variable(lower) if lower is not None else None + upper = at.as_tensor_variable(upper) if upper is not None else None + return lower, upper + + return transforms.interval(transform_params) def assert_negative_support(var, label, distname, value=-1e-6): @@ -242,6 +244,7 @@ class Uniform(BoundedContinuous): Upper limit. """ rv_op = uniform + transform_args = [0, 1] # Lower, Upper @classmethod def dist(cls, lower=0, upper=1, **kwargs): @@ -3335,7 +3338,7 @@ class Triangular(BoundedContinuous): """ rv_op = triangular - rv_op.bound_args = [0, 2] # TODO: Find less hacking solution? + transform_args = [0, 2] # lower, upper @classmethod def dist(cls, lower=0, upper=1, c=0.5, *args, **kwargs): diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 59d2e185a1..a6f9922371 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -109,13 +109,6 @@ def logcdf(op, var, rvs_to_values, *dist_params, **kwargs): value_var = rvs_to_values.get(var, var) return class_logcdf(value_var, *dist_params, **kwargs) - # class_transform = clsdict.get("transform") - # if class_transform: - # - # @logp_transform.register(rv_type) - # def transform(op, *args, **kwargs): - # return class_transform(*args, **kwargs) - # Register the Aesara `RandomVariable` type as a subclass of this # `Distribution` type. new_cls.register(rv_type) From 62c824a308e4f1402bec676e30b5b2ac890bfa41 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 16 Apr 2021 16:38:10 +0200 Subject: [PATCH 3/9] Add 32bit xfail to Weibull logp --- pymc3/tests/test_distributions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 1db5e97e76..46a7a83e41 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1431,6 +1431,10 @@ def test_pareto(self): lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m), ) + @pytest.mark.xfail( + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to numerical issues", + ) def test_weibull_logp(self): self.check_logp( Weibull, From ee2e706da5bb72f9cb7260c3a682a5e71a039e4b Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 21 Apr 2021 08:41:58 +0200 Subject: [PATCH 4/9] Add TODO reminder for Weibull --- pymc3/distributions/continuous.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 081f30fd08..a300c9780e 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -2641,6 +2641,7 @@ def __init__(self, nu, *args, **kwargs): super().__init__(alpha=nu / 2.0, beta=0.5, *args, **kwargs) +# TODO: Remove this once logpt for multiplication is working! class WeibullBetaRV(WeibullRV): ndims_params = [0, 0] From c5db0e5ffa79e8d8090ef4d5cae4c60bcb7e2776 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 26 Apr 2021 10:19:23 +0200 Subject: [PATCH 5/9] Refactor random tests --- pymc3/tests/test_distributions_random.py | 362 +++++++++++------------ 1 file changed, 181 insertions(+), 181 deletions(-) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 28d425a689..cfa91ba37a 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -36,7 +36,7 @@ from pymc3.distributions.multivariate import quaddist_matrix from pymc3.distributions.shape_utils import to_tuple from pymc3.exceptions import ShapeError -from pymc3.tests.helpers import SeededTest +from pymc3.tests.helpers import SeededTest, select_by_precision from pymc3.tests.test_distributions import ( Domain, I, @@ -267,12 +267,6 @@ class TestSkewNormal(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0, "alpha": 5.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestTriangular(BaseTestCases.BaseTestCase): - distribution = pm.Triangular - params = {"c": 0.5, "lower": 0.0, "upper": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestWald(BaseTestCases.BaseTestCase): distribution = pm.Wald @@ -297,12 +291,6 @@ class TestAsymmetricLaplace(BaseTestCases.BaseTestCase): params = {"kappa": 1.0, "b": 1.0, "mu": 0.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestLognormal(BaseTestCases.BaseTestCase): - distribution = pm.Lognormal - params = {"mu": 1.0, "tau": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestStudentT(BaseTestCases.BaseTestCase): distribution = pm.StudentT @@ -315,29 +303,12 @@ class TestChiSquared(BaseTestCases.BaseTestCase): params = {"nu": 2.0} -class TestWeibull(BaseTestCases.BaseTestCase): - distribution = pm.Weibull - params = {"alpha": 1.0, "beta": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestExGaussian(BaseTestCases.BaseTestCase): distribution = pm.ExGaussian params = {"mu": 0.0, "sigma": 1.0, "nu": 1.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestVonMises(BaseTestCases.BaseTestCase): - distribution = pm.VonMises - params = {"mu": 0.0, "kappa": 1.0} - - -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestLogistic(BaseTestCases.BaseTestCase): - distribution = pm.Logistic - params = {"mu": 0.0, "s": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestLogitNormal(BaseTestCases.BaseTestCase): distribution = pm.LogitNormal @@ -380,18 +351,6 @@ class TestDiscreteUniform(BaseTestCases.BaseTestCase): params = {"lower": 0.0, "upper": 10.0} -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestGeometric(BaseTestCases.BaseTestCase): - distribution = pm.Geometric - params = {"p": 0.5} - - -@pytest.mark.skip(reason="This test is covered by Aesara") -class TestHyperGeometric(BaseTestCases.BaseTestCase): - distribution = pm.HyperGeometric - params = {"N": 50, "k": 25, "n": 10} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestMoyal(BaseTestCases.BaseTestCase): distribution = pm.Moyal @@ -406,7 +365,7 @@ class BaseTestDistribution(SeededTest): expected_rv_op_params = dict() tests_to_run = [] size = 15 - decimal = 6 + decimal = select_by_precision(float64=6, float32=3) sizes_to_check: Optional[List] = None sizes_expected: Optional[List] = None @@ -453,15 +412,15 @@ def check_rv_size(self): sizes_expected = self.sizes_expected or [(), (), (1,), (1,), (5,), (4, 5), (2, 4, 2)] for size, expected in zip(sizes_to_check, sizes_expected): actual = change_rv_size(self.pymc_rv, size).eval().shape - assert actual == expected + assert actual == expected, f"size={size}, expected={expected}, actual={actual}" # test negative sizes raise for size in [-2, (3, -2)]: with pytest.raises(ValueError): change_rv_size(self.pymc_rv, size).eval() - # test multi-parameters sampling for univariate distributions - if self.pymc_dist.rv_op.ndim_supp == 0: + # test multi-parameters sampling for univariate distributions (with univariate inputs) + if self.pymc_dist.rv_op.ndim_supp == 0 and sum(self.pymc_dist.rv_op.ndims_params) == 0: params = { k: p * np.ones(self.repeated_params_shape) for k, p in self.pymc_dist_params.items() } @@ -511,8 +470,8 @@ def seeded_discrete_weibul_rng_fn(self): reference_dist = seeded_discrete_weibul_rng_fn tests_to_run = [ "check_pymc_params_match_rv_op", - "check_rv_size", "check_pymc_draws_match_reference", + "check_rv_size", ] @@ -521,11 +480,9 @@ class TestGumbel(BaseTestDistribution): pymc_dist_params = {"mu": 1.5, "beta": 3.0} expected_rv_op_params = {"mu": 1.5, "beta": 3.0} reference_dist_params = {"loc": 1.5, "scale": 3.0} - size = 15 reference_dist = seeded_scipy_distribution_builder("gumbel_r") tests_to_run = [ "check_pymc_params_match_rv_op", - "check_rv_size", "check_pymc_draws_match_reference", ] @@ -539,16 +496,16 @@ class TestNormal(BaseTestDistribution): reference_dist = seeded_numpy_distribution_builder("normal") tests_to_run = [ "check_pymc_params_match_rv_op", - "check_rv_size", "check_pymc_draws_match_reference", + "check_rv_size", ] class TestNormalTau(BaseTestDistribution): pymc_dist = pm.Normal tau, sigma = get_tau_sigma(tau=25.0) - pymc_dist_params = {"mu": 1.0, "sigma": sigma} - expected_rv_op_params = {"mu": 1.0, "sigma": 0.2} + pymc_dist_params = {"mu": 1.0, "tau": tau} + expected_rv_op_params = {"mu": 1.0, "sigma": sigma} tests_to_run = ["check_pymc_params_match_rv_op"] @@ -570,14 +527,19 @@ class TestHalfNormal(BaseTestDistribution): pymc_dist = pm.HalfNormal pymc_dist_params = {"sigma": 10.0} expected_rv_op_params = {"mean": 0, "sigma": 10.0} - tests_to_run = ["check_pymc_params_match_rv_op"] + reference_dist_params = {"loc": 0, "scale": 10.0} + reference_dist = seeded_scipy_distribution_builder("halfnorm") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] class TestHalfNormalTau(BaseTestDistribution): pymc_dist = pm.Normal tau, sigma = get_tau_sigma(tau=25.0) - pymc_dist_params = {"sigma": sigma} - expected_rv_op_params = {"mu": 0.0, "sigma": 0.2} + pymc_dist_params = {"tau": tau} + expected_rv_op_params = {"mu": 0.0, "sigma": sigma} tests_to_run = ["check_pymc_params_match_rv_op"] @@ -599,8 +561,8 @@ class TestBeta(BaseTestDistribution): ) tests_to_run = [ "check_pymc_params_match_rv_op", - "check_rv_size", "check_pymc_draws_match_reference", + "check_rv_size", ] @@ -617,29 +579,49 @@ class TestBetaMuSigma(BaseTestDistribution): class TestExponential(BaseTestDistribution): pymc_dist = pm.Exponential pymc_dist_params = {"lam": 10.0} - expected_rv_op_params = {"lam": 1.0 / pymc_dist_params["lam"]} - tests_to_run = ["check_pymc_params_match_rv_op"] + expected_rv_op_params = {"mu": 1.0 / pymc_dist_params["lam"]} + reference_dist_params = {"scale": 1.0 / pymc_dist_params["lam"]} + reference_dist = seeded_numpy_distribution_builder("exponential") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] class TestCauchy(BaseTestDistribution): pymc_dist = pm.Cauchy pymc_dist_params = {"alpha": 2.0, "beta": 5.0} expected_rv_op_params = {"alpha": 2.0, "beta": 5.0} - tests_to_run = ["check_pymc_params_match_rv_op"] + reference_dist_params = {"loc": 2.0, "scale": 5.0} + reference_dist = seeded_scipy_distribution_builder("cauchy") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] -class TestHalfCauchyn(BaseTestDistribution): +class TestHalfCauchy(BaseTestDistribution): pymc_dist = pm.HalfCauchy pymc_dist_params = {"beta": 5.0} expected_rv_op_params = {"alpha": 0.0, "beta": 5.0} - tests_to_run = ["check_pymc_params_match_rv_op"] + reference_dist_params = {"loc": 0.0, "scale": 5.0} + reference_dist = seeded_scipy_distribution_builder("halfcauchy") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] class TestGamma(BaseTestDistribution): pymc_dist = pm.Gamma pymc_dist_params = {"alpha": 2.0, "beta": 5.0} expected_rv_op_params = {"alpha": 2.0, "beta": 1 / 5.0} - tests_to_run = ["check_pymc_params_match_rv_op"] + reference_dist_params = {"shape": 2.0, "scale": 1 / 5.0} + reference_dist = seeded_numpy_distribution_builder("gamma") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] class TestGammaMuSigma(BaseTestDistribution): @@ -656,7 +638,12 @@ class TestInverseGamma(BaseTestDistribution): pymc_dist = pm.InverseGamma pymc_dist_params = {"alpha": 2.0, "beta": 5.0} expected_rv_op_params = {"alpha": 2.0, "beta": 5.0} - tests_to_run = ["check_pymc_params_match_rv_op"] + reference_dist_params = {"a": 2.0, "scale": 5.0} + reference_dist = seeded_scipy_distribution_builder("invgamma") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] class TestInverseGammaMuSigma(BaseTestDistribution): @@ -703,7 +690,12 @@ class TestBernoulli(BaseTestDistribution): pymc_dist = pm.Bernoulli pymc_dist_params = {"p": 0.33} expected_rv_op_params = {"p": 0.33} - tests_to_run = ["check_pymc_params_match_rv_op"] + reference_dist_params = {"p": 0.33} + reference_dist = seeded_scipy_distribution_builder("bernoulli") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] @pytest.mark.skip("Still not implemented") @@ -733,7 +725,16 @@ class TestMvNormal(BaseTestDistribution): } sizes_to_check = [None, (1), (2, 3)] sizes_expected = [(2,), (1, 2), (2, 3, 2)] - tests_to_run = ["check_pymc_params_match_rv_op", "check_rv_size"] + reference_dist_params = { + "mean": np.array([1.0, 2.0]), + "cov": np.array([[2.0, 0.0], [0.0, 3.5]]), + } + reference_dist = seeded_numpy_distribution_builder("multivariate_normal") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] class TestMvNormalChol(BaseTestDistribution): @@ -766,7 +767,15 @@ class TestDirichlet(BaseTestDistribution): pymc_dist = pm.Dirichlet pymc_dist_params = {"a": np.array([1.0, 2.0])} expected_rv_op_params = {"a": np.array([1.0, 2.0])} - tests_to_run = ["check_pymc_params_match_rv_op"] + sizes_to_check = [None, (1), (4,), (3, 4)] + sizes_expected = [(2,), (1, 2), (4, 2), (3, 4, 2)] + reference_dist_params = {"alpha": np.array([1.0, 2.0])} + reference_dist = seeded_numpy_distribution_builder("dirichlet") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] class TestMultinomial(BaseTestDistribution): @@ -775,16 +784,118 @@ class TestMultinomial(BaseTestDistribution): expected_rv_op_params = {"n": 85, "p": np.array([0.28, 0.62, 0.10])} sizes_to_check = [None, (1), (4,), (3, 2)] sizes_expected = [(3,), (1, 3), (4, 3), (3, 2, 3)] - tests_to_run = ["check_pymc_params_match_rv_op", "check_rv_size"] + reference_dist_params = {"n": 85, "pvals": np.array([0.28, 0.62, 0.10])} + reference_dist = seeded_numpy_distribution_builder("multinomial") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] class TestCategorical(BaseTestDistribution): pymc_dist = pm.Categorical pymc_dist_params = {"p": np.array([0.28, 0.62, 0.10])} expected_rv_op_params = {"p": np.array([0.28, 0.62, 0.10])} + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + +class TestGeometric(BaseTestDistribution): + pymc_dist = pm.Geometric + pymc_dist_params = {"p": 0.9} + expected_rv_op_params = {"p": 0.9} + tests_to_run = ["check_pymc_params_match_rv_op"] + + +class TestHyperGeometric(BaseTestDistribution): + pymc_dist = pm.HyperGeometric + pymc_dist_params = {"N": 20, "k": 12, "n": 5} + expected_rv_op_params = { + "ngood": pymc_dist_params["k"], + "nbad": pymc_dist_params["N"] - pymc_dist_params["k"], + "nsample": pymc_dist_params["n"], + } + reference_dist_params = expected_rv_op_params + reference_dist = seeded_numpy_distribution_builder("hypergeometric") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] + + +class TestLogistic(BaseTestDistribution): + pymc_dist = pm.Logistic + pymc_dist_params = {"mu": 1.0, "s": 2.0} + expected_rv_op_params = {"mu": 1.0, "s": 2.0} + tests_to_run = ["check_pymc_params_match_rv_op"] + + +class TestLognormal(BaseTestDistribution): + pymc_dist = pm.Lognormal + pymc_dist_params = {"mu": 1.0, "sigma": 5.0} + expected_rv_op_params = {"mu": 1.0, "sigma": 5.0} + tests_to_run = ["check_pymc_params_match_rv_op"] + + +class TestLognormalTau(BaseTestDistribution): + pymc_dist = pm.Lognormal + tau, sigma = get_tau_sigma(tau=25.0) + pymc_dist_params = {"mu": 1.0, "tau": 25.0} + expected_rv_op_params = {"mu": 1.0, "sigma": sigma} + tests_to_run = ["check_pymc_params_match_rv_op"] + + +class TestLognormalSd(BaseTestDistribution): + pymc_dist = pm.Lognormal + pymc_dist_params = {"mu": 1.0, "sd": 5.0} + expected_rv_op_params = {"mu": 1.0, "sigma": 5.0} tests_to_run = ["check_pymc_params_match_rv_op"] +class TestTriangular(BaseTestDistribution): + pymc_dist = pm.Triangular + pymc_dist_params = {"lower": 0, "upper": 1, "c": 0.5} + expected_rv_op_params = {"lower": 0, "c": 0.5, "upper": 1} + reference_dist_params = {"left": 0, "mode": 0.5, "right": 1} + reference_dist = seeded_numpy_distribution_builder("triangular") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + ] + + +class TestVonMises(BaseTestDistribution): + pymc_dist = pm.VonMises + pymc_dist_params = {"mu": -2.1, "kappa": 5} + expected_rv_op_params = {"mu": -2.1, "kappa": 5} + tests_to_run = ["check_pymc_params_match_rv_op"] + + +class TestWeibull(BaseTestDistribution): + def weibull_rng_fn(self, size, alpha, beta, std_weibull_rng_fct): + return beta * std_weibull_rng_fct(alpha, size=size) + + def seeded_weibul_rng_fn(self): + std_weibull_rng_fct = functools.partial( + getattr(np.random.RandomState, "weibull"), self.get_random_state() + ) + return functools.partial(self.weibull_rng_fn, std_weibull_rng_fct=std_weibull_rng_fct) + + pymc_dist = pm.Weibull + pymc_dist_params = {"alpha": 1.0, "beta": 2.0} + expected_rv_op_params = {"alpha": 1.0, "beta": 2.0} + reference_dist_params = {"alpha": 1.0, "beta": 2.0} + reference_dist = seeded_weibul_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestScalarParameterSamples(SeededTest): @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_bounded(self): @@ -985,6 +1096,7 @@ def ref_rand(size, lower, upper): pm.DiscreteUniform, {"lower": -NatSmall, "upper": NatSmall}, ref_rand=ref_rand ) + @pytest.mark.skip(reason="This test is covered by Aesara") def test_discrete_weibull(self): def ref_rand(size, q, beta): u = np.random.uniform(size=size) @@ -995,7 +1107,7 @@ def ref_rand(size, q, beta): pm.DiscreteWeibull, {"q": Unit, "beta": Rplusdunif}, ref_rand=ref_rand ) - # TODO: Remove wasteful test + @pytest.mark.skip(reason="This test is covered by Aesara") def test_weibull(self): def ref_rand(size, alpha, beta): u = np.random.uniform(size=size) @@ -1227,7 +1339,7 @@ def test_dirichlet_multinomial_dist_ShapeError(self, n, a, shape, expectation): with expectation: m.random() - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") + @pytest.mark.skip(reason="This test is covered by Aesara") def test_logistic(self): def ref_rand(size, mu, s): return st.logistic.rvs(loc=mu, scale=s, size=size) @@ -1978,115 +2090,3 @@ def test_with_cov_rv(self, sample_shape, dist_shape, mu_shape): prior = pm.sample_prior_predictive(samples=sample_shape) assert prior["mv"].shape == to_tuple(sample_shape) + dist_shape - - -def test_exponential_parameterization(): - test_lambda = floatX(10.0) - - exp_pymc = pm.Exponential.dist(lam=test_lambda) - (rv_scale,) = exp_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_scale.eval(), 1 / test_lambda) - - -def test_gamma_parameterization(): - - test_alpha = floatX(10.0) - test_beta = floatX(100.0) - - gamma_pymc = pm.Gamma.dist(alpha=test_alpha, beta=test_beta) - rv_alpha, rv_inv_beta = gamma_pymc.owner.inputs[3:] - - assert np.array_equal(rv_alpha.eval(), test_alpha) - - decimal = select_by_precision(float64=6, float32=3) - - npt.assert_almost_equal(rv_inv_beta.eval(), 1.0 / test_beta, decimal) - - test_mu = test_alpha / test_beta - test_sigma = np.sqrt(test_mu / test_beta) - - gamma_pymc = pm.Gamma.dist(mu=test_mu, sigma=test_sigma) - rv_alpha, rv_inv_beta = gamma_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_alpha.eval(), test_alpha, decimal) - npt.assert_almost_equal(rv_inv_beta.eval(), 1.0 / test_beta, decimal) - - -def test_geometric_parameterization(): - test_p = floatX(0.9) - - geom_pymc = pm.Geometric.dist(p=test_p) - (rv_p,) = geom_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_p.eval(), test_p) - - -def test_hypergeometric_parameterization(): - test_N = intX(20.0) - test_k = intX(12.0) - test_n = intX(5) - - hypergeom_pymc = pm.HyperGeometric.dist(N=test_N, k=test_k, n=test_n) - rv_good, rv_bad, rv_sample = hypergeom_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_good.eval(), test_k) - npt.assert_almost_equal(rv_bad.eval(), test_N - test_k) - npt.assert_almost_equal(rv_sample.eval(), test_n) - - -def test_logistic_parameterization(): - test_mu = floatX(1.0) - test_s = floatX(2) - - logistic_pymc = pm.Logistic.dist(mu=test_mu, s=test_s) - rv_loc, rv_scale = logistic_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_loc.eval(), test_mu) - npt.assert_almost_equal(rv_scale.eval(), test_s) - - -def test_lognormal_parameterization(): - test_mu = floatX(1.0) - test_tau = floatX(5) - - lognormal_pymc = pm.Lognormal.dist(mu=test_mu, tau=test_tau) - rv_loc, rv_scale = lognormal_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_loc.eval(), test_mu) - npt.assert_almost_equal(rv_scale.eval(), test_tau ** -0.5) - - -def test_triangular_parameterization(): - test_lower = floatX(0) - test_upper = floatX(1) - test_c = float(0.5) - - triang_pymc = pm.Triangular.dist(lower=test_lower, upper=test_upper, c=test_c) - rv_left, rv_mode, rv_right = triang_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_left.eval(), test_lower) - npt.assert_almost_equal(rv_mode.eval(), test_c) - npt.assert_almost_equal(rv_right.eval(), test_upper) - - -def test_vonmises_parameterization(): - test_mu = floatX(-2.1) - test_kappa = floatX(5) - - vonmises_pymc = pm.VonMises.dist(mu=test_mu, kappa=test_kappa) - rv_mu, rv_kappa = vonmises_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_mu.eval(), test_mu) - npt.assert_almost_equal(rv_kappa.eval(), test_kappa) - - -def test_weibull_parameterization(): - test_alpha = floatX(1) - test_beta = floatX(2) - - weibull_pymc = pm.Weibull.dist(alpha=test_alpha, beta=test_beta) - rv_alpha, rv_beta = weibull_pymc.owner.inputs[3:] - - npt.assert_almost_equal(rv_alpha.eval(), test_alpha) - npt.assert_almost_equal(rv_beta.eval(), test_beta) From faa06001ff2d17d31e9ed23cd5e1b5fa5dd03e77 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 3 May 2021 15:44:03 +0200 Subject: [PATCH 6/9] Remove tests covered by Aesara --- pymc3/tests/test_distributions_random.py | 106 ----------------------- 1 file changed, 106 deletions(-) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index cfa91ba37a..a2e9e0cefc 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -49,10 +49,7 @@ RealMatrix, Rplus, Rplusbig, - Rplusdunif, - Runif, Simplex, - Unit, Vector, build_model, product, @@ -977,13 +974,6 @@ def ref_rand(size, kappa, b, mu): pymc3_random(pm.AsymmetricLaplace, {"b": Rplus, "kappa": Rplus, "mu": R}, ref_rand=ref_rand) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_lognormal(self): - def ref_rand(size, mu, tau): - return np.exp(mu + (tau ** -0.5) * st.norm.rvs(loc=0.0, scale=1.0, size=size)) - - pymc3_random(pm.Lognormal, {"mu": R, "tau": Rplusbig}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_student_t(self): def ref_rand(size, nu, mu, lam): @@ -998,24 +988,6 @@ def ref_rand(size, mu, sigma, nu): pymc3_random(pm.ExGaussian, {"mu": R, "sigma": Rplus, "nu": Rplus}, ref_rand=ref_rand) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_vonmises(self): - def ref_rand(size, mu, kappa): - x = st.vonmises.rvs(size=size, loc=mu, kappa=kappa) - - pymc3_random(pm.VonMises, {"mu": R, "kappa": Rplus}, ref_rand=ref_rand) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_triangular(self): - def ref_rand(size, lower, upper, c): - scale = upper - lower - c_ = (c - lower) / scale - return st.triang.rvs(size=size, loc=lower, scale=scale, c=c_) - - pymc3_random( - pm.Triangular, {"lower": Runif, "upper": Runif + 3, "c": Runif + 1}, ref_rand=ref_rand - ) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_flat(self): with pm.Model(): @@ -1043,50 +1015,6 @@ def test_beta_binomial(self): def _beta_bin(self, n, alpha, beta, size=None): return st.binom.rvs(n, st.beta.rvs(a=alpha, b=beta, size=size)) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_bernoulli(self): - pymc3_random_discrete( - pm.Bernoulli, {"p": Unit}, ref_rand=lambda size, p=None: st.bernoulli.rvs(p, size=size) - ) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_poisson(self): - pymc3_random_discrete(pm.Poisson, {"mu": Rplusbig}, size=500, ref_rand=st.poisson.rvs) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_negative_binomial(self): - def ref_rand(size, alpha, mu): - return st.nbinom.rvs(alpha, alpha / (mu + alpha), size=size) - - pymc3_random_discrete( - pm.NegativeBinomial, - {"mu": Rplusbig, "alpha": Rplusbig}, - size=100, - fails=50, - ref_rand=ref_rand, - ) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_geometric(self): - pymc3_random_discrete(pm.Geometric, {"p": Unit}, size=500, fails=50, ref_rand=nr.geometric) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_hypergeometric(self): - def ref_rand(size, N, k, n): - return st.hypergeom.rvs(M=N, n=k, N=n, size=size) - - pymc3_random_discrete( - pm.HyperGeometric, - { - "N": Domain([10, 11, 12, 13], "int64"), - "k": Domain([4, 5, 6, 7], "int64"), - "n": Domain([6, 7, 8, 9], "int64"), - }, - size=500, - fails=50, - ref_rand=ref_rand, - ) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_discrete_uniform(self): def ref_rand(size, lower, upper): @@ -1096,33 +1024,6 @@ def ref_rand(size, lower, upper): pm.DiscreteUniform, {"lower": -NatSmall, "upper": NatSmall}, ref_rand=ref_rand ) - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_discrete_weibull(self): - def ref_rand(size, q, beta): - u = np.random.uniform(size=size) - - return np.ceil(np.power(np.log(1 - u) / np.log(q), 1.0 / beta)) - 1 - - pymc3_random_discrete( - pm.DiscreteWeibull, {"q": Unit, "beta": Rplusdunif}, ref_rand=ref_rand - ) - - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_weibull(self): - def ref_rand(size, alpha, beta): - u = np.random.uniform(size=size) - return beta * np.power(-np.log(u), 1 / alpha) - - pymc3_random(pm.Weibull, {"alpha": Rplusbig, "beta": Rplusbig}, ref_rand=ref_rand) - - @pytest.mark.skip(reason="This test is covered by Aesara") - @pytest.mark.parametrize("s", [2, 3, 4]) - def test_categorical_random(self, s): - def ref_rand(size, p): - return nr.choice(np.arange(p.shape[0]), p=p, size=size) - - pymc3_random_discrete(pm.Categorical, {"p": Simplex(s)}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_constant_dist(self): def ref_rand(size, c): @@ -1339,13 +1240,6 @@ def test_dirichlet_multinomial_dist_ShapeError(self, n, a, shape, expectation): with expectation: m.random() - @pytest.mark.skip(reason="This test is covered by Aesara") - def test_logistic(self): - def ref_rand(size, mu, s): - return st.logistic.rvs(loc=mu, scale=s, size=size) - - pymc3_random(pm.Logistic, {"mu": R, "s": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_logitnormal(self): def ref_rand(size, mu, sigma): From d52ae504c19fdb31e2103481b47fcc957830eb58 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 3 May 2021 16:17:37 +0200 Subject: [PATCH 7/9] Refactor BetaBinomial --- pymc3/distributions/discrete.py | 74 +++++------------------- pymc3/tests/test_distributions.py | 6 +- pymc3/tests/test_distributions_random.py | 31 ++++------ 3 files changed, 27 insertions(+), 84 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 6faccb6f49..03672ac41e 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -19,6 +19,7 @@ from aesara.tensor.random.basic import ( RandomVariable, bernoulli, + betabinom, binomial, categorical, geometric, @@ -41,7 +42,7 @@ normal_lcdf, ) from pymc3.distributions.distribution import Discrete -from pymc3.math import log1mexp, logaddexp, logsumexp, sigmoid, tround +from pymc3.math import log1mexp, logaddexp, logsumexp, sigmoid __all__ = [ "Binomial", @@ -227,58 +228,16 @@ def BetaBinom(a, b, n, x): beta > 0. """ - def __init__(self, alpha, beta, n, *args, **kwargs): - super().__init__(*args, **kwargs) - self.alpha = alpha = at.as_tensor_variable(floatX(alpha)) - self.beta = beta = at.as_tensor_variable(floatX(beta)) - self.n = n = at.as_tensor_variable(intX(n)) - self.mode = at.cast(tround(alpha / (alpha + beta)), "int8") - - def _random(self, alpha, beta, n, size=None): - size = size or () - p = stats.beta.rvs(a=alpha, b=beta, size=size).flatten() - # Sometimes scipy.beta returns nan. Ugh. - while np.any(np.isnan(p)): - i = np.isnan(p) - p[i] = stats.beta.rvs(a=alpha, b=beta, size=np.sum(i)) - # Sigh... - _n, _p, _size = np.atleast_1d(n).flatten(), p.flatten(), p.shape[0] - - quotient, remainder = divmod(_p.shape[0], _n.shape[0]) - if remainder != 0: - raise TypeError( - "n has a bad size! Was cast to {}, must evenly divide {}".format( - _n.shape[0], _p.shape[0] - ) - ) - if quotient != 1: - _n = np.tile(_n, quotient) - samples = np.reshape(stats.binom.rvs(n=_n, p=_p, size=_size), size) - return samples - - def random(self, point=None, size=None): - r""" - Draw random values from BetaBinomial distribution. + rv_op = betabinom - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # alpha, beta, n = draw_values([self.alpha, self.beta, self.n], point=point, size=size) - # return generate_samples( - # self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size - # ) + @classmethod + def dist(cls, alpha, beta, n, *args, **kwargs): + alpha = at.as_tensor_variable(floatX(alpha)) + beta = at.as_tensor_variable(floatX(beta)) + n = at.as_tensor_variable(intX(n)) + return super().dist([n, alpha, beta], **kwargs) - def logp(self, value): + def logp(value, n, alpha, beta): r""" Calculate log-probability of BetaBinomial distribution at specified value. @@ -292,9 +251,6 @@ def logp(self, value): ------- TensorVariable """ - alpha = self.alpha - beta = self.beta - n = self.n return bound( binomln(n, value) + betaln(value + alpha, n - value + beta) - betaln(alpha, beta), value >= 0, @@ -303,7 +259,7 @@ def logp(self, value): beta > 0, ) - def logcdf(self, value): + def logcdf(value, n, alpha, beta): """ Compute the log of the cumulative distribution function for BetaBinomial distribution at the specified value. @@ -323,15 +279,15 @@ def logcdf(self, value): f"BetaBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - alpha = self.alpha - beta = self.beta - n = self.n safe_lower = at.switch(at.lt(value, 0), value, 0) return bound( at.switch( at.lt(value, n), - logsumexp(self.logp(at.arange(safe_lower, value + 1)), keepdims=False), + logsumexp( + BetaBinomial.logp(at.arange(safe_lower, value + 1), n, alpha, beta), + keepdims=False, + ), 0, ), 0 <= value, diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 46a7a83e41..397d2ba6af 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1496,8 +1496,7 @@ def test_binomial(self): n_samples=10, ) - # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail(reason="checkd tests has not been refactored") @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_beta_binomial_distribution(self): self.checkd( @@ -1506,7 +1505,6 @@ def test_beta_binomial_distribution(self): {"alpha": Rplus, "beta": Rplus, "n": NatSmall}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.skipif( condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" ) @@ -1518,7 +1516,6 @@ def test_beta_binomial_logp(self): lambda value, alpha, beta, n: sp.betabinom.logpmf(value, a=alpha, b=beta, n=n), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.skipif( condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" @@ -1531,7 +1528,6 @@ def test_beta_binomial_logcdf(self): lambda value, alpha, beta, n: sp.betabinom.logcdf(value, a=alpha, b=beta, n=n), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_beta_binomial_selfconsistency(self): self.check_selfconsistency_discrete_logcdf( BetaBinomial, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index a2e9e0cefc..bccb70bf67 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -13,7 +13,6 @@ # limitations under the License. import functools import itertools -import sys from contextlib import ExitStack as does_not_raise from typing import Callable, List, Optional @@ -312,12 +311,6 @@ class TestLogitNormal(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestBetaBinomial(BaseTestCases.BaseTestCase): - distribution = pm.BetaBinomial - params = {"n": 5, "alpha": 1.0, "beta": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestConstant(BaseTestCases.BaseTestCase): distribution = pm.Constant @@ -893,6 +886,17 @@ def seeded_weibul_rng_fn(self): ] +class TestBetaBinomial(BaseTestDistribution): + pymc_dist = pm.BetaBinomial + pymc_dist_params = {"alpha": 2.0, "beta": 1.0, "n": 5} + expected_rv_op_params = {"n": 5, "alpha": 2.0, "beta": 1.0} + reference_dist_params = {"n": 5, "a": 2.0, "b": 1.0} + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + class TestScalarParameterSamples(SeededTest): @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_bounded(self): @@ -1002,19 +1006,6 @@ def test_half_flat(self): with pytest.raises(ValueError): f.random(1) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") - @pytest.mark.xfail( - sys.platform.startswith("win"), - reason="Known issue: https://github.com/pymc-devs/pymc3/pull/4269", - ) - def test_beta_binomial(self): - pymc3_random_discrete( - pm.BetaBinomial, {"n": Nat, "alpha": Rplus, "beta": Rplus}, ref_rand=self._beta_bin - ) - - def _beta_bin(self, n, alpha, beta, size=None): - return st.binom.rvs(n, st.beta.rvs(a=alpha, b=beta, size=size)) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_discrete_uniform(self): def ref_rand(size, lower, upper): From fa4bdb85f52296acb17170064349c55413cda6a4 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 5 May 2021 11:04:43 +0200 Subject: [PATCH 8/9] Add skipif for betabinom depending on scipy version --- pymc3/tests/test_distributions_random.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index bccb70bf67..1a7aa185a4 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -25,6 +25,8 @@ import scipy.stats as st from numpy.testing import assert_almost_equal, assert_array_almost_equal +from packaging.version import parse +from scipy import __version__ as scipy_version from scipy.special import expit import pymc3 as pm @@ -54,6 +56,8 @@ product, ) +SCIPY_VERSION = parse(scipy_version) + def pymc3_random( dist, @@ -886,13 +890,19 @@ def seeded_weibul_rng_fn(self): ] +@pytest.mark.skipif( + condition=(SCIPY_VERSION < parse("1.4.0")), + reason="betabinom is new in Scipy 1.4.0", +) class TestBetaBinomial(BaseTestDistribution): pymc_dist = pm.BetaBinomial pymc_dist_params = {"alpha": 2.0, "beta": 1.0, "n": 5} expected_rv_op_params = {"n": 5, "alpha": 2.0, "beta": 1.0} reference_dist_params = {"n": 5, "a": 2.0, "b": 1.0} + reference_dist = seeded_scipy_distribution_builder("betabinom") tests_to_run = [ "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", "check_rv_size", ] From d9f0775cee957f344153d7f50c17ba5355c35151 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 11 May 2021 09:03:11 +0200 Subject: [PATCH 9/9] Rename transform_args to bound_args_indices --- pymc3/distributions/continuous.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index a300c9780e..9e03eb6d61 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -134,7 +134,8 @@ def circ_cont_transform(op): class BoundedContinuous(Continuous): """Base class for bounded continuous distributions""" - transform_args = None + # Indices of the arguments that define the lower and upper bounds of the distribution + bound_args_indices = None def __new__(cls, *args, **kwargs): transform = kwargs.get("transform", UNSET) @@ -144,13 +145,15 @@ def __new__(cls, *args, **kwargs): @classmethod def default_transform(cls): - if cls.transform_args is None: - raise ValueError(f"Must specify transform args for {cls.__name__} bounded distribution") + if cls.bound_args_indices is None: + raise ValueError( + f"Must specify bound_args_indices for {cls.__name__} bounded distribution" + ) def transform_params(rv_var): _, _, _, *args = rv_var.owner.inputs - lower = args[cls.transform_args[0]] - upper = args[cls.transform_args[1]] + lower = args[cls.bound_args_indices[0]] + upper = args[cls.bound_args_indices[1]] lower = at.as_tensor_variable(lower) if lower is not None else None upper = at.as_tensor_variable(upper) if upper is not None else None return lower, upper @@ -244,7 +247,7 @@ class Uniform(BoundedContinuous): Upper limit. """ rv_op = uniform - transform_args = [0, 1] # Lower, Upper + bound_args_indices = (0, 1) # Lower, Upper @classmethod def dist(cls, lower=0, upper=1, **kwargs): @@ -3339,7 +3342,7 @@ class Triangular(BoundedContinuous): """ rv_op = triangular - transform_args = [0, 2] # lower, upper + bound_args_indices = (0, 2) # lower, upper @classmethod def dist(cls, lower=0, upper=1, c=0.5, *args, **kwargs):