Remove MixtureSameFamily

ricardoV94 · ricardoV94 · commit 34f467959e3a · 2022-03-08T13:20:17.000+01:00
Behavior is now implemented in Mixture
diff --git a/docs/source/api/distributions/mixture.rst b/docs/source/api/distributions/mixture.rst
@@ -8,4 +8,3 @@ Mixture
 
    Mixture
    NormalMixture
-   MixtureSameFamily
diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py
@@ -83,7 +83,7 @@
     NoDistribution,
     SymbolicDistribution,
 )
-from pymc.distributions.mixture import Mixture, MixtureSameFamily, NormalMixture
+from pymc.distributions.mixture import Mixture, NormalMixture
 from pymc.distributions.multivariate import (
     CAR,
     Dirichlet,
@@ -180,7 +180,6 @@
     "SkewNormal",
     "Mixture",
     "NormalMixture",
-    "MixtureSameFamily",
     "Triangular",
     "DiscreteWeibull",
     "Gumbel",
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
@@ -23,16 +23,15 @@
 from aesara.tensor import TensorVariable
 from aesara.tensor.random.op import RandomVariable
 
-from pymc.aesaraf import change_rv_size, take_along_axis
+from pymc.aesaraf import change_rv_size
 from pymc.distributions.continuous import Normal, get_tau_sigma
 from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.distribution import Discrete, Distribution, SymbolicDistribution
 from pymc.distributions.logprob import logp
 from pymc.distributions.shape_utils import to_tuple
-from pymc.math import logsumexp
 from pymc.util import check_dist_not_registered
 
-__all__ = ["Mixture", "NormalMixture", "MixtureSameFamily"]
+__all__ = ["Mixture", "NormalMixture"]
 
 
 def all_discrete(comp_dists):
@@ -468,235 +467,3 @@ def dist(cls, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), **kwargs):
         _, sigma = get_tau_sigma(tau=tau, sigma=sigma)
 
         return Mixture.dist(w, Normal.dist(mu, sigma=sigma, size=comp_shape), **kwargs)
-
-
-class MixtureSameFamily(Distribution):
-    R"""
-    Mixture Same Family log-likelihood
-    This distribution handles mixtures of multivariate distributions in a vectorized
-    manner. It is used over Mixture distribution when the mixture components are not
-    present on the last axis of components' distribution.
-
-    .. math::f(x \mid w, \theta) = \sum_{i = 1}^n w_i f_i(x \mid \theta_i)\textrm{ Along mixture\_axis}
-
-    ========  ============================================
-    Support   :math:`\textrm{support}(f)`
-    Mean      :math:`w\mu`
-    ========  ============================================
-
-    Parameters
-    ----------
-    w: array of floats
-        w >= 0 and w <= 1
-        the mixture weights
-    comp_dists: PyMC distribution (e.g. `pm.Multinomial.dist(...)`)
-        The `comp_dists` can be scalar or multidimensional distribution.
-        Assuming its shape to be - (i_0, ..., i_n, mixture_axis, i_n+1, ..., i_N),
-        the `mixture_axis` is consumed resulting in the shape of mixture as -
-        (i_0, ..., i_n, i_n+1, ..., i_N).
-    mixture_axis: int, default = -1
-        Axis representing the mixture components to be reduced in the mixture.
-
-    Notes
-    -----
-    The default behaviour resembles Mixture distribution wherein the last axis of component
-    distribution is reduced.
-    """
-
-    def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs):
-        self.w = at.as_tensor_variable(w)
-        if not isinstance(comp_dists, Distribution):
-            raise TypeError(
-                "The MixtureSameFamily distribution only accepts Distribution "
-                f"instances as its components. Got {type(comp_dists)} instead."
-            )
-        self.comp_dists = comp_dists
-        if mixture_axis < 0:
-            mixture_axis = len(comp_dists.shape) + mixture_axis
-            if mixture_axis < 0:
-                raise ValueError(
-                    "`mixture_axis` is supposed to be in shape of components' distribution. "
-                    f"Got {mixture_axis + len(comp_dists.shape)} axis instead out of the bounds."
-                )
-        comp_shape = to_tuple(comp_dists.shape)
-        self.shape = comp_shape[:mixture_axis] + comp_shape[mixture_axis + 1 :]
-        self.mixture_axis = mixture_axis
-        kwargs.setdefault("dtype", self.comp_dists.dtype)
-
-        # Compute the mode so we don't always have to pass a initval
-        defaults = kwargs.pop("defaults", [])
-        event_shape = self.comp_dists.shape[mixture_axis + 1 :]
-        _w = at.shape_padleft(
-            at.shape_padright(w, len(event_shape)),
-            len(self.comp_dists.shape) - w.ndim - len(event_shape),
-        )
-        mode = take_along_axis(
-            self.comp_dists.mode,
-            at.argmax(_w, keepdims=True),
-            axis=mixture_axis,
-        )
-        self.mode = mode[(..., 0) + (slice(None),) * len(event_shape)]
-
-        if not all_discrete(comp_dists):
-            mean = at.as_tensor_variable(self.comp_dists.mean)
-            self.mean = (_w * mean).sum(axis=mixture_axis)
-            if "mean" not in defaults:
-                defaults.append("mean")
-        defaults.append("mode")
-
-        super().__init__(defaults=defaults, *args, **kwargs)
-
-    def logp(self, value):
-        """
-        Calculate log-probability of defined ``MixtureSameFamily`` distribution at specified value.
-
-        Parameters
-        ----------
-        value : numeric
-            Value(s) for which log-probability is calculated. If the log probabilities for multiple
-            values are desired the values must be provided in a numpy array or Aesara tensor
-
-        Returns
-        -------
-        TensorVariable
-        """
-
-        comp_dists = self.comp_dists
-        w = self.w
-        mixture_axis = self.mixture_axis
-
-        event_shape = comp_dists.shape[mixture_axis + 1 :]
-
-        # To be able to broadcast the comp_dists.logp with w and value
-        # We first have to pad the shape of w to the right with ones
-        # so that it can broadcast with the event_shape.
-
-        w = at.shape_padright(w, len(event_shape))
-
-        # Second, we have to add the mixture_axis to the value tensor
-        # To insert the mixture axis at the correct location, we use the
-        # negative number index. This way, we can also handle situations
-        # in which, value is an observed value with more batch dimensions
-        # than the ones present in the comp_dists.
-        comp_dists_ndim = len(comp_dists.shape)
-
-        value = at.shape_padaxis(value, axis=mixture_axis - comp_dists_ndim)
-
-        comp_logp = comp_dists.logp(value)
-        return check_parameters(
-            logsumexp(at.log(w) + comp_logp, axis=mixture_axis, keepdims=False),
-            w >= 0,
-            w <= 1,
-            at.allclose(w.sum(axis=mixture_axis - comp_dists_ndim), 1),
-            broadcast_conditions=False,
-        )
-
-    def random(self, point=None, size=None):
-        """
-        Draw random values from defined ``MixtureSameFamily`` distribution.
-
-        Parameters
-        ----------
-        point : dict, optional
-            Dict of variable values on which random values are to be
-            conditioned (uses default point if not specified).
-        size : int, optional
-            Desired size of random sample (returns one sample if not
-            specified).
-
-        Returns
-        -------
-        array
-        """
-        # sample_shape = to_tuple(size)
-        # mixture_axis = self.mixture_axis
-        #
-        # # First we draw values for the mixture component weights
-        # (w,) = draw_values([self.w], point=point, size=size)
-        #
-        # # We now draw random choices from those weights.
-        # # However, we have to ensure that the number of choices has the
-        # # sample_shape present.
-        # w_shape = w.shape
-        # batch_shape = self.comp_dists.shape[: mixture_axis + 1]
-        # param_shape = np.broadcast(np.empty(w_shape), np.empty(batch_shape)).shape
-        # event_shape = self.comp_dists.shape[mixture_axis + 1 :]
-        #
-        # if np.asarray(self.shape).size != 0:
-        #     comp_dists_ndim = len(self.comp_dists.shape)
-        #
-        #     # If event_shape of both comp_dists and supplied shape matches,
-        #     # broadcast only batch_shape
-        #     # else broadcast the entire given shape with batch_shape.
-        #     if list(self.shape[mixture_axis - comp_dists_ndim + 1 :]) == list(event_shape):
-        #         dist_shape = np.broadcast(
-        #             np.empty(self.shape[:mixture_axis]), np.empty(param_shape[:mixture_axis])
-        #         ).shape
-        #     else:
-        #         dist_shape = np.broadcast(
-        #             np.empty(self.shape), np.empty(param_shape[:mixture_axis])
-        #         ).shape
-        # else:
-        #     dist_shape = param_shape[:mixture_axis]
-        #
-        # # Try to determine the size that must be used to get the mixture
-        # # components (i.e. get random choices using w).
-        # # 1. There must be size independent choices based on w.
-        # # 2. There must also be independent draws for each non singleton axis
-        # # of w.
-        # # 3. There must also be independent draws for each dimension added by
-        # # self.shape with respect to the w.ndim. These usually correspond to
-        # # observed variables with batch shapes
-        # wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:mixture_axis]
-        # psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:mixture_axis]
-        # w_sample_size = []
-        # # Loop through the dist_shape to get the conditions 2 and 3 first
-        # for i in range(len(dist_shape)):
-        #     if dist_shape[i] != psh[i] and wsh[i] == 1:
-        #         # self.shape[i] is a non singleton dimension (usually caused by
-        #         # observed data)
-        #         sh = dist_shape[i]
-        #     else:
-        #         sh = wsh[i]
-        #     w_sample_size.append(sh)
-        #
-        # if sample_shape is not None and w_sample_size[: len(sample_shape)] != sample_shape:
-        #     w_sample_size = sample_shape + tuple(w_sample_size)
-        #
-        # choices = random_choice(p=w, size=w_sample_size)
-        #
-        # # We now draw samples from the mixture components random method
-        # comp_samples = self.comp_dists.random(point=point, size=size)
-        # if comp_samples.shape[: len(sample_shape)] != sample_shape:
-        #     comp_samples = np.broadcast_to(
-        #         comp_samples,
-        #         shape=sample_shape + comp_samples.shape,
-        #     )
-        #
-        # # At this point the shapes of the arrays involved are:
-        # # comp_samples.shape = (sample_shape, batch_shape, mixture_axis, event_shape)
-        # # choices.shape = (sample_shape, batch_shape)
-        # #
-        # # To be able to take the choices along the mixture_axis of the
-        # # comp_samples, we have to add in dimensions to the right of the
-        # # choices array.
-        # # We also need to make sure that the batch_shapes of both the comp_samples
-        # # and choices broadcast with each other.
-        #
-        # choices = np.reshape(choices, choices.shape + (1,) * (1 + len(event_shape)))
-        #
-        # choices, comp_samples = get_broadcastable_dist_samples([choices, comp_samples], size=size)
-        #
-        # # We now take the choices of the mixture components along the mixture_axis
-        # # but we use the negative index representation to be able to handle the
-        # # sample_shape
-        # samples = np.take_along_axis(
-        #     comp_samples, choices, axis=mixture_axis - len(self.comp_dists.shape)
-        # )
-        #
-        # # The `samples` array still has the `mixture_axis`, so we must remove it:
-        # output = samples[(..., 0) + (slice(None),) * len(event_shape)]
-        # return output
-
-    def _distr_parameters_for_repr(self):
-        return []
diff --git a/pymc/tests/test_distributions_moments.py b/pymc/tests/test_distributions_moments.py
@@ -100,7 +100,6 @@ def test_all_distributions_have_moments():
 
     # Distributions that have not been refactored for V4 yet
     not_implemented = {
-        dist_module.mixture.MixtureSameFamily,
         dist_module.timeseries.AR,
         dist_module.timeseries.AR1,
         dist_module.timeseries.GARCH11,
diff --git a/pymc/tests/test_mixture.py b/pymc/tests/test_mixture.py
@@ -34,7 +34,6 @@
     LKJCholeskyCov,
     LogNormal,
     Mixture,
-    MixtureSameFamily,
     Multinomial,
     MvNormal,
     Normal,
@@ -886,8 +885,12 @@ def loose_logp(model, vars):
         assert_allclose(mix_logp, latent_mix_logp, rtol=rtol)
 
 
-@pytest.mark.xfail(reason="MixtureSameFamily not refactored yet")
 class TestMixtureSameFamily(SeededTest):
+    """Tests that used to belong to deprecated `TestMixtureSameFamily`.
+
+    The functionality is now expected to be provided by `Mixture`
+    """
+
     @classmethod
     def setup_class(cls):
         super().setup_class()
@@ -903,36 +906,33 @@ def test_with_multinomial(self, batch_shape):
         mixture_axis = len(batch_shape)
         with Model() as model:
             comp_dists = Multinomial.dist(p=p, n=n, shape=(*batch_shape, self.mixture_comps, 3))
-            mixture = MixtureSameFamily(
+            mixture = Mixture(
                 "mixture",
                 w=w,
                 comp_dists=comp_dists,
-                mixture_axis=mixture_axis,
                 shape=(*batch_shape, 3),
             )
-            prior = sample_prior_predictive(samples=self.n_samples)
+            prior = sample_prior_predictive(samples=self.n_samples, return_inferencedata=False)
 
         assert prior["mixture"].shape == (self.n_samples, *batch_shape, 3)
-        assert mixture.random(size=self.size).shape == (self.size, *batch_shape, 3)
+        assert draw(mixture, draws=self.size).shape == (self.size, *batch_shape, 3)
 
         if aesara.config.floatX == "float32":
             rtol = 1e-4
         else:
             rtol = 1e-7
 
         initial_point = model.compute_initial_point()
-        comp_logp = comp_dists.logp(initial_point["mixture"].reshape(*batch_shape, 1, 3))
+        comp_logp = logp(comp_dists, initial_point["mixture"].reshape(*batch_shape, 1, 3))
         log_sum_exp = logsumexp(
-            comp_logp.eval() + np.log(w)[..., None], axis=mixture_axis, keepdims=True
+            comp_logp.eval() + np.log(w), axis=mixture_axis, keepdims=True
         ).sum()
         assert_allclose(
-            model.logp(initial_point),
+            model.compile_logp()(initial_point),
             log_sum_exp,
             rtol,
         )
 
-    # TODO: Handle case when `batch_shape` == `sample_shape`.
-    # See https://github.com/pymc-devs/pymc/issues/4185 for details.
     def test_with_mvnormal(self):
         # 10 batch, 3-variate Gaussian
         mu = np.random.randn(self.mixture_comps, 3)
@@ -943,26 +943,22 @@ def test_with_mvnormal(self):
 
         with Model() as model:
             comp_dists = MvNormal.dist(mu=mu, chol=chol, shape=(self.mixture_comps, 3))
-            mixture = MixtureSameFamily(
-                "mixture", w=w, comp_dists=comp_dists, mixture_axis=0, shape=(3,)
-            )
-            prior = sample_prior_predictive(samples=self.n_samples)
+            mixture = Mixture("mixture", w=w, comp_dists=comp_dists, shape=(3,))
+            prior = sample_prior_predictive(samples=self.n_samples, return_inferencedata=False)
 
         assert prior["mixture"].shape == (self.n_samples, 3)
-        assert mixture.random(size=self.size).shape == (self.size, 3)
+        assert draw(mixture, draws=self.size).shape == (self.size, 3)
 
         if aesara.config.floatX == "float32":
             rtol = 1e-4
         else:
             rtol = 1e-7
 
         initial_point = model.compute_initial_point()
-        comp_logp = comp_dists.logp(initial_point["mixture"].reshape(1, 3))
-        log_sum_exp = logsumexp(
-            comp_logp.eval() + np.log(w)[..., None], axis=0, keepdims=True
-        ).sum()
+        comp_logp = logp(comp_dists, initial_point["mixture"].reshape(1, 3))
+        log_sum_exp = logsumexp(comp_logp.eval() + np.log(w), axis=0, keepdims=True).sum()
         assert_allclose(
-            model.logp(initial_point),
+            model.compile_logp()(initial_point),
             log_sum_exp,
             rtol,
         )
@@ -971,7 +967,7 @@ def test_broadcasting_in_shape(self):
         with Model() as model:
             mu = Gamma("mu", 1.0, 1.0, shape=2)
             comp_dists = Poisson.dist(mu, shape=2)
-            mix = MixtureSameFamily("mix", w=np.ones(2) / 2, comp_dists=comp_dists, shape=(1000,))
-            prior = sample_prior_predictive(samples=self.n_samples)
+            mix = Mixture("mix", w=np.ones(2) / 2, comp_dists=comp_dists, shape=(1000,))
+            prior = sample_prior_predictive(samples=self.n_samples, return_inferencedata=False)
 
         assert prior["mix"].shape == (self.n_samples, 1000)

Original file line number	Diff line number	Diff line change
`@@ -8,4 +8,3 @@ Mixture`
`8`	`8`
`9`	`9`	`Mixture`
`10`	`10`	`NormalMixture`
`11`		`- MixtureSameFamily`