From 8f4c93c90cd5f4ead47ad56df3afc24548027de6 Mon Sep 17 00:00:00 2001 From: Larry Dong Date: Thu, 17 Jun 2021 22:29:12 -0400 Subject: [PATCH 1/8] Attempt 1: beginning of DP submodule for GSoC project :smile: --- pymc3/dp/__init__.py | 14 ++++++++++++++ pymc3/dp/dp.py | 29 +++++++++++++++++++++++++++++ pymc3/dp/utils.py | 7 +++++++ 3 files changed, 50 insertions(+) create mode 100644 pymc3/dp/__init__.py create mode 100644 pymc3/dp/dp.py create mode 100644 pymc3/dp/utils.py diff --git a/pymc3/dp/__init__.py b/pymc3/dp/__init__.py new file mode 100644 index 0000000000..2083dd7ad6 --- /dev/null +++ b/pymc3/dp/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/pymc3/dp/dp.py b/pymc3/dp/dp.py new file mode 100644 index 0000000000..21e50ce1f1 --- /dev/null +++ b/pymc3/dp/dp.py @@ -0,0 +1,29 @@ +# Copyright 2021 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import warnings + +import aesara.tensor as at +import numpy as np + +import pymc3 as pm + +from pymc3.distributions.distribution import Discrete, Distribution + +__all__ = [] + + +class Base(Distribution): + pass diff --git a/pymc3/dp/utils.py b/pymc3/dp/utils.py new file mode 100644 index 0000000000..4ae85bbb40 --- /dev/null +++ b/pymc3/dp/utils.py @@ -0,0 +1,7 @@ +def stick_breaking(betas): + ''' + betas is a K-dimensional vector consisting of iid draws from a Beta distribution + ''' + sticks = at.concatenate([[1], (1 - betas[:-1])]) + + return at.mul(betas, at.cumprod(sticks)) From a884007f60a727c17994900d356c033eb367ffa7 Mon Sep 17 00:00:00 2001 From: Larry Dong Date: Thu, 17 Jun 2021 22:39:17 -0400 Subject: [PATCH 2/8] Added Apache Copyright to utils --- pymc3/dp/utils.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pymc3/dp/utils.py b/pymc3/dp/utils.py index 4ae85bbb40..ee803d7351 100644 --- a/pymc3/dp/utils.py +++ b/pymc3/dp/utils.py @@ -1,3 +1,18 @@ +# Copyright 2021 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + def stick_breaking(betas): ''' betas is a K-dimensional vector consisting of iid draws from a Beta distribution From ddcc020f2e63ebe4417523df7c013fca6079bb39 Mon Sep 17 00:00:00 2001 From: Larry Dong Date: Sat, 19 Jun 2021 18:32:16 -0400 Subject: [PATCH 3/8] =?UTF-8?q?=F0=9F=8D=A6=20Trying=20to=20address=20=20r?= =?UTF-8?q?efactoring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pymc3/distributions/mixture.py | 40 ++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py index c9b764a0ac..6319d864e9 100644 --- a/pymc3/distributions/mixture.py +++ b/pymc3/distributions/mixture.py @@ -13,6 +13,7 @@ # limitations under the License. from collections.abc import Iterable +from typing import List, Optional, Union import aesara import aesara.tensor as at @@ -38,6 +39,32 @@ def all_discrete(comp_dists): return all(isinstance(comp_dist, Discrete) for comp_dist in comp_dists) +class MixtureRV(Distribution): + name = "mixture" + ndims_supp = 0 + ndims_params = [1, 1] + _print_name = ("Mixture", "\\operatorname{Mixture") + + def __call__(self, w, comp_dist, *args, **kwargs): + return super().__call__(w, comp_dist, size=size, **kwargs) + + @classmethod + def rng_fn( + cls, + rng: np.random.RandomState, + w: Union[np.ndarray, float], + comp_dist: Union[Distribution, Iterable[Distribution]], + size: Optional[Union[List[int], int]] = None, + ) -> np.ndarray: + + component = rng.multinomial(n=1, pvals=w) + + return comp_dist[component].rv_op.rng_fn(rng) + + +mixture = MixtureRV() + + class Mixture(Distribution): R""" Mixture log-likelihood @@ -111,8 +138,10 @@ class Mixture(Distribution): # Each can be thought of as an independent scalar mixture of 5 components like = pm.Mixture('like', w=w, comp_dists = components, observed=data, shape=3) """ + rv_op = mixture - def __init__(self, w, comp_dists, *args, **kwargs): + @classmethod + def dist(cls, w, comp_dists, *args, **kwargs): # comp_dists type checking if not ( isinstance(comp_dists, Distribution) @@ -165,7 +194,7 @@ def __init__(self, w, comp_dists, *args, **kwargs): except (AttributeError, ValueError, IndexError): pass - super().__init__(shape, dtype, defaults=defaults, *args, **kwargs) + return super().__dist__(shape, dtype, defaults=defaults, *args, **kwargs) @property def comp_dists(self): @@ -400,7 +429,9 @@ def infer_comp_dist_shapes(self, point=None): # ) # return comp_dist_shapes, broadcast_shape - def logp(self, value): + def logp( + value: Union[float, np.ndarray, TensorVariable], + ): """ Calculate log-probability of defined Mixture distribution at specified value. @@ -617,6 +648,7 @@ class NormalMixture(Mixture): pm.NormalMixture("y", w=weights, mu=μ, sigma=σ, observed=data) """ + @classmethod def __init__(self, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), *args, **kwargs): if sd is not None: sigma = sd @@ -625,7 +657,7 @@ def __init__(self, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), *args, * self.mu = mu = at.as_tensor_variable(mu) self.sigma = self.sd = sigma = at.as_tensor_variable(sigma) - super().__init__(w, Normal.dist(mu, sigma=sigma, shape=comp_shape), *args, **kwargs) + super().dist([w, Normal.dist(mu, sigma=sigma, shape=comp_shape)], *args, **kwargs) def _distr_parameters_for_repr(self): return ["w", "mu", "sigma"] From aefc6231b01055f9944025c0f5c73a02534e7f51 Mon Sep 17 00:00:00 2001 From: Larry Dong Date: Sat, 19 Jun 2021 23:59:07 -0400 Subject: [PATCH 4/8] Attempting to refactor Mixture Distributions --- pymc3/distributions/continuous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index fad5b003b2..9f7f930473 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -2595,7 +2595,7 @@ class ChiSquared(PositiveContinuous): nu: float Degrees of freedom (nu > 0). """ - rv_op = chisquare + rv_op = chisquared @classmethod def dist(cls, nu, *args, **kwargs): From 976a6216507723482b5c71fa294719bb270a35a7 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Thu, 17 Jun 2021 19:54:48 +0200 Subject: [PATCH 5/8] Create dedicated environment YML for Windows The install installation guide for Windows was recently updated after it was discovered that the combination of numba+scipy dependencies leads to the correct installation of BLAS dependencies. --- .github/workflows/windows.yml | 4 +-- conda-envs/windows-environment-dev-py38.yml | 37 +++++++++++++++++++++ scripts/generate_pip_deps_from_conda.py | 14 ++++++-- 3 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 conda-envs/windows-environment-dev-py38.yml diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index ab9d61d15a..c6a4806dc1 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -32,7 +32,7 @@ jobs: with: path: ~/conda_pkgs_dir key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ - hashFiles('conda-envs/environment-dev-py38.yml') }} + hashFiles('conda-envs/windows-environment-dev-py38.yml') }} - name: Cache multiple paths uses: actions/cache@v2 env: @@ -49,7 +49,7 @@ jobs: with: activate-environment: pymc3-dev-py38 channel-priority: strict - environment-file: conda-envs/environment-dev-py38.yml + environment-file: conda-envs/windows-environment-dev-py38.yml use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly! - name: Install-pymc3 run: | diff --git a/conda-envs/windows-environment-dev-py38.yml b/conda-envs/windows-environment-dev-py38.yml new file mode 100644 index 0000000000..9f085f7d47 --- /dev/null +++ b/conda-envs/windows-environment-dev-py38.yml @@ -0,0 +1,37 @@ +name: pymc3-dev-py38 +channels: +- conda-forge +- defaults +dependencies: + # base dependencies (see install guide for Windows) +- aesara>=2.0.9 +- arviz>=0.11.2 +- cachetools>=4.2.1 +- dill +- fastprogress>=0.2.0 +- h5py>=2.7 +- libpython +- mkl-service +- m2w64-toolchain +- numba +- numpy>=1.15.0 +- pandas>=0.24.0 +- pip +- python=3.8 +- python-graphviz +- scipy>1.4.1 +- typing-extensions>=3.7.4 +# Extra stuff for dev, testing and docs build +- ipython>=7.16 +- myst-nb +- nbsphinx>=0.4 +- numpydoc>=0.9 +- pre-commit>=2.8.0 +- pydata-sphinx-theme +- pytest-cov>=2.5 +- pytest>=3.0 +- recommonmark>=0.4 +- sphinx-autobuild>=0.7 +- sphinx-panels +- sphinx>=1.5 +- watermark diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index 04a764d57e..0413754af6 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -41,7 +41,17 @@ import yaml -EXCLUDE = {"python", "libblas", "mkl-service", "python-graphviz"} +EXCLUDE = { + "pip", + "python", + "libblas", + "libpython", + "m2w64-toolchain", + "mkl-service", + "numba", + "scipy", + "python-graphviz", +} RENAME = {} @@ -117,7 +127,7 @@ def main(conda_fname, pip_fname): f"# This file is auto-generated by scripts/generate_pip_deps_from_conda.py, " "do not modify.\n# See that file for comments about the need/usage of each dependency.\n\n" ) - pip_content = header + "\n".join(pip_deps) + "\n" + pip_content = header + "\n".join(sorted(pip_deps)) + "\n" with open(pip_fname, "w") as pip_fd: pip_fd.write(pip_content) From b5138d64a5f3f0cf1ed97adb34117aee03198a56 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Sat, 19 Jun 2021 15:06:41 +0200 Subject: [PATCH 6/8] Reenable Windows test workflow --- .github/workflows/windows.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index c6a4806dc1..b5727829e7 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -7,7 +7,6 @@ on: jobs: pytest: - if: false strategy: matrix: os: [windows-latest] From a9eb893c2c8e9f9935ad46f02abeb81158b7d83d Mon Sep 17 00:00:00 2001 From: Larry Dong Date: Sun, 20 Jun 2021 23:29:19 -0400 Subject: [PATCH 7/8] First draft of refactoring mixture distributions --- pymc3/distributions/continuous.py | 2 +- pymc3/distributions/mixture.py | 268 ++---------------------------- pymc3/dp/__init__.py | 1 - pymc3/dp/utils.py | 6 +- 4 files changed, 17 insertions(+), 260 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 9f7f930473..fad5b003b2 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -2595,7 +2595,7 @@ class ChiSquared(PositiveContinuous): nu: float Degrees of freedom (nu > 0). """ - rv_op = chisquared + rv_op = chisquare @classmethod def dist(cls, nu, *args, **kwargs): diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py index 6319d864e9..f72e8bd418 100644 --- a/pymc3/distributions/mixture.py +++ b/pymc3/distributions/mixture.py @@ -19,6 +19,9 @@ import aesara.tensor as at import numpy as np +from aesara.tensor.random.op import RandomVariable +from aesara.tensor.var import TensorVariable + from pymc3.aesaraf import _conversion_map, take_along_axis from pymc3.distributions.continuous import Normal, get_tau_sigma from pymc3.distributions.dist_math import bound @@ -39,15 +42,12 @@ def all_discrete(comp_dists): return all(isinstance(comp_dist, Discrete) for comp_dist in comp_dists) -class MixtureRV(Distribution): +class MixtureRV(RandomVariable): name = "mixture" - ndims_supp = 0 + ndim_supp = 0 ndims_params = [1, 1] _print_name = ("Mixture", "\\operatorname{Mixture") - def __call__(self, w, comp_dist, *args, **kwargs): - return super().__call__(w, comp_dist, size=size, **kwargs) - @classmethod def rng_fn( cls, @@ -60,7 +60,7 @@ def rng_fn( component = rng.multinomial(n=1, pvals=w) return comp_dist[component].rv_op.rng_fn(rng) - + mixture = MixtureRV() @@ -194,7 +194,7 @@ def dist(cls, w, comp_dists, *args, **kwargs): except (AttributeError, ValueError, IndexError): pass - return super().__dist__(shape, dtype, defaults=defaults, *args, **kwargs) + return super().__dist__([w, comp_dists], *args, **kwargs) @property def comp_dists(self): @@ -429,9 +429,7 @@ def infer_comp_dist_shapes(self, point=None): # ) # return comp_dist_shapes, broadcast_shape - def logp( - value: Union[float, np.ndarray, TensorVariable], - ): + def logp(value: Union[float, np.ndarray, TensorVariable]): """ Calculate log-probability of defined Mixture distribution at specified value. @@ -455,140 +453,6 @@ def logp( broadcast_conditions=False, ) - def random(self, point=None, size=None): - """ - Draw random values from defined Mixture distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # # Convert size to tuple - # size = to_tuple(size) - # # Draw mixture weights and infer the comp_dists shapes - # with _DrawValuesContext() as draw_context: - # # We first need to check w and comp_tmp shapes and re compute size - # w = draw_values([self.w], point=point, size=size)[0] - # comp_dist_shapes, broadcast_shape = self.infer_comp_dist_shapes(point=point) - # - # # When size is not None, it's hard to tell the w parameter shape - # if size is not None and w.shape[: len(size)] == size: - # w_shape = w.shape[len(size) :] - # else: - # w_shape = w.shape - # - # # Try to determine parameter shape and dist_shape - # if self.comp_is_distribution: - # param_shape = np.broadcast(np.empty(w_shape), np.empty(broadcast_shape)).shape - # else: - # param_shape = np.broadcast(np.empty(w_shape), np.empty(broadcast_shape + (1,))).shape - # if np.asarray(self.shape).size != 0: - # dist_shape = np.broadcast(np.empty(self.shape), np.empty(param_shape[:-1])).shape - # else: - # dist_shape = param_shape[:-1] - # - # # Try to determine the size that must be used to get the mixture - # # components (i.e. get random choices using w). - # # 1. There must be size independent choices based on w. - # # 2. There must also be independent draws for each non singleton axis - # # of w. - # # 3. There must also be independent draws for each dimension added by - # # self.shape with respect to the w.ndim. These usually correspond to - # # observed variables with batch shapes - # wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:-1] - # psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:-1] - # w_sample_size = [] - # # Loop through the dist_shape to get the conditions 2 and 3 first - # for i in range(len(dist_shape)): - # if dist_shape[i] != psh[i] and wsh[i] == 1: - # # self.shape[i] is a non singleton dimension (usually caused by - # # observed data) - # sh = dist_shape[i] - # else: - # sh = wsh[i] - # w_sample_size.append(sh) - # if size is not None and w_sample_size[: len(size)] != size: - # w_sample_size = size + tuple(w_sample_size) - # # Broadcast w to the w_sample_size (add a singleton last axis for the - # # mixture components) - # w = broadcast_distribution_samples([w, np.empty(w_sample_size + (1,))], size=size)[0] - # - # # Semiflatten the mixture weights. The last axis is the number of - # # mixture mixture components, and the rest is all about size, - # # dist_shape and broadcasting - # w_ = np.reshape(w, (-1, w.shape[-1])) - # w_samples = random_choice(p=w_, size=None) # w's shape already includes size - # # Now we broadcast the chosen components to the dist_shape - # w_samples = np.reshape(w_samples, w.shape[:-1]) - # if size is not None and dist_shape[: len(size)] != size: - # w_samples = np.broadcast_to(w_samples, size + dist_shape) - # else: - # w_samples = np.broadcast_to(w_samples, dist_shape) - # - # # When size is not None, maybe dist_shape partially overlaps with size - # if size is not None: - # if size == dist_shape: - # size = None - # elif size[-len(dist_shape) :] == dist_shape: - # size = size[: len(size) - len(dist_shape)] - # - # # We get an integer _size instead of a tuple size for drawing the - # # mixture, then we just reshape the output - # if size is None: - # _size = None - # else: - # _size = int(np.prod(size)) - # - # # Compute the total size of the mixture's random call with size - # if _size is not None: - # output_size = int(_size * np.prod(dist_shape) * param_shape[-1]) - # else: - # output_size = int(np.prod(dist_shape) * param_shape[-1]) - # # Get the size we need for the mixture's random call - # if self.comp_is_distribution: - # mixture_size = int(output_size // np.prod(broadcast_shape)) - # else: - # mixture_size = int(output_size // (np.prod(broadcast_shape) * param_shape[-1])) - # if mixture_size == 1 and _size is None: - # mixture_size = None - # - # # Sample from the mixture - # with draw_context: - # mixed_samples = self._comp_samples( - # point=point, - # size=mixture_size, - # broadcast_shape=broadcast_shape, - # comp_dist_shapes=comp_dist_shapes, - # ) - # # Test that the mixture has the same number of "samples" as w - # if w_samples.size != (mixed_samples.size // w.shape[-1]): - # raise ValueError( - # "Inconsistent number of samples from the " - # "mixture and mixture weights. Drew {} mixture " - # "weights elements, and {} samples from the " - # "mixture components.".format(w_samples.size, mixed_samples.size // w.shape[-1]) - # ) - # # Semiflatten the mixture to be able to zip it with w_samples - # w_samples = w_samples.flatten() - # mixed_samples = np.reshape(mixed_samples, (-1, w.shape[-1])) - # # Select the samples from the mixture - # samples = np.array([mixed[choice] for choice, mixed in zip(w_samples, mixed_samples)]) - # # Reshape the samples to the correct output shape - # if size is None: - # samples = np.reshape(samples, dist_shape) - # else: - # samples = np.reshape(samples, size + dist_shape) - # return samples - def _distr_parameters_for_repr(self): return [] @@ -649,7 +513,7 @@ class NormalMixture(Mixture): """ @classmethod - def __init__(self, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), *args, **kwargs): + def dist(w, mu, sigma=None, tau=None, sd=None, comp_shape=(), *args, **kwargs): if sd is not None: sigma = sd _, sigma = get_tau_sigma(tau=tau, sigma=sigma) @@ -696,7 +560,8 @@ class MixtureSameFamily(Distribution): distribution is reduced. """ - def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs): + @classmethod + def __dist__(w, comp_dists, mixture_axis=-1, *args, **kwargs): self.w = at.as_tensor_variable(w) if not isinstance(comp_dists, Distribution): raise TypeError( @@ -737,9 +602,9 @@ def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs): defaults.append("mean") defaults.append("mode") - super().__init__(defaults=defaults, *args, **kwargs) + super().dist([w, comp_dists], defaults=defaults, *args, **kwargs) - def logp(self, value): + def logp(value): """ Calculate log-probability of defined ``MixtureSameFamily`` distribution at specified value. @@ -784,112 +649,5 @@ def logp(self, value): broadcast_conditions=False, ) - def random(self, point=None, size=None): - """ - Draw random values from defined ``MixtureSameFamily`` distribution. - - Parameters - ---------- - point : dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size : int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # sample_shape = to_tuple(size) - # mixture_axis = self.mixture_axis - # - # # First we draw values for the mixture component weights - # (w,) = draw_values([self.w], point=point, size=size) - # - # # We now draw random choices from those weights. - # # However, we have to ensure that the number of choices has the - # # sample_shape present. - # w_shape = w.shape - # batch_shape = self.comp_dists.shape[: mixture_axis + 1] - # param_shape = np.broadcast(np.empty(w_shape), np.empty(batch_shape)).shape - # event_shape = self.comp_dists.shape[mixture_axis + 1 :] - # - # if np.asarray(self.shape).size != 0: - # comp_dists_ndim = len(self.comp_dists.shape) - # - # # If event_shape of both comp_dists and supplied shape matches, - # # broadcast only batch_shape - # # else broadcast the entire given shape with batch_shape. - # if list(self.shape[mixture_axis - comp_dists_ndim + 1 :]) == list(event_shape): - # dist_shape = np.broadcast( - # np.empty(self.shape[:mixture_axis]), np.empty(param_shape[:mixture_axis]) - # ).shape - # else: - # dist_shape = np.broadcast( - # np.empty(self.shape), np.empty(param_shape[:mixture_axis]) - # ).shape - # else: - # dist_shape = param_shape[:mixture_axis] - # - # # Try to determine the size that must be used to get the mixture - # # components (i.e. get random choices using w). - # # 1. There must be size independent choices based on w. - # # 2. There must also be independent draws for each non singleton axis - # # of w. - # # 3. There must also be independent draws for each dimension added by - # # self.shape with respect to the w.ndim. These usually correspond to - # # observed variables with batch shapes - # wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:mixture_axis] - # psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:mixture_axis] - # w_sample_size = [] - # # Loop through the dist_shape to get the conditions 2 and 3 first - # for i in range(len(dist_shape)): - # if dist_shape[i] != psh[i] and wsh[i] == 1: - # # self.shape[i] is a non singleton dimension (usually caused by - # # observed data) - # sh = dist_shape[i] - # else: - # sh = wsh[i] - # w_sample_size.append(sh) - # - # if sample_shape is not None and w_sample_size[: len(sample_shape)] != sample_shape: - # w_sample_size = sample_shape + tuple(w_sample_size) - # - # choices = random_choice(p=w, size=w_sample_size) - # - # # We now draw samples from the mixture components random method - # comp_samples = self.comp_dists.random(point=point, size=size) - # if comp_samples.shape[: len(sample_shape)] != sample_shape: - # comp_samples = np.broadcast_to( - # comp_samples, - # shape=sample_shape + comp_samples.shape, - # ) - # - # # At this point the shapes of the arrays involved are: - # # comp_samples.shape = (sample_shape, batch_shape, mixture_axis, event_shape) - # # choices.shape = (sample_shape, batch_shape) - # # - # # To be able to take the choices along the mixture_axis of the - # # comp_samples, we have to add in dimensions to the right of the - # # choices array. - # # We also need to make sure that the batch_shapes of both the comp_samples - # # and choices broadcast with each other. - # - # choices = np.reshape(choices, choices.shape + (1,) * (1 + len(event_shape))) - # - # choices, comp_samples = get_broadcastable_dist_samples([choices, comp_samples], size=size) - # - # # We now take the choices of the mixture components along the mixture_axis - # # but we use the negative index representation to be able to handle the - # # sample_shape - # samples = np.take_along_axis( - # comp_samples, choices, axis=mixture_axis - len(self.comp_dists.shape) - # ) - # - # # The `samples` array still has the `mixture_axis`, so we must remove it: - # output = samples[(..., 0) + (slice(None),) * len(event_shape)] - # return output - def _distr_parameters_for_repr(self): return [] diff --git a/pymc3/dp/__init__.py b/pymc3/dp/__init__.py index 2083dd7ad6..49436dcfde 100644 --- a/pymc3/dp/__init__.py +++ b/pymc3/dp/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/pymc3/dp/utils.py b/pymc3/dp/utils.py index ee803d7351..bb1ca9d7e9 100644 --- a/pymc3/dp/utils.py +++ b/pymc3/dp/utils.py @@ -14,9 +14,9 @@ def stick_breaking(betas): - ''' + """ betas is a K-dimensional vector consisting of iid draws from a Beta distribution - ''' + """ sticks = at.concatenate([[1], (1 - betas[:-1])]) - + return at.mul(betas, at.cumprod(sticks)) From db666c37ceac3c37345f77f64ff0b7090793897a Mon Sep 17 00:00:00 2001 From: Larry Dong Date: Mon, 21 Jun 2021 01:32:22 -0400 Subject: [PATCH 8/8] Updating process of Mixture refactoring - help wanted --- pymc3/distributions/continuous.py | 2 +- pymc3/distributions/mixture.py | 271 ++---------------------------- pymc3/dp/__init__.py | 14 -- pymc3/dp/dp.py | 29 ---- pymc3/dp/utils.py | 22 --- 5 files changed, 15 insertions(+), 323 deletions(-) delete mode 100644 pymc3/dp/__init__.py delete mode 100644 pymc3/dp/dp.py delete mode 100644 pymc3/dp/utils.py diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 9f7f930473..fad5b003b2 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -2595,7 +2595,7 @@ class ChiSquared(PositiveContinuous): nu: float Degrees of freedom (nu > 0). """ - rv_op = chisquared + rv_op = chisquare @classmethod def dist(cls, nu, *args, **kwargs): diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py index 6319d864e9..da6610df0d 100644 --- a/pymc3/distributions/mixture.py +++ b/pymc3/distributions/mixture.py @@ -19,6 +19,9 @@ import aesara.tensor as at import numpy as np +from aesara.tensor.random.op import RandomVariable +from aesara.tensor.var import TensorVariable + from pymc3.aesaraf import _conversion_map, take_along_axis from pymc3.distributions.continuous import Normal, get_tau_sigma from pymc3.distributions.dist_math import bound @@ -39,15 +42,12 @@ def all_discrete(comp_dists): return all(isinstance(comp_dist, Discrete) for comp_dist in comp_dists) -class MixtureRV(Distribution): +class MixtureRV(RandomVariable): name = "mixture" - ndims_supp = 0 + ndim_supp = 0 ndims_params = [1, 1] _print_name = ("Mixture", "\\operatorname{Mixture") - def __call__(self, w, comp_dist, *args, **kwargs): - return super().__call__(w, comp_dist, size=size, **kwargs) - @classmethod def rng_fn( cls, @@ -60,7 +60,7 @@ def rng_fn( component = rng.multinomial(n=1, pvals=w) return comp_dist[component].rv_op.rng_fn(rng) - + mixture = MixtureRV() @@ -161,8 +161,7 @@ def dist(cls, w, comp_dists, *args, **kwargs): ) shape = kwargs.pop("shape", ()) - self.w = w = at.as_tensor_variable(w) - self.comp_dists = comp_dists + w = at.as_tensor_variable(w) defaults = kwargs.pop("defaults", []) @@ -194,7 +193,7 @@ def dist(cls, w, comp_dists, *args, **kwargs): except (AttributeError, ValueError, IndexError): pass - return super().__dist__(shape, dtype, defaults=defaults, *args, **kwargs) + return super().dist([w, comp_dists], *args, **kwargs) @property def comp_dists(self): @@ -429,9 +428,7 @@ def infer_comp_dist_shapes(self, point=None): # ) # return comp_dist_shapes, broadcast_shape - def logp( - value: Union[float, np.ndarray, TensorVariable], - ): + def logp(value: Union[float, np.ndarray, TensorVariable]): """ Calculate log-probability of defined Mixture distribution at specified value. @@ -455,140 +452,6 @@ def logp( broadcast_conditions=False, ) - def random(self, point=None, size=None): - """ - Draw random values from defined Mixture distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # # Convert size to tuple - # size = to_tuple(size) - # # Draw mixture weights and infer the comp_dists shapes - # with _DrawValuesContext() as draw_context: - # # We first need to check w and comp_tmp shapes and re compute size - # w = draw_values([self.w], point=point, size=size)[0] - # comp_dist_shapes, broadcast_shape = self.infer_comp_dist_shapes(point=point) - # - # # When size is not None, it's hard to tell the w parameter shape - # if size is not None and w.shape[: len(size)] == size: - # w_shape = w.shape[len(size) :] - # else: - # w_shape = w.shape - # - # # Try to determine parameter shape and dist_shape - # if self.comp_is_distribution: - # param_shape = np.broadcast(np.empty(w_shape), np.empty(broadcast_shape)).shape - # else: - # param_shape = np.broadcast(np.empty(w_shape), np.empty(broadcast_shape + (1,))).shape - # if np.asarray(self.shape).size != 0: - # dist_shape = np.broadcast(np.empty(self.shape), np.empty(param_shape[:-1])).shape - # else: - # dist_shape = param_shape[:-1] - # - # # Try to determine the size that must be used to get the mixture - # # components (i.e. get random choices using w). - # # 1. There must be size independent choices based on w. - # # 2. There must also be independent draws for each non singleton axis - # # of w. - # # 3. There must also be independent draws for each dimension added by - # # self.shape with respect to the w.ndim. These usually correspond to - # # observed variables with batch shapes - # wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:-1] - # psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:-1] - # w_sample_size = [] - # # Loop through the dist_shape to get the conditions 2 and 3 first - # for i in range(len(dist_shape)): - # if dist_shape[i] != psh[i] and wsh[i] == 1: - # # self.shape[i] is a non singleton dimension (usually caused by - # # observed data) - # sh = dist_shape[i] - # else: - # sh = wsh[i] - # w_sample_size.append(sh) - # if size is not None and w_sample_size[: len(size)] != size: - # w_sample_size = size + tuple(w_sample_size) - # # Broadcast w to the w_sample_size (add a singleton last axis for the - # # mixture components) - # w = broadcast_distribution_samples([w, np.empty(w_sample_size + (1,))], size=size)[0] - # - # # Semiflatten the mixture weights. The last axis is the number of - # # mixture mixture components, and the rest is all about size, - # # dist_shape and broadcasting - # w_ = np.reshape(w, (-1, w.shape[-1])) - # w_samples = random_choice(p=w_, size=None) # w's shape already includes size - # # Now we broadcast the chosen components to the dist_shape - # w_samples = np.reshape(w_samples, w.shape[:-1]) - # if size is not None and dist_shape[: len(size)] != size: - # w_samples = np.broadcast_to(w_samples, size + dist_shape) - # else: - # w_samples = np.broadcast_to(w_samples, dist_shape) - # - # # When size is not None, maybe dist_shape partially overlaps with size - # if size is not None: - # if size == dist_shape: - # size = None - # elif size[-len(dist_shape) :] == dist_shape: - # size = size[: len(size) - len(dist_shape)] - # - # # We get an integer _size instead of a tuple size for drawing the - # # mixture, then we just reshape the output - # if size is None: - # _size = None - # else: - # _size = int(np.prod(size)) - # - # # Compute the total size of the mixture's random call with size - # if _size is not None: - # output_size = int(_size * np.prod(dist_shape) * param_shape[-1]) - # else: - # output_size = int(np.prod(dist_shape) * param_shape[-1]) - # # Get the size we need for the mixture's random call - # if self.comp_is_distribution: - # mixture_size = int(output_size // np.prod(broadcast_shape)) - # else: - # mixture_size = int(output_size // (np.prod(broadcast_shape) * param_shape[-1])) - # if mixture_size == 1 and _size is None: - # mixture_size = None - # - # # Sample from the mixture - # with draw_context: - # mixed_samples = self._comp_samples( - # point=point, - # size=mixture_size, - # broadcast_shape=broadcast_shape, - # comp_dist_shapes=comp_dist_shapes, - # ) - # # Test that the mixture has the same number of "samples" as w - # if w_samples.size != (mixed_samples.size // w.shape[-1]): - # raise ValueError( - # "Inconsistent number of samples from the " - # "mixture and mixture weights. Drew {} mixture " - # "weights elements, and {} samples from the " - # "mixture components.".format(w_samples.size, mixed_samples.size // w.shape[-1]) - # ) - # # Semiflatten the mixture to be able to zip it with w_samples - # w_samples = w_samples.flatten() - # mixed_samples = np.reshape(mixed_samples, (-1, w.shape[-1])) - # # Select the samples from the mixture - # samples = np.array([mixed[choice] for choice, mixed in zip(w_samples, mixed_samples)]) - # # Reshape the samples to the correct output shape - # if size is None: - # samples = np.reshape(samples, dist_shape) - # else: - # samples = np.reshape(samples, size + dist_shape) - # return samples - def _distr_parameters_for_repr(self): return [] @@ -649,7 +512,7 @@ class NormalMixture(Mixture): """ @classmethod - def __init__(self, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), *args, **kwargs): + def dist(cls, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), *args, **kwargs): if sd is not None: sigma = sd _, sigma = get_tau_sigma(tau=tau, sigma=sigma) @@ -696,7 +559,8 @@ class MixtureSameFamily(Distribution): distribution is reduced. """ - def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs): + @classmethod + def __dist__(w, comp_dists, mixture_axis=-1, *args, **kwargs): self.w = at.as_tensor_variable(w) if not isinstance(comp_dists, Distribution): raise TypeError( @@ -737,9 +601,9 @@ def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs): defaults.append("mean") defaults.append("mode") - super().__init__(defaults=defaults, *args, **kwargs) + super().dist([w, comp_dists], defaults=defaults, *args, **kwargs) - def logp(self, value): + def logp(value): """ Calculate log-probability of defined ``MixtureSameFamily`` distribution at specified value. @@ -784,112 +648,5 @@ def logp(self, value): broadcast_conditions=False, ) - def random(self, point=None, size=None): - """ - Draw random values from defined ``MixtureSameFamily`` distribution. - - Parameters - ---------- - point : dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size : int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # sample_shape = to_tuple(size) - # mixture_axis = self.mixture_axis - # - # # First we draw values for the mixture component weights - # (w,) = draw_values([self.w], point=point, size=size) - # - # # We now draw random choices from those weights. - # # However, we have to ensure that the number of choices has the - # # sample_shape present. - # w_shape = w.shape - # batch_shape = self.comp_dists.shape[: mixture_axis + 1] - # param_shape = np.broadcast(np.empty(w_shape), np.empty(batch_shape)).shape - # event_shape = self.comp_dists.shape[mixture_axis + 1 :] - # - # if np.asarray(self.shape).size != 0: - # comp_dists_ndim = len(self.comp_dists.shape) - # - # # If event_shape of both comp_dists and supplied shape matches, - # # broadcast only batch_shape - # # else broadcast the entire given shape with batch_shape. - # if list(self.shape[mixture_axis - comp_dists_ndim + 1 :]) == list(event_shape): - # dist_shape = np.broadcast( - # np.empty(self.shape[:mixture_axis]), np.empty(param_shape[:mixture_axis]) - # ).shape - # else: - # dist_shape = np.broadcast( - # np.empty(self.shape), np.empty(param_shape[:mixture_axis]) - # ).shape - # else: - # dist_shape = param_shape[:mixture_axis] - # - # # Try to determine the size that must be used to get the mixture - # # components (i.e. get random choices using w). - # # 1. There must be size independent choices based on w. - # # 2. There must also be independent draws for each non singleton axis - # # of w. - # # 3. There must also be independent draws for each dimension added by - # # self.shape with respect to the w.ndim. These usually correspond to - # # observed variables with batch shapes - # wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:mixture_axis] - # psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:mixture_axis] - # w_sample_size = [] - # # Loop through the dist_shape to get the conditions 2 and 3 first - # for i in range(len(dist_shape)): - # if dist_shape[i] != psh[i] and wsh[i] == 1: - # # self.shape[i] is a non singleton dimension (usually caused by - # # observed data) - # sh = dist_shape[i] - # else: - # sh = wsh[i] - # w_sample_size.append(sh) - # - # if sample_shape is not None and w_sample_size[: len(sample_shape)] != sample_shape: - # w_sample_size = sample_shape + tuple(w_sample_size) - # - # choices = random_choice(p=w, size=w_sample_size) - # - # # We now draw samples from the mixture components random method - # comp_samples = self.comp_dists.random(point=point, size=size) - # if comp_samples.shape[: len(sample_shape)] != sample_shape: - # comp_samples = np.broadcast_to( - # comp_samples, - # shape=sample_shape + comp_samples.shape, - # ) - # - # # At this point the shapes of the arrays involved are: - # # comp_samples.shape = (sample_shape, batch_shape, mixture_axis, event_shape) - # # choices.shape = (sample_shape, batch_shape) - # # - # # To be able to take the choices along the mixture_axis of the - # # comp_samples, we have to add in dimensions to the right of the - # # choices array. - # # We also need to make sure that the batch_shapes of both the comp_samples - # # and choices broadcast with each other. - # - # choices = np.reshape(choices, choices.shape + (1,) * (1 + len(event_shape))) - # - # choices, comp_samples = get_broadcastable_dist_samples([choices, comp_samples], size=size) - # - # # We now take the choices of the mixture components along the mixture_axis - # # but we use the negative index representation to be able to handle the - # # sample_shape - # samples = np.take_along_axis( - # comp_samples, choices, axis=mixture_axis - len(self.comp_dists.shape) - # ) - # - # # The `samples` array still has the `mixture_axis`, so we must remove it: - # output = samples[(..., 0) + (slice(None),) * len(event_shape)] - # return output - def _distr_parameters_for_repr(self): return [] diff --git a/pymc3/dp/__init__.py b/pymc3/dp/__init__.py deleted file mode 100644 index 2083dd7ad6..0000000000 --- a/pymc3/dp/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2021 The PyMC Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - diff --git a/pymc3/dp/dp.py b/pymc3/dp/dp.py deleted file mode 100644 index 21e50ce1f1..0000000000 --- a/pymc3/dp/dp.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2021 The PyMC Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import functools -import warnings - -import aesara.tensor as at -import numpy as np - -import pymc3 as pm - -from pymc3.distributions.distribution import Discrete, Distribution - -__all__ = [] - - -class Base(Distribution): - pass diff --git a/pymc3/dp/utils.py b/pymc3/dp/utils.py deleted file mode 100644 index ee803d7351..0000000000 --- a/pymc3/dp/utils.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2021 The PyMC Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -def stick_breaking(betas): - ''' - betas is a K-dimensional vector consisting of iid draws from a Beta distribution - ''' - sticks = at.concatenate([[1], (1 - betas[:-1])]) - - return at.mul(betas, at.cumprod(sticks))