diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 44ce951bf0..6064e80af8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -76,6 +76,7 @@ jobs: - | tests/distributions/test_timeseries.py tests/gp/test_cov.py + tests/gp/test_hsgp_approx.py tests/gp/test_gp.py tests/gp/test_mean.py tests/gp/test_util.py diff --git a/docs/source/api/gp/implementations.rst b/docs/source/api/gp/implementations.rst index 3b9f972845..03b59bbf9c 100644 --- a/docs/source/api/gp/implementations.rst +++ b/docs/source/api/gp/implementations.rst @@ -6,6 +6,7 @@ Implementations .. autosummary:: :toctree: generated + HSGP Latent LatentKron Marginal diff --git a/pymc/gp/__init__.py b/pymc/gp/__init__.py index 25fbf7dcf1..99c8023398 100644 --- a/pymc/gp/__init__.py +++ b/pymc/gp/__init__.py @@ -22,3 +22,4 @@ MarginalKron, MarginalSparse, ) +from pymc.gp.hsgp_approx import HSGP diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index 1331a9e349..134c3f207e 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numbers import warnings +from collections import Counter from functools import reduce -from numbers import Number from operator import add, mul +from typing import Optional, Sequence import numpy as np -import pytensor import pytensor.tensor as pt from pytensor.graph.basic import Variable @@ -47,26 +48,10 @@ ] -class Covariance: - r""" - Base class for all kernels/covariance functions. - - Parameters - ---------- - input_dim: integer - The number of input dimensions, or columns of X (or Xs) - the kernel will operate on. - active_dims: List of integers - Indicate which dimension or column of X the covariance - function operates on. +class BaseCovariance: + """ + Base class for kernels/covariance functions. """ - - def __init__(self, input_dim, active_dims=None): - self.input_dim = input_dim - if active_dims is None: - self.active_dims = np.arange(input_dim) - else: - self.active_dims = np.asarray(active_dims, int) def __call__(self, X, Xs=None, diag=False): r""" @@ -89,27 +74,14 @@ def __call__(self, X, Xs=None, diag=False): def diag(self, X): raise NotImplementedError - def full(self, X, Xs): + def full(self, X, Xs=None): raise NotImplementedError - def _slice(self, X, Xs): - xdims = X.shape[-1] - if isinstance(xdims, Variable): - xdims = xdims.eval() - if self.input_dim != xdims: - warnings.warn( - f"Only {self.input_dim} column(s) out of {xdims} are" - " being used to compute the covariance function. If this" - " is not intended, increase 'input_dim' parameter to" - " the number of columns to use. Ignore otherwise.", - UserWarning, - ) - X = pt.as_tensor_variable(X[:, self.active_dims]) - if Xs is not None: - Xs = pt.as_tensor_variable(Xs[:, self.active_dims]) - return X, Xs - def __add__(self, other): + # If it's a scalar, cast as Constant covariance. This allows validation for power spectral + # density calc. + if isinstance(other, numbers.Real): + other = Constant(c=other) return Add([self, other]) def __mul__(self, other): @@ -122,19 +94,10 @@ def __rmul__(self, other): return self.__mul__(other) def __pow__(self, other): - if ( - isinstance(other, pytensor.compile.SharedVariable) - and other.get_value().squeeze().shape == () - ): - other = pt.squeeze(other) - return Exponentiated(self, other) - elif isinstance(other, Number): - return Exponentiated(self, other) - elif np.asarray(other).squeeze().shape == (): - other = np.squeeze(other) - return Exponentiated(self, other) - - raise ValueError("A covariance function can only be exponentiated by a scalar value") + other = pt.as_tensor_variable(other).squeeze() + if not other.ndim == 0: + raise ValueError("A covariance function can only be exponentiated by a scalar value") + return Exponentiated(self, other) def __array_wrap__(self, result): """ @@ -151,41 +114,126 @@ def __array_wrap__(self, result): A = np.zeros((r, c)) for i in range(r): for j in range(c): - A[i, j] = result[i, j].factor_list[1] + r = result[i, j]._factor_list[1] + if isinstance(r, Constant): + # Counteract the elemwise Add edgecase + r = r.c + A[i, j] = r if isinstance(result[0][0], Add): - return result[0][0].factor_list[0] + A + return result[0][0]._factor_list[0] + A elif isinstance(result[0][0], Prod): - return result[0][0].factor_list[0] * A + return result[0][0]._factor_list[0] * A else: raise TypeError( - f"Unknown Covariance combination type {result[0][0]}. Known types are `Add` or `Prod`." + f"Unknown Covariance combination type {result[0][0]}. " + "Known types are `Add` or `Prod`." + ) + + +class Covariance(BaseCovariance): + """ + Base class for kernels/covariance functions with input_dim and active_dims, which excludes + kernels like `Constant` and `WhiteNoise`. + + Parameters + ---------- + input_dim: integer + The number of input dimensions, or columns of X (or Xs) + the kernel will operate on. + active_dims: List of integers + Indicate which dimension or column of X the covariance + function operates on. + """ + + def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None): + self.input_dim = input_dim + if active_dims is None: + self.active_dims = np.arange(input_dim) + else: + self.active_dims = np.asarray(active_dims, int) + + if max(self.active_dims) > self.input_dim: + raise ValueError("Values in `active_dims` can't be larger than `input_dim`.") + + @property + def n_dims(self): + """The dimensionality of the input, as taken from the + `active_dims`. + """ + # Evaluate lazily in-case this changes. + return len(self.active_dims) + + def _slice(self, X, Xs=None): + xdims = X.shape[-1] + if isinstance(xdims, Variable): + xdims = xdims.eval() + if self.input_dim != xdims: + warnings.warn( + f"Only {self.input_dim} column(s) out of {xdims} are" + " being used to compute the covariance function. If this" + " is not intended, increase 'input_dim' parameter to" + " the number of columns to use. Ignore otherwise.", + UserWarning, ) + X = pt.as_tensor_variable(X[:, self.active_dims]) + if Xs is not None: + Xs = pt.as_tensor_variable(Xs[:, self.active_dims]) + return X, Xs class Combination(Covariance): def __init__(self, factor_list): - input_dim = max( - factor.input_dim for factor in factor_list if isinstance(factor, Covariance) + """Use constituent factors to get input_dim and active_dims for the Combination covariance.""" + + # Check if all input_dim are the same in factor_list + input_dims = {factor.input_dim for factor in factor_list if isinstance(factor, Covariance)} + + if len(input_dims) != 1: + raise ValueError("All covariances must have the same `input_dim`.") + input_dim = input_dims.pop() + + # Union all active_dims sets in factor_list for the combination covariance + active_dims = np.sort( + np.asarray( + list( + set.union( + *[ + set(factor.active_dims) + for factor in factor_list + if isinstance(factor, Covariance) + ] + ) + ), + dtype=int, + ) ) - super().__init__(input_dim=input_dim) - self.factor_list = [] + + super().__init__(input_dim=input_dim, active_dims=active_dims) + + # Set up combination kernel, flatten out factor_list so that + self._factor_list = [] for factor in factor_list: if isinstance(factor, self.__class__): - self.factor_list.extend(factor.factor_list) + self._factor_list.extend(factor._factor_list) else: - self.factor_list.append(factor) + self._factor_list.append(factor) - def merge_factors(self, X, Xs=None, diag=False): + def _merge_factors_cov(self, X, Xs=None, diag=False): + """Called to evaluate either all the sums or all the + products of kernels that are possible to evaluate. + """ factor_list = [] - for factor in self.factor_list: + for factor in self._factor_list: # make sure diag=True is handled properly - if isinstance(factor, Covariance): + if isinstance(factor, BaseCovariance): factor_list.append(factor(X, Xs, diag)) + elif isinstance(factor, np.ndarray): if np.ndim(factor) == 2 and diag: factor_list.append(np.diag(factor)) else: factor_list.append(factor) + elif isinstance( factor, ( @@ -198,19 +246,75 @@ def merge_factors(self, X, Xs=None, diag=False): factor_list.append(pt.diag(factor)) else: factor_list.append(factor) + + else: + factor_list.append(factor) + + return factor_list + + def _merge_factors_psd(self, omega): + """Called to evaluatate spectral densities of combination kernels when possible. + + Implements + a more restricted set of rules than `_merge_factors_cov` -- just additivity of stationary + covariances with defined power spectral densities and multiplication by scalars. Also, the + active_dims for all covariances in the sum must be the same. + """ + factor_list = [] + for factor in self._factor_list: + if isinstance(factor, Covariance): + # Allow merging covariances for psd only if active_dims are the same + if set(self.active_dims) != set(factor.active_dims): + raise ValueError( + "For power spectral density calculations `active_dims` must be the same " + "for all covariances in the sum." + ) + + # If it's a covariance try to calculate the psd + try: + factor_list.append(factor.power_spectral_density(omega)) + + except (AttributeError, NotImplementedError) as e: + if isinstance(factor, Stationary): + raise NotImplementedError( + f"No power spectral density method has been implemented for {factor}." + ) from e + + else: + raise ValueError( + "Power spectral densities, `.power_spectral_density(omega)`, can only " + f"be calculated for `Stationary` covariance functions. {factor} is " + "non-stationary." + ) from e + else: + # Otherwise defer the reduction to later factor_list.append(factor) + return factor_list class Add(Combination): def __call__(self, X, Xs=None, diag=False): - return reduce(add, self.merge_factors(X, Xs, diag)) + return reduce(add, self._merge_factors_cov(X, Xs, diag)) + + def power_spectral_density(self, omega): + return reduce(add, self._merge_factors_psd(omega)) class Prod(Combination): def __call__(self, X, Xs=None, diag=False): - return reduce(mul, self.merge_factors(X, Xs, diag)) + return reduce(mul, self._merge_factors_cov(X, Xs, diag)) + + def power_spectral_density(self, omega): + check = Counter([isinstance(factor, Covariance) for factor in self._factor_list]) + if check.get(True) >= 2: + raise NotImplementedError( + "The power spectral density of products of covariance " + "functions is not implemented." + ) + + return reduce(mul, self._merge_factors_psd(omega)) class Exponentiated(Covariance): @@ -243,7 +347,7 @@ def __init__(self, factor_list): self.input_dims = [factor.input_dim for factor in factor_list] input_dim = sum(self.input_dims) super().__init__(input_dim=input_dim) - self.factor_list = factor_list + self._factor_list = factor_list def _split(self, X, Xs): indices = np.cumsum(self.input_dims) @@ -256,11 +360,11 @@ def _split(self, X, Xs): def __call__(self, X, Xs=None, diag=False): X_split, Xs_split = self._split(X, Xs) - covs = [cov(x, xs, diag) for cov, x, xs in zip(self.factor_list, X_split, Xs_split)] + covs = [cov(x, xs, diag) for cov, x, xs in zip(self._factor_list, X_split, Xs_split)] return reduce(mul, covs) -class Constant(Covariance): +class Constant(BaseCovariance): r""" Constant valued covariance function. @@ -270,7 +374,6 @@ class Constant(Covariance): """ def __init__(self, c): - super().__init__(1, None) self.c = c def diag(self, X): @@ -293,7 +396,6 @@ class WhiteNoise(Covariance): """ def __init__(self, sigma): - super().__init__(1, None) self.sigma = sigma def diag(self, X): @@ -408,6 +510,9 @@ def diag(self, X): def full(self, X, Xs=None): raise NotImplementedError + def power_spectral_density(self, omega): + raise NotImplementedError + class Periodic(Stationary): r""" @@ -451,12 +556,28 @@ class ExpQuad(Stationary): .. math:: k(x, x') = \mathrm{exp}\left[ -\frac{(x - x')^2}{2 \ell^2} \right] + """ def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) return pt.exp(-0.5 * self.square_dist(X, Xs)) + def power_spectral_density(self, omega): + r""" + The power spectral density for the ExpQuad kernel is: + + .. math:: + + S(\boldsymbol\omega) = + (\sqrt(2 \pi)^D \prod_{i}^{D}\ell_i + \exp\left( -\frac{1}{2} \sum_{i}^{D}\ell_i^2 \omega_i^{2} \right) + """ + ls = pt.ones(self.n_dims) * self.ls + c = pt.power(pt.sqrt(2.0 * np.pi), self.n_dims) + exp = pt.exp(-0.5 * pt.dot(pt.square(omega), pt.square(ls))) + return c * pt.prod(ls) * exp + class RatQuad(Stationary): r""" @@ -495,6 +616,30 @@ def full(self, X, Xs=None): r = self.euclidean_dist(X, Xs) return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * pt.square(r)) * pt.exp(-1.0 * np.sqrt(5.0) * r) + def power_spectral_density(self, omega): + r""" + The power spectral density for the Matern52 kernel is: + + .. math:: + + S(\boldsymbol\omega) = + \frac{2^D \pi^{\frac{D}{2}} \Gamma(\frac{D+5}{2}) 5^{5/2}} + {\frac{3}{4}\sqrt{\pi}} + \prod_{i=1}^{D}\ell_{i} + \left(5 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+5}{2}} + """ + ls = pt.ones(self.n_dims) * self.ls + D52 = (self.n_dims + 5) / 2 + num = ( + pt.power(2, self.n_dims) + * pt.power(np.pi, self.n_dims / 2) + * pt.gamma(D52) + * pt.power(5, 5 / 2) + ) + den = 0.75 * pt.sqrt(np.pi) + pow = pt.power(5.0 + pt.dot(pt.square(omega), pt.square(ls)), -1 * D52) + return (num / den) * pt.prod(ls) * pow + class Matern32(Stationary): r""" @@ -511,6 +656,30 @@ def full(self, X, Xs=None): r = self.euclidean_dist(X, Xs) return (1.0 + np.sqrt(3.0) * r) * pt.exp(-np.sqrt(3.0) * r) + def power_spectral_density(self, omega): + r""" + The power spectral density for the Matern32 kernel is: + + .. math:: + + S(\boldsymbol\omega) = + \frac{2^D \pi^{D/2} \Gamma\left(\frac{D+3}{2}\right) 3^{3/2}} + {\frac{1}{2}\sqrt{\pi}} + \prod_{i=1}^{D}\ell_{i} + \left(3 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+3}{2}} + """ + ls = pt.ones(self.n_dims) * self.ls + D32 = (self.n_dims + 3) / 2 + num = ( + pt.power(2, self.n_dims) + * pt.power(np.pi, self.n_dims / 2) + * pt.gamma(D32) + * pt.power(3, 3 / 2) + ) + den = 0.5 * pt.sqrt(np.pi) + pow = pt.power(3.0 + pt.dot(pt.square(omega), pt.square(ls)), -1 * D32) + return (num / den) * pt.prod(ls) * pow + class Matern12(Stationary): r""" diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py new file mode 100644 index 0000000000..adca16f600 --- /dev/null +++ b/pymc/gp/hsgp_approx.py @@ -0,0 +1,374 @@ +# Copyright 2023 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numbers +import warnings + +from types import ModuleType +from typing import Optional, Sequence, Union + +import numpy as np +import pytensor.tensor as pt + +import pymc as pm + +from pymc.gp.cov import Covariance +from pymc.gp.gp import Base +from pymc.gp.mean import Mean, Zero + +TensorLike = Union[np.ndarray, pt.TensorVariable] + + +def set_boundary(Xs: TensorLike, c: Union[numbers.Real, TensorLike]) -> TensorLike: + """Set the boundary using the mean-subtracted `Xs` and `c`. `c` is usually a scalar + multiplyer greater than 1.0, but it may be one value per dimension or column of `Xs`. + """ + S = pt.max(pt.abs(Xs), axis=0) + L = c * S + return L + + +def calc_eigenvalues(L: TensorLike, m: Sequence[int], tl: ModuleType = np): + """Calculate eigenvalues of the Laplacian.""" + S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))]) + S_arr = np.vstack([s.flatten() for s in S]).T + return tl.square((np.pi * S_arr) / (2 * L)) + + +def calc_eigenvectors( + Xs: TensorLike, + L: TensorLike, + eigvals: TensorLike, + m: Sequence[int], + tl: ModuleType = np, +): + """Calculate eigenvectors of the Laplacian. These are used as basis vectors in the HSGP + approximation. + """ + m_star = int(np.prod(m)) + phi = tl.ones((Xs.shape[0], m_star)) + for d in range(len(m)): + c = 1.0 / tl.sqrt(L[d]) + term1 = tl.sqrt(eigvals[:, d]) + term2 = tl.tile(Xs[:, d][:, None], m_star) + L[d] + phi *= c * tl.sin(term1 * term2) + return phi + + +class HSGP(Base): + R""" + Hilbert Space Gaussian process approximation. + + The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process. It is a + reduced rank GP approximation that uses a fixed set of basis vectors whose coefficients are + random functions of a stationary covariance function's power spectral density. It's usage + is largely similar to `gp.Latent`. Like `gp.Latent`, it does not assume a Gaussian noise model + and can be used with any likelihood, or as a component anywhere within a model. Also like + `gp.Latent`, it has `prior` and `conditional` methods. It supports any sum of covariance + functions that implement a `power_spectral_density` method. + + For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to + the PyMC examples that use HSGP. + + To with with the HSGP in its "linearized" form, as a matrix of basis vectors and and vector of + coefficients, see the method `prior_linearized`. + + Parameters + ---------- + m: list + The number of basis vectors to use for each active dimension (covariance parameter + `active_dim`). + L: list + The boundary of the space for each `active_dim`. It is called the boundary condition. + Choose L such that the domain `[-L, L]` contains all points in the column of X given by the + `active_dim`. + c: float + The proportion extension factor. Used to construct L from X. Defined as `S = max|X|` such + that `X` is in `[-S, S]`. `L` is the calculated as `c * S`. One of `c` or `L` must be + provided. Further information can be found in Ruitort-Mayol et. al. + drop_first: bool + Default `False`. Sometimes the first basis vector is quite "flat" and very similar to + the intercept term. When there is an intercept in the model, ignoring the first basis + vector may improve sampling. + cov_func: None, 2D array, or instance of Covariance + The covariance function. Defaults to zero. + mean_func: None, instance of Mean + The mean function. Defaults to zero. + + Examples + -------- + .. code:: python + + # A three dimensional column vector of inputs. + X = np.random.rand(100, 3) + + with pm.Model() as model: + # Specify the covariance function. + # Three input dimensions, but we only want to use the last two. + cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2]) + + # Specify the HSGP. + # Use 25 basis vectors across each active dimension for a total of 25 * 25 = 625. + # The value `c = 4` means the boundary of the approximation + # lies at four times the half width of the data. + # In this example the data lie between zero and one, + # so the boundaries occur at -1.5 and 2.5. The data, both for + # training and prediction should reside well within that boundary.. + gp = pm.gp.HSGP(m=[25, 25], c=4.0, cov_func=cov_func) + + # Place a GP prior over the function f. + f = gp.prior("f", X=X) + + ... + + # After fitting or sampling, specify the distribution + # at new points with .conditional + Xnew = np.linspace(-1, 2, 50)[:, None] + + with model: + fcond = gp.conditional("fcond", Xnew=Xnew) + + References + ---------- + - Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical + Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming + + - Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process + Regression. + """ + + def __init__( + self, + m: Sequence[int], + L: Optional[Sequence[float]] = None, + c: Optional[numbers.Real] = None, + drop_first: bool = False, + parameterization="noncentered", + *, + mean_func: Mean = Zero(), + cov_func: Covariance, + ): + arg_err_msg = ( + "`m` and L, if provided, must be sequences with one element per active " + "dimension of the kernel or covariance function." + ) + + if not isinstance(m, Sequence): + raise ValueError(arg_err_msg) + + if len(m) != cov_func.n_dims: + raise ValueError(arg_err_msg) + m = tuple(m) + + if (L is None and c is None) or (L is not None and c is not None): + raise ValueError("Provide one of `c` or `L`") + + if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.n_dims): + raise ValueError(arg_err_msg) + + if L is None and c is not None and c < 1.2: + warnings.warn("For an adequate approximation `c >= 1.2` is recommended.") + + parameterization = parameterization.lower().replace("-", "").replace("_", "") + if parameterization not in ["centered", "noncentered"]: + raise ValueError("`parameterization` must be either 'centered' or 'noncentered'.") + else: + self._parameterization = parameterization + + self._drop_first = drop_first + self._m = m + self._m_star = int(np.prod(self._m)) + self._L = L + self._c = c + + super().__init__(mean_func=mean_func, cov_func=cov_func) + + def __add__(self, other): + raise NotImplementedError("Additive HSGPs aren't supported.") + + @property + def L(self): + if self._L is None: + raise RuntimeError("Boundaries `L` required but still unset.") + return self._L + + @L.setter + def L(self, value): + self._L = pt.as_tensor_variable(value) + + def prior_linearized(self, Xs: TensorLike): + """Linearized version of the HSGP. Returns the Laplace eigenfunctions and the square root + of the power spectral density needed to create the GP. + + This function allows the user to bypass the GP interface and work directly with the basis + and coefficients directly. This format allows the user to create predictions using + `pm.set_data` similarly to a linear model. It also enables computational speed ups in + multi-GP models since they may share the same basis. The return values are the Laplace + eigenfunctions `phi`, and the square root of the power spectral density. + + Correct results when using `prior_linearized` in tandem with `pm.set_data` and + `pm.MutableData` require two conditions. First, one must specify `L` instead of `c` when + the GP is constructed. If not, a RuntimeError is raised. Second, the `Xs` needs to be + zero-centered, so it's mean must be subtracted. An example is given below. + + Parameters + ---------- + Xs: array-like + Function input values. Assumes they have been mean subtracted or centered at zero. + + Returns + ------- + phi: array-like + Either Numpy or PyTensor 2D array of the fixed basis vectors. There are n rows, one + per row of `Xs` and `prod(m)` columns, one for each basis vector. + sqrt_psd: array-like + Either a Numpy or PyTensor 1D array of the square roots of the power spectral + densities. + + Examples + -------- + .. code:: python + + # A one dimensional column vector of inputs. + X = np.linspace(0, 10, 100)[:, None] + + with pm.Model() as model: + eta = pm.Exponential("eta", lam=1.0) + ell = pm.InverseGamma("ell", mu=5.0, sigma=5.0) + cov_func = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell) + + # m = [200] means 200 basis vectors for the first dimenison + # L = [10] means the approximation is valid from Xs = [-10, 10] + gp = pm.gp.HSGP(m=[200], L=[10], cov_func=cov_func) + + # Order is important. First calculate the mean, then make X a shared variable, + # then subtract the mean. When X is mutated later, the correct mean will be + # subtracted. + X_mean = np.mean(X, axis=0) + X = pm.MutableData("X", X) + Xs = X - X_mean + + # Pass the zero-subtracted Xs in to the GP + phi, sqrt_psd = gp.prior_linearized(Xs=Xs) + + # Specify standard normal prior in the coefficients. The number of which + # is given by the number of basis vectors, which is also saved in the GP object + # as m_star. + beta = pm.Normal("beta", size=gp.m_star) + + # The (non-centered) GP approximation is given by + f = pm.Deterministic("f", phi @ (beta * sqrt_psd)) + + ... + + + # Then it works just like a linear regression to predict on new data. + # First mutate the data X, + x_new = np.linspace(-10, 10, 100) + with model: + model.set_data("X", x_new[:, None]) + + # and then make predictions for the GP using posterior predictive sampling. + with model: + ppc = pm.sample_posterior_predictive(idata, var_names=["f"]) + """ + + # Index Xs using input_dim and active_dims of covariance function + Xs, _ = self.cov_func._slice(Xs) + + # If not provided, use Xs and c to set L + if self._L is None: + assert isinstance(self._c, (numbers.Real, np.ndarray, pt.TensorVariable)) + self.L = set_boundary(Xs, self._c) + else: + self.L = self._L + + eigvals = calc_eigenvalues(self.L, self._m, tl=pt) + phi = calc_eigenvectors(Xs, self.L, eigvals, self._m, tl=pt) + omega = pt.sqrt(eigvals) + psd = self.cov_func.power_spectral_density(omega) + + i = int(self._drop_first == True) + return phi[:, i:], pt.sqrt(psd[i:]) + + def prior(self, name: str, X: TensorLike, dims: Optional[str] = None): # type: ignore + R""" + Returns the (approximate) GP prior distribution evaluated over the input locations `X`. + For usage examples, refer to `pm.gp.Latent`. + + Parameters + ---------- + name: str + Name of the random variable + X: array-like + Function input values. + dims: None + Dimension name for the GP random variable. + """ + self._X_mean = pt.mean(X, axis=0) + phi, sqrt_psd = self.prior_linearized(X - self._X_mean) + + if self._parameterization == "noncentered": + self._beta = pm.Normal( + f"{name}_hsgp_coeffs_", size=self._m_star - int(self._drop_first) + ) + self._sqrt_psd = sqrt_psd + f = self.mean_func(X) + phi @ (self._beta * self._sqrt_psd) + + elif self._parameterization == "centered": + self._beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd) + f = self.mean_func(X) + phi @ self._beta + + self.f = pm.Deterministic(name, f, dims=dims) + return self.f + + def _build_conditional(self, Xnew): + try: + beta, X_mean = self._beta, self._X_mean + + if self._parameterization == "noncentered": + sqrt_psd = self._sqrt_psd + + except AttributeError: + raise ValueError( + "Prior is not set, can't create a conditional. Call `.prior(name, X)` first." + ) + + Xnew, _ = self.cov_func._slice(Xnew) + eigvals = calc_eigenvalues(self.L, self._m, tl=pt) + phi = calc_eigenvectors(Xnew - X_mean, self.L, eigvals, self._m, tl=pt) + i = int(self._drop_first == True) + + if self._parameterization == "noncentered": + return self.mean_func(Xnew) + phi[:, i:] @ (beta * sqrt_psd) + + elif self._parameterization == "centered": + return self.mean_func(Xnew) + phi[:, i:] @ beta + + def conditional(self, name: str, Xnew: TensorLike, dims: Optional[str] = None): # type: ignore + R""" + Returns the (approximate) conditional distribution evaluated over new input locations + `Xnew`. + + Parameters + ---------- + name + Name of the random variable + Xnew : array-like + Function input values. + dims: None + Dimension name for the GP random variable. + """ + fnew = self._build_conditional(Xnew) + return pm.Deterministic(name, fnew, dims=dims) diff --git a/tests/gp/test_cov.py b/tests/gp/test_cov.py index 148d3f0e7b..e671ba8fc3 100644 --- a/tests/gp/test_cov.py +++ b/tests/gp/test_cov.py @@ -181,6 +181,68 @@ def test_inv_rightprod(self): cov = M + pm.gp.cov.ExpQuad(1, 1.0) +class TestCovPSD: + def test_covpsd_add(self): + L = 10.0 + omega = np.pi * np.arange(1, 101) / (2 * L) + with pm.Model() as model: + cov1 = 2 * pm.gp.cov.ExpQuad(1, 0.1) + cov2 = 5 * pm.gp.cov.ExpQuad(1, 1.0) + cov = cov1 + cov2 + psd1 = cov1.power_spectral_density(omega[:, None]).eval() + psd2 = cov2.power_spectral_density(omega[:, None]).eval() + psd = cov.power_spectral_density(omega[:, None]).eval() + npt.assert_allclose(psd, psd1 + psd2) + + def test_copsd_multiply(self): + # This could be implemented via convolution + L = 10.0 + omega = np.pi * np.arange(1, 101) / (2 * L) + with pm.Model() as model: + cov1 = 2 * pm.gp.cov.ExpQuad(1, ls=1) + cov2 = pm.gp.cov.ExpQuad(1, ls=1) + + msg = "The power spectral density of products of covariance functions is not implemented" + with pytest.raises(NotImplementedError, match=msg): + psd = (cov1 * cov2).power_spectral_density(omega[:, None]).eval() + + def test_covpsd_nonstationary1(self): + L = 10.0 + omega = np.pi * np.arange(1, 101) / (2 * L) + with pm.Model() as model: + cov = 2 * pm.gp.cov.Linear(1, c=5) + + msg = "can only be calculated for `Stationary` covariance functions." + with pytest.raises(ValueError, match=msg): + psd = cov.power_spectral_density(omega[:, None]).eval() + + def test_covpsd_nonstationary2(self): + L = 10.0 + omega = np.pi * np.arange(1, 101) / (2 * L) + with pm.Model() as model: + cov = 2 * pm.gp.cov.ExpQuad(1, ls=1) + 10.0 + + # Even though this should error, this isnt the appropriate message. The actual problem + # is because the covariance function is non-stationary. The underlying bug is due to + # `Constant` covariances not having an input_dim. + msg = "All covariances must have the same `input_dim`." + with pytest.raises(ValueError, match=msg): + psd = cov.power_spectral_density(omega[:, None]).eval() + + def test_covpsd_notimplemented(self): + class NewStationaryCov(pm.gp.cov.Stationary): + pass + + L = 10.0 + omega = np.pi * np.arange(1, 101) / (2 * L) + with pm.Model() as model: + cov = 2 * NewStationaryCov(1, ls=1) + + msg = "No power spectral density method has been implemented" + with pytest.raises(NotImplementedError, match=msg): + psd = cov.power_spectral_density(omega[:, None]).eval() + + class TestCovExponentiation: def test_symexp_cov(self): X = np.linspace(0, 1, 10)[:, None] @@ -228,7 +290,9 @@ def test_covexp_shared(self): def test_invalid_covexp(self): X = np.linspace(0, 1, 10)[:, None] - with pytest.raises(ValueError, match=r"can only be exponentiated by a scalar value"): + with pytest.raises( + ValueError, match=r"A covariance function can only be exponentiated by a scalar value" + ): with pm.Model() as model: a = np.array([[1.0, 2.0]]) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a @@ -262,7 +326,7 @@ def test_multiops(self): + pm.gp.cov.ExpQuad(1, 0.1) + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1) ) - cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1) + cov2 = pm.gp.cov.ExpQuad(2, 0.1) * pm.gp.cov.ExpQuad(2, 0.1) cov = pm.gp.cov.Kron([cov1, cov2]) K_true = kronecker(cov1(X1).eval(), cov2(X2).eval()).eval() K = cov(X).eval() @@ -373,6 +437,17 @@ def test_inv_lengthscale(self): Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + def test_psd(self): + # compare to simple 1d formula + X = np.linspace(0, 1, 10)[:, None] + omega = np.linspace(0, 2, 50) + ell = 2.0 + true_1d_psd = np.sqrt(2 * np.pi * np.square(ell)) * np.exp(-0.5 * np.square(ell * omega)) + test_1d_psd = ( + pm.gp.cov.ExpQuad(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval() + ) + npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5) + class TestWhiteNoise: def test_1d(self): @@ -449,6 +524,18 @@ def test_1d(self): Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + def test_psd(self): + # compare to simple 1d formula + X = np.linspace(0, 1, 10)[:, None] + omega = np.linspace(0, 2, 50) + ell = 2.0 + lamda = np.sqrt(5) / ell + true_1d_psd = (16.0 / 3.0) * np.power(lamda, 5) * np.power(lamda**2 + omega**2, -3) + test_1d_psd = ( + pm.gp.cov.Matern52(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval() + ) + npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5) + class TestMatern32: def test_1d(self): @@ -463,6 +550,18 @@ def test_1d(self): Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + def test_psd(self): + # compare to simple 1d formula + X = np.linspace(0, 1, 10)[:, None] + omega = np.linspace(0, 2, 50) + ell = 2.0 + lamda = np.sqrt(3) / ell + true_1d_psd = 4 * np.power(lamda, 3) * np.power(lamda**2 + omega**2, -2) + test_1d_psd = ( + pm.gp.cov.Matern32(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval() + ) + npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5) + class TestMatern12: def test_1d(self): diff --git a/tests/gp/test_hsgp_approx.py b/tests/gp/test_hsgp_approx.py new file mode 100644 index 0000000000..b6f03a4acc --- /dev/null +++ b/tests/gp/test_hsgp_approx.py @@ -0,0 +1,210 @@ +# Copyright 2023 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import arviz as az +import numpy as np +import pytensor +import pytensor.tensor as pt +import pytest +import scipy as sp + +from scipy.spatial import distance + +import pymc as pm + + +def build_mmd_func(sample1, sample2): + """Build a PyTensor function that calculates the minimum mean discrepancy (MMD) statistic.""" + + assert sample1.shape[1] == sample2.shape[1] + + s1 = pt.matrix(name="s1", shape=sample1.shape) + s2 = pt.matrix(name="s2", shape=sample2.shape) + + X = np.concatenate((sample1, sample2), axis=0) + test_ell = np.median(distance.pdist(X)) / 2 + + K = pm.gp.cov.ExpQuad(sample1.shape[1], ls=test_ell) + Kxx = K(s1) + Kyy = K(s2) + Kxy = K(s1, s2) + + n_x, n_y = s1.shape[0], s2.shape[0] + mmd = ( + (pt.sum(Kxx) / (n_x * (n_x - 1))) + + (pt.sum(Kyy) / (n_y * (n_y - 1))) + - 2 * pt.sum(Kxy) / (n_x * n_y) + ) + + calc_mmd = pytensor.function(inputs=[s1, s2], outputs=mmd) + return calc_mmd + + +def two_sample_test(sample1, sample2, n_sims=1000, alpha=0.05): + """Calculate test whose null hypothesis is that two sets of samples were drawn from + the same distribution. + + Largely taken from https://torchdrift.org/notebooks/note_on_mmd.html + """ + # build function to calculate mmd + calc_mmd = build_mmd_func(sample1, sample2) + + # simulate test statistic under null hypothesis + X = np.concatenate((sample1, sample2), axis=0) + half_N = int(X.shape[0] // 2) + ix = np.arange(half_N * 2) + + h0 = [] + for i in range(n_sims): + np.random.shuffle(ix) + X = X[ix, :] + h0.append(calc_mmd(X[:half_N, :], X[half_N:, :])) + h0 = np.asarray(h0) + critical_value = np.percentile(h0, 100 * (1 - alpha)) + mmd = calc_mmd(sample1, sample2) + return h0, mmd, critical_value, mmd > critical_value + + +class TestHSGP: + @pytest.fixture + def rng(self): + return np.random.RandomState(10) + + @pytest.fixture + def data(self, rng): + # 1D dataset + X1 = np.linspace(-5, 5, 100)[:, None] + + # 3D dataset + x1, x2, x3 = np.meshgrid( + np.linspace(0, 10, 5), np.linspace(20, 30, 5), np.linspace(10, 20, 5) + ) + X2 = np.vstack([x1.flatten(), x2.flatten(), x3.flatten()]).T + return X1, X2 + + @pytest.fixture + def X1(self, data): + return data[0] + + @pytest.fixture + def X2(self, data): + return data[1] + + @pytest.fixture + def model(self): + return pm.Model() + + @pytest.fixture + def cov_func(self): + return pm.gp.cov.ExpQuad(1, ls=1) + + @pytest.fixture + def gp(self, cov_func): + gp = pm.gp.Latent(cov_func=cov_func) + return gp + + def test_set_boundaries_1d(self, X1): + X1s = X1 - np.mean(X1, axis=0) + L = pm.gp.hsgp_approx.set_boundary(X1s, c=2).eval() + assert np.all(L == 10) + + def test_set_boundaries_3d(self, X2): + X2s = X2 - np.mean(X2, axis=0) + L = pm.gp.hsgp_approx.set_boundary(X2s, c=2).eval() + assert np.all(L == 10) + + def test_parametrization(self): + err_msg = "`m` and L, if provided, must be sequences with one element per active dimension" + + with pytest.raises(ValueError, match=err_msg): + # m must be a list + cov_func = pm.gp.cov.ExpQuad(1, ls=0.1) + pm.gp.HSGP(m=500, c=2, cov_func=cov_func) + + with pytest.raises(ValueError, match=err_msg): + # m must have same length as L + cov_func = pm.gp.cov.ExpQuad(2, ls=[1, 2]) + pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func) + + with pytest.raises(ValueError, match=err_msg): + # m must have same length as L, and match number of active dims of cov_func + cov_func = pm.gp.cov.ExpQuad(1, ls=0.1) + pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func) + + # pass without error, cov_func has 2 active dimensions, c given as scalar + cov_func = pm.gp.cov.ExpQuad(3, ls=[1, 2], active_dims=[0, 2]) + pm.gp.HSGP(m=[50, 50], c=2, cov_func=cov_func) + + # pass without error, all have two dimensions + cov_func = pm.gp.cov.ExpQuad(2, ls=[1, 2]) + pm.gp.HSGP(m=[50, 50], L=[12, 12], cov_func=cov_func) + + @pytest.mark.parametrize("drop_first", [True, False]) + def test_parametrization_drop_first(self, model, cov_func, X1, drop_first): + n_basis = 100 + with model: + gp = pm.gp.HSGP(m=[n_basis], c=4.0, cov_func=cov_func, drop_first=drop_first) + gp.prior("f1", X1) + + n_coeffs = model.f1_hsgp_coeffs_.type.shape[0] + if drop_first: + assert ( + n_coeffs == n_basis - 1 + ), f"one basis vector should have been dropped, {n_coeffs}" + else: + assert n_coeffs == n_basis, "one was dropped when it shouldn't have been" + + @pytest.mark.parametrize("parameterization", ["centered", "noncentered"]) + def test_prior(self, model, cov_func, X1, parameterization): + """Compare HSGP prior to unapproximated GP prior, pm.gp.Latent. Draw samples from the + prior and compare them using MMD two sample test. Tests both centered and non-centered + parameterizations. + """ + with model: + hsgp = pm.gp.HSGP(m=[200], c=2.0, parameterization=parameterization, cov_func=cov_func) + f1 = hsgp.prior("f1", X=X1) + + gp = pm.gp.Latent(cov_func=cov_func) + f2 = gp.prior("f2", X=X1) + + idata = pm.sample_prior_predictive(samples=1000) + + samples1 = az.extract(idata.prior["f1"])["f1"].values.T + samples2 = az.extract(idata.prior["f2"])["f2"].values.T + + h0, mmd, critical_value, reject = two_sample_test( + samples1, samples2, n_sims=500, alpha=0.01 + ) + assert not reject, "H0 was rejected, even though HSGP and GP priors should match." + + @pytest.mark.parametrize("parameterization", ["centered", "noncentered"]) + def test_conditional(self, model, cov_func, X1, parameterization): + """Compare HSGP conditional to unapproximated GP prior, pm.gp.Latent. Draw samples from the + prior and compare them using MMD two sample test. Tests both centered and non-centered + parameterizations. The conditional should match the prior when no data is observed. + """ + with model: + hsgp = pm.gp.HSGP(m=[100], c=2.0, parameterization=parameterization, cov_func=cov_func) + f = hsgp.prior("f", X=X1) + fc = hsgp.conditional("fc", Xnew=X1) + + idata = pm.sample_prior_predictive(samples=1000) + + samples1 = az.extract(idata.prior["f"])["f"].values.T + samples2 = az.extract(idata.prior["fc"])["fc"].values.T + + h0, mmd, critical_value, reject = two_sample_test( + samples1, samples2, n_sims=500, alpha=0.01 + ) + assert not reject, "H0 was rejected, even though HSGP prior and conditional should match."