diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 44ce951bf0..6064e80af8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -76,6 +76,7 @@ jobs:
           - |
             tests/distributions/test_timeseries.py
             tests/gp/test_cov.py
+            tests/gp/test_hsgp_approx.py
             tests/gp/test_gp.py
             tests/gp/test_mean.py
             tests/gp/test_util.py
diff --git a/docs/source/api/gp/implementations.rst b/docs/source/api/gp/implementations.rst
index 3b9f972845..03b59bbf9c 100644
--- a/docs/source/api/gp/implementations.rst
+++ b/docs/source/api/gp/implementations.rst
@@ -6,6 +6,7 @@ Implementations
 .. autosummary::
    :toctree: generated
 
+   HSGP
    Latent
    LatentKron
    Marginal
diff --git a/pymc/gp/__init__.py b/pymc/gp/__init__.py
index 25fbf7dcf1..99c8023398 100644
--- a/pymc/gp/__init__.py
+++ b/pymc/gp/__init__.py
@@ -22,3 +22,4 @@
     MarginalKron,
     MarginalSparse,
 )
+from pymc.gp.hsgp_approx import HSGP
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 1331a9e349..134c3f207e 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -12,14 +12,15 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+import numbers
 import warnings
 
+from collections import Counter
 from functools import reduce
-from numbers import Number
 from operator import add, mul
+from typing import Optional, Sequence
 
 import numpy as np
-import pytensor
 import pytensor.tensor as pt
 
 from pytensor.graph.basic import Variable
@@ -47,26 +48,10 @@
 ]
 
 
-class Covariance:
-    r"""
-    Base class for all kernels/covariance functions.
-
-    Parameters
-    ----------
-    input_dim: integer
-        The number of input dimensions, or columns of X (or Xs)
-        the kernel will operate on.
-    active_dims: List of integers
-        Indicate which dimension or column of X the covariance
-        function operates on.
+class BaseCovariance:
+    """
+    Base class for kernels/covariance functions.
     """
-
-    def __init__(self, input_dim, active_dims=None):
-        self.input_dim = input_dim
-        if active_dims is None:
-            self.active_dims = np.arange(input_dim)
-        else:
-            self.active_dims = np.asarray(active_dims, int)
 
     def __call__(self, X, Xs=None, diag=False):
         r"""
@@ -89,27 +74,14 @@ def __call__(self, X, Xs=None, diag=False):
     def diag(self, X):
         raise NotImplementedError
 
-    def full(self, X, Xs):
+    def full(self, X, Xs=None):
         raise NotImplementedError
 
-    def _slice(self, X, Xs):
-        xdims = X.shape[-1]
-        if isinstance(xdims, Variable):
-            xdims = xdims.eval()
-        if self.input_dim != xdims:
-            warnings.warn(
-                f"Only {self.input_dim} column(s) out of {xdims} are"
-                " being used to compute the covariance function. If this"
-                " is not intended, increase 'input_dim' parameter to"
-                " the number of columns to use. Ignore otherwise.",
-                UserWarning,
-            )
-        X = pt.as_tensor_variable(X[:, self.active_dims])
-        if Xs is not None:
-            Xs = pt.as_tensor_variable(Xs[:, self.active_dims])
-        return X, Xs
-
     def __add__(self, other):
+        # If it's a scalar, cast as Constant covariance.  This allows validation for power spectral
+        # density calc.
+        if isinstance(other, numbers.Real):
+            other = Constant(c=other)
         return Add([self, other])
 
     def __mul__(self, other):
@@ -122,19 +94,10 @@ def __rmul__(self, other):
         return self.__mul__(other)
 
     def __pow__(self, other):
-        if (
-            isinstance(other, pytensor.compile.SharedVariable)
-            and other.get_value().squeeze().shape == ()
-        ):
-            other = pt.squeeze(other)
-            return Exponentiated(self, other)
-        elif isinstance(other, Number):
-            return Exponentiated(self, other)
-        elif np.asarray(other).squeeze().shape == ():
-            other = np.squeeze(other)
-            return Exponentiated(self, other)
-
-        raise ValueError("A covariance function can only be exponentiated by a scalar value")
+        other = pt.as_tensor_variable(other).squeeze()
+        if not other.ndim == 0:
+            raise ValueError("A covariance function can only be exponentiated by a scalar value")
+        return Exponentiated(self, other)
 
     def __array_wrap__(self, result):
         """
@@ -151,41 +114,126 @@ def __array_wrap__(self, result):
         A = np.zeros((r, c))
         for i in range(r):
             for j in range(c):
-                A[i, j] = result[i, j].factor_list[1]
+                r = result[i, j]._factor_list[1]
+                if isinstance(r, Constant):
+                    # Counteract the elemwise Add edgecase
+                    r = r.c
+                A[i, j] = r
         if isinstance(result[0][0], Add):
-            return result[0][0].factor_list[0] + A
+            return result[0][0]._factor_list[0] + A
         elif isinstance(result[0][0], Prod):
-            return result[0][0].factor_list[0] * A
+            return result[0][0]._factor_list[0] * A
         else:
             raise TypeError(
-                f"Unknown Covariance combination type {result[0][0]}.  Known types are `Add` or `Prod`."
+                f"Unknown Covariance combination type {result[0][0]}.  "
+                "Known types are `Add` or `Prod`."
+            )
+
+
+class Covariance(BaseCovariance):
+    """
+    Base class for kernels/covariance functions with input_dim and active_dims, which excludes
+    kernels like `Constant` and `WhiteNoise`.
+
+    Parameters
+    ----------
+    input_dim: integer
+        The number of input dimensions, or columns of X (or Xs)
+        the kernel will operate on.
+    active_dims: List of integers
+        Indicate which dimension or column of X the covariance
+        function operates on.
+    """
+
+    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None):
+        self.input_dim = input_dim
+        if active_dims is None:
+            self.active_dims = np.arange(input_dim)
+        else:
+            self.active_dims = np.asarray(active_dims, int)
+
+        if max(self.active_dims) > self.input_dim:
+            raise ValueError("Values in `active_dims` can't be larger than `input_dim`.")
+
+    @property
+    def n_dims(self):
+        """The dimensionality of the input, as taken from the
+        `active_dims`.
+        """
+        # Evaluate lazily in-case this changes.
+        return len(self.active_dims)
+
+    def _slice(self, X, Xs=None):
+        xdims = X.shape[-1]
+        if isinstance(xdims, Variable):
+            xdims = xdims.eval()
+        if self.input_dim != xdims:
+            warnings.warn(
+                f"Only {self.input_dim} column(s) out of {xdims} are"
+                " being used to compute the covariance function. If this"
+                " is not intended, increase 'input_dim' parameter to"
+                " the number of columns to use. Ignore otherwise.",
+                UserWarning,
             )
+        X = pt.as_tensor_variable(X[:, self.active_dims])
+        if Xs is not None:
+            Xs = pt.as_tensor_variable(Xs[:, self.active_dims])
+        return X, Xs
 
 
 class Combination(Covariance):
     def __init__(self, factor_list):
-        input_dim = max(
-            factor.input_dim for factor in factor_list if isinstance(factor, Covariance)
+        """Use constituent factors to get input_dim and active_dims for the Combination covariance."""
+
+        # Check if all input_dim are the same in factor_list
+        input_dims = {factor.input_dim for factor in factor_list if isinstance(factor, Covariance)}
+
+        if len(input_dims) != 1:
+            raise ValueError("All covariances must have the same `input_dim`.")
+        input_dim = input_dims.pop()
+
+        # Union all active_dims sets in factor_list for the combination covariance
+        active_dims = np.sort(
+            np.asarray(
+                list(
+                    set.union(
+                        *[
+                            set(factor.active_dims)
+                            for factor in factor_list
+                            if isinstance(factor, Covariance)
+                        ]
+                    )
+                ),
+                dtype=int,
+            )
         )
-        super().__init__(input_dim=input_dim)
-        self.factor_list = []
+
+        super().__init__(input_dim=input_dim, active_dims=active_dims)
+
+        # Set up combination kernel, flatten out factor_list so that
+        self._factor_list = []
         for factor in factor_list:
             if isinstance(factor, self.__class__):
-                self.factor_list.extend(factor.factor_list)
+                self._factor_list.extend(factor._factor_list)
             else:
-                self.factor_list.append(factor)
+                self._factor_list.append(factor)
 
-    def merge_factors(self, X, Xs=None, diag=False):
+    def _merge_factors_cov(self, X, Xs=None, diag=False):
+        """Called to evaluate either all the sums or all the
+        products of kernels that are possible to evaluate.
+        """
         factor_list = []
-        for factor in self.factor_list:
+        for factor in self._factor_list:
             # make sure diag=True is handled properly
-            if isinstance(factor, Covariance):
+            if isinstance(factor, BaseCovariance):
                 factor_list.append(factor(X, Xs, diag))
+
             elif isinstance(factor, np.ndarray):
                 if np.ndim(factor) == 2 and diag:
                     factor_list.append(np.diag(factor))
                 else:
                     factor_list.append(factor)
+
             elif isinstance(
                 factor,
                 (
@@ -198,19 +246,75 @@ def merge_factors(self, X, Xs=None, diag=False):
                     factor_list.append(pt.diag(factor))
                 else:
                     factor_list.append(factor)
+
+            else:
+                factor_list.append(factor)
+
+        return factor_list
+
+    def _merge_factors_psd(self, omega):
+        """Called to evaluatate spectral densities of combination kernels when possible.
+
+        Implements
+        a more restricted set of rules than `_merge_factors_cov` -- just additivity of stationary
+        covariances with defined power spectral densities and multiplication by scalars.  Also, the
+        active_dims for all covariances in the sum must be the same.
+        """
+        factor_list = []
+        for factor in self._factor_list:
+            if isinstance(factor, Covariance):
+                # Allow merging covariances for psd only if active_dims are the same
+                if set(self.active_dims) != set(factor.active_dims):
+                    raise ValueError(
+                        "For power spectral density calculations `active_dims` must be the same "
+                        "for all covariances in the sum."
+                    )
+
+                # If it's a covariance try to calculate the psd
+                try:
+                    factor_list.append(factor.power_spectral_density(omega))
+
+                except (AttributeError, NotImplementedError) as e:
+                    if isinstance(factor, Stationary):
+                        raise NotImplementedError(
+                            f"No power spectral density method has been implemented for {factor}."
+                        ) from e
+
+                    else:
+                        raise ValueError(
+                            "Power spectral densities, `.power_spectral_density(omega)`, can only "
+                            f"be calculated for `Stationary` covariance functions.  {factor} is "
+                            "non-stationary."
+                        ) from e
+
             else:
+                # Otherwise defer the reduction to later
                 factor_list.append(factor)
+
         return factor_list
 
 
 class Add(Combination):
     def __call__(self, X, Xs=None, diag=False):
-        return reduce(add, self.merge_factors(X, Xs, diag))
+        return reduce(add, self._merge_factors_cov(X, Xs, diag))
+
+    def power_spectral_density(self, omega):
+        return reduce(add, self._merge_factors_psd(omega))
 
 
 class Prod(Combination):
     def __call__(self, X, Xs=None, diag=False):
-        return reduce(mul, self.merge_factors(X, Xs, diag))
+        return reduce(mul, self._merge_factors_cov(X, Xs, diag))
+
+    def power_spectral_density(self, omega):
+        check = Counter([isinstance(factor, Covariance) for factor in self._factor_list])
+        if check.get(True) >= 2:
+            raise NotImplementedError(
+                "The power spectral density of products of covariance "
+                "functions is not implemented."
+            )
+
+        return reduce(mul, self._merge_factors_psd(omega))
 
 
 class Exponentiated(Covariance):
@@ -243,7 +347,7 @@ def __init__(self, factor_list):
         self.input_dims = [factor.input_dim for factor in factor_list]
         input_dim = sum(self.input_dims)
         super().__init__(input_dim=input_dim)
-        self.factor_list = factor_list
+        self._factor_list = factor_list
 
     def _split(self, X, Xs):
         indices = np.cumsum(self.input_dims)
@@ -256,11 +360,11 @@ def _split(self, X, Xs):
 
     def __call__(self, X, Xs=None, diag=False):
         X_split, Xs_split = self._split(X, Xs)
-        covs = [cov(x, xs, diag) for cov, x, xs in zip(self.factor_list, X_split, Xs_split)]
+        covs = [cov(x, xs, diag) for cov, x, xs in zip(self._factor_list, X_split, Xs_split)]
         return reduce(mul, covs)
 
 
-class Constant(Covariance):
+class Constant(BaseCovariance):
     r"""
     Constant valued covariance function.
 
@@ -270,7 +374,6 @@ class Constant(Covariance):
     """
 
     def __init__(self, c):
-        super().__init__(1, None)
         self.c = c
 
     def diag(self, X):
@@ -293,7 +396,6 @@ class WhiteNoise(Covariance):
     """
 
     def __init__(self, sigma):
-        super().__init__(1, None)
         self.sigma = sigma
 
     def diag(self, X):
@@ -408,6 +510,9 @@ def diag(self, X):
     def full(self, X, Xs=None):
         raise NotImplementedError
 
+    def power_spectral_density(self, omega):
+        raise NotImplementedError
+
 
 class Periodic(Stationary):
     r"""
@@ -451,12 +556,28 @@ class ExpQuad(Stationary):
     .. math::
 
        k(x, x') = \mathrm{exp}\left[ -\frac{(x - x')^2}{2 \ell^2} \right]
+
     """
 
     def full(self, X, Xs=None):
         X, Xs = self._slice(X, Xs)
         return pt.exp(-0.5 * self.square_dist(X, Xs))
 
+    def power_spectral_density(self, omega):
+        r"""
+        The power spectral density for the ExpQuad kernel is:
+
+        .. math::
+
+           S(\boldsymbol\omega) =
+               (\sqrt(2 \pi)^D \prod_{i}^{D}\ell_i
+                \exp\left( -\frac{1}{2} \sum_{i}^{D}\ell_i^2 \omega_i^{2} \right)
+        """
+        ls = pt.ones(self.n_dims) * self.ls
+        c = pt.power(pt.sqrt(2.0 * np.pi), self.n_dims)
+        exp = pt.exp(-0.5 * pt.dot(pt.square(omega), pt.square(ls)))
+        return c * pt.prod(ls) * exp
+
 
 class RatQuad(Stationary):
     r"""
@@ -495,6 +616,30 @@ def full(self, X, Xs=None):
         r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * pt.square(r)) * pt.exp(-1.0 * np.sqrt(5.0) * r)
 
+    def power_spectral_density(self, omega):
+        r"""
+        The power spectral density for the Matern52 kernel is:
+
+        .. math::
+
+           S(\boldsymbol\omega) =
+               \frac{2^D \pi^{\frac{D}{2}} \Gamma(\frac{D+5}{2}) 5^{5/2}}
+                    {\frac{3}{4}\sqrt{\pi}}
+               \prod_{i=1}^{D}\ell_{i}
+               \left(5 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+5}{2}}
+        """
+        ls = pt.ones(self.n_dims) * self.ls
+        D52 = (self.n_dims + 5) / 2
+        num = (
+            pt.power(2, self.n_dims)
+            * pt.power(np.pi, self.n_dims / 2)
+            * pt.gamma(D52)
+            * pt.power(5, 5 / 2)
+        )
+        den = 0.75 * pt.sqrt(np.pi)
+        pow = pt.power(5.0 + pt.dot(pt.square(omega), pt.square(ls)), -1 * D52)
+        return (num / den) * pt.prod(ls) * pow
+
 
 class Matern32(Stationary):
     r"""
@@ -511,6 +656,30 @@ def full(self, X, Xs=None):
         r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(3.0) * r) * pt.exp(-np.sqrt(3.0) * r)
 
+    def power_spectral_density(self, omega):
+        r"""
+        The power spectral density for the Matern32 kernel is:
+
+        .. math::
+
+            S(\boldsymbol\omega) =
+                \frac{2^D \pi^{D/2} \Gamma\left(\frac{D+3}{2}\right) 3^{3/2}}
+                     {\frac{1}{2}\sqrt{\pi}}
+               \prod_{i=1}^{D}\ell_{i}
+               \left(3 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+3}{2}}
+        """
+        ls = pt.ones(self.n_dims) * self.ls
+        D32 = (self.n_dims + 3) / 2
+        num = (
+            pt.power(2, self.n_dims)
+            * pt.power(np.pi, self.n_dims / 2)
+            * pt.gamma(D32)
+            * pt.power(3, 3 / 2)
+        )
+        den = 0.5 * pt.sqrt(np.pi)
+        pow = pt.power(3.0 + pt.dot(pt.square(omega), pt.square(ls)), -1 * D32)
+        return (num / den) * pt.prod(ls) * pow
+
 
 class Matern12(Stationary):
     r"""
diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
new file mode 100644
index 0000000000..adca16f600
--- /dev/null
+++ b/pymc/gp/hsgp_approx.py
@@ -0,0 +1,374 @@
+#   Copyright 2023 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+import numbers
+import warnings
+
+from types import ModuleType
+from typing import Optional, Sequence, Union
+
+import numpy as np
+import pytensor.tensor as pt
+
+import pymc as pm
+
+from pymc.gp.cov import Covariance
+from pymc.gp.gp import Base
+from pymc.gp.mean import Mean, Zero
+
+TensorLike = Union[np.ndarray, pt.TensorVariable]
+
+
+def set_boundary(Xs: TensorLike, c: Union[numbers.Real, TensorLike]) -> TensorLike:
+    """Set the boundary using the mean-subtracted `Xs` and `c`.  `c` is usually a scalar
+    multiplyer greater than 1.0, but it may be one value per dimension or column of `Xs`.
+    """
+    S = pt.max(pt.abs(Xs), axis=0)
+    L = c * S
+    return L
+
+
+def calc_eigenvalues(L: TensorLike, m: Sequence[int], tl: ModuleType = np):
+    """Calculate eigenvalues of the Laplacian."""
+    S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
+    S_arr = np.vstack([s.flatten() for s in S]).T
+    return tl.square((np.pi * S_arr) / (2 * L))
+
+
+def calc_eigenvectors(
+    Xs: TensorLike,
+    L: TensorLike,
+    eigvals: TensorLike,
+    m: Sequence[int],
+    tl: ModuleType = np,
+):
+    """Calculate eigenvectors of the Laplacian.  These are used as basis vectors in the HSGP
+    approximation.
+    """
+    m_star = int(np.prod(m))
+    phi = tl.ones((Xs.shape[0], m_star))
+    for d in range(len(m)):
+        c = 1.0 / tl.sqrt(L[d])
+        term1 = tl.sqrt(eigvals[:, d])
+        term2 = tl.tile(Xs[:, d][:, None], m_star) + L[d]
+        phi *= c * tl.sin(term1 * term2)
+    return phi
+
+
+class HSGP(Base):
+    R"""
+    Hilbert Space Gaussian process approximation.
+
+    The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process.  It is a
+    reduced rank GP approximation that uses a fixed set of basis vectors whose coefficients are
+    random functions of a stationary covariance function's power spectral density.  It's usage
+    is largely similar to `gp.Latent`.  Like `gp.Latent`, it does not assume a Gaussian noise model
+    and can be used with any likelihood, or as a component anywhere within a model.  Also like
+    `gp.Latent`, it has `prior` and `conditional` methods.  It supports any sum of covariance
+    functions that implement a `power_spectral_density` method.
+
+    For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to
+    the PyMC examples that use HSGP.
+
+    To with with the HSGP in its "linearized" form, as a matrix of basis vectors and and vector of
+    coefficients, see the method `prior_linearized`.
+
+    Parameters
+    ----------
+    m: list
+        The number of basis vectors to use for each active dimension (covariance parameter
+        `active_dim`).
+    L: list
+        The boundary of the space for each `active_dim`.  It is called the boundary condition.
+        Choose L such that the domain `[-L, L]` contains all points in the column of X given by the
+        `active_dim`.
+    c: float
+        The proportion extension factor.  Used to construct L from X.  Defined as `S = max|X|` such
+        that `X` is in `[-S, S]`.  `L` is the calculated as `c * S`.  One of `c` or `L` must be
+        provided.  Further information can be found in Ruitort-Mayol et. al.
+    drop_first: bool
+        Default `False`. Sometimes the first basis vector is quite "flat" and very similar to
+        the intercept term.  When there is an intercept in the model, ignoring the first basis
+        vector may improve sampling.
+    cov_func: None, 2D array, or instance of Covariance
+        The covariance function.  Defaults to zero.
+    mean_func: None, instance of Mean
+        The mean function.  Defaults to zero.
+
+    Examples
+    --------
+    .. code:: python
+
+        # A three dimensional column vector of inputs.
+        X = np.random.rand(100, 3)
+
+        with pm.Model() as model:
+            # Specify the covariance function.
+            # Three input dimensions, but we only want to use the last two.
+            cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])
+
+            # Specify the HSGP.
+            # Use 25 basis vectors across each active dimension for a total of 25 * 25 = 625.
+            # The value `c = 4` means the boundary of the approximation
+            # lies at four times the half width of the data.
+            # In this example the data lie between zero and one,
+            # so the boundaries occur at -1.5 and 2.5.  The data, both for
+            # training and prediction should reside well within that boundary..
+            gp = pm.gp.HSGP(m=[25, 25], c=4.0, cov_func=cov_func)
+
+            # Place a GP prior over the function f.
+            f = gp.prior("f", X=X)
+
+        ...
+
+        # After fitting or sampling, specify the distribution
+        # at new points with .conditional
+        Xnew = np.linspace(-1, 2, 50)[:, None]
+
+        with model:
+            fcond = gp.conditional("fcond", Xnew=Xnew)
+
+    References
+    ----------
+    -   Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical
+        Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming
+
+    -   Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process
+        Regression.
+    """
+
+    def __init__(
+        self,
+        m: Sequence[int],
+        L: Optional[Sequence[float]] = None,
+        c: Optional[numbers.Real] = None,
+        drop_first: bool = False,
+        parameterization="noncentered",
+        *,
+        mean_func: Mean = Zero(),
+        cov_func: Covariance,
+    ):
+        arg_err_msg = (
+            "`m` and L, if provided, must be sequences with one element per active "
+            "dimension of the kernel or covariance function."
+        )
+
+        if not isinstance(m, Sequence):
+            raise ValueError(arg_err_msg)
+
+        if len(m) != cov_func.n_dims:
+            raise ValueError(arg_err_msg)
+        m = tuple(m)
+
+        if (L is None and c is None) or (L is not None and c is not None):
+            raise ValueError("Provide one of `c` or `L`")
+
+        if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.n_dims):
+            raise ValueError(arg_err_msg)
+
+        if L is None and c is not None and c < 1.2:
+            warnings.warn("For an adequate approximation `c >= 1.2` is recommended.")
+
+        parameterization = parameterization.lower().replace("-", "").replace("_", "")
+        if parameterization not in ["centered", "noncentered"]:
+            raise ValueError("`parameterization` must be either 'centered' or 'noncentered'.")
+        else:
+            self._parameterization = parameterization
+
+        self._drop_first = drop_first
+        self._m = m
+        self._m_star = int(np.prod(self._m))
+        self._L = L
+        self._c = c
+
+        super().__init__(mean_func=mean_func, cov_func=cov_func)
+
+    def __add__(self, other):
+        raise NotImplementedError("Additive HSGPs aren't supported.")
+
+    @property
+    def L(self):
+        if self._L is None:
+            raise RuntimeError("Boundaries `L` required but still unset.")
+        return self._L
+
+    @L.setter
+    def L(self, value):
+        self._L = pt.as_tensor_variable(value)
+
+    def prior_linearized(self, Xs: TensorLike):
+        """Linearized version of the HSGP.  Returns the Laplace eigenfunctions and the square root
+        of the power spectral density needed to create the GP.
+
+        This function allows the user to bypass the GP interface and work directly with the basis
+        and coefficients directly.  This format allows the user to create predictions using
+        `pm.set_data` similarly to a linear model.  It also enables computational speed ups in
+        multi-GP models since they may share the same basis.  The return values are the Laplace
+        eigenfunctions `phi`, and the square root of the power spectral density.
+
+        Correct results when using `prior_linearized` in tandem with `pm.set_data` and
+        `pm.MutableData` require two conditions.  First, one must specify `L` instead of `c` when
+        the GP is constructed.  If not, a RuntimeError is raised.  Second, the `Xs` needs to be
+        zero-centered, so it's mean must be subtracted.  An example is given below.
+
+        Parameters
+        ----------
+        Xs: array-like
+            Function input values.  Assumes they have been mean subtracted or centered at zero.
+
+        Returns
+        -------
+        phi: array-like
+            Either Numpy or PyTensor 2D array of the fixed basis vectors.  There are n rows, one
+            per row of `Xs` and `prod(m)` columns, one for each basis vector.
+        sqrt_psd: array-like
+            Either a Numpy or PyTensor 1D array of the square roots of the power spectral
+            densities.
+
+        Examples
+        --------
+        .. code:: python
+
+            # A one dimensional column vector of inputs.
+            X = np.linspace(0, 10, 100)[:, None]
+
+            with pm.Model() as model:
+                eta = pm.Exponential("eta", lam=1.0)
+                ell = pm.InverseGamma("ell", mu=5.0, sigma=5.0)
+                cov_func = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell)
+
+                # m = [200] means 200 basis vectors for the first dimenison
+                # L = [10] means the approximation is valid from Xs = [-10, 10]
+                gp = pm.gp.HSGP(m=[200], L=[10], cov_func=cov_func)
+
+                # Order is important.  First calculate the mean, then make X a shared variable,
+                # then subtract the mean.  When X is mutated later, the correct mean will be
+                # subtracted.
+                X_mean = np.mean(X, axis=0)
+                X = pm.MutableData("X", X)
+                Xs = X - X_mean
+
+                # Pass the zero-subtracted Xs in to the GP
+                phi, sqrt_psd = gp.prior_linearized(Xs=Xs)
+
+                # Specify standard normal prior in the coefficients.  The number of which
+                # is given by the number of basis vectors, which is also saved in the GP object
+                # as m_star.
+                beta = pm.Normal("beta", size=gp.m_star)
+
+                # The (non-centered) GP approximation is given by
+                f = pm.Deterministic("f", phi @ (beta * sqrt_psd))
+
+                ...
+
+
+            # Then it works just like a linear regression to predict on new data.
+            # First mutate the data X,
+            x_new = np.linspace(-10, 10, 100)
+            with model:
+                model.set_data("X", x_new[:, None])
+
+            # and then make predictions for the GP using posterior predictive sampling.
+            with model:
+                ppc = pm.sample_posterior_predictive(idata, var_names=["f"])
+        """
+
+        # Index Xs using input_dim and active_dims of covariance function
+        Xs, _ = self.cov_func._slice(Xs)
+
+        # If not provided, use Xs and c to set L
+        if self._L is None:
+            assert isinstance(self._c, (numbers.Real, np.ndarray, pt.TensorVariable))
+            self.L = set_boundary(Xs, self._c)
+        else:
+            self.L = self._L
+
+        eigvals = calc_eigenvalues(self.L, self._m, tl=pt)
+        phi = calc_eigenvectors(Xs, self.L, eigvals, self._m, tl=pt)
+        omega = pt.sqrt(eigvals)
+        psd = self.cov_func.power_spectral_density(omega)
+
+        i = int(self._drop_first == True)
+        return phi[:, i:], pt.sqrt(psd[i:])
+
+    def prior(self, name: str, X: TensorLike, dims: Optional[str] = None):  # type: ignore
+        R"""
+        Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
+        For usage examples, refer to `pm.gp.Latent`.
+
+        Parameters
+        ----------
+        name: str
+            Name of the random variable
+        X: array-like
+            Function input values.
+        dims: None
+            Dimension name for the GP random variable.
+        """
+        self._X_mean = pt.mean(X, axis=0)
+        phi, sqrt_psd = self.prior_linearized(X - self._X_mean)
+
+        if self._parameterization == "noncentered":
+            self._beta = pm.Normal(
+                f"{name}_hsgp_coeffs_", size=self._m_star - int(self._drop_first)
+            )
+            self._sqrt_psd = sqrt_psd
+            f = self.mean_func(X) + phi @ (self._beta * self._sqrt_psd)
+
+        elif self._parameterization == "centered":
+            self._beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd)
+            f = self.mean_func(X) + phi @ self._beta
+
+        self.f = pm.Deterministic(name, f, dims=dims)
+        return self.f
+
+    def _build_conditional(self, Xnew):
+        try:
+            beta, X_mean = self._beta, self._X_mean
+
+            if self._parameterization == "noncentered":
+                sqrt_psd = self._sqrt_psd
+
+        except AttributeError:
+            raise ValueError(
+                "Prior is not set, can't create a conditional.  Call `.prior(name, X)` first."
+            )
+
+        Xnew, _ = self.cov_func._slice(Xnew)
+        eigvals = calc_eigenvalues(self.L, self._m, tl=pt)
+        phi = calc_eigenvectors(Xnew - X_mean, self.L, eigvals, self._m, tl=pt)
+        i = int(self._drop_first == True)
+
+        if self._parameterization == "noncentered":
+            return self.mean_func(Xnew) + phi[:, i:] @ (beta * sqrt_psd)
+
+        elif self._parameterization == "centered":
+            return self.mean_func(Xnew) + phi[:, i:] @ beta
+
+    def conditional(self, name: str, Xnew: TensorLike, dims: Optional[str] = None):  # type: ignore
+        R"""
+        Returns the (approximate) conditional distribution evaluated over new input locations
+        `Xnew`.
+
+        Parameters
+        ----------
+        name
+            Name of the random variable
+        Xnew : array-like
+            Function input values.
+        dims: None
+            Dimension name for the GP random variable.
+        """
+        fnew = self._build_conditional(Xnew)
+        return pm.Deterministic(name, fnew, dims=dims)
diff --git a/tests/gp/test_cov.py b/tests/gp/test_cov.py
index 148d3f0e7b..e671ba8fc3 100644
--- a/tests/gp/test_cov.py
+++ b/tests/gp/test_cov.py
@@ -181,6 +181,68 @@ def test_inv_rightprod(self):
             cov = M + pm.gp.cov.ExpQuad(1, 1.0)
 
 
+class TestCovPSD:
+    def test_covpsd_add(self):
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov1 = 2 * pm.gp.cov.ExpQuad(1, 0.1)
+            cov2 = 5 * pm.gp.cov.ExpQuad(1, 1.0)
+            cov = cov1 + cov2
+        psd1 = cov1.power_spectral_density(omega[:, None]).eval()
+        psd2 = cov2.power_spectral_density(omega[:, None]).eval()
+        psd = cov.power_spectral_density(omega[:, None]).eval()
+        npt.assert_allclose(psd, psd1 + psd2)
+
+    def test_copsd_multiply(self):
+        # This could be implemented via convolution
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov1 = 2 * pm.gp.cov.ExpQuad(1, ls=1)
+            cov2 = pm.gp.cov.ExpQuad(1, ls=1)
+
+        msg = "The power spectral density of products of covariance functions is not implemented"
+        with pytest.raises(NotImplementedError, match=msg):
+            psd = (cov1 * cov2).power_spectral_density(omega[:, None]).eval()
+
+    def test_covpsd_nonstationary1(self):
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov = 2 * pm.gp.cov.Linear(1, c=5)
+
+        msg = "can only be calculated for `Stationary` covariance functions."
+        with pytest.raises(ValueError, match=msg):
+            psd = cov.power_spectral_density(omega[:, None]).eval()
+
+    def test_covpsd_nonstationary2(self):
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov = 2 * pm.gp.cov.ExpQuad(1, ls=1) + 10.0
+
+        # Even though this should error, this isnt the appropriate message.  The actual problem
+        # is because the covariance function is non-stationary. The underlying bug is due to
+        # `Constant` covariances not having an input_dim.
+        msg = "All covariances must have the same `input_dim`."
+        with pytest.raises(ValueError, match=msg):
+            psd = cov.power_spectral_density(omega[:, None]).eval()
+
+    def test_covpsd_notimplemented(self):
+        class NewStationaryCov(pm.gp.cov.Stationary):
+            pass
+
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov = 2 * NewStationaryCov(1, ls=1)
+
+        msg = "No power spectral density method has been implemented"
+        with pytest.raises(NotImplementedError, match=msg):
+            psd = cov.power_spectral_density(omega[:, None]).eval()
+
+
 class TestCovExponentiation:
     def test_symexp_cov(self):
         X = np.linspace(0, 1, 10)[:, None]
@@ -228,7 +290,9 @@ def test_covexp_shared(self):
 
     def test_invalid_covexp(self):
         X = np.linspace(0, 1, 10)[:, None]
-        with pytest.raises(ValueError, match=r"can only be exponentiated by a scalar value"):
+        with pytest.raises(
+            ValueError, match=r"A covariance function can only be exponentiated by a scalar value"
+        ):
             with pm.Model() as model:
                 a = np.array([[1.0, 2.0]])
                 cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
@@ -262,7 +326,7 @@ def test_multiops(self):
                 + pm.gp.cov.ExpQuad(1, 0.1)
                 + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1)
             )
-            cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1)
+            cov2 = pm.gp.cov.ExpQuad(2, 0.1) * pm.gp.cov.ExpQuad(2, 0.1)
             cov = pm.gp.cov.Kron([cov1, cov2])
         K_true = kronecker(cov1(X1).eval(), cov2(X2).eval()).eval()
         K = cov(X).eval()
@@ -373,6 +437,17 @@ def test_inv_lengthscale(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
+    def test_psd(self):
+        # compare to simple 1d formula
+        X = np.linspace(0, 1, 10)[:, None]
+        omega = np.linspace(0, 2, 50)
+        ell = 2.0
+        true_1d_psd = np.sqrt(2 * np.pi * np.square(ell)) * np.exp(-0.5 * np.square(ell * omega))
+        test_1d_psd = (
+            pm.gp.cov.ExpQuad(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval()
+        )
+        npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
+
 
 class TestWhiteNoise:
     def test_1d(self):
@@ -449,6 +524,18 @@ def test_1d(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
+    def test_psd(self):
+        # compare to simple 1d formula
+        X = np.linspace(0, 1, 10)[:, None]
+        omega = np.linspace(0, 2, 50)
+        ell = 2.0
+        lamda = np.sqrt(5) / ell
+        true_1d_psd = (16.0 / 3.0) * np.power(lamda, 5) * np.power(lamda**2 + omega**2, -3)
+        test_1d_psd = (
+            pm.gp.cov.Matern52(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval()
+        )
+        npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
+
 
 class TestMatern32:
     def test_1d(self):
@@ -463,6 +550,18 @@ def test_1d(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
+    def test_psd(self):
+        # compare to simple 1d formula
+        X = np.linspace(0, 1, 10)[:, None]
+        omega = np.linspace(0, 2, 50)
+        ell = 2.0
+        lamda = np.sqrt(3) / ell
+        true_1d_psd = 4 * np.power(lamda, 3) * np.power(lamda**2 + omega**2, -2)
+        test_1d_psd = (
+            pm.gp.cov.Matern32(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval()
+        )
+        npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
+
 
 class TestMatern12:
     def test_1d(self):
diff --git a/tests/gp/test_hsgp_approx.py b/tests/gp/test_hsgp_approx.py
new file mode 100644
index 0000000000..b6f03a4acc
--- /dev/null
+++ b/tests/gp/test_hsgp_approx.py
@@ -0,0 +1,210 @@
+#   Copyright 2023 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+import arviz as az
+import numpy as np
+import pytensor
+import pytensor.tensor as pt
+import pytest
+import scipy as sp
+
+from scipy.spatial import distance
+
+import pymc as pm
+
+
+def build_mmd_func(sample1, sample2):
+    """Build a PyTensor function that calculates the minimum mean discrepancy (MMD) statistic."""
+
+    assert sample1.shape[1] == sample2.shape[1]
+
+    s1 = pt.matrix(name="s1", shape=sample1.shape)
+    s2 = pt.matrix(name="s2", shape=sample2.shape)
+
+    X = np.concatenate((sample1, sample2), axis=0)
+    test_ell = np.median(distance.pdist(X)) / 2
+
+    K = pm.gp.cov.ExpQuad(sample1.shape[1], ls=test_ell)
+    Kxx = K(s1)
+    Kyy = K(s2)
+    Kxy = K(s1, s2)
+
+    n_x, n_y = s1.shape[0], s2.shape[0]
+    mmd = (
+        (pt.sum(Kxx) / (n_x * (n_x - 1)))
+        + (pt.sum(Kyy) / (n_y * (n_y - 1)))
+        - 2 * pt.sum(Kxy) / (n_x * n_y)
+    )
+
+    calc_mmd = pytensor.function(inputs=[s1, s2], outputs=mmd)
+    return calc_mmd
+
+
+def two_sample_test(sample1, sample2, n_sims=1000, alpha=0.05):
+    """Calculate test whose null hypothesis is that two sets of samples were drawn from
+    the same distribution.
+
+    Largely taken from https://torchdrift.org/notebooks/note_on_mmd.html
+    """
+    # build function to calculate mmd
+    calc_mmd = build_mmd_func(sample1, sample2)
+
+    # simulate test statistic under null hypothesis
+    X = np.concatenate((sample1, sample2), axis=0)
+    half_N = int(X.shape[0] // 2)
+    ix = np.arange(half_N * 2)
+
+    h0 = []
+    for i in range(n_sims):
+        np.random.shuffle(ix)
+        X = X[ix, :]
+        h0.append(calc_mmd(X[:half_N, :], X[half_N:, :]))
+    h0 = np.asarray(h0)
+    critical_value = np.percentile(h0, 100 * (1 - alpha))
+    mmd = calc_mmd(sample1, sample2)
+    return h0, mmd, critical_value, mmd > critical_value
+
+
+class TestHSGP:
+    @pytest.fixture
+    def rng(self):
+        return np.random.RandomState(10)
+
+    @pytest.fixture
+    def data(self, rng):
+        # 1D dataset
+        X1 = np.linspace(-5, 5, 100)[:, None]
+
+        # 3D dataset
+        x1, x2, x3 = np.meshgrid(
+            np.linspace(0, 10, 5), np.linspace(20, 30, 5), np.linspace(10, 20, 5)
+        )
+        X2 = np.vstack([x1.flatten(), x2.flatten(), x3.flatten()]).T
+        return X1, X2
+
+    @pytest.fixture
+    def X1(self, data):
+        return data[0]
+
+    @pytest.fixture
+    def X2(self, data):
+        return data[1]
+
+    @pytest.fixture
+    def model(self):
+        return pm.Model()
+
+    @pytest.fixture
+    def cov_func(self):
+        return pm.gp.cov.ExpQuad(1, ls=1)
+
+    @pytest.fixture
+    def gp(self, cov_func):
+        gp = pm.gp.Latent(cov_func=cov_func)
+        return gp
+
+    def test_set_boundaries_1d(self, X1):
+        X1s = X1 - np.mean(X1, axis=0)
+        L = pm.gp.hsgp_approx.set_boundary(X1s, c=2).eval()
+        assert np.all(L == 10)
+
+    def test_set_boundaries_3d(self, X2):
+        X2s = X2 - np.mean(X2, axis=0)
+        L = pm.gp.hsgp_approx.set_boundary(X2s, c=2).eval()
+        assert np.all(L == 10)
+
+    def test_parametrization(self):
+        err_msg = "`m` and L, if provided, must be sequences with one element per active dimension"
+
+        with pytest.raises(ValueError, match=err_msg):
+            # m must be a list
+            cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
+            pm.gp.HSGP(m=500, c=2, cov_func=cov_func)
+
+        with pytest.raises(ValueError, match=err_msg):
+            # m must have same length as L
+            cov_func = pm.gp.cov.ExpQuad(2, ls=[1, 2])
+            pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func)
+
+        with pytest.raises(ValueError, match=err_msg):
+            # m must have same length as L, and match number of active dims of cov_func
+            cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
+            pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func)
+
+        # pass without error, cov_func has 2 active dimensions, c given as scalar
+        cov_func = pm.gp.cov.ExpQuad(3, ls=[1, 2], active_dims=[0, 2])
+        pm.gp.HSGP(m=[50, 50], c=2, cov_func=cov_func)
+
+        # pass without error, all have two dimensions
+        cov_func = pm.gp.cov.ExpQuad(2, ls=[1, 2])
+        pm.gp.HSGP(m=[50, 50], L=[12, 12], cov_func=cov_func)
+
+    @pytest.mark.parametrize("drop_first", [True, False])
+    def test_parametrization_drop_first(self, model, cov_func, X1, drop_first):
+        n_basis = 100
+        with model:
+            gp = pm.gp.HSGP(m=[n_basis], c=4.0, cov_func=cov_func, drop_first=drop_first)
+            gp.prior("f1", X1)
+
+            n_coeffs = model.f1_hsgp_coeffs_.type.shape[0]
+            if drop_first:
+                assert (
+                    n_coeffs == n_basis - 1
+                ), f"one basis vector should have been dropped, {n_coeffs}"
+            else:
+                assert n_coeffs == n_basis, "one was dropped when it shouldn't have been"
+
+    @pytest.mark.parametrize("parameterization", ["centered", "noncentered"])
+    def test_prior(self, model, cov_func, X1, parameterization):
+        """Compare HSGP prior to unapproximated GP prior, pm.gp.Latent.  Draw samples from the
+        prior and compare them using MMD two sample test.  Tests both centered and non-centered
+        parameterizations.
+        """
+        with model:
+            hsgp = pm.gp.HSGP(m=[200], c=2.0, parameterization=parameterization, cov_func=cov_func)
+            f1 = hsgp.prior("f1", X=X1)
+
+            gp = pm.gp.Latent(cov_func=cov_func)
+            f2 = gp.prior("f2", X=X1)
+
+            idata = pm.sample_prior_predictive(samples=1000)
+
+        samples1 = az.extract(idata.prior["f1"])["f1"].values.T
+        samples2 = az.extract(idata.prior["f2"])["f2"].values.T
+
+        h0, mmd, critical_value, reject = two_sample_test(
+            samples1, samples2, n_sims=500, alpha=0.01
+        )
+        assert not reject, "H0 was rejected, even though HSGP and GP priors should match."
+
+    @pytest.mark.parametrize("parameterization", ["centered", "noncentered"])
+    def test_conditional(self, model, cov_func, X1, parameterization):
+        """Compare HSGP conditional to unapproximated GP prior, pm.gp.Latent.  Draw samples from the
+        prior and compare them using MMD two sample test.  Tests both centered and non-centered
+        parameterizations.  The conditional should match the prior when no data is observed.
+        """
+        with model:
+            hsgp = pm.gp.HSGP(m=[100], c=2.0, parameterization=parameterization, cov_func=cov_func)
+            f = hsgp.prior("f", X=X1)
+            fc = hsgp.conditional("fc", Xnew=X1)
+
+            idata = pm.sample_prior_predictive(samples=1000)
+
+        samples1 = az.extract(idata.prior["f"])["f"].values.T
+        samples2 = az.extract(idata.prior["fc"])["fc"].values.T
+
+        h0, mmd, critical_value, reject = two_sample_test(
+            samples1, samples2, n_sims=500, alpha=0.01
+        )
+        assert not reject, "H0 was rejected, even though HSGP prior and conditional should match."