diff --git a/conda-envs/environment-dev.yml b/conda-envs/environment-dev.yml
index 66595e8182..dce14b50ba 100644
--- a/conda-envs/environment-dev.yml
+++ b/conda-envs/environment-dev.yml
@@ -14,7 +14,7 @@ dependencies:
 - numpy>=1.15.0
 - pandas>=0.24.0
 - pip
-- pytensor>=2.12.0,<2.13
+- pytensor>=2.14.1,<2.15
 - python-graphviz
 - networkx
 - scipy>=1.4.1
diff --git a/conda-envs/environment-docs.yml b/conda-envs/environment-docs.yml
index b1c2b4ee7f..b64e179251 100644
--- a/conda-envs/environment-docs.yml
+++ b/conda-envs/environment-docs.yml
@@ -12,7 +12,7 @@ dependencies:
 - numpy>=1.15.0
 - pandas>=0.24.0
 - pip
-- pytensor>=2.12.0,<2.13
+- pytensor>=2.14.1,<2.15
 - python-graphviz
 - scipy>=1.4.1
 - typing-extensions>=3.7.4
diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml
index 4ce5e06a6d..7186209e76 100644
--- a/conda-envs/environment-test.yml
+++ b/conda-envs/environment-test.yml
@@ -17,7 +17,7 @@ dependencies:
 - numpy>=1.15.0
 - pandas>=0.24.0
 - pip
-- pytensor>=2.12.0,<2.13
+- pytensor>=2.14.1,<2.15
 - python-graphviz
 - networkx
 - scipy>=1.4.1
diff --git a/conda-envs/windows-environment-dev.yml b/conda-envs/windows-environment-dev.yml
index 86d1830d70..5f48132823 100644
--- a/conda-envs/windows-environment-dev.yml
+++ b/conda-envs/windows-environment-dev.yml
@@ -14,7 +14,7 @@ dependencies:
 - numpy>=1.15.0
 - pandas>=0.24.0
 - pip
-- pytensor>=2.12.0,<2.13
+- pytensor>=2.14.1,<2.15
 - python-graphviz
 - networkx
 - scipy>=1.4.1
diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml
index 1595305836..27dae190e1 100644
--- a/conda-envs/windows-environment-test.yml
+++ b/conda-envs/windows-environment-test.yml
@@ -17,7 +17,7 @@ dependencies:
 - numpy>=1.15.0
 - pandas>=0.24.0
 - pip
-- pytensor>=2.12.0,<2.13
+- pytensor>=2.14.1,<2.15
 - python-graphviz
 - networkx
 - scipy>=1.4.1
diff --git a/docs/source/contributing/implementing_distribution.md b/docs/source/contributing/implementing_distribution.md
index 85347622cd..be4df863be 100644
--- a/docs/source/contributing/implementing_distribution.md
+++ b/docs/source/contributing/implementing_distribution.md
@@ -1,11 +1,11 @@
 (implementing-a-distribution)=
 # Implementing a RandomVariable Distribution
 
-This guide provides an overview on how to implement a distribution for PyMC version `>=4.0.0`.
+This guide provides an overview on how to implement a distribution for PyMC.
 It is designed for developers who wish to add a new distribution to the library.
-Users will not be aware of all this complexity and should instead make use of helper methods such as `~pymc.DensityDist`.
+Users will not be aware of all this complexity and should instead make use of helper methods such as `~pymc.CustomDist`.
 
-PyMC {class}`~pymc.Distribution` builds on top of PyTensor's {class}`~pytensor.tensor.random.op.RandomVariable`, and implements `logp`, `logcdf` and `moment` methods as well as other initialization and validation helpers.
+PyMC {class}`~pymc.Distribution` builds on top of PyTensor's {class}`~pytensor.tensor.random.op.RandomVariable`, and implements `logp`, `logcdf`, `icdf` and `moment` methods as well as other initialization and validation helpers.
 Most notably `shape/dims/observed` kwargs, alternative parametrizations, and default `transform`.
 
 Here is a summary check-list of the steps needed to implement a new distribution.
@@ -14,14 +14,14 @@ Each section will be expanded below:
 1. Creating a new `RandomVariable` `Op`
 1. Implementing the corresponding `Distribution` class
 1. Adding tests for the new `RandomVariable`
-1. Adding tests for `logp` / `logcdf` and `moment` methods
+1. Adding tests for `logp` / `logcdf` / `icdf` and `moment` methods
 1. Documenting the new `Distribution`.
 
 This guide does not attempt to explain the rationale behind the `Distributions` current implementation, and details are provided only insofar as they help to implement new "standard" distributions.
 
 ## 1. Creating a new `RandomVariable` `Op`
 
-{class}`~pytensor.tensor.random.op.RandomVariable` are responsible for implementing the random sampling methods, which in version 3 of PyMC used to be one of the standard `Distribution` methods, alongside `logp` and `logcdf`.
+{class}`~pytensor.tensor.random.op.RandomVariable` are responsible for implementing the random sampling methods.
 The `RandomVariable` is also responsible for parameter broadcasting and shape inference.
 
 Before creating a new `RandomVariable` make sure that it is not already offered in the {mod}`NumPy library <numpy.random>`.
@@ -68,8 +68,7 @@ class BlahRV(RandomVariable):
     # This is the Python code that produces samples.  Its signature will always
     # start with a NumPy `RandomState` object, then the distribution
     # parameters, and, finally, the size.
-    #
-    # This is effectively the PyMC >=4.0 replacement for `Distribution.random`.
+
     @classmethod
     def rng_fn(
         cls,
@@ -87,19 +86,20 @@ blah = BlahRV()
 
 Some important things to keep in mind:
 
-1. Everything inside the `rng_fn` method is pure Python code (as are the inputs) and should not make use of other `PyTensor` symbolic ops. The random method should make use of the `rng` which is a NumPy {class}`~numpy.random.RandomState`, so that samples are reproducible.
+1. Everything inside the `rng_fn` method is pure Python code (as are the inputs) and should __not__ make use of other `PyTensor` symbolic ops. The random method should make use of the `rng` which is a NumPy {class}`~numpy.random.RandomGenerator`, so that samples are reproducible.
 1. Non-default `RandomVariable` dimensions will end up in the `rng_fn` via the `size` kwarg. The `rng_fn` will have to take this into consideration for correct output. `size` is the specification used by NumPy and SciPy and works like PyMC `shape` for univariate distributions, but is different for multivariate distributions. For multivariate distributions the __`size` excludes the `ndim_supp` support dimensions__, whereas the __`shape` of the resulting `TensorVariable` or `ndarray` includes the support dimensions__. For more context check {ref}`The dimensionality notebook <dimensionality>`.
-1. `PyTensor` tries to infer the output shape of the `RandomVariable` (given a user-specified size) by introspection of the `ndim_supp` and `ndim_params` attributes. However, the default method may not work for more complex distributions. In that case, custom `_supp_shape_from_params` (and less probably, `_infer_shape`) should also be implemented in the new `RandomVariable` class. One simple example is seen in the {class}`~pymc.DirichletMultinomialRV` where it was necessary to specify the `rep_param_idx` so that the `default_supp_shape_from_params` helper method can do its job. In more complex cases, it may not suffice to use this default helper. This could happen for instance if the argument values determined the support shape of the distribution, as happens in the `~pymc.distributions.multivarite._LKJCholeskyCovRV`.
+1. `PyTensor` can automatically infer the output shape of univariate `RandomVariable`s (`ndim_supp=0`). For multivariate distributions (`ndim_supp>=1`), the method `_supp_shape_from_params` must be implemented in the new `RandomVariable` class. This method returns the support dimensionality of an RV given its parameters. In some cases this can be derived from the shape of one of its parameters, in which case the helper {func}`pytensor.tensor.random.utils.supp_shape_from_ref_param_shape` cand be used as is in {class}`~pymc.DirichletMultinomialRV`. In other cases the argument values (and not their shapes) may determine the support shape of the distribution, as happens in the `~pymc.distributions.multivarite._LKJCholeskyCovRV`. In simpler cases they may be constant.
 1. It's okay to use the `rng_fn` `classmethods` of other PyTensor and PyMC `RandomVariables` inside the new `rng_fn`. For example if you are implementing a negative HalfNormal `RandomVariable`, your `rng_fn` can simply return `- halfnormal.rng_fn(rng, scale, size)`.
 
 *Note: In addition to `size`, the PyMC API also provides `shape`, `dims` and `observed` as alternatives to define a distribution dimensionality, but this is taken care of by {class}`~pymc.Distribution`, and should not require any extra changes.*
 
-For a quick test that your new `RandomVariable` `Op` is working, you can call the `Op` with the necessary parameters and then call `eval()` on the returned object:
+For a quick test that your new `RandomVariable` `Op` is working, you can call the `Op` with the necessary parameters and then call {class}`~pymc.draw` on the returned object:
 
 ```python
 
 # blah = pytensor.tensor.random.uniform in this example
-blah([0, 0], [1, 2], size=(10, 2)).eval()
+# multiple calls with the same seed should return the same values
+pm.draw(blah([0, 0], [1, 2], size=(10, 2)), random_seed=1)
 
 # array([[0.83674527, 0.76593773],
 #    [0.00958496, 1.85742402],
@@ -117,10 +117,10 @@ blah([0, 0], [1, 2], size=(10, 2)).eval()
 ## 2. Inheriting from a PyMC base `Distribution` class
 
 After implementing the new `RandomVariable` `Op`, it's time to make use of it in a new PyMC {class}`~pymc.Distribution`.
-PyMC `>=4.0.0` works in a very {term}`functional <Functional Programming>` way, and the `distribution` classes are there mostly to facilitate porting the `PyMC3` v3.x code to PyMC `>=4.0.0`, add PyMC API features and keep related methods organized together.
+PyMC works in a very {term}`functional <Functional Programming>` way, and the `distribution` classes are there mostly to add PyMC API features and keep related methods organized together.
 In practice, they take care of:
 
-1. Linking ({term}`Dispatching`) an rv_op class with the corresponding `moment`, `logp` and `logcdf` methods.
+1. Linking ({term}`Dispatching`) an `rv_op` class with the corresponding `moment`, `logp`, `logcdf` and `icdf` methods.
 1. Defining a standard transformation (for continuous distributions) that converts a bounded variable domain (e.g., positive line) to an unbounded domain (i.e., the real line), which many samplers prefer.
 1. Validating the parametrization of a distribution and converting non-symbolic inputs (i.e., numeric literals or NumPy arrays) to symbolic variables.
 1. Converting multiple alternative parametrizations to the standard parametrization that the `RandomVariable` is defined in terms of.
@@ -130,7 +130,6 @@ Here is how the example continues:
 ```python
 
 import pytensor.tensor as pt
-from pymc.pytensorf import floatX, intX
 from pymc.distributions.continuous import PositiveContinuous
 from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.shape_utils import rv_size_is_none
@@ -146,12 +145,12 @@ class Blah(PositiveContinuous):
     # We pass the standard parametrizations to super().dist
     @classmethod
     def dist(cls, param1, param2=None, alt_param2=None, **kwargs):
-        param1 = pt.as_tensor_variable(intX(param1))
+        param1 = pt.as_tensor_variable(param1)
         if param2 is not None and alt_param2 is not None:
             raise ValueError("Only one of param2 and alt_param2 is allowed.")
         if alt_param2 is not None:
             param2 = 1 / alt_param2
-        param2 = pt.as_tensor_variable(floatX(param2))
+        param2 = pt.as_tensor_variable(param2)
 
         # The first value-only argument should be a list of the parameters that
         # the rv_op needs in order to be instantiated
@@ -183,7 +182,7 @@ class Blah(PositiveContinuous):
         # Whenever a bound is invalidated, the returned expression raises an error
         # with the message defined in the optional `msg` keyword argument.
         return check_parameters(
-            logp_expression,
+            bounded_logp_expression,
             param2 >= 0,
             msg="param2 >= 0",
         )
@@ -193,15 +192,18 @@ class Blah(PositiveContinuous):
     def logcdf(value, param1, param2):
         ...
 
+    def icdf(value, param1, param2):
+        ...
+
 ```
 
 Some notes:
 
 1. A distribution should at the very least inherit from {class}`~pymc.Discrete` or {class}`~pymc.Continuous`. For the latter, more specific subclasses exist: `PositiveContinuous`, `UnitContinuous`, `BoundedContinuous`, `CircularContinuous`, `SimplexContinuous`, which specify default transformations for the variables. If you need to specify a one-time custom transform you can also create a `_default_transform` dispatch function as is done for the {class}`~pymc.distributions.multivariate.LKJCholeskyCov`.
 1. If a distribution does not have a corresponding `rng_fn` implementation, a `RandomVariable` should still be created to raise a `NotImplementedError`. This is, for example, the case in {class}`~pymc.distributions.continuous.Flat`. In this case it will be necessary to provide a `moment` method, because without a `rng_fn`, PyMC can't fall back to a random draw to use as an initial point for MCMC.
-1. As mentioned above, PyMC `>=4.0.0` works in a very {term}`functional <Functional Programming>` way, and all the information that is needed in the `logp` and `logcdf` methods is expected to be "carried" via the `RandomVariable` inputs. You may pass numerical arguments that are not strictly needed for the `rng_fn` method but are used in the `logp` and `logcdf` methods. Just keep in mind whether this affects the correct shape inference behavior of the `RandomVariable`. If specialized non-numeric information is needed you might need to define your custom`_logp` and `_logcdf` {term}`Dispatching` functions, but this should be done as a last resort.
-1. The `logcdf` method is not a requirement, but it's a nice plus!
-1. Currently, only one moment is supported in the `moment` method, and probably the "higher-order" one is the most useful (that is `mean` > `median` > `mode`)... You might need to truncate the moment if you are dealing with a discrete distribution.
+1. As mentioned above, PyMC works in a very {term}`functional <Functional Programming>` way, and all the information that is needed in the `logp`, `logcdf`, `icdf` and `moment` methods is expected to be "carried" via the `RandomVariable` inputs. You may pass numerical arguments that are not strictly needed for the `rng_fn` method but are used in the those methods. Just keep in mind whether this affects the correct shape inference behavior of the `RandomVariable`.
+1. The `logcdf`, and `icdf` methods is not a requirement, but it's a nice plus!
+1. Currently, only one moment is supported in the `moment` method, and probably the "higher-order" one is the most useful (that is `mean` > `median` > `mode`)... You might need to truncate the moment if you are dealing with a discrete distribution. `moment` should return a valid point for the random variable (i.e., it always has non-zero probability when evaluated at that point)
 1. When creating the `moment` method, be careful with `size != None` and broadcast properly also based on parameters that are not necessarily used to calculate the moment. For example, the `sigma` in `pm.Normal.dist(mu=0, sigma=np.arange(1, 6))` is irrelevant for the moment, but may nevertheless inform about the shape. In this case, the `moment` should return `[mu, mu, mu, mu, mu]`.
 
 For a quick check that things are working you can try the following:
@@ -215,7 +217,7 @@ from pymc.distributions.distribution import moment
 blah = pm.blah.dist(mu=0, sigma=1)
 
 # Test that the returned blah_op is still working fine
-blah.eval()
+pm.draw(blah, random_seed=1)
 # array(-1.01397228)
 
 # Test the moment method
@@ -306,10 +308,10 @@ Finally, when your `rng_fn` is doing something more than just calling a NumPy or
 You can find an example in {class}`~tests.distributions.test_continuous.TestWeibull`, whose `rng_fn` returns `beta * np.random.weibull(alpha, size=size)`.
 
 
-## 4. Adding tests for the `logp` / `logcdf` methods
+## 4. Adding tests for the `logp` / `logcdf` / `icdf` methods
 
-Tests for the `logp` and `logcdf` mostly make use of the helpers `check_logp`, `check_logcdf`, and
-`check_selfconsistency_discrete_logcdf` implemented in `~tests.distributions.util`
+Tests for the `logp`, `logcdf` and `icdf` mostly make use of the helpers `check_logp`, `check_logcdf`, `check_icdf` and
+`check_selfconsistency_discrete_logcdf` implemented in `~testing`
 
 ```python
 
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 5c74080ace..8056fbd6bb 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -34,6 +34,7 @@
 from pytensor.tensor.random.op import RandomVariable
 from pytensor.tensor.random.rewriting import local_subtensor_rv_lift
 from pytensor.tensor.random.utils import normalize_size_param
+from pytensor.tensor.rewriting.shape import ShapeFeature
 from pytensor.tensor.var import TensorVariable
 from typing_extensions import TypeAlias
 
@@ -54,7 +55,12 @@
 from pymc.logprob.rewriting import logprob_rewrites_db
 from pymc.model import new_or_existing_block_model_access
 from pymc.printing import str_for_dist
-from pymc.pytensorf import collect_default_updates, convert_observed_data, floatX
+from pymc.pytensorf import (
+    collect_default_updates,
+    constant_fold,
+    convert_observed_data,
+    floatX,
+)
 from pymc.util import UNSET, _add_future_warning_tag
 from pymc.vartypes import continuous_types, string_types
 
@@ -482,6 +488,11 @@ def dist(
         class_name: str = "CustomDist",
         **kwargs,
     ):
+        if ndim_supp > 0:
+            raise NotImplementedError(
+                "CustomDist with ndim_supp > 0 and without a `dist` function are not supported."
+            )
+
         dist_params = [as_tensor_variable(param) for param in dist_params]
 
         # Assume scalar ndims_params
@@ -1229,15 +1240,12 @@ def create_partial_observed_rv(
                     can_rewrite = True
 
     if can_rewrite:
-        # Rewrite doesn't work with boolean masks. Should be fixed after https://github.com/pymc-devs/pytensor/pull/329
-        mask, antimask = mask.nonzero(), antimask.nonzero()
-
         masked_rv = rv[mask]
-        fgraph = FunctionGraph(outputs=[masked_rv], clone=False)
+        fgraph = FunctionGraph(outputs=[masked_rv], clone=False, features=[ShapeFeature()])
         [unobserved_rv] = local_subtensor_rv_lift.transform(fgraph, fgraph.outputs[0].owner)
 
         antimasked_rv = rv[antimask]
-        fgraph = FunctionGraph(outputs=[antimasked_rv], clone=False)
+        fgraph = FunctionGraph(outputs=[antimasked_rv], clone=False, features=[ShapeFeature()])
         [observed_rv] = local_subtensor_rv_lift.transform(fgraph, fgraph.outputs[0].owner)
 
         # Make a clone of the observedRV, with a distinct rng so that observed and
@@ -1270,7 +1278,7 @@ def partial_observed_rv_logprob(op, values, dist, mask, **kwargs):
     # For the logp, simply join the values
     [obs_value, unobs_value] = values
     antimask = ~mask
-    joined_value = pt.empty_like(dist)
+    joined_value = pt.empty(constant_fold([dist.shape])[0])
     joined_value = pt.set_subtensor(joined_value[mask], unobs_value)
     joined_value = pt.set_subtensor(joined_value[antimask], obs_value)
     joined_logp = logp(dist, joined_value)
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index 2abe668b74..2aa3c889e3 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -32,8 +32,11 @@
 from pytensor.tensor import TensorConstant, gammaln, sigmoid
 from pytensor.tensor.nlinalg import det, eigh, matrix_inverse, trace
 from pytensor.tensor.random.basic import dirichlet, multinomial, multivariate_normal
-from pytensor.tensor.random.op import RandomVariable, default_supp_shape_from_params
-from pytensor.tensor.random.utils import broadcast_params
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.utils import (
+    broadcast_params,
+    supp_shape_from_ref_param_shape,
+)
 from pytensor.tensor.slinalg import Cholesky, SolveTriangular
 from pytensor.tensor.type import TensorType
 from scipy import linalg, stats
@@ -320,9 +323,12 @@ def __call__(self, nu, mu=None, cov=None, size=None, **kwargs):
             cov = np.array([[1.0]], dtype=dtype)
         return super().__call__(nu, mu, cov, size=size, **kwargs)
 
-    def _supp_shape_from_params(self, dist_params, rep_param_idx=1, param_shapes=None):
-        return default_supp_shape_from_params(
-            self.ndim_supp, dist_params, rep_param_idx, param_shapes
+    def _supp_shape_from_params(self, dist_params, param_shapes=None):
+        return supp_shape_from_ref_param_shape(
+            ndim_supp=self.ndim_supp,
+            dist_params=dist_params,
+            param_shapes=param_shapes,
+            ref_param_idx=1,
         )
 
     @classmethod
@@ -611,9 +617,12 @@ class DirichletMultinomialRV(RandomVariable):
     dtype = "int64"
     _print_name = ("DirichletMN", "\\operatorname{DirichletMN}")
 
-    def _supp_shape_from_params(self, dist_params, rep_param_idx=1, param_shapes=None):
-        return default_supp_shape_from_params(
-            self.ndim_supp, dist_params, rep_param_idx, param_shapes
+    def _supp_shape_from_params(self, dist_params, param_shapes=None):
+        return supp_shape_from_ref_param_shape(
+            ndim_supp=self.ndim_supp,
+            dist_params=dist_params,
+            param_shapes=param_shapes,
+            ref_param_idx=1,
         )
 
     @classmethod
@@ -893,9 +902,14 @@ class WishartRV(RandomVariable):
     dtype = "floatX"
     _print_name = ("Wishart", "\\operatorname{Wishart}")
 
-    def _supp_shape_from_params(self, dist_params, rep_param_idx=1, param_shapes=None):
+    def _supp_shape_from_params(self, dist_params, param_shapes=None):
         # The shape of second parameter `V` defines the shape of the output.
-        return dist_params[1].shape[-2:]
+        return supp_shape_from_ref_param_shape(
+            ndim_supp=self.ndim_supp,
+            dist_params=dist_params,
+            param_shapes=param_shapes,
+            ref_param_idx=1,
+        )
 
     @classmethod
     def rng_fn(cls, rng, nu, V, size):
@@ -1639,9 +1653,13 @@ class MatrixNormalRV(RandomVariable):
     dtype = "floatX"
     _print_name = ("MatrixNormal", "\\operatorname{MatrixNormal}")
 
-    def _infer_shape(self, size, dist_params, param_shapes=None):
-        shape = tuple(size) + tuple(dist_params[0].shape[-2:])
-        return shape
+    def _supp_shape_from_params(self, dist_params, param_shapes=None):
+        return supp_shape_from_ref_param_shape(
+            ndim_supp=self.ndim_supp,
+            dist_params=dist_params,
+            param_shapes=param_shapes,
+            ref_param_idx=0,
+        )
 
     @classmethod
     def rng_fn(cls, rng, mu, rowchol, colchol, size=None):
@@ -1858,6 +1876,14 @@ class KroneckerNormalRV(RandomVariable):
     dtype = "floatX"
     _print_name = ("KroneckerNormal", "\\operatorname{KroneckerNormal}")
 
+    def _supp_shape_from_params(self, dist_params, param_shapes=None):
+        return supp_shape_from_ref_param_shape(
+            ndim_supp=self.ndim_supp,
+            dist_params=dist_params,
+            param_shapes=param_shapes,
+            ref_param_idx=0,
+        )
+
     def rng_fn(self, rng, mu, sigma, *covs, size=None):
         size = size if size else covs[-1]
         covs = covs[:-1] if covs[-1] == size else covs
@@ -2069,9 +2095,13 @@ def make_node(self, rng, size, dtype, mu, W, alpha, tau):
 
         return super().make_node(rng, size, dtype, mu, W, alpha, tau)
 
-    def _infer_shape(self, size, dist_params, param_shapes=None):
-        shape = tuple(size) + (dist_params[0].shape[-1],)
-        return shape
+    def _supp_shape_from_params(self, dist_params, param_shapes=None):
+        return supp_shape_from_ref_param_shape(
+            ndim_supp=self.ndim_supp,
+            dist_params=dist_params,
+            param_shapes=param_shapes,
+            ref_param_idx=0,
+        )
 
     @classmethod
     def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, size):
@@ -2242,7 +2272,7 @@ def make_node(self, rng, size, dtype, alpha, K):
 
         return super().make_node(rng, size, dtype, alpha, K)
 
-    def _supp_shape_from_params(self, dist_params, **kwargs):
+    def _supp_shape_from_params(self, dist_params, param_shapes):
         K = dist_params[1]
         return (K + 1,)
 
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 9fc5e62aff..8a6ec534ca 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -183,7 +183,10 @@ def n_dims(self) -> int:
     def _slice(self, X, Xs=None):
         xdims = X.shape[-1]
         if isinstance(xdims, Variable):
-            xdims = xdims.eval()
+            # Circular dependency
+            from pymc.pytensorf import constant_fold
+
+            [xdims] = constant_fold([xdims])
         if self.input_dim != xdims:
             warnings.warn(
                 f"Only {self.input_dim} column(s) out of {xdims} are"
diff --git a/pymc/logprob/mixture.py b/pymc/logprob/mixture.py
index 267670a835..daa16f520e 100644
--- a/pymc/logprob/mixture.py
+++ b/pymc/logprob/mixture.py
@@ -42,7 +42,7 @@
 from pytensor.graph.basic import Apply, Constant, Variable
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import Op, compute_test_value
-from pytensor.graph.rewriting.basic import node_rewriter, pre_greedy_node_rewriter
+from pytensor.graph.rewriting.basic import EquilibriumGraphRewriter, node_rewriter
 from pytensor.ifelse import IfElse, ifelse
 from pytensor.scalar import Switch
 from pytensor.scalar import switch as scalar_switch
@@ -52,6 +52,7 @@
     local_rv_size_lift,
     local_subtensor_rv_lift,
 )
+from pytensor.tensor.rewriting.shape import ShapeFeature
 from pytensor.tensor.shape import shape_tuple
 from pytensor.tensor.subtensor import (
     AdvancedSubtensor,
@@ -77,7 +78,6 @@
     measurable_ir_rewrites_db,
     subtensor_ops,
 )
-from pymc.logprob.tensor import naive_bcast_rv_lift
 from pymc.logprob.utils import check_potential_measurability
 
 
@@ -203,21 +203,17 @@ def expand_indices(
     return cast(Tuple[TensorVariable], tuple(pt.broadcast_arrays(*adv_indices)))
 
 
-def rv_pull_down(x: TensorVariable, dont_touch_vars=None) -> TensorVariable:
+def rv_pull_down(x: TensorVariable) -> TensorVariable:
     """Pull a ``RandomVariable`` ``Op`` down through a graph, when possible."""
-    fgraph = FunctionGraph(outputs=dont_touch_vars or [], clone=False)
-
-    return pre_greedy_node_rewriter(
-        fgraph,
-        [
-            local_rv_size_lift,
-            local_dimshuffle_rv_lift,
-            local_subtensor_rv_lift,
-            naive_bcast_rv_lift,
-            local_lift_DiracDelta,
-        ],
-        x,
-    )
+    fgraph = FunctionGraph(outputs=[x], clone=False, features=[ShapeFeature()])
+    rewrites = [
+        local_rv_size_lift,
+        local_dimshuffle_rv_lift,
+        local_subtensor_rv_lift,
+        local_lift_DiracDelta,
+    ]
+    EquilibriumGraphRewriter(rewrites, max_use_ratio=100).rewrite(fgraph)
+    return fgraph.outputs[0]
 
 
 class MixtureRV(Op):
diff --git a/pymc/logprob/order.py b/pymc/logprob/order.py
index 4033bf674c..be8d688d80 100644
--- a/pymc/logprob/order.py
+++ b/pymc/logprob/order.py
@@ -52,6 +52,7 @@
     _logprob_helper,
 )
 from pymc.logprob.rewriting import measurable_ir_rewrites_db
+from pymc.pytensorf import constant_fold
 
 
 class MeasurableMax(Max):
@@ -120,7 +121,7 @@ def max_logprob(op, values, base_rv, **kwargs):
     logprob = _logprob_helper(base_rv, value)
     logcdf = _logcdf_helper(base_rv, value)
 
-    n = base_rv.size
+    [n] = constant_fold([base_rv.size])
 
     logprob = (n - 1) * logcdf + logprob + pt.math.log(n)
 
diff --git a/pymc/math.py b/pymc/math.py
index 2f8520b0ec..0cb86095ee 100644
--- a/pymc/math.py
+++ b/pymc/math.py
@@ -75,13 +75,14 @@
     where,
     zeros_like,
 )
+from pytensor.tensor.special import log_softmax, softmax
 
 try:
     from pytensor.tensor.basic import extract_diag
 except ImportError:
     from pytensor.tensor.nlinalg import extract_diag
 
-from pytensor.tensor.nlinalg import det, matrix_dot, matrix_inverse, trace
+from pytensor.tensor.nlinalg import matrix_inverse
 from scipy.linalg import block_diag as scipy_block_diag
 
 from pymc.pytensorf import floatX, ix_, largest_common_dtype
@@ -267,31 +268,7 @@ def logdiffexp_numpy(a, b):
     return a + log1mexp_numpy(b - a, negative_input=True)
 
 
-def invlogit(x, eps=None):
-    """The inverse of the logit function, 1 / (1 + exp(-x))."""
-    if eps is not None:
-        warnings.warn(
-            "pymc.math.invlogit no longer supports the ``eps`` argument and it will be ignored.",
-            FutureWarning,
-            stacklevel=2,
-        )
-    return pt.sigmoid(x)
-
-
-def softmax(x, axis=None):
-    # Ignore vector case UserWarning issued by PyTensor. This can be removed once PyTensor
-    # drops that warning
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore", UserWarning)
-        return pt.special.softmax(x, axis=axis)
-
-
-def log_softmax(x, axis=None):
-    # Ignore vector case UserWarning issued by PyTensor. This can be removed once PyTensor
-    # drops that warning
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore", UserWarning)
-        return pt.special.log_softmax(x, axis=axis)
+invlogit = sigmoid
 
 
 def logbern(log_p):
diff --git a/pymc/model_graph.py b/pymc/model_graph.py
index 43e7d1f728..1f3e6d1b89 100644
--- a/pymc/model_graph.py
+++ b/pymc/model_graph.py
@@ -23,6 +23,7 @@
 from pytensor.scalar.basic import Cast
 from pytensor.tensor.elemwise import Elemwise
 from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.shape import Shape
 from pytensor.tensor.var import TensorConstant, TensorVariable
 
 import pymc as pm
@@ -55,6 +56,9 @@ def get_parent_names(self, var: TensorVariable) -> Set[VarName]:
 
         def _filter_non_parameter_inputs(var):
             node = var.owner
+            if isinstance(node.op, Shape):
+                # Don't show shape-related dependencies
+                return []
             if isinstance(node.op, RandomVariable):
                 # Filter out rng, dtype and size parameters or RandomVariable nodes
                 return node.inputs[3:]
diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py
index 04ed1bdad2..066ba233c5 100644
--- a/pymc/pytensorf.py
+++ b/pymc/pytensorf.py
@@ -87,7 +87,6 @@
     "generator",
     "convert_observed_data",
     "compile_pymc",
-    "constant_fold",
 ]
 
 
diff --git a/pymc/variational/approximations.py b/pymc/variational/approximations.py
index d00d893ff5..d271e80448 100644
--- a/pymc/variational/approximations.py
+++ b/pymc/variational/approximations.py
@@ -19,6 +19,7 @@
 from arviz import InferenceData
 from pytensor import tensor as pt
 from pytensor.graph.basic import Variable
+from pytensor.graph.replace import graph_replace
 from pytensor.tensor.var import TensorVariable
 
 import pymc as pm
@@ -390,7 +391,7 @@ def evaluate_over_trace(self, node):
         node = self.to_flat_input(node)
 
         def sample(post, *_):
-            return pytensor.clone_replace(node, {self.input: post})
+            return graph_replace(node, {self.input: post}, strict=False)
 
         nodes, _ = pytensor.scan(
             sample, self.histogram, non_sequences=_known_scan_ignored_inputs(makeiter(node))
diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py
index 7280105d03..f81a6d6a23 100644
--- a/pymc/variational/opvi.py
+++ b/pymc/variational/opvi.py
@@ -59,6 +59,8 @@
 import xarray
 
 from pytensor.graph.basic import Variable
+from pytensor.graph.replace import graph_replace
+from pytensor.tensor.shape import unbroadcast
 
 import pymc as pm
 
@@ -1002,7 +1004,7 @@ def set_size_and_deterministic(
         """
 
         flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
-        node_out = pytensor.clone_replace(node, flat2rand)
+        node_out = graph_replace(node, flat2rand, strict=False)
         assert not (
             set(makeiter(self.input)) & set(pytensor.graph.graph_inputs(makeiter(node_out)))
         )
@@ -1012,7 +1014,7 @@ def set_size_and_deterministic(
 
     def to_flat_input(self, node):
         """*Dev* - replace vars with flattened view stored in `self.inputs`"""
-        return pytensor.clone_replace(node, self.replacements)
+        return graph_replace(node, self.replacements, strict=False)
 
     def symbolic_sample_over_posterior(self, node):
         """*Dev* - performs sampling of node applying independent samples from posterior each time.
@@ -1023,7 +1025,7 @@ def symbolic_sample_over_posterior(self, node):
         random = pt.specify_shape(random, self.symbolic_initial.type.shape)
 
         def sample(post, *_):
-            return pytensor.clone_replace(node, {self.input: post})
+            return graph_replace(node, {self.input: post}, strict=False)
 
         nodes, _ = pytensor.scan(
             sample, random, non_sequences=_known_scan_ignored_inputs(makeiter(random))
@@ -1038,7 +1040,7 @@ def symbolic_single_sample(self, node):
         """
         node = self.to_flat_input(node)
         random = self.symbolic_random.astype(self.symbolic_initial.dtype)
-        return pytensor.clone_replace(node, {self.input: random[0]})
+        return graph_replace(node, {self.input: random[0]}, strict=False)
 
     def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
         """*Dev* - creates correct replacements for initial depending on
@@ -1059,8 +1061,15 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
         """
         initial = self._new_initial(s, d, more_replacements)
         initial = pt.specify_shape(initial, self.symbolic_initial.type.shape)
+        # The static shape of initial may be more precise than self.symbolic_initial,
+        # and reveal previously unknown broadcastable dimensions. We have to mask those again.
+        if initial.type.broadcastable != self.symbolic_initial.type.broadcastable:
+            unbroadcast_axes = (
+                i for i, b in enumerate(self.symbolic_initial.type.broadcastable) if not b
+            )
+            initial = unbroadcast(initial, *unbroadcast_axes)
         if more_replacements:
-            initial = pytensor.clone_replace(initial, more_replacements)
+            initial = graph_replace(initial, more_replacements, strict=False)
         return {self.symbolic_initial: initial}
 
     @node_property
@@ -1394,8 +1403,8 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         _node = node
         optimizations = self.get_optimization_replacements(s, d)
         flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
-        node = pytensor.clone_replace(node, optimizations)
-        node = pytensor.clone_replace(node, flat2rand)
+        node = graph_replace(node, optimizations, strict=False)
+        node = graph_replace(node, flat2rand, strict=False)
         assert not (set(self.symbolic_randoms) & set(pytensor.graph.graph_inputs(makeiter(node))))
         try_to_set_test_value(_node, node, s)
         return node
@@ -1403,8 +1412,8 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
     def to_flat_input(self, node, more_replacements=None):
         """*Dev* - replace vars with flattened view stored in `self.inputs`"""
         more_replacements = more_replacements or {}
-        node = pytensor.clone_replace(node, more_replacements)
-        return pytensor.clone_replace(node, self.replacements)
+        node = graph_replace(node, more_replacements, strict=False)
+        return graph_replace(node, self.replacements, strict=False)
 
     def symbolic_sample_over_posterior(self, node, more_replacements=None):
         """*Dev* - performs sampling of node applying independent samples from posterior each time.
@@ -1413,7 +1422,7 @@ def symbolic_sample_over_posterior(self, node, more_replacements=None):
         node = self.to_flat_input(node)
 
         def sample(*post):
-            return pytensor.clone_replace(node, dict(zip(self.inputs, post)))
+            return graph_replace(node, dict(zip(self.inputs, post)), strict=False)
 
         nodes, _ = pytensor.scan(
             sample, self.symbolic_randoms, non_sequences=_known_scan_ignored_inputs(makeiter(node))
@@ -1429,7 +1438,7 @@ def symbolic_single_sample(self, node, more_replacements=None):
         node = self.to_flat_input(node, more_replacements=more_replacements)
         post = [v[0] for v in self.symbolic_randoms]
         inp = self.inputs
-        return pytensor.clone_replace(node, dict(zip(inp, post)))
+        return graph_replace(node, dict(zip(inp, post)), strict=False)
 
     def get_optimization_replacements(self, s, d):
         """*Dev* - optimizations for logP. If sample size is static and equal to 1:
@@ -1463,7 +1472,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No
         """
         node_in = node
         if more_replacements:
-            node = pytensor.clone_replace(node, more_replacements)
+            node = graph_replace(node, more_replacements, strict=False)
         if not isinstance(node, (list, tuple)):
             node = [node]
         node = self.model.replace_rvs_by_values(node)
diff --git a/pymc/variational/stein.py b/pymc/variational/stein.py
index 1f8c034f80..d48b6f4710 100644
--- a/pymc/variational/stein.py
+++ b/pymc/variational/stein.py
@@ -12,9 +12,10 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import pytensor
 import pytensor.tensor as pt
 
+from pytensor.graph.replace import graph_replace
+
 from pymc.pytensorf import floatX
 from pymc.util import WithMemoization, locally_cachedmethod
 from pymc.variational.opvi import node_property
@@ -85,9 +86,10 @@ def dxkxy(self):
     def logp_norm(self):
         sized_symbolic_logp = self.approx.sized_symbolic_logp
         if self.use_histogram:
-            sized_symbolic_logp = pytensor.clone_replace(
+            sized_symbolic_logp = graph_replace(
                 sized_symbolic_logp,
                 dict(zip(self.approx.symbolic_randoms, self.approx.collect("histogram"))),
+                strict=False,
             )
         return sized_symbolic_logp / self.approx.symbolic_normalizing_constant
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 6032958e14..3ca57176a3 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -18,7 +18,7 @@ numpydoc
 pandas>=0.24.0
 polyagamma
 pre-commit>=2.8.0
-pytensor>=2.12.0,<2.13
+pytensor>=2.14.1,<2.15
 pytest-cov>=2.5
 pytest>=3.0
 scipy>=1.4.1
diff --git a/requirements.txt b/requirements.txt
index 82d327e62a..04b732c060 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,6 @@ cloudpickle
 fastprogress>=0.2.0
 numpy>=1.15.0
 pandas>=0.24.0
-pytensor>=2.12.0,<2.13
+pytensor>=2.14.1,<2.15
 scipy>=1.4.1
 typing-extensions>=3.7.4
diff --git a/tests/conftest.py b/tests/conftest.py
index 97b3207cb4..b6e4285a8f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,6 +11,7 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import warnings
 
 import numpy as np
 import pytensor
@@ -45,3 +46,10 @@ def strict_float32():
 def seeded_test():
     # TODO: use this instead of SeededTest
     np.random.seed(42)
+
+
+@pytest.fixture
+def fail_on_warning():
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        yield
diff --git a/tests/distributions/test_dist_math.py b/tests/distributions/test_dist_math.py
index f0b6d8f87f..2ccab76b1f 100644
--- a/tests/distributions/test_dist_math.py
+++ b/tests/distributions/test_dist_math.py
@@ -224,7 +224,7 @@ def check_vals(fn1, fn2, *args):
 
 
 def test_multigamma():
-    x = pt.vector("x")
+    x = pt.vector("x", shape=(1,))
     p = pt.scalar("p")
 
     xvals = [np.array([v], dtype=config.floatX) for v in [0.1, 2, 5, 10, 50, 100]]
diff --git a/tests/distributions/test_distribution.py b/tests/distributions/test_distribution.py
index 99a89dbace..9bdfdb2053 100644
--- a/tests/distributions/test_distribution.py
+++ b/tests/distributions/test_distribution.py
@@ -217,6 +217,10 @@ def test_custom_dist_without_random(self):
         with pytest.raises(NotImplementedError):
             pm.sample_posterior_predictive(idata, model=model)
 
+    @pytest.mark.xfail(
+        NotImplementedError,
+        reason="Support shape of multivariate CustomDist cannot be inferred. See https://github.com/pymc-devs/pytensor/pull/388",
+    )
     @pytest.mark.parametrize("size", [(), (3,), (3, 2)], ids=str)
     def test_custom_dist_with_random_multivariate(self, size):
         supp_shape = 5
@@ -264,6 +268,10 @@ def test_custom_dist_old_api_error(self):
             ):
                 CustomDist("a", lambda x: x)
 
+    @pytest.mark.xfail(
+        NotImplementedError,
+        reason="Support shape of multivariate CustomDist cannot be inferred. See https://github.com/pymc-devs/pytensor/pull/388",
+    )
     @pytest.mark.parametrize("size", [None, (), (2,)], ids=str)
     def test_custom_dist_multivariate_logp(self, size):
         supp_shape = 5
@@ -314,6 +322,10 @@ def density_moment(rv, size, mu):
         assert evaled_moment.shape == to_tuple(size)
         assert np.all(evaled_moment == mu_val)
 
+    @pytest.mark.xfail(
+        NotImplementedError,
+        reason="Support shape of multivariate CustomDist cannot be inferred. See https://github.com/pymc-devs/pytensor/pull/388",
+    )
     @pytest.mark.parametrize("size", [(), (2,), (3, 2)], ids=str)
     def test_custom_dist_custom_moment_multivariate(self, size):
         def density_moment(rv, size, mu):
@@ -328,6 +340,10 @@ def density_moment(rv, size, mu):
         assert evaled_moment.shape == to_tuple(size) + (5,)
         assert np.all(evaled_moment == mu_val)
 
+    @pytest.mark.xfail(
+        NotImplementedError,
+        reason="Support shape of multivariate CustomDist cannot be inferred. See https://github.com/pymc-devs/pytensor/pull/388",
+    )
     @pytest.mark.parametrize(
         "with_random, size",
         [
diff --git a/tests/logprob/test_mixture.py b/tests/logprob/test_mixture.py
index 53e2096b55..ef46b2cf27 100644
--- a/tests/logprob/test_mixture.py
+++ b/tests/logprob/test_mixture.py
@@ -1119,6 +1119,7 @@ def test_ifelse_mixture_shared_component():
     )
 
 
+@pytest.mark.xfail(reason="Relied on rewrite-case that is no longer supported by PyTensor")
 def test_joint_logprob_subtensor():
     """Make sure we can compute a joint log-probability for ``Y[I]`` where ``Y`` and ``I`` are random variables."""
 
@@ -1137,6 +1138,10 @@ def test_joint_logprob_subtensor():
     I_rv = pt.random.bernoulli(p, size=size, rng=rng)
     I_rv.name = "I"
 
+    # The rewrite for lifting subtensored RVs refuses to work with advanced
+    # indexing as it could lead to repeated draws.
+    # TODO: Re-enable rewrite for cases where this is not a concern
+    #  (e.g., at least one of the advanced indexes has non-repeating values)
     A_idx = A_rv[I_rv, pt.ogrid[A_rv.shape[-1] :]]
 
     assert isinstance(A_idx.owner.op, (Subtensor, AdvancedSubtensor, AdvancedSubtensor1))
diff --git a/tests/test_math.py b/tests/test_math.py
index c5aed9246c..ff9c8708c0 100644
--- a/tests/test_math.py
+++ b/tests/test_math.py
@@ -262,37 +262,3 @@ def test_expand_packed_triangular():
     assert np.all(expand_upper.eval({packed: upper_packed}) == upper)
     assert np.all(expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag(vals)))
     assert np.all(expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag(vals)))
-
-
-def test_invlogit_deprecation_warning():
-    with pytest.warns(
-        FutureWarning,
-        match="pymc.math.invlogit no longer supports the",
-    ):
-        res = invlogit(np.array(-750.0), 1e-5).eval()
-
-    with warnings.catch_warnings():
-        warnings.simplefilter("error")
-        res_zero_eps = invlogit(np.array(-750.0)).eval()
-
-    assert np.isclose(res, res_zero_eps)
-
-
-@pytest.mark.parametrize(
-    "pytensor_function, pymc_wrapper",
-    [
-        (pt.special.softmax, softmax),
-        (pt.special.log_softmax, log_softmax),
-    ],
-)
-def test_softmax_logsoftmax_no_warnings(pytensor_function, pymc_wrapper):
-    """Test that wrappers for pytensor functions do not issue Warnings"""
-
-    vector = pt.vector("vector")
-    with pytest.warns(Warning) as record:
-        pytensor_function(vector)
-    assert {w.category for w in record.list} == {UserWarning, FutureWarning}
-
-    with warnings.catch_warnings():
-        warnings.simplefilter("error")
-        pymc_wrapper(vector)
diff --git a/tests/test_model.py b/tests/test_model.py
index f4d2bbe78d..c4d04918a0 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -963,7 +963,7 @@ def test_set_data_constant_shape_error():
         pmodel.add_coord("weekday", length=x.shape[0])
         pm.MutableData("y", np.arange(7), dims="weekday")
 
-    msg = "because the dimension length is tied to a TensorConstant"
+    msg = "because the dimension was initialized from 'x'"
     with pytest.raises(ShapeError, match=msg):
         pmodel.set_data("y", np.arange(10))
 
diff --git a/tests/variational/test_inference.py b/tests/variational/test_inference.py
index f23d7cc4f7..b38c94740a 100644
--- a/tests/variational/test_inference.py
+++ b/tests/variational/test_inference.py
@@ -15,6 +15,8 @@
 import io
 import operator
 
+from contextlib import nullcontext
+
 import cloudpickle
 import numpy as np
 import pytensor
@@ -29,7 +31,7 @@
 from pymc.variational.opvi import NotImplementedInference
 from tests import models
 
-pytestmark = pytest.mark.usefixtures("strict_float32", "seeded_test")
+pytestmark = pytest.mark.usefixtures("strict_float32", "seeded_test", "fail_on_warning")
 
 
 @pytest.mark.parametrize("score", [True, False])
@@ -157,7 +159,16 @@ def fit_kwargs(inference, use_minibatch):
 
 
 def test_fit_oo(inference, fit_kwargs, simple_model_data):
-    trace = inference.fit(**fit_kwargs).sample(10000)
+    # Minibatch data can't be extracted into the `observed_data` group in the final InferenceData
+    if getattr(simple_model_data["data"], "name", "").startswith("minibatch"):
+        warn_ctxt = pytest.warns(
+            UserWarning, match="Could not extract data from symbolic observation"
+        )
+    else:
+        warn_ctxt = nullcontext()
+
+    with warn_ctxt:
+        trace = inference.fit(**fit_kwargs).sample(10000)
     mu_post = simple_model_data["mu_post"]
     d = simple_model_data["d"]
     np.testing.assert_allclose(np.mean(trace.posterior["mu"]), mu_post, rtol=0.05)
@@ -180,11 +191,21 @@ def test_fit_start(inference_spec, simple_model):
     kw = {"start": {"mu": mu_init}}
     if has_start_sigma:
         kw.update({"start_sigma": {"mu": mu_sigma_init}})
-
     with simple_model:
         inference = inference_spec(**kw)
+
+    # Minibatch data can't be extracted into the `observed_data` group in the final InferenceData
+    [observed_value] = [simple_model.rvs_to_values[obs] for obs in simple_model.observed_RVs]
+    if observed_value.name.startswith("minibatch"):
+        warn_ctxt = pytest.warns(
+            UserWarning, match="Could not extract data from symbolic observation"
+        )
+    else:
+        warn_ctxt = nullcontext()
+
     try:
-        trace = inference.fit(n=0).sample(10000)
+        with warn_ctxt:
+            trace = inference.fit(n=0).sample(10000)
     except NotImplementedInference as e:
         pytest.skip(str(e))
     np.testing.assert_allclose(np.mean(trace.posterior["mu"]), mu_init, rtol=0.05)