Enable prior_predictive to return transformed values

ricardoV94 · ricardoV94 · commit 687f044bfcf3 · 2021-06-15T15:46:11.000+02:00
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -7,7 +7,8 @@
 - The GLM submodule has been removed, please use [Bambi](https://bambinos.github.io/bambi/) instead.
 - The `Distribution` keyword argument `testval` has been deprecated in favor of `initval`.
 - `pm.sample` now returns results as `InferenceData` instead of `MultiTrace` by default (see [#4744](https://github.com/pymc-devs/pymc3/pull/4744)).
-- ...
+- `pm.sample_prior_predictive` no longer returns transformed variable values by default. Pass them by name in `var_names` if you want to obtain these draws (see [4769](https://github.com/pymc-devs/pymc3/pull/4769)).
+...
 
 ### New Features
 - The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models.
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -1943,7 +1943,8 @@ def sample_prior_predictive(
     model : Model (optional if in ``with`` context)
     var_names : Iterable[str]
         A list of names of variables for which to compute the posterior predictive
-        samples. Defaults to both observed and unobserved RVs.
+        samples. Defaults to both observed and unobserved RVs. Transformed values
+        are not included unless explicitly defined in var_names.
     random_seed : int
         Seed for the random number generator.
     mode:
@@ -1983,8 +1984,26 @@ def sample_prior_predictive(
         )
 
     names = get_default_varnames(vars_, include_transformed=False)
-
     vars_to_sample = [model[name] for name in names]
+
+    # Any variables from var_names that are missing must be transformed variables.
+    # Misspelled variables would have raised a KeyError above.
+    missing_names = vars_.difference(names)
+    for name in missing_names:
+        transformed_value_var = model[name]
+        rv_var = model.values_to_rvs[transformed_value_var]
+        transform = transformed_value_var.tag.transform
+        transformed_rv_var = transform.forward(rv_var, rv_var)
+
+        names.append(name)
+        vars_to_sample.append(transformed_rv_var)
+
+        # If the user asked for the transformed variable in var_names, but not the
+        # original RV, we add it manually here
+        if rv_var.name not in names:
+            names.append(rv_var.name)
+            vars_to_sample.append(rv_var)
+
     inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, SharedVariable)]
 
     sampler_fn = compile_rv_inplace(
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
@@ -1076,6 +1076,43 @@ def test_potentials_warning(self):
             with pytest.warns(UserWarning, match=warning_msg):
                 pm.sample_prior_predictive(samples=5)
 
+    def test_transformed_vars(self):
+        # Test that prior predictive returns transformation of RVs when these are
+        # passed explicitly in `var_names`
+
+        def ub_interval_forward(x, ub):
+            # Interval transform assuming lower bound is zero
+            return np.log(x - 0) - np.log(ub - x)
+
+        with pm.Model(rng_seeder=123) as model:
+            ub = pm.HalfNormal("ub", 10)
+            x = pm.Uniform("x", 0, ub)
+
+            prior = pm.sample_prior_predictive(
+                var_names=["ub", "ub_log__", "x", "x_interval__"],
+                samples=10,
+            )
+
+        # Check values are correct
+        assert np.allclose(prior["ub_log__"], np.log(prior["ub"]))
+        assert np.allclose(
+            prior["x_interval__"],
+            ub_interval_forward(prior["x"], prior["ub"]),
+        )
+
+        # Check that it works when the original RVs are not mentioned in var_names
+        with pm.Model(rng_seeder=123) as model_transformed_only:
+            ub = pm.HalfNormal("ub", 10)
+            x = pm.Uniform("x", 0, ub)
+
+            prior_transformed_only = pm.sample_prior_predictive(
+                var_names=["ub_log__", "x_interval__"],
+                samples=10,
+            )
+        assert "ub" not in prior_transformed_only and "x" not in prior_transformed_only
+        assert np.allclose(prior["ub_log__"], prior_transformed_only["ub_log__"])
+        assert np.allclose(prior["x_interval__"], prior_transformed_only["x_interval__"])
+
 
 class TestSamplePosteriorPredictive:
     def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture):