Handle value mappings for Scans with taps and initial values

brandonwillard · brandonwillard · commit 4d27b626dc86 · 2021-09-22T00:31:00.000-05:00
diff --git a/aeppl/scan.py b/aeppl/scan.py
@@ -297,7 +297,9 @@ def create_inner_out_logp(
 def find_measurable_scans(fgraph, node):
     r"""Finds `Scan`\s for which a `logprob` can be computed.
 
-    This will convert said `Scan`\s into `MeasurableScan`\s.
+    This will convert said `Scan`\s into `MeasurableScan`\s.  It also updates
+    random variable and value variable mappings that have been specified for
+    parts of a `Scan`\s outputs (e.g. everything except the initial values).
     """
 
     if not isinstance(node.op, Scan):
@@ -306,6 +308,11 @@ def find_measurable_scans(fgraph, node):
     if isinstance(node.op, MeasurableScan):
         return
 
+    rv_map_feature = getattr(fgraph, "preserve_rv_mappings", None)
+
+    if rv_map_feature is None:
+        return None  # pragma: no cover
+
     curr_scanargs = ScanArgs.from_node(node)
 
     # Find the un-output `MeasurableVariable`s created in the inner-graph
@@ -328,6 +335,71 @@ def find_measurable_scans(fgraph, node):
                 # TODO: Why can't we make this a `MeasurableScan`?
                 return None
 
+    if not any(out in rv_map_feature.rv_values for out in node.outputs):
+        # We need to remap user inputs that have been specified in terms of
+        # `Subtensor`s of this `Scan`'s node's outputs.
+        #
+        # For example, the output that the user got was something like
+        # `out[1:]` for `outputs_info = [{"initial": x0, "taps": [-1]}]`, so
+        # they likely passed `{out[1:]: x_1T_vv}` to `joint_logprob`.
+        # Since `out[1:]` isn't really the output of a `Scan`, but a
+        # `Subtensor` of the output `out` of a `Scan`, we need to account for
+        # that.
+
+        from aesara.tensor.subtensor import Subtensor, indices_from_subtensor
+
+        # Get any `Subtensor` outputs that have been applied to outputs of this
+        # `Scan` (and get the corresponding indices of the outputs from this
+        # `Scan`)
+        output_clients: List[Tuple[Variable, int]] = sum(
+            [
+                [
+                    # This is expected to work for `Subtensor` `Op`s,
+                    # because they only ever have one output
+                    (cl.default_output(), i)
+                    for cl, _ in fgraph.get_clients(out)
+                    if isinstance(cl.op, Subtensor)
+                ]
+                for i, out in enumerate(node.outputs)
+            ],
+            [],
+        )
+
+        # The second items in these tuples are the value variables mapped to
+        # the *user-specified* measurable variables (i.e. the first items) that
+        # are `Subtensor`s of the outputs of this `Scan`.  The second items are
+        # the index of the corresponding output of this `Scan` node.
+        indirect_rv_vars = [
+            (out, rv_map_feature.rv_values[out], out_idx)
+            for out, out_idx in output_clients
+            if out in rv_map_feature.rv_values
+        ]
+
+        if not indirect_rv_vars:
+            return None
+
+        # We're going to replace the user's random variable/value variable mappings
+        # with ones that map directly to outputs of this `Scan`.
+        for rv_var, val_var, out_idx in indirect_rv_vars:
+
+            # The full/un-`Subtensor`ed `Scan` output that we need to use
+            full_out = node.outputs[out_idx]
+
+            assert rv_var.owner.inputs[0] == full_out
+
+            # A new value variable that spans the full output
+            new_val_var = full_out.clone()
+            # Set the parts of this new value variable that applied to the
+            # user-specified value variable to the user's value variable
+            subtensor_indices = indices_from_subtensor(
+                rv_var.owner.inputs[1:], rv_var.owner.op.idx_list
+            )
+            new_val_var = at.set_subtensor(new_val_var[subtensor_indices], val_var)
+
+            # Replace the mapping
+            del rv_map_feature.rv_values[rv_var]
+            rv_map_feature.rv_values[full_out] = new_val_var
+
     op = MeasurableScan(
         curr_scanargs.inner_inputs, curr_scanargs.inner_outputs, curr_scanargs.info
     )
diff --git a/tests/test_scan.py b/tests/test_scan.py
@@ -341,3 +341,34 @@ def scan_fn(mus_t, sigma_t, Y_t_val, S_t_val, Gamma_t):
     y_logp_ref_val = y_logp_ref.eval(test_point)
 
     assert np.allclose(y_logp_val, y_logp_ref_val)
+
+
+def test_initial_values():
+    srng = at.random.RandomStream()
+
+    S_0_rv = srng.categorical(np.array([0.5, 0.5]), name="S_0")
+
+    s_0_vv = S_0_rv.clone()
+    s_0_vv.name = "s_0"
+
+    def step_fn(S_tm1):
+        S_t = srng.categorical(np.array([0.5, 0.5]), name="S_t")
+        return S_t
+
+    S_1T_rv, _ = aesara.scan(
+        fn=step_fn,
+        outputs_info=[{"initial": S_0_rv, "taps": [-1]}],
+        strict=True,
+        n_steps=10,
+        name="S_0T",
+    )
+
+    S_1T_rv.name = "S_1T"
+    s_1T_vv = S_1T_rv.clone()
+    s_1T_vv.name = "s_1T"
+
+    S_0T_logp = joint_logprob({S_1T_rv: s_1T_vv, S_0_rv: s_0_vv})
+
+    assert S_0T_logp
+
+    raise AssertionError("Not finished")