Add functional vectorize helper to pytensor.tensor module

ricardoV94 · ricardoV94 · commit 230a80805220 · 2023-11-15T21:39:47.000Z
diff --git a/pytensor/tensor/__init__.py b/pytensor/tensor/__init__.py
@@ -148,6 +148,7 @@ def _get_vector_length_Constant(op: Union[Op, Variable], var: Constant) -> int:
 from pytensor.tensor.type import *  # noqa
 from pytensor.tensor.type_other import *  # noqa
 from pytensor.tensor.variable import TensorConstant, TensorVariable  # noqa
+from pytensor.tensor.functional import vectorize  # noqa
 
 # Allow accessing numpy constants from pytensor.tensor
 from numpy import e, euler_gamma, inf, infty, nan, newaxis, pi  # noqa
diff --git a/pytensor/tensor/blockwise.py b/pytensor/tensor/blockwise.py
@@ -1,4 +1,3 @@
-import re
 from collections.abc import Sequence
 from typing import Any, Optional, cast
 
@@ -13,49 +12,14 @@
 from pytensor.tensor import as_tensor_variable
 from pytensor.tensor.shape import shape_padleft
 from pytensor.tensor.type import continuous_dtypes, discrete_dtypes, tensor
-from pytensor.tensor.utils import broadcast_static_dim_lengths, import_func_from_string
+from pytensor.tensor.utils import (
+    _parse_gufunc_signature,
+    broadcast_static_dim_lengths,
+    import_func_from_string,
+)
 from pytensor.tensor.variable import TensorVariable
 
 
-# TODO: Implement vectorize helper to batch whole graphs (similar to what Blockwise does for the grad)
-
-# Copied verbatim from numpy.lib.function_base
-# https://github.com/numpy/numpy/blob/f2db090eb95b87d48a3318c9a3f9d38b67b0543c/numpy/lib/function_base.py#L1999-L2029
-_DIMENSION_NAME = r"\w+"
-_CORE_DIMENSION_LIST = "(?:{0:}(?:,{0:})*)?".format(_DIMENSION_NAME)
-_ARGUMENT = rf"\({_CORE_DIMENSION_LIST}\)"
-_ARGUMENT_LIST = "{0:}(?:,{0:})*".format(_ARGUMENT)
-_SIGNATURE = "^{0:}->{0:}$".format(_ARGUMENT_LIST)
-
-
-def _parse_gufunc_signature(signature):
-    """
-    Parse string signatures for a generalized universal function.
-
-    Arguments
-    ---------
-    signature : string
-        Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)``
-        for ``np.matmul``.
-
-    Returns
-    -------
-    Tuple of input and output core dimensions parsed from the signature, each
-    of the form List[Tuple[str, ...]].
-    """
-    signature = re.sub(r"\s+", "", signature)
-
-    if not re.match(_SIGNATURE, signature):
-        raise ValueError(f"not a valid gufunc signature: {signature}")
-    return tuple(
-        [
-            tuple(re.findall(_DIMENSION_NAME, arg))
-            for arg in re.findall(_ARGUMENT, arg_list)
-        ]
-        for arg_list in signature.split("->")
-    )
-
-
 def safe_signature(
     core_inputs: Sequence[Variable],
     core_outputs: Sequence[Variable],
diff --git a/pytensor/tensor/functional.py b/pytensor/tensor/functional.py
@@ -0,0 +1,125 @@
+from typing import Callable, Optional
+
+from pytensor.graph import vectorize_graph
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.utils import _parse_gufunc_signature
+
+
+def vectorize(func: Callable, signature: Optional[str] = None) -> Callable:
+    """Create a vectorized version of a python function that takes TensorVariables as inputs and outputs.
+
+    Similar to numpy.vectorize. See respective docstrings for more details.
+
+    Parameters
+    ----------
+    func: Callable
+        Function that creates the desired outputs from TensorVariable inputs with the core dimensions.
+    signature: str, optional
+        Generalized universal function signature, e.g., (m,n),(n)->(m) for vectorized matrix-vector multiplication.
+        If not provided, it is assumed all inputs have scalar core dimensions. Unlike numpy, the outputs
+        can have arbitrary shapes when the signature is not provided.
+
+    Returns
+    -------
+    vectorized_func: Callable
+        Callable that takes TensorVariables with arbitrarily batched dimensions on the left
+        and returns variables whose graphs correspond to the vectorized expressions of func.
+
+    Notes
+    -----
+    Unlike numpy.vectorize, the equality of core dimensions implied by the signature is not explicitly asserted.
+
+    To vectorize an existing graph, use `pytensor.graph.replace.vectorize_graph` instead.
+
+
+    Examples
+    --------
+    .. code-block:: python
+
+        import pytensor
+        import pytensor.tensor as pt
+
+        def func(x):
+            return pt.exp(x) / pt.sum(pt.exp(x))
+
+        vec_func = pt.vectorize(func, signature="(a)->(a)")
+
+        x = pt.matrix("x")
+        y = vec_func(x)
+
+        fn = pytensor.function([x], y)
+        fn([[0, 1, 2], [2, 1, 0]])
+        # array([[0.09003057, 0.24472847, 0.66524096],
+        #        [0.66524096, 0.24472847, 0.09003057]])
+
+
+    .. code-block:: python
+
+        import pytensor
+        import pytensor.tensor as pt
+
+        def func(x):
+            return x[0], x[-1]
+
+        vec_func = pt.vectorize(func, signature="(a)->(),()")
+
+        x = pt.matrix("x")
+        y1, y2 = vec_func(x)
+
+        fn = pytensor.function([x], [y1, y2])
+        fn([[-10, 0, 10], [-11, 0, 11]])
+        # [array([-10., -11.]), array([10., 11.])]
+
+    """
+
+    def inner(*inputs):
+        if signature is None:
+            # Assume all inputs are scalar
+            inputs_sig = [()] * len(inputs)
+        else:
+            inputs_sig, outputs_sig = _parse_gufunc_signature(signature)
+            if len(inputs) != len(inputs_sig):
+                raise ValueError(
+                    f"Number of inputs does not match signature: {signature}"
+                )
+
+        # Create dummy core inputs by stripping the batched dimensions of inputs
+        core_inputs = []
+        for input, input_sig in zip(inputs, inputs_sig):
+            if not isinstance(input, TensorVariable):
+                raise TypeError(
+                    f"Inputs to vectorize function must be TensorVariable, got {type(input)}"
+                )
+
+            if input.ndim < len(input_sig):
+                raise ValueError(
+                    f"Input {input} has less dimensions than signature {input_sig}"
+                )
+            if len(input_sig):
+                core_shape = input.type.shape[-len(input_sig) :]
+            else:
+                core_shape = ()
+
+            core_input = input.type.clone(shape=core_shape)(name=input.name)
+            core_inputs.append(core_input)
+
+        # Call function on dummy core inputs
+        core_outputs = func(*core_inputs)
+        if core_outputs is None:
+            raise ValueError("vectorize function returned no outputs")
+
+        if signature is not None:
+            if isinstance(core_outputs, (list, tuple)):
+                n_core_outputs = len(core_outputs)
+            else:
+                n_core_outputs = 1
+            if n_core_outputs != len(outputs_sig):
+                raise ValueError(
+                    f"Number of outputs does not match signature: {signature}"
+                )
+
+        # Vectorize graph by replacing dummy core inputs by original inputs
+        outputs = vectorize_graph(core_outputs, replace=dict(zip(core_inputs, inputs)))
+        return outputs
+
+    return inner
diff --git a/pytensor/tensor/utils.py b/pytensor/tensor/utils.py
@@ -1,3 +1,4 @@
+import re
 from collections.abc import Sequence
 from typing import Union
 
@@ -161,3 +162,40 @@ def broadcast_static_dim_lengths(
     if len(dim_lengths_set) > 1:
         raise ValueError
     return tuple(dim_lengths_set)[0]
+
+
+# Copied verbatim from numpy.lib.function_base
+# https://github.com/numpy/numpy/blob/f2db090eb95b87d48a3318c9a3f9d38b67b0543c/numpy/lib/function_base.py#L1999-L2029
+_DIMENSION_NAME = r"\w+"
+_CORE_DIMENSION_LIST = "(?:{0:}(?:,{0:})*)?".format(_DIMENSION_NAME)
+_ARGUMENT = rf"\({_CORE_DIMENSION_LIST}\)"
+_ARGUMENT_LIST = "{0:}(?:,{0:})*".format(_ARGUMENT)
+_SIGNATURE = "^{0:}->{0:}$".format(_ARGUMENT_LIST)
+
+
+def _parse_gufunc_signature(signature):
+    """
+    Parse string signatures for a generalized universal function.
+
+    Arguments
+    ---------
+    signature : string
+        Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)``
+        for ``np.matmul``.
+
+    Returns
+    -------
+    Tuple of input and output core dimensions parsed from the signature, each
+    of the form List[Tuple[str, ...]].
+    """
+    signature = re.sub(r"\s+", "", signature)
+
+    if not re.match(_SIGNATURE, signature):
+        raise ValueError(f"not a valid gufunc signature: {signature}")
+    return tuple(
+        [
+            tuple(re.findall(_DIMENSION_NAME, arg))
+            for arg in re.findall(_ARGUMENT, arg_list)
+        ]
+        for arg_list in signature.split("->")
+    )
diff --git a/tests/tensor/test_blockwise.py b/tests/tensor/test_blockwise.py
@@ -10,9 +10,10 @@
 from pytensor.graph import Apply, Op
 from pytensor.graph.replace import vectorize_node
 from pytensor.tensor import diagonal, log, tensor
-from pytensor.tensor.blockwise import Blockwise, _parse_gufunc_signature
+from pytensor.tensor.blockwise import Blockwise
 from pytensor.tensor.nlinalg import MatrixInverse
 from pytensor.tensor.slinalg import Cholesky, Solve, cholesky, solve_triangular
+from pytensor.tensor.utils import _parse_gufunc_signature
 
 
 def test_vectorize_blockwise():
diff --git a/tests/tensor/test_functional.py b/tests/tensor/test_functional.py
@@ -0,0 +1,81 @@
+import numpy as np
+import pytest
+
+from pytensor.graph.basic import equal_computations
+from pytensor.tensor import full, tensor
+from pytensor.tensor.functional import vectorize
+from pytensor.tensor.random.type import RandomGeneratorType
+
+
+class TestVectorize:
+    def test_vectorize_no_signature(self):
+        """Unlike numpy we don't assume outputs of vectorize without signature are scalar."""
+
+        def func(x):
+            return full((5, 3), x)
+
+        vec_func = vectorize(func)
+
+        x = tensor("x", shape=(4,), dtype="float64")
+        out = vec_func(x)
+
+        assert out.type.ndim == 3
+        test_x = np.array([1, 2, 3, 4])
+        np.testing.assert_allclose(
+            out.eval({x: test_x}), np.full((len(test_x), 5, 3), test_x[:, None, None])
+        )
+
+    def test_vectorize_outer_product(self):
+        def func(x, y):
+            return x[:, None] * y[None, :]
+
+        vec_func = vectorize(func, signature="(a),(b)->(a,b)")
+
+        x = tensor("x", shape=(2, 3, 5))
+        y = tensor("y", shape=(2, 3, 7))
+        out = vec_func(x, y)
+
+        assert out.type.shape == (2, 3, 5, 7)
+        assert equal_computations([out], [x[..., :, None] * y[..., None, :]])
+
+    def test_vectorize_outer_inner_product(self):
+        def func(x, y):
+            return x[:, None] * y[None, :], (x * y).sum()
+
+        vec_func = vectorize(func, signature="(a),(b)->(a,b),()")
+
+        x = tensor("x", shape=(2, 3, 5))
+        y = tensor("y", shape=(2, 3, 5))
+        outer, inner = vec_func(x, y)
+
+        assert outer.type.shape == (2, 3, 5, 5)
+        assert inner.type.shape == (2, 3)
+        assert equal_computations([outer], [x[..., :, None] * y[..., None, :]])
+        assert equal_computations([inner], [(x * y).sum(axis=-1)])
+
+    def test_errors(self):
+        def func(x, y):
+            return x + y, x - y
+
+        x = tensor("x", shape=(5,))
+        y = tensor("y", shape=())
+
+        with pytest.raises(ValueError, match="Number of inputs"):
+            vectorize(func, signature="(),()->()")(x)
+
+        with pytest.raises(ValueError, match="Number of outputs"):
+            vectorize(func, signature="(),()->()")(x, y)
+
+        with pytest.raises(ValueError, match="Input y has less dimensions"):
+            vectorize(func, signature="(a),(a)->(a),(a)")(x, y)
+
+        bad_input = RandomGeneratorType()
+
+        with pytest.raises(TypeError, match="must be TensorVariable"):
+            vectorize(func)(bad_input, x)
+
+        def bad_func(x, y):
+            x + y
+
+        with pytest.raises(ValueError, match="no outputs"):
+            vectorize(bad_func)(x, y)