Re-implement aten_bilinear using MatMul and Transpose operations instead of Einsum

Copilot · justinchuby · Copilot · commit 22c4c0f46d55 · 2025-09-24T18:35:03.000Z
Co-authored-by: justinchuby &lt;11205048+justinchuby@users.noreply.github.com&gt;
diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -1205,10 +1205,53 @@ def aten_bilinear(
     # bias shape: (out_features) - optional
     # output shape: (..., out_features)
 
-    # Use Einsum to compute the bilinear transformation
-    # "...i,oij,...j->...o" means:
-    # - input1[..., i] * weight[o, i, j] * input2[..., j] -> output[..., o]
-    result = op.Einsum(input1, weight, input2, equation="...i,oij,...j->...o")
+    # Decompose bilinear into MatMul operations:
+    # 1. Create outer product of input1 and input2
+    # 2. Reshape to flatten feature dimensions
+    # 3. Use MatMul with reshaped weight
+
+    # Get shapes for reshaping
+    input1_shape = op.Shape(input1)
+    weight_shape = op.Shape(weight)
+
+    # Get dimensions
+    out_features = op.Gather(weight_shape, 0, axis=0)
+    in1_features = op.Gather(weight_shape, 1, axis=0)
+    in2_features = op.Gather(weight_shape, 2, axis=0)
+
+    # Get batch dimensions (everything except the last dimension)
+    input1_rank = Rank(input1)
+    batch_dims = op.Slice(input1_shape, [0], [input1_rank - 1])
+    batch_size = op.ReduceProd(batch_dims, keepdims=False)
+
+    # Create outer product: input1[..., i] * input2[..., j] -> [..., i, j]
+    # Reshape inputs to [batch_size, features] for easier handling
+    input1_2d = op.Reshape(input1, op.Concat([batch_size], [in1_features], axis=0))
+    input2_2d = op.Reshape(input2, op.Concat([batch_size], [in2_features], axis=0))
+
+    # Create outer product using unsqueeze and broadcasting
+    input1_expanded = op.Unsqueeze(input1_2d, axes=[2])  # [batch_size, in1_features, 1]
+    input2_expanded = op.Unsqueeze(input2_2d, axes=[1])  # [batch_size, 1, in2_features]
+
+    # Outer product via broadcasting multiplication
+    outer_product = op.Mul(input1_expanded, input2_expanded)  # [batch_size, in1_features, in2_features]
+
+    # Flatten the feature dimensions
+    features_total = op.Mul(in1_features, in2_features)
+    outer_flat = op.Reshape(outer_product, op.Concat([batch_size], [features_total], axis=0))
+
+    # Reshape weight to 2D: [out_features, in1_features * in2_features]
+    weight_2d = op.Reshape(weight, op.Concat([out_features], [features_total], axis=0))
+
+    # Transpose weight for MatMul: [in1_features * in2_features, out_features]
+    weight_t = op.Transpose(weight_2d, perm=[1, 0])
+
+    # Matrix multiplication: [batch_size, out_features]
+    result = op.MatMul(outer_flat, weight_t)
+
+    # Reshape back to original batch dimensions + out_features
+    output_shape = op.Concat(batch_dims, [out_features], axis=0)
+    result = op.Reshape(result, output_shape)
 
     # Add bias if provided
     if bias is not None: