From 3b4c5a3e188408cb9ddf39a586428051f6973c99 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 19 Jun 2025 17:11:31 +0000
Subject: [PATCH 1/5] Initial plan for issue


From 658f966657b77942ffd372c576f900d514fbc657 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 19 Jun 2025 17:27:02 +0000
Subject: [PATCH 2/5] Implement aten_feature_dropout with proper torch_op
 decorator

Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com>
---
 .../function_libs/torch_lib/ops/core.py       | 61 ++++++++++++++++++-
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
index 05e2cd925..450d21312 100644
--- a/onnxscript/function_libs/torch_lib/ops/core.py
+++ b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -3538,10 +3538,67 @@ def aten_feature_alpha_dropout(input: TensorType, p: float, train: bool) -> Tens
     raise NotImplementedError()
 
 
-def aten_feature_dropout(input: TensorType, p: float, train: bool) -> TensorType:
+@torch_op("aten::feature_dropout", trace_only=True)
+def aten_feature_dropout(input: TFloat, p: FLOAT, train: BOOL) -> TFloat:
     """feature_dropout(Tensor input, float p, bool train) -> Tensor"""
 
-    raise NotImplementedError()
+    # Feature dropout applies dropout to entire feature maps/channels
+    # rather than individual elements
+    
+    # Use ONNX operations to handle control flow
+    # In inference mode or when p=0, return input unchanged
+    should_dropout = op.And(train, p > 0.0)
+    
+    # Get input shape
+    input_shape = op.Shape(input)
+    ndim = op.Size(input_shape)
+    
+    # Create mask shape for feature dropout
+    # For 2D tensors [N, C]: mask shape is [N, C]
+    # For higher dim tensors [N, C, ...]: mask shape is [N, C, 1, 1, ...]
+    batch_size = op.Gather(input_shape, [0])
+    channel_size = op.Gather(input_shape, [1])
+    
+    # Create the appropriate mask shape based on tensor dimensions
+    is_2d = op.Equal(ndim, 2)
+    
+    # For 2D case, mask_shape = [N, C]
+    mask_shape_2d = op.Concat(batch_size, channel_size, axis=0)
+    
+    # For higher dimensions, mask_shape = [N, C, 1, 1, ...]
+    spatial_dims_count = op.Sub(ndim, 2)
+    ones_for_spatial = op.ConstantOfShape(
+        op.Reshape(spatial_dims_count, [1]), 
+        value=1
+    )
+    mask_shape_nd = op.Concat(batch_size, channel_size, ones_for_spatial, axis=0)
+    
+    # Select appropriate mask shape
+    mask_shape = op.Where(is_2d, mask_shape_2d, mask_shape_nd)
+    
+    # Generate random uniform values between 0 and 1
+    random_vals = op.RandomUniformLike(
+        op.ConstantOfShape(mask_shape, value=0.0),
+        dtype=1,  # float32
+        low=0.0,
+        high=1.0
+    )
+    
+    # Create binary mask: 1 where random_vals >= p, 0 otherwise
+    mask = op.Cast(random_vals >= p, to=input.dtype)
+    
+    # Scale by 1/(1-p) to maintain expected value
+    scale = op.Div(1.0, op.Sub(1.0, p))
+    scaled_mask = op.Mul(mask, scale)
+    
+    # Apply dropout only if we should dropout, otherwise use all-ones mask
+    ones_mask = op.ConstantOfShape(mask_shape, value=1.0)
+    final_mask = op.Where(should_dropout, scaled_mask, ones_mask)
+    
+    # Apply mask to input (broadcasting will handle different shapes)
+    result = op.Mul(input, final_mask)
+    
+    return result
 
 
 @torch_op(("aten::fill.Tensor", "aten::fill.Scalar"))

From 29f19680a42bd16a3b8bdafd99c196bcba0d3ff7 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 19 Jun 2025 17:44:31 +0000
Subject: [PATCH 3/5] Refactor aten_feature_dropout to use op.Dropout for
 cleaner implementation

Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com>
---
 .../function_libs/torch_lib/ops/core.py       | 28 ++++---------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
index 450d21312..283ab75af 100644
--- a/onnxscript/function_libs/torch_lib/ops/core.py
+++ b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -3545,10 +3545,6 @@ def aten_feature_dropout(input: TFloat, p: FLOAT, train: BOOL) -> TFloat:
     # Feature dropout applies dropout to entire feature maps/channels
     # rather than individual elements
     
-    # Use ONNX operations to handle control flow
-    # In inference mode or when p=0, return input unchanged
-    should_dropout = op.And(train, p > 0.0)
-    
     # Get input shape
     input_shape = op.Shape(input)
     ndim = op.Size(input_shape)
@@ -3576,27 +3572,13 @@ def aten_feature_dropout(input: TFloat, p: FLOAT, train: BOOL) -> TFloat:
     # Select appropriate mask shape
     mask_shape = op.Where(is_2d, mask_shape_2d, mask_shape_nd)
     
-    # Generate random uniform values between 0 and 1
-    random_vals = op.RandomUniformLike(
-        op.ConstantOfShape(mask_shape, value=0.0),
-        dtype=1,  # float32
-        low=0.0,
-        high=1.0
-    )
-    
-    # Create binary mask: 1 where random_vals >= p, 0 otherwise
-    mask = op.Cast(random_vals >= p, to=input.dtype)
-    
-    # Scale by 1/(1-p) to maintain expected value
-    scale = op.Div(1.0, op.Sub(1.0, p))
-    scaled_mask = op.Mul(mask, scale)
-    
-    # Apply dropout only if we should dropout, otherwise use all-ones mask
-    ones_mask = op.ConstantOfShape(mask_shape, value=1.0)
-    final_mask = op.Where(should_dropout, scaled_mask, ones_mask)
+    # Create a dummy tensor of ones with the mask shape and apply dropout to it
+    # This leverages op.Dropout to handle training mode, scaling, and random generation
+    dummy_tensor = op.ConstantOfShape(mask_shape, value=1.0)
+    mask, _ = op.Dropout(dummy_tensor, p, train)
     
     # Apply mask to input (broadcasting will handle different shapes)
-    result = op.Mul(input, final_mask)
+    result = op.Mul(input, mask)
     
     return result
 

From af8d6223560ece10d6eb9c61eb8f3285ed18d212 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 19 Jun 2025 17:48:53 +0000
Subject: [PATCH 4/5] Add early return for p==0 or not train in
 aten_feature_dropout

Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com>
---
 onnxscript/function_libs/torch_lib/ops/core.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
index 283ab75af..fb99f70a6 100644
--- a/onnxscript/function_libs/torch_lib/ops/core.py
+++ b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -3545,6 +3545,9 @@ def aten_feature_dropout(input: TFloat, p: FLOAT, train: BOOL) -> TFloat:
     # Feature dropout applies dropout to entire feature maps/channels
     # rather than individual elements
     
+    if p == 0 or not train:
+        return input
+    
     # Get input shape
     input_shape = op.Shape(input)
     ndim = op.Size(input_shape)

From 96f78cdcfe2f489184a329c1045c241f6f2dcf4b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 19 Jun 2025 17:57:52 +0000
Subject: [PATCH 5/5] Refactor feature_dropout to use op.Shape with start/end
 parameters instead of op.Gather

Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com>
---
 onnxscript/function_libs/torch_lib/ops/core.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
index fb99f70a6..e74d68c38 100644
--- a/onnxscript/function_libs/torch_lib/ops/core.py
+++ b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -3548,15 +3548,14 @@ def aten_feature_dropout(input: TFloat, p: FLOAT, train: BOOL) -> TFloat:
     if p == 0 or not train:
         return input
     
-    # Get input shape
-    input_shape = op.Shape(input)
-    ndim = op.Size(input_shape)
+    # Get input dimensions
+    ndim = op.Size(op.Shape(input))
     
     # Create mask shape for feature dropout
     # For 2D tensors [N, C]: mask shape is [N, C]
     # For higher dim tensors [N, C, ...]: mask shape is [N, C, 1, 1, ...]
-    batch_size = op.Gather(input_shape, [0])
-    channel_size = op.Gather(input_shape, [1])
+    batch_size = op.Shape(input, start=0, end=1)
+    channel_size = op.Shape(input, start=1, end=2)
     
     # Create the appropriate mask shape based on tensor dimensions
     is_2d = op.Equal(ndim, 2)