[ENH] Consistent 3D output for single-target point predictions in TimeXer v1. (#1936)

PranavBhatP · web-flow · commit 526d1d830700 · 2025-10-19T07:52:51.000+05:30
#### Reference Issues/PRs  #### What does this implement/fix? Explain your changes.  Small change in the code to provide a 3d output tensor for point predictions with 3rd dimension set to 1. With this change the output contract for `TimeXer` is - Point predictions: `(batch_size, predictions, 1)` where the 3rd dimension indicates a single target. - Quantile predicitons: `(batch_size, predictions, num_quantiles)` where the 3rd dimension indicates the number of quantiles for which the output is generated. #### What should a reviewer concentrate their feedback on?  #### PR checklist  - [x] The PR title starts with either [ENH], [MNT], [DOC], or [BUG]. [BUG] - bugfix, [MNT] - CI, test framework, [ENH] - adding or improving code, [DOC] - writing or improving documentation or docstrings. - [ ] Added/modified tests - [x] Used pre-commit hooks when committing to ensure that code is compliant with hooks. Install hooks with `pre-commit install`. To run hooks independent of commit, execute `pre-commit run --all-files`
diff --git a/pytorch_forecasting/models/timexer/_timexer.py b/pytorch_forecasting/models/timexer/_timexer.py
@@ -214,8 +214,13 @@ def __init__(
         if enc_in is None:
             self.enc_in = len(self.reals)
 
-        self.n_quantiles = None
+        # NOTE: assume point prediction as default here,
+        # with single median quantile being the point prediction.
+        # hence self.n_quantiles = 1 for point predictions.
+        self.n_quantiles = 1
 
+        # set n_quantiles to the length of the quantiles list passed
+        # into the "quantiles" parameter when QuantileLoss is used.
         if isinstance(loss, QuantileLoss):
             self.n_quantiles = len(loss.quantiles)
 
@@ -353,10 +358,7 @@ def _forecast(self, x: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
         enc_out = enc_out.permute(0, 1, 3, 2)
 
         dec_out = self.head(enc_out)
-        if self.n_quantiles is not None:
-            dec_out = dec_out.permute(0, 2, 1, 3)
-        else:
-            dec_out = dec_out.permute(0, 2, 1)
+        dec_out = dec_out.permute(0, 2, 1, 3)
 
         return dec_out
 
@@ -395,10 +397,7 @@ def _forecast_multi(self, x: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]
         enc_out = enc_out.permute(0, 1, 3, 2)
 
         dec_out = self.head(enc_out)
-        if self.n_quantiles is not None:
-            dec_out = dec_out.permute(0, 2, 1, 3)
-        else:
-            dec_out = dec_out.permute(0, 2, 1)
+        dec_out = dec_out.permute(0, 2, 1, 3)
 
         return dec_out
 
@@ -470,25 +469,15 @@ def forward(self, x: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
             if prediction.size(2) != len(target_positions):
                 prediction = prediction[:, :, : len(target_positions)]
 
-            # In the case of a single target, the result will be a torch.Tensor
-            # with shape (batch_size, prediction_length)
-            # In the case of multiple targets, the result will be a list of "n_targets"
-            # tensors with shape (batch_size, prediction_length)
-            # If quantile predictions are used, the result will have an additional
-            # dimension for quantiles, resulting in a shape of
-            # (batch_size, prediction_length, n_quantiles)
-            if self.n_quantiles is not None:
-                # quantile predictions.
-                if len(target_indices) == 1:
-                    prediction = prediction[..., 0, :]
-                else:
-                    prediction = [prediction[..., i, :] for i in target_indices]
+            # output format is (batch_size, prediction_length, n_quantiles)
+            # in case of quantile loss, the output n_quantiles = self.n_quantiles
+            # which is the length of a list of float. In case of MAE, MSE, etc.
+            # n_quantiles = 1 and it mimics the behavior of a point prediction.
+            # for multi-target forecasting, the output is a list of tensors.
+            if len(target_positions) == 1:
+                prediction = prediction[..., 0, :]
             else:
-                # point predictions.
-                if len(target_indices) == 1:
-                    prediction = prediction[..., 0]
-                else:
-                    prediction = [prediction[..., i] for i in target_indices]
+                prediction = [prediction[..., i, :] for i in target_indices]
             prediction = self.transform_output(
                 prediction=prediction, target_scale=x["target_scale"]
             )
diff --git a/pytorch_forecasting/models/timexer/_timexer_pkg.py b/pytorch_forecasting/models/timexer/_timexer_pkg.py
@@ -17,7 +17,6 @@ class TimeXer_pkg(_BasePtForecaster):
         "capability:pred_int": True,
         "capability:flexible_history_length": True,
         "capability:cold_start": False,
-        "tests:skip_by_name": "test_integration",
     }
 
     @classmethod
diff --git a/pytorch_forecasting/models/timexer/sub_modules.py b/pytorch_forecasting/models/timexer/sub_modules.py
@@ -183,29 +183,24 @@ class FlattenHead(nn.Module):
         nf (int): Number of features in the last layer.
         target_window (int): Target window size.
         head_dropout (float): Dropout rate for the head. Defaults to 0.
-        n_quantiles (int, optional): Number of quantiles. Defaults to None."""
+        n_quantiles (int, optional): Number of quantiles. Defaults to 1."""
 
-    def __init__(self, n_vars, nf, target_window, head_dropout=0, n_quantiles=None):
+    def __init__(self, n_vars, nf, target_window, head_dropout=0, n_quantiles=1):
         super().__init__()
         self.n_vars = n_vars
         self.flatten = nn.Flatten(start_dim=-2)
-        self.linear = nn.Linear(nf, target_window)
         self.n_quantiles = n_quantiles
 
-        if self.n_quantiles is not None:
-            self.linear = nn.Linear(nf, target_window * n_quantiles)
-        else:
-            self.linear = nn.Linear(nf, target_window)
+        self.linear = nn.Linear(nf, target_window * n_quantiles)
         self.dropout = nn.Dropout(head_dropout)
 
     def forward(self, x):
         x = self.flatten(x)
         x = self.linear(x)
         x = self.dropout(x)
 
-        if self.n_quantiles is not None:
-            batch_size, n_vars = x.shape[0], x.shape[1]
-            x = x.reshape(batch_size, n_vars, -1, self.n_quantiles)
+        batch_size, n_vars = x.shape[0], x.shape[1]
+        x = x.reshape(batch_size, n_vars, -1, self.n_quantiles)
         return x
 
 

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,6 @@ class TimeXer_pkg(_BasePtForecaster):`
`17`	`17`	`"capability:pred_int": True,`
`18`	`18`	`"capability:flexible_history_length": True,`
`19`	`19`	`"capability:cold_start": False,`
`20`		`- "tests:skip_by_name": "test_integration",`
`21`	`20`	`}`
`22`	`21`
`23`	`22`	`@classmethod`