fix RuntimeError: expected scalar type float but found double in core.py masked_(min|max)

janosh · janosh · commit 47e607a11a28 · 2022-06-17T15:15:19.000+01:00
in "min": lambda x, mask, dim: torch.where(mask, x, float("inf")).min(dim=dim)[0],
diff --git a/aviary/core.py b/aviary/core.py
@@ -12,7 +12,7 @@
 import torch
 import torch.nn as nn
 from sklearn.metrics import f1_score
-from torch import Tensor
+from torch import BoolTensor, Tensor
 from torch.nn.functional import softmax
 from torch.utils.data import DataLoader
 from torch.utils.tensorboard import SummaryWriter
@@ -577,22 +577,20 @@ def np_one_hot(targets: np.ndarray, n_classes: int = None) -> np.ndarray:
     return np.eye(n_classes)[targets]
 
 
-def masked_std(
-    x: torch.Tensor, mask: torch.BoolTensor, dim: int = 0, eps: float = 1e-12
-) -> torch.Tensor:
+def masked_std(x: Tensor, mask: BoolTensor, dim: int = 0, eps: float = 1e-12) -> Tensor:
     """Compute the standard deviation of a tensor, ignoring masked values.
 
     Args:
-        x (torch.Tensor): Tensor to compute standard deviation of.
-        mask (torch.BoolTensor): Same shape as x with True where x is valid and False
+        x (Tensor): Tensor to compute standard deviation of.
+        mask (BoolTensor): Same shape as x with True where x is valid and False
             where x should be masked. Mask should not be all False in any column of
             dimension dim to avoid NaNs.
         dim (int, optional): Dimension to take std of. Defaults to 0.
         eps (float, optional): Small positive number to ensure std is differentiable.
             Defaults to 1e-12.
 
     Returns:
-        torch.Tensor: Same shape as x, except dimension dim reduced.
+        Tensor: Same shape as x, except dimension dim reduced.
     """
     mean = masked_mean(x, mask, dim=dim)
     squared_diff = (x - mean.unsqueeze(dim=dim)) ** 2
@@ -601,18 +599,42 @@ def masked_std(
     return std
 
 
-def masked_mean(x: torch.Tensor, mask: torch.BoolTensor, dim: int = 0) -> torch.Tensor:
+def masked_mean(x: Tensor, mask: BoolTensor, dim: int = 0) -> Tensor:
     """Compute the mean of a tensor, ignoring masked values.
 
     Args:
-        x (torch.Tensor): Tensor to compute standard deviation of.
-        mask (torch.BoolTensor): Same shape as x with True where x is valid and False
+        x (Tensor): Tensor to compute mean of.
+        mask (BoolTensor): Same shape as x with True where x is valid and False
             where x should be masked. Mask should not be all False in any column of
             dimension dim to avoid NaNs from zero division.
         dim (int, optional): Dimension to take mean of. Defaults to 0.
 
     Returns:
-        torch.Tensor: Same shape as x, except dimension dim reduced.
+        Tensor: Same shape as x, except dimension dim reduced.
     """
-    x_nan = x.masked_fill(~mask, float("nan"))
+    # for safety, we could add this assert but might impact performance
+    # assert (
+    #     mask.sum(dim=dim).ne(0).all()
+    # ), "mask should not be all False in any column, causes zero division"
+    x_nan = x.float().masked_fill(~mask, float("nan"))
     return x_nan.nanmean(dim=dim)
+
+
+def masked_max(x: Tensor, mask: BoolTensor, dim: int = 0) -> Tensor:
+    """Compute the max of a tensor along dimension dim, ignoring values at indices where
+    mask is False. See masked_mean docstring for Args details.
+    """
+    # replace padded values with +/-inf to make sure min()/max() ignore them
+    x_inf = x.float().masked_fill(~mask, float("-inf"))
+    # 1st ret val = max, 2nd ret val = max indices
+    x_max, _ = x_inf.max(dim=dim)
+    return x_max
+
+
+def masked_min(x: Tensor, mask: BoolTensor, dim: int = 0) -> Tensor:
+    """Compute the min of a tensor along dimension dim, ignoring values at indices where
+    mask is False. See masked_mean docstring for Args details.
+    """
+    x_inf = x.float().masked_fill(~mask, float("inf"))
+    x_min, _ = x_inf.min(dim=dim)
+    return x_min
diff --git a/aviary/wrenformer/model.py b/aviary/wrenformer/model.py
@@ -7,7 +7,7 @@
 import torch.nn.functional as F
 from torch import BoolTensor, Tensor
 
-from aviary.core import BaseModelClass, masked_mean, masked_std
+from aviary.core import BaseModelClass, masked_max, masked_mean, masked_min, masked_std
 from aviary.networks import ResidualNetwork
 
 
@@ -142,13 +142,11 @@ def forward(  # type: ignore
         return tuple(output_nn(predictions) for output_nn in self.output_nns)
 
 
-# using all at once we call this S2M3 aggregation
+# map aggregation types to functions
 aggregators: dict[str, Callable[[Tensor, BoolTensor, int], Tensor]] = {
     "mean": masked_mean,
-    "sum": lambda x, mask, dim: (x * mask).sum(dim=dim),
     "std": masked_std,
-    # replace padded values with +/-inf to make sure min()/max() ignore them
-    "min": lambda x, mask, dim: torch.where(mask, x, float("inf")).min(dim=dim)[0],
-    # 1st ret val = max, 2nd ret val = max indices
-    "max": lambda x, mask, dim: torch.where(mask, x, float("-inf")).max(dim=dim)[0],
+    "max": masked_max,
+    "min": masked_min,
+    "sum": lambda x, mask, dim: (x * mask).sum(dim=dim),
 }
diff --git a/examples/wrenformer.py b/examples/wrenformer.py
@@ -166,12 +166,15 @@ def run_wrenformer(
     # the element type (usually 200-dim matscholar embeddings) and Wyckoff position (see
     # 'bra-alg-off.json') + 1 for the weight of that element/Wyckoff position in the
     # material's composition
-    n_features = features[0].shape[-1]
-    assert n_features in (200 + 1, 200 + 1 + 444)  # Roost and Wren embedding size resp.
+    embedding_len = features[0].shape[-1]
+    assert embedding_len in (
+        200 + 1,
+        200 + 1 + 444,
+    )  # Roost and Wren embedding size resp.
 
     model = Wrenformer(
         n_targets=[1 if task_type == reg_key else 2],
-        n_features=n_features,
+        n_features=embedding_len,
         task_dict={target_col: task_type},  # e.g. {'exfoliation_en': 'regression'}
         n_attn_layers=n_attn_layers,
         robust=robust,
@@ -201,14 +204,14 @@ def run_wrenformer(
         "target": target_col,
         "warmup_steps": warmup_steps,
         "robust": robust,
-        "n_features": n_features,  # embedding size
+        "embedding_len": embedding_len,
         "losses": str(loss_dict),
         "training_samples": len(train_df),
         "test_samples": len(test_df),
         "trainable_params": model.num_params,
         "swa_start": swa_start,
         "timestamp": timestamp,
-        "embedding_aggregations": embedding_aggregations,
+        "embedding_aggregations": ",".join(embedding_aggregations),
         **(run_params or {}),
     }