fix dp reduction test (#6404)

awaelchli · web-flow · commit e1f5eacab986 · 2021-03-08T18:11:20.000Z
* fix

* update

* fix

* move the class outside
diff --git a/pytorch_lightning/plugins/training_type/dp.py b/pytorch_lightning/plugins/training_type/dp.py
@@ -49,9 +49,9 @@ def reduce(self, tensor, *args, **kwargs):
 
         else:
 
-            def _reduce(tensor: torch.Tensor):
-                dtype_tensor = tensor.dtype
-                return tensor.float().mean().type(dtype_tensor)
+            def _reduce(t: torch.Tensor):
+                dtype_tensor = t.dtype
+                return t.float().mean().type(dtype_tensor)
 
             tensor = apply_to_collection(tensor, torch.Tensor, _reduce)
 
diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py
@@ -13,13 +13,14 @@
 # limitations under the License.
 import torch
 import torch.nn.functional as F
+from torch.utils.data import DataLoader
 
 import pytorch_lightning as pl
 import tests.helpers.pipelines as tpipes
 import tests.helpers.utils as tutils
 from pytorch_lightning.callbacks import EarlyStopping
 from pytorch_lightning.core import memory
-from tests.helpers import BoringModel
+from tests.helpers import BoringModel, RandomDataset
 from tests.helpers.datamodules import ClassifDataModule
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import ClassificationModel
@@ -125,19 +126,58 @@ def test_dp_test(tmpdir):
     assert torch.all(torch.eq(old_weights, new_weights))
 
 
+class ReductionTestModel(BoringModel):
+
+    def train_dataloader(self):
+        return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+    def val_dataloader(self):
+        return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+    def test_dataloader(self):
+        return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+    def add_outputs(self, output, device):
+        output.update({
+            "reduce_int": torch.tensor(device.index, dtype=torch.int, device=device),
+            "reduce_float": torch.tensor(device.index, dtype=torch.float, device=device),
+        })
+
+    def training_step(self, batch, batch_idx):
+        output = super().training_step(batch, batch_idx)
+        self.add_outputs(output, batch.device)
+        return output
+
+    def validation_step(self, batch, batch_idx):
+        output = super().validation_step(batch, batch_idx)
+        self.add_outputs(output, batch.device)
+        return output
+
+    def test_step(self, batch, batch_idx):
+        output = super().test_step(batch, batch_idx)
+        self.add_outputs(output, batch.device)
+        return output
+
+    def training_epoch_end(self, outputs):
+        assert outputs[0]["loss"].shape == torch.Size([])
+        assert outputs[0]["reduce_int"].item() == 0  # mean([0, 1]) = 0
+        assert outputs[0]["reduce_float"].item() == 0.5  # mean([0., 1.]) = 0.5
+
+
 @RunIf(min_gpus=2)
 def test_dp_training_step_dict(tmpdir):
-    """
-    This test verify dp properly reduce dictionaries
-    """
-
-    model = BoringModel()
+    """ This test verifies that dp properly reduces dictionaries """
+    model = ReductionTestModel()
     model.training_step_end = None
+    model.validation_step_end = None
+    model.test_step_end = None
+
     trainer = pl.Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
-        limit_train_batches=2,
-        limit_val_batches=0,
+        limit_train_batches=1,
+        limit_val_batches=1,
+        limit_test_batches=1,
         gpus=2,
         accelerator='dp',
     )