Lightning-AI
diff --git a/‎CHANGELOG.md
Lines changed: 3 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎pl_examples/basic_examples/profiler_example.py
Lines changed: 102 additions & 0 deletions b/‎pl_examples/basic_examples/profiler_example.py
Lines changed: 102 additions & 0 deletions
diff --git a/‎pytorch_lightning/accelerators/accelerator.py
Lines changed: 8 additions & 4 deletions b/‎pytorch_lightning/accelerators/accelerator.py
Lines changed: 8 additions & 4 deletions
diff --git a/‎pytorch_lightning/profiler/__init__.py
Lines changed: 13 additions & 12 deletions b/‎pytorch_lightning/profiler/__init__.py
Lines changed: 13 additions & 12 deletions
diff --git a/‎pytorch_lightning/profiler/profilers.py
Lines changed: 2 additions & 2 deletions b/‎pytorch_lightning/profiler/profilers.py
Lines changed: 2 additions & 2 deletions
@@ -55,6 +55,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added support for including module names for forward in the autograd trace of `PyTorchProfiler` ([#6349](https://github.com/PyTorchLightning/pytorch-lightning/pull/6349))
 
 
+- Added support for the PyTorch 1.8.1 autograd profiler ([#6618](https://github.com/PyTorchLightning/pytorch-lightning/pull/6618))
+
+
 - Added `outputs` parameter to callback's `on_validation_epoch_end` & `on_test_epoch_end` hooks ([#6120](https://github.com/PyTorchLightning/pytorch-lightning/pull/6120))
 
 
 
@@ -0,0 +1,102 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This script will generate 2 traces: one for `training_step` and one for `validation_step`.
+The traces can be visualized in 2 ways:
+* With Chrome:
+    1. Open Chrome and copy/paste this url: `chrome://tracing/`.
+    2. Once tracing opens, click on `Load` at the top-right and load one of the generated traces.
+* With PyTorch Tensorboard Profiler (Instructions are here: https://github.com/pytorch/kineto/tree/master/tb_plugin)
+    1. pip install tensorboard torch-tb-profiler
+    2. tensorboard --logdir={FOLDER}
+"""
+
+import sys
+from argparse import ArgumentParser
+
+import torch
+import torchvision
+import torchvision.models as models
+import torchvision.transforms as T
+
+from pl_examples import cli_lightning_logo
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer
+
+DEFAULT_CMD_LINE = (
+    "--max_epochs",
+    "1",
+    "--limit_train_batches",
+    "15",
+    "--limit_val_batches",
+    "15",
+    "--profiler",
+    "pytorch",
+    "--gpus",
+    f"{int(torch.cuda.is_available())}",
+)
+
+
+class ModelToProfile(LightningModule):
+
+    def __init__(self, model):
+        super().__init__()
+        self.model = model
+        self.criterion = torch.nn.CrossEntropyLoss()
+
+    def training_step(self, batch, batch_idx):
+        inputs, labels = batch
+        outputs = self.model(inputs)
+        loss = self.criterion(outputs, labels)
+        self.log("train_loss", loss)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        inputs, labels = batch
+        outputs = self.model(inputs)
+        loss = self.criterion(outputs, labels)
+        self.log("val_loss", loss)
+
+    def configure_optimizers(self):
+        return torch.optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
+
+
+class CIFAR10DataModule(LightningDataModule):
+
+    transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()])
+
+    def train_dataloader(self, *args, **kwargs):
+        trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=self.transform)
+        return torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=0)
+
+    def val_dataloader(self, *args, **kwargs):
+        valset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=self.transform)
+        return torch.utils.data.DataLoader(valset, batch_size=32, shuffle=True, num_workers=0)
+
+
+def cli_main():
+
+    parser = ArgumentParser()
+    parser = Trainer.add_argparse_args(parser)
+    cmd_line = None if len(sys.argv) != 1 else DEFAULT_CMD_LINE
+    args = parser.parse_args(args=cmd_line)
+
+    model = ModelToProfile(models.resnet50(pretrained=True))
+    datamodule = CIFAR10DataModule()
+    trainer = Trainer(**vars(args))
+    trainer.fit(model, datamodule=datamodule)
+
+
+if __name__ == '__main__':
+    cli_lightning_logo()
+    cli_main()
@@ -448,8 +448,10 @@ def connect_training_type_plugin(self, plugin: TrainingTypePlugin, model: Lightn
         .. deprecated::v1.3
             Will be removed in v1.5.0.
         """
-        rank_zero_warn('Accelerator method `connect_training_type_plugin` was deprecated in v1.3.'
-                       ' It will be removed in v1.5.')
+        rank_zero_warn(
+            'Accelerator method `connect_training_type_plugin` was deprecated in v1.3.'
+            ' It will be removed in v1.5.'
+        )
         self.setup_training_type_plugin(plugin, model)
 
     # todo: remove in v1.5
@@ -459,6 +461,8 @@ def connect_precision_plugin(self, plugin: PrecisionPlugin) -> None:
         .. deprecated::v1.3
             Will be removed in v1.5.0.
         """
-        rank_zero_warn('Accelerator method `connect_precision_plugin` was deprecated in v1.3.'
-                       ' It will be removed in v1.5.')
+        rank_zero_warn(
+            'Accelerator method `connect_precision_plugin` was deprecated in v1.3.'
+            ' It will be removed in v1.5.'
+        )
         self.setup_precision_plugin(plugin)
@@ -121,7 +121,8 @@ def custom_processing_step(self, data):
 Autograd includes a profiler that lets you inspect the cost of different operators
 inside your model - both on the CPU and GPU.
 
-Find the Pytorch Profiler doc at [PyTorch Profiler](https://pytorch-lightning.readthedocs.io/en/stable/profiler.html)
+To read more about the PyTorch Profiler and all its options,
+have a look at its `docs <https://pytorch.org/docs/master/profiler.html>`__
 
 .. code-block:: python
 
@@ -134,16 +135,16 @@ def custom_processing_step(self, data):
 
 
 This profiler works with PyTorch ``DistributedDataParallel``.
-If ``output_filename`` is provided, each rank will save their profiled operation to their own file.
+If ``filename`` is provided, each rank will save their profiled operation to their own file. The profiler
+report can be quite long, so you setting a ``filename`` will save the report instead of logging it to the
+output in your terminal. If no filename is given, it will be logged only on rank 0.
 
+The profiler's results will be printed on the completion of ``{fit,validate,test,predict}``.
 
-The profiler's results will be printed on the completion of a training `fit()`. This profiler
-report can be quite long, so you can also specify an `output_filename` to save the report instead
-of logging it to the output in your terminal.
-
-This profiler will record only for `training_step_and_backward`, `evaluation_step` and `test_step` functions by default.
-The output below shows the profiling for the action `training_step_and_backward`.
-The user can provide ``PyTorchProfiler(profiled_functions=[...])`` to extend the scope of profiled functions.
+This profiler will record ``training_step_and_backward``, ``training_step``, ``backward``,
+``validation_step``, ``test_step``, and ``predict_step`` by default.
+The output below shows the profiling for the action ``training_step_and_backward``.
+The user can provide ``PyTorchProfiler(record_functions={...})`` to extend the scope of profiled functions.
 
 .. note:: When using the PyTorch Profiler, wall clock time will not not be representative of the true wall clock time. This is due to forcing profiled operations to be measured synchronously, when many CUDA ops happen asynchronously. It is recommended to use this Profiler to find bottlenecks/breakdowns, however for end to end wall clock time use the `SimpleProfiler`.   # noqa E501
 
@@ -184,13 +185,13 @@ def custom_processing_step(self, data):
 
 To visualize the profiled operation, you can either:
 
-* Use::
+Use::
 
     nvvp trace_name.prof
 
-* Use::
+Or::
 
-     python -c 'import torch; print(torch.autograd.profiler.load_nvprof("trace_name.prof"))'
+    python -c 'import torch; print(torch.autograd.profiler.load_nvprof("trace_name.prof"))'
 
 """
 
 
@@ -120,14 +120,14 @@ def _rank_zero_info(self, *args, **kwargs) -> None:
         if self._local_rank in (None, 0):
             log.info(*args, **kwargs)
 
-    def _prepare_filename(self) -> str:
+    def _prepare_filename(self, extension: str = ".txt") -> str:
         filename = ""
         if self._stage is not None:
             filename += f"{self._stage}-"
         filename += str(self.filename)
         if self._local_rank is not None:
             filename += f"-{self._local_rank}"
-        filename += ".txt"
+        filename += extension
         return filename
 
     def _prepare_streams(self) -> None: