From 67780a3f6a5d50c759e089164a7c9d493e8d3802 Mon Sep 17 00:00:00 2001
From: Anay Dongre <dongreanay@gmail.com>
Date: Wed, 11 Jun 2025 15:31:43 -0700
Subject: [PATCH 1/5] DOC: Clarify DeviceStatsMonitor logged metrics (#20807)

---
 .../pytorch/callbacks/device_stats_monitor.py  | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/lightning/pytorch/callbacks/device_stats_monitor.py b/src/lightning/pytorch/callbacks/device_stats_monitor.py
index 6279dd13be4af..a8566bb4a7e76 100644
--- a/src/lightning/pytorch/callbacks/device_stats_monitor.py
+++ b/src/lightning/pytorch/callbacks/device_stats_monitor.py
@@ -34,6 +34,23 @@ class DeviceStatsMonitor(Callback):
     r"""Automatically monitors and logs device stats during training, validation and testing stage.
     ``DeviceStatsMonitor`` is a special callback as it requires a ``logger`` to passed as argument to the ``Trainer``.
 
+    Logged Metrics:
+        Device statistics are logged with keys prefixed as
+        ``DeviceStatsMonitor.{hook_name}/{base_metric_name}`` (e.g.,
+        ``DeviceStatsMonitor.on_train_batch_start/cpu_percent``).
+	    The source of these metrics depends on the ``cpu_stats`` flag
+	    and the active accelerator.
+
+        CPU (via ``psutil``): Logs ``cpu_percent``, ``cpu_vm_percent``, ``cpu_swap_percent``.
+        All are percentages (%).
+        CUDA GPU (via :func:`torch.cuda.memory_stats`): Logs detailed memory statistics from
+        PyTorch's allocator (e.g., ``allocated_bytes.all.current``, ``num_ooms``; all in Bytes).
+        GPU compute utilization is not logged by default.
+        Other Accelerators (e.g., TPU, MPS): Logs device-specific stats.
+        - TPU example: ``avg. free memory (MB)``.
+        - MPS example: ``mps.current_allocated_bytes``.
+        Observe logs or check accelerator documentation for details.
+
     Args:
         cpu_stats: if ``None``, it will log CPU stats only if the accelerator is CPU.
             If ``True``, it will log CPU stats regardless of the accelerator.
@@ -45,6 +62,7 @@ class DeviceStatsMonitor(Callback):
         ModuleNotFoundError:
             If ``psutil`` is not installed and CPU stats are monitored.
 
+
     Example::
 
         from lightning import Trainer

From 4cb878428fed683d5f3677e8db43e0b5ca7ced28 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 17 Jun 2025 03:43:14 +0000
Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/lightning/pytorch/callbacks/device_stats_monitor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lightning/pytorch/callbacks/device_stats_monitor.py b/src/lightning/pytorch/callbacks/device_stats_monitor.py
index a8566bb4a7e76..0fb3876913d6e 100644
--- a/src/lightning/pytorch/callbacks/device_stats_monitor.py
+++ b/src/lightning/pytorch/callbacks/device_stats_monitor.py
@@ -38,8 +38,8 @@ class DeviceStatsMonitor(Callback):
         Device statistics are logged with keys prefixed as
         ``DeviceStatsMonitor.{hook_name}/{base_metric_name}`` (e.g.,
         ``DeviceStatsMonitor.on_train_batch_start/cpu_percent``).
-	    The source of these metrics depends on the ``cpu_stats`` flag
-	    and the active accelerator.
+            The source of these metrics depends on the ``cpu_stats`` flag
+            and the active accelerator.
 
         CPU (via ``psutil``): Logs ``cpu_percent``, ``cpu_vm_percent``, ``cpu_swap_percent``.
         All are percentages (%).

From 8dee98a9d298b8d8b542193122b20860bc5c4e39 Mon Sep 17 00:00:00 2001
From: Jirka B <j.borovec+github@gmail.com>
Date: Wed, 18 Jun 2025 13:28:28 +0200
Subject: [PATCH 3/5] update

---
 .../pytorch/callbacks/device_stats_monitor.py       | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/lightning/pytorch/callbacks/device_stats_monitor.py b/src/lightning/pytorch/callbacks/device_stats_monitor.py
index 0fb3876913d6e..41206c6755cf2 100644
--- a/src/lightning/pytorch/callbacks/device_stats_monitor.py
+++ b/src/lightning/pytorch/callbacks/device_stats_monitor.py
@@ -34,21 +34,20 @@ class DeviceStatsMonitor(Callback):
     r"""Automatically monitors and logs device stats during training, validation and testing stage.
     ``DeviceStatsMonitor`` is a special callback as it requires a ``logger`` to passed as argument to the ``Trainer``.
 
-    Logged Metrics:
-        Device statistics are logged with keys prefixed as
-        ``DeviceStatsMonitor.{hook_name}/{base_metric_name}`` (e.g.,
-        ``DeviceStatsMonitor.on_train_batch_start/cpu_percent``).
-            The source of these metrics depends on the ``cpu_stats`` flag
-            and the active accelerator.
+    Device statistics are logged with keys prefixed as ``DeviceStatsMonitor.{hook_name}/{base_metric_name}`` (e.g.,
+    ``DeviceStatsMonitor.on_train_batch_start/cpu_percent``).
+    The source of these metrics depends on the ``cpu_stats`` flag and the active accelerator.
 
         CPU (via ``psutil``): Logs ``cpu_percent``, ``cpu_vm_percent``, ``cpu_swap_percent``.
         All are percentages (%).
         CUDA GPU (via :func:`torch.cuda.memory_stats`): Logs detailed memory statistics from
         PyTorch's allocator (e.g., ``allocated_bytes.all.current``, ``num_ooms``; all in Bytes).
         GPU compute utilization is not logged by default.
-        Other Accelerators (e.g., TPU, MPS): Logs device-specific stats.
+        Other Accelerators (e.g., TPU, MPS): Logs device-specific stats:
+
         - TPU example: ``avg. free memory (MB)``.
         - MPS example: ``mps.current_allocated_bytes``.
+
         Observe logs or check accelerator documentation for details.
 
     Args:

From fd323edc91090cb633d1548a8043e15b5eb7bc8a Mon Sep 17 00:00:00 2001
From: Jirka B <j.borovec+github@gmail.com>
Date: Wed, 18 Jun 2025 13:28:28 +0200
Subject: [PATCH 4/5] update

---
 .../pytorch/callbacks/device_stats_monitor.py | 71 +++++++++++++++----
 1 file changed, 59 insertions(+), 12 deletions(-)

diff --git a/src/lightning/pytorch/callbacks/device_stats_monitor.py b/src/lightning/pytorch/callbacks/device_stats_monitor.py
index 41206c6755cf2..7768be03eb68e 100644
--- a/src/lightning/pytorch/callbacks/device_stats_monitor.py
+++ b/src/lightning/pytorch/callbacks/device_stats_monitor.py
@@ -34,21 +34,68 @@ class DeviceStatsMonitor(Callback):
     r"""Automatically monitors and logs device stats during training, validation and testing stage.
     ``DeviceStatsMonitor`` is a special callback as it requires a ``logger`` to passed as argument to the ``Trainer``.
 
-    Device statistics are logged with keys prefixed as ``DeviceStatsMonitor.{hook_name}/{base_metric_name}`` (e.g.,
-    ``DeviceStatsMonitor.on_train_batch_start/cpu_percent``).
-    The source of these metrics depends on the ``cpu_stats`` flag and the active accelerator.
 
-        CPU (via ``psutil``): Logs ``cpu_percent``, ``cpu_vm_percent``, ``cpu_swap_percent``.
-        All are percentages (%).
-        CUDA GPU (via :func:`torch.cuda.memory_stats`): Logs detailed memory statistics from
-        PyTorch's allocator (e.g., ``allocated_bytes.all.current``, ``num_ooms``; all in Bytes).
-        GPU compute utilization is not logged by default.
-        Other Accelerators (e.g., TPU, MPS): Logs device-specific stats:
+    **Logged Metrics**
 
-        - TPU example: ``avg. free memory (MB)``.
-        - MPS example: ``mps.current_allocated_bytes``.
+    Logs device statistics with keys prefixed as ``DeviceStatsMonitor.{hook_name}/{base_metric_name}``.
 
-        Observe logs or check accelerator documentation for details.
+    The actual metrics depend on the active accelerator and the ``cpu_stats`` flag.
+
+    **CPU (via `psutil`)**
+
+    - ``cpu_percent``: System-wide CPU utilization (%)
+    - ``cpu_vm_percent``: System-wide virtual memory (RAM) utilization (%)
+    - ``cpu_swap_percent``: System-wide swap memory utilization (%)
+
+    **CUDA GPU (via `torch.cuda.memory_stats`)**
+
+    Logs memory statistics from PyTorch caching allocator (all in Bytes). 
+    GPU compute utilization is not logged by default.
+
+    *General Memory Usage:*
+
+    - ``allocated_bytes.all.current``: Current allocated GPU memory
+    - ``allocated_bytes.all.peak``: Peak allocated GPU memory
+    - ``reserved_bytes.all.current``: Current reserved GPU memory (allocated + cached)
+    - ``reserved_bytes.all.peak``: Peak reserved GPU memory
+    - ``active_bytes.all.current``: Current GPU memory in active use
+    - ``active_bytes.all.peak``: Peak GPU memory in active use
+    - ``inactive_split_bytes.all.current``: Memory in inactive, splittable blocks
+
+    *Allocator Pool Statistics* (for ``small_pool`` and ``large_pool``):
+
+    - ``allocated_bytes.{pool_type}.current`` / ``.peak``
+    - ``reserved_bytes.{pool_type}.current`` / ``.peak``
+    - ``active_bytes.{pool_type}.current`` / ``.peak``
+
+    *Allocator Events:*
+
+    - ``num_ooms``: Cumulative out-of-memory errors
+    - ``num_alloc_retries``: Number of allocation retries
+    - ``num_device_alloc``: Number of device allocations
+    - ``num_device_free``: Number of device deallocations
+
+    For a full list of CUDA memory stats, see:
+    https://pytorch.org/docs/stable/generated/torch.cuda.memory_stats.html
+
+    **TPU (via `torch_xla`)**
+
+    *Memory Metrics* (per device, e.g. ``xla:0``):
+
+    - ``memory.free.xla:0``: Free HBM memory (MB)
+    - ``memory.used.xla:0``: Used HBM memory (MB)
+    - ``memory.percent.xla:0``: Percentage of HBM memory used (%)
+
+    *XLA Operation Counters:*
+
+    - ``CachedCompile.xla``
+    - ``CreateXlaTensor.xla``
+    - ``DeviceDataCacheMiss.xla``
+    - ``UncachedCompile.xla``
+    - ``xla::add.xla``, ``xla::addmm.xla``, etc.
+
+    These counters can be retrieved using:
+    ``torch_xla.debug.metrics.counter_names()``
 
     Args:
         cpu_stats: if ``None``, it will log CPU stats only if the accelerator is CPU.

From df20e4bcc9dcf0df2fee99e8ad0e35fe70abbcf5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 26 Jun 2025 02:24:14 +0000
Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/lightning/pytorch/callbacks/device_stats_monitor.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/lightning/pytorch/callbacks/device_stats_monitor.py b/src/lightning/pytorch/callbacks/device_stats_monitor.py
index 7768be03eb68e..9b03431be29e4 100644
--- a/src/lightning/pytorch/callbacks/device_stats_monitor.py
+++ b/src/lightning/pytorch/callbacks/device_stats_monitor.py
@@ -34,7 +34,6 @@ class DeviceStatsMonitor(Callback):
     r"""Automatically monitors and logs device stats during training, validation and testing stage.
     ``DeviceStatsMonitor`` is a special callback as it requires a ``logger`` to passed as argument to the ``Trainer``.
 
-
     **Logged Metrics**
 
     Logs device statistics with keys prefixed as ``DeviceStatsMonitor.{hook_name}/{base_metric_name}``.
@@ -49,7 +48,7 @@ class DeviceStatsMonitor(Callback):
 
     **CUDA GPU (via `torch.cuda.memory_stats`)**
 
-    Logs memory statistics from PyTorch caching allocator (all in Bytes). 
+    Logs memory statistics from PyTorch caching allocator (all in Bytes).
     GPU compute utilization is not logged by default.
 
     *General Memory Usage:*