fix: Address review comments

gs-olive · gs-olive · commit 51a3edb4fae0 · 2023-12-07T11:59:18.000-08:00
diff --git a/core/runtime/runtime.cpp b/core/runtime/runtime.cpp
@@ -105,16 +105,14 @@ RTDevice get_current_device() {
   return RTDevice(device_id, nvinfer1::DeviceType::kGPU);
 }
 
-void multi_gpu_device_check(const RTDevice& most_compatible_device) {
+void multi_gpu_device_check() {
   // If multi-device safe mode is disabled and more than 1 device is registered on the machine, warn user
   if (!(MULTI_DEVICE_SAFE_MODE) && get_available_device_list().get_devices().size() > 1) {
     LOG_WARNING(
         "Detected this engine is being instantitated in a multi-GPU system with "
         << "multi-device safe mode disabled. For more on the implications of this "
-        << "as well as workarounds, see MULTI_DEVICE_SAFE_MODE.md "
-        << "(https://github.com/pytorch/TensorRT/blob/main/py/torch_tensorrt/dynamo/runtime/MULTI_DEVICE_SAFE_MODE.md). "
-        << "The engine is set to be instantiated on the cuda device, " << most_compatible_device << ". "
-        << "If this is incorrect, please set the desired cuda device as default and retry.");
+        << "as well as workarounds, see the linked documentation "
+        << "(https://pytorch.org/TensorRT/user_guide/multi_device_safe_mode.html#multi-device-safe-mode)");
   }
 }
 
diff --git a/core/runtime/runtime.h b/core/runtime/runtime.h
@@ -34,7 +34,7 @@ std::vector<RTDevice> find_compatible_devices(const RTDevice& target_device);
 
 std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intrusive_ptr<TRTEngine> compiled_engine);
 
-void multi_gpu_device_check(const RTDevice& most_compatible_device);
+void multi_gpu_device_check();
 
 class DeviceList {
   using DeviceMap = std::unordered_map<int, RTDevice>;
diff --git a/docsrc/index.rst b/docsrc/index.rst
@@ -80,6 +80,7 @@ User Guide
 * :ref:`ptq`
 * :ref:`saving_models`
 * :ref:`runtime`
+* :ref:`multi_device_safe_mode`
 * :ref:`using_dla`
 
 .. toctree::
@@ -92,6 +93,7 @@ User Guide
    user_guide/ptq
    user_guide/saving_models
    user_guide/runtime
+   user_guide/multi_device_safe_mode
    user_guide/using_dla
 
 Tutorials
diff --git a/docsrc/user_guide/multi_device_safe_mode.rst b/docsrc/user_guide/multi_device_safe_mode.rst
@@ -1,20 +1,27 @@
+.. _multi_device_safe_mode:
+
+Multi-Device Safe Mode
+====================================
+
 Multi-device safe mode is a setting in Torch-TensorRT which allows the user to determine whether
 the runtime checks for device consistency prior to every inference call.
 
 There is a non-negligible, fixed cost per-inference call when multi-device safe mode, which is why
 it is now disabled by default. It can be controlled via the following convenience function which
 doubles as a context manager.
-```python
-# Enables Multi Device Safe Mode
-torch_tensorrt.runtime.set_multi_device_safe_mode(True)
 
-# Disables Multi Device Safe Mode [Default Behavior]
-torch_tensorrt.runtime.set_multi_device_safe_mode(False)
+.. code-block:: python
+
+    # Enables Multi Device Safe Mode
+    torch_tensorrt.runtime.set_multi_device_safe_mode(True)
+
+    # Disables Multi Device Safe Mode [Default Behavior]
+    torch_tensorrt.runtime.set_multi_device_safe_mode(False)
+
+    # Enables Multi Device Safe Mode, then resets the safe mode to its prior setting
+    with torch_tensorrt.runtime.set_multi_device_safe_mode(True):
+        ...
 
-# Enables Multi Device Safe Mode, then resets the safe mode to its prior setting
-with torch_tensorrt.runtime.set_multi_device_safe_mode(True):
-    ...
-```
 TensorRT requires that each engine be associated with the CUDA context in the active thread from which it is invoked.
 Therefore, if the device were to change in the active thread, which may be the case when invoking
 engines on multiple GPUs from the same Python process, safe mode will cause Torch-TensorRT to display
@@ -24,5 +31,5 @@ device and CUDA context device, which could lead the program to crash.
 One technique for managing multiple TRT engines on different GPUs while not sacrificing performance for
 multi-device safe mode is to use Python threads. Each thread is responsible for all of the TRT engines
 on a single GPU, and the default CUDA device on each thread corresponds to the GPU for which it is
-responsible (can be set via `torch.cuda.set_device(...)`). In this way, multiple threads can be used in the same
-Python scripts without needing to switch CUDA contexts and incur performance overhead by leveraging threads.
+responsible (can be set via ``torch.cuda.set_device(...)``). In this way, multiple threads can be used in the same
+Python script without needing to switch CUDA contexts and incur performance overhead.
diff --git a/py/torch_tensorrt/dynamo/runtime/tools.py b/py/torch_tensorrt/dynamo/runtime/tools.py
@@ -17,8 +17,8 @@ def multi_gpu_device_check() -> None:
         logger.warning(
             "Detected this engine is being instantitated in a multi-GPU system with "
             "multi-device safe mode disabled. For more on the implications of this "
-            "as well as workarounds, see MULTI_DEVICE_SAFE_MODE.md "
-            "(https://github.com/pytorch/TensorRT/blob/main/py/torch_tensorrt/dynamo/runtime/MULTI_DEVICE_SAFE_MODE.md). "
+            "as well as workarounds, see the linked documentation "
+            "(https://pytorch.org/TensorRT/user_guide/multi_device_safe_mode.html#multi-device-safe-mode). "
             f"The engine is set to be instantiated on the current default cuda device, cuda:{torch.cuda.current_device()}. "
             "If this is incorrect, please set the desired cuda device via torch.cuda.set_device(...) and retry."
         )

Original file line number	Diff line number	Diff line change
`@@ -105,16 +105,14 @@ RTDevice get_current_device() {`
`105`	`105`	`return RTDevice(device_id, nvinfer1::DeviceType::kGPU);`
`106`	`106`	`}`
`107`	`107`
`108`		`-void multi_gpu_device_check(const RTDevice& most_compatible_device) {`
	`108`	`+void multi_gpu_device_check() {`
`109`	`109`	`// If multi-device safe mode is disabled and more than 1 device is registered on the machine, warn user`
`110`	`110`	`if (!(MULTI_DEVICE_SAFE_MODE) && get_available_device_list().get_devices().size() > 1) {`
`111`	`111`	`LOG_WARNING(`
`112`	`112`	`"Detected this engine is being instantitated in a multi-GPU system with "`
`113`	`113`	`<< "multi-device safe mode disabled. For more on the implications of this "`
`114`		`- << "as well as workarounds, see MULTI_DEVICE_SAFE_MODE.md "`
`115`		`- << "(https://github.com/pytorch/TensorRT/blob/main/py/torch_tensorrt/dynamo/runtime/MULTI_DEVICE_SAFE_MODE.md). "`
`116`		`- << "The engine is set to be instantiated on the cuda device, " << most_compatible_device << ". "`
`117`		`- << "If this is incorrect, please set the desired cuda device as default and retry.");`
	`114`	`+ << "as well as workarounds, see the linked documentation "`
	`115`	`+ << "(https://pytorch.org/TensorRT/user_guide/multi_device_safe_mode.html#multi-device-safe-mode)");`
`118`	`116`	`}`
`119`	`117`	`}`
`120`	`118`