refactor! : Update default workspace size based on platforms.

peri044 · peri044 · commit 6e0505281e39 · 2021-10-18T11:35:57.000-07:00
BREAKING CHANGE: This commit sets the default workspace size to 1GB for GPU platforms and 256MB for Jetson Nano/TX1 platforms whose compute capability is &lt; 6.

Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;

Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -347,6 +347,22 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
   if (cfg.partition_info.enabled) {
     return CompileGraphWithFallback(mod, cfg);
   }
+  auto device_spec = cfg.convert_info.engine_settings.device;
+
+  // GPU default WS size : 1 GB
+  // Jetson nano compute capability is 5.X. WS = 16 MB
+  auto workspace_size = cfg.convert_info.engine_settings.workspace_size;
+  cudaDeviceProp device_prop;
+  cudaGetDeviceProperties(&device_prop, device_spec.gpu_id);
+  if (workspace_size == 0) {
+    if (device_prop.major < 6){
+      cfg.convert_info.engine_settings.workspace_size = 256 * (1 << 20);
+    } else {
+      cfg.convert_info.engine_settings.workspace_size = 1 << 30;
+    }
+  }
+
+  // Configure workspace size based on
   // TODO: Should be doing a functional transform but need PR #31978
   // [jit] More robust mangling
   // torch::jit::script::Module new_mod = mod.clone();
@@ -357,7 +373,6 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
     if (method.name().compare("forward") == 0) {
       auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
       auto new_g = std::make_shared<torch::jit::Graph>();
-      auto device_spec = cfg.convert_info.engine_settings.device;
       auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
       AddEngineToGraph(new_mod, new_g, engine, cuda_device);
       auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -1,4 +1,6 @@
 #include "core/conversion/conversionctx/ConversionCtx.h"
+#include <cuda_runtime.h>
+#include <typeinfo>
 #include <iostream>
 #include <sstream>
 #include <utility>
@@ -58,7 +60,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
   net = make_trt(
       builder->createNetworkV2(1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
 
-  LOG_DEBUG(build_settings);
+  LOG_INFO(settings);
   cfg = make_trt(builder->createBuilderConfig());
 
   for (auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) {
@@ -120,6 +122,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
   cfg->setMinTimingIterations(settings.num_min_timing_iters);
   cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
   cfg->setMaxWorkspaceSize(settings.workspace_size);
+
   cfg->setDefaultDeviceType(settings.device.device_type);
   cfg->setEngineCapability(settings.capability);