Skip to content

Commit 6e05052

Browse files
committed
refactor! : Update default workspace size based on platforms.
BREAKING CHANGE: This commit sets the default workspace size to 1GB for GPU platforms and 256MB for Jetson Nano/TX1 platforms whose compute capability is < 6. Signed-off-by: Dheeraj Peri <[email protected]> Signed-off-by: Dheeraj Peri <[email protected]>
1 parent a1180ce commit 6e05052

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

core/compiler.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,22 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
347347
if (cfg.partition_info.enabled) {
348348
return CompileGraphWithFallback(mod, cfg);
349349
}
350+
auto device_spec = cfg.convert_info.engine_settings.device;
351+
352+
// GPU default WS size : 1 GB
353+
// Jetson nano compute capability is 5.X. WS = 16 MB
354+
auto workspace_size = cfg.convert_info.engine_settings.workspace_size;
355+
cudaDeviceProp device_prop;
356+
cudaGetDeviceProperties(&device_prop, device_spec.gpu_id);
357+
if (workspace_size == 0) {
358+
if (device_prop.major < 6){
359+
cfg.convert_info.engine_settings.workspace_size = 256 * (1 << 20);
360+
} else {
361+
cfg.convert_info.engine_settings.workspace_size = 1 << 30;
362+
}
363+
}
364+
365+
// Configure workspace size based on
350366
// TODO: Should be doing a functional transform but need PR #31978
351367
// [jit] More robust mangling
352368
// torch::jit::script::Module new_mod = mod.clone();
@@ -357,7 +373,6 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
357373
if (method.name().compare("forward") == 0) {
358374
auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
359375
auto new_g = std::make_shared<torch::jit::Graph>();
360-
auto device_spec = cfg.convert_info.engine_settings.device;
361376
auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
362377
AddEngineToGraph(new_mod, new_g, engine, cuda_device);
363378
auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);

core/conversion/conversionctx/ConversionCtx.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "core/conversion/conversionctx/ConversionCtx.h"
2+
#include <cuda_runtime.h>
3+
#include <typeinfo>
24
#include <iostream>
35
#include <sstream>
46
#include <utility>
@@ -58,7 +60,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
5860
net = make_trt(
5961
builder->createNetworkV2(1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
6062

61-
LOG_DEBUG(build_settings);
63+
LOG_INFO(settings);
6264
cfg = make_trt(builder->createBuilderConfig());
6365

6466
for (auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) {
@@ -120,6 +122,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
120122
cfg->setMinTimingIterations(settings.num_min_timing_iters);
121123
cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
122124
cfg->setMaxWorkspaceSize(settings.workspace_size);
125+
123126
cfg->setDefaultDeviceType(settings.device.device_type);
124127
cfg->setEngineCapability(settings.capability);
125128

0 commit comments

Comments
 (0)