From c4fb732d6a0f18f8f2f6fa2e53f354072123e0a3 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 12 Feb 2022 16:42:33 +0000 Subject: [PATCH 1/5] Fix build error from PR #782 Signed-off-by: root --- cpp/bin/torchtrtc/main.cpp | 6 ------ py/torch_tensorrt/ts/_compiler.py | 6 ------ 2 files changed, 12 deletions(-) diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp index a437a5e133..f0aba1b677 100644 --- a/cpp/bin/torchtrtc/main.cpp +++ b/cpp/bin/torchtrtc/main.cpp @@ -229,8 +229,6 @@ int main(int argc, char** argv) { args::Flag build_debuggable_engine( parser, "build-debuggable-engine", "Creates a debuggable engine", {"build-debuggable-engine"}); - args::Flag use_strict_types( - parser, "use-strict-types", "Restrict operating type to only use set operation precision", {"use-strict-types"}); args::Flag allow_gpu_fallback( parser, "allow-gpu-fallback", @@ -306,8 +304,6 @@ int main(int argc, char** argv) { parser, "num_iters", "Number of averaging timing iterations used to select kernels", {"num-avg-timing-iters"}); args::ValueFlag workspace_size( parser, "workspace_size", "Maximum size of workspace given to TensorRT", {"workspace-size"}); - args::ValueFlag max_batch_size( - parser, "max_batch_size", "Maximum batch size (must be >= 1 to be set, 0 means not set)", {"max-batch-size"}); args::ValueFlag threshold( parser, "threshold", @@ -460,7 +456,6 @@ int main(int argc, char** argv) { compile_settings.debug = true; } - if (allow_gpu_fallback) { compile_settings.device.allow_gpu_fallback = true; } @@ -583,7 +578,6 @@ int main(int argc, char** argv) { compile_settings.workspace_size = args::get(workspace_size); } - if (truncate_long_and_double) { compile_settings.truncate_long_and_double = true; } diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py index f5c14a4f46..b895bf54b8 100644 --- a/py/torch_tensorrt/ts/_compiler.py +++ b/py/torch_tensorrt/ts/_compiler.py @@ -17,7 +17,6 @@ def compile(module: torch.jit.ScriptModule, enabled_precisions=set(), refit=False, debug=False, - strict_types=False, capability=_enums.EngineCapability.default, num_min_timing_iters=2, num_avg_timing_iters=1, @@ -65,7 +64,6 @@ def compile(module: torch.jit.ScriptModule, enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels refit (bool): Enable refitting debug (bool): Enable debuggable engine - strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_min_timing_iters (int): Number of minimization timing iterations used to select kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels @@ -98,7 +96,6 @@ def compile(module: torch.jit.ScriptModule, "enabled_precisions": enabled_precisions, # Enabling FP16 kernels "refit": refit, # enable refit "debug": debug, # enable debuggable engine - "strict_types": strict_types, # kernels should strictly run in operating precision "capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels @@ -127,7 +124,6 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, enabled_precisions=set(), refit=False, debug=False, - strict_types=False, capability=_enums.EngineCapability.default, num_min_timing_iters=2, num_avg_timing_iters=1, @@ -169,7 +165,6 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels refit (bool): Enable refitting debug (bool): Enable debuggable engine - strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_min_timing_iters (int): Number of minimization timing iterations used to select kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels @@ -193,7 +188,6 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, "enabled_precisions": enabled_precisions, # Enabling FP16 kernels "refit": refit, # enable refit "debug": debug, # enable debuggable engine - "strict_types": strict_types, # kernels should strictly run in operating precision "capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels From d499b1f03333b1ee5ec682b9b7d83fcf0e552451 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 12 Feb 2022 16:43:32 +0000 Subject: [PATCH 2/5] Change default tensor Signed-off-by: root --- .../conversion/converters/impl/batch_norm.cpp | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/core/conversion/converters/impl/batch_norm.cpp b/core/conversion/converters/impl/batch_norm.cpp index a17e8548e2..d469c9694e 100644 --- a/core/conversion/converters/impl/batch_norm.cpp +++ b/core/conversion/converters/impl/batch_norm.cpp @@ -50,27 +50,34 @@ auto batch_norm_registrations TORCHTRT_UNUSED = auto orig_shape = input->getDimensions(); auto shape = util::toVec(orig_shape); auto tensor_type = util::TRTDataTypeToScalarType(input->getType()); - auto options = torch::TensorOptions().dtype(tensor_type); - + auto options = torch::TensorOptions().dtype(tensor_type).device(torch::kCUDA, ctx->settings.device.gpu_id); + torch::Tensor gamma, beta, mean, var; + LOG_DEBUG("Input :" << orig_shape << "/" << input->getType()); + // affine=True + LOG_DEBUG("Args[1] gamma : " << args[1].isIValue() << " / " << args[1].IValue()->isNone()); + LOG_DEBUG("Args[2] beta : " << args[2].isIValue() << " / " << args[2].IValue()->isNone()); + // track_running_stats=True + LOG_DEBUG("Args[3] mean : " << args[3].isIValue() << " / " << args[3].IValue()->isNone()); + LOG_DEBUG("Args[4] var : " << args[4].isIValue() << " / " << args[4].IValue()->isNone()); + LOG_DEBUG("use_input_stats, momemtum, cudnn_enabled disregarded"); + LOG_DEBUG("ctx->input_is_dynamic : " << ctx->input_is_dynamic); + auto channel_dim = shape[1]; if (ctx->input_is_dynamic) { - gamma = args[1].unwrapToTensor(); - beta = args[2].unwrapToTensor(); + gamma = args[1].unwrapToTensor(at::full(channel_dim, 1, options)); + beta = args[2].unwrapToTensor(at::full(channel_dim, 0, options)); mean = args[3].unwrapToTensor(); var = args[4].unwrapToTensor(); } else { - gamma = args[1].unwrapToTensor(at::full({shape}, 1, {options})); - beta = args[2].unwrapToTensor(at::full({shape}, 1, {options})); - mean = args[3].unwrapToTensor(at::full({shape}, 0, {options})); - var = args[4].unwrapToTensor(at::full({shape}, 0, {options})); + gamma = args[1].unwrapToTensor(at::full(channel_dim, 1, options)); + beta = args[2].unwrapToTensor(at::full(channel_dim, 0, options)); + mean = args[3].unwrapToTensor(at::full(channel_dim, 0, options)); + var = args[4].unwrapToTensor(at::full(channel_dim, 0, options)); } auto eps = static_cast(args[7].unwrapToDouble(1e-5f)); - LOG_DEBUG("momentum disregarded"); - LOG_DEBUG("training disregarded"); - LOG_DEBUG("cudnn disregarded"); TORCHTRT_CHECK(orig_shape.nbDims >= 2, "Unable to create batch normalization layer from node: " << *n); // Expand spatial dims from 1D to 2D if needed From 8659318553bdfab0ae7947eb0974ab48d2063a38 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 13 Feb 2022 06:40:10 +0000 Subject: [PATCH 3/5] Fix lint Signed-off-by: root --- core/conversion/converters/impl/batch_norm.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/conversion/converters/impl/batch_norm.cpp b/core/conversion/converters/impl/batch_norm.cpp index d469c9694e..b1e3cd67a7 100644 --- a/core/conversion/converters/impl/batch_norm.cpp +++ b/core/conversion/converters/impl/batch_norm.cpp @@ -50,8 +50,9 @@ auto batch_norm_registrations TORCHTRT_UNUSED = auto orig_shape = input->getDimensions(); auto shape = util::toVec(orig_shape); auto tensor_type = util::TRTDataTypeToScalarType(input->getType()); - auto options = torch::TensorOptions().dtype(tensor_type).device(torch::kCUDA, ctx->settings.device.gpu_id); - + auto options = + torch::TensorOptions().dtype(tensor_type).device(torch::kCUDA, ctx->settings.device.gpu_id); + torch::Tensor gamma, beta, mean, var; LOG_DEBUG("Input :" << orig_shape << "/" << input->getType()); // affine=True From 56a2043c3935b763b86b926f1997d8c9931539fc Mon Sep 17 00:00:00 2001 From: root Date: Tue, 15 Feb 2022 08:26:23 +0000 Subject: [PATCH 4/5] Add test Signed-off-by: root --- .../conversion/converters/test_batch_norm.cpp | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/core/conversion/converters/test_batch_norm.cpp b/tests/core/conversion/converters/test_batch_norm.cpp index aa7782552f..ddb834b933 100644 --- a/tests/core/conversion/converters/test_batch_norm.cpp +++ b/tests/core/conversion/converters/test_batch_norm.cpp @@ -36,6 +36,39 @@ TEST(Converters, ATenBatchNormConvertsCorrectly) { torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); } +TEST(Converters, ATenBatchNormAffineFalseConvertsCorrectly) { + // BatchNorm(ch, affine=False) + const auto graph = R"IR( + graph(%0 : Tensor, + %1: NoneType = prim::Constant(), + %2: NoneType = prim::Constant(), + %3: Float(5, strides=[1]), + %4: Float(5, strides=[1])): + %5 : bool = prim::Constant[value=0]() + %6 : float = prim::Constant[value=1.0000000000000001e-05]() + %7 : float = prim::Constant[value=0.10000000000000001]() + %8 : Tensor = aten::batch_norm(%0, %1, %2, %3, %4, %5, %6, %7, %5) + return (%8))IR"; + + auto g = std::make_shared(); + torch::jit::parseIR(graph, g.get()); + + auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA}); + + torch::jit::IValue gamma, beta; // NoneType + auto mean = at::randint(1, 10, {5}, {at::kCUDA}); + auto var = at::randint(1, 10, {5}, {at::kCUDA}); + + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {gamma, beta, mean, var}); + auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); + + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {gamma, beta, mean, var}); + auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); + + ASSERT_TRUE( + torch_tensorrt::tests::util::almostEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0]), 2e-6)); +} + TEST(Converters, ATenBatchNorm1DConvertsCorrectly) { const auto graph = R"IR( graph(%0 : Tensor, From 12942ac8f841f75c2c78230e89b49d61029b7565 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Mon, 4 Apr 2022 19:42:45 -0700 Subject: [PATCH 5/5] refactor(//tests): Fixing batchnorm false test Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- tests/core/conversion/converters/test_batch_norm.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/core/conversion/converters/test_batch_norm.cpp b/tests/core/conversion/converters/test_batch_norm.cpp index ddb834b933..5af54d7de4 100644 --- a/tests/core/conversion/converters/test_batch_norm.cpp +++ b/tests/core/conversion/converters/test_batch_norm.cpp @@ -40,14 +40,13 @@ TEST(Converters, ATenBatchNormAffineFalseConvertsCorrectly) { // BatchNorm(ch, affine=False) const auto graph = R"IR( graph(%0 : Tensor, - %1: NoneType = prim::Constant(), - %2: NoneType = prim::Constant(), %3: Float(5, strides=[1]), %4: Float(5, strides=[1])): + %1 : None = prim::Constant() %5 : bool = prim::Constant[value=0]() %6 : float = prim::Constant[value=1.0000000000000001e-05]() %7 : float = prim::Constant[value=0.10000000000000001]() - %8 : Tensor = aten::batch_norm(%0, %1, %2, %3, %4, %5, %6, %7, %5) + %8 : Tensor = aten::batch_norm(%0, %1, %1, %3, %4, %5, %6, %7, %5) return (%8))IR"; auto g = std::make_shared(); @@ -55,14 +54,13 @@ TEST(Converters, ATenBatchNormAffineFalseConvertsCorrectly) { auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA}); - torch::jit::IValue gamma, beta; // NoneType auto mean = at::randint(1, 10, {5}, {at::kCUDA}); auto var = at::randint(1, 10, {5}, {at::kCUDA}); - auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {gamma, beta, mean, var}); + auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {mean, var}); auto jit_results = torch_tensorrt::tests::util::RunGraph(g, params, {in}); - params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {gamma, beta, mean, var}); + params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {mean, var}); auto trt_results = torch_tensorrt::tests::util::RunGraphEngine(g, params, {in}); ASSERT_TRUE(