Skip to content

Commit 16c78e9

Browse files
committed
Merge branch 'master' of https://github.com/NVIDIA/TRTorch into add_instance_norm
2 parents 8555512 + 744b417 commit 16c78e9

File tree

88 files changed

+2035
-272
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+2035
-272
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,7 @@ tests/py/data
4444
examples/**/deps/**/*
4545
!examples/**/deps/.gitkeep
4646
examples/trtorchrt_example/trtorchrt_example
47+
examples/int8/ptq/ptq
48+
examples/int8/qat/qat
49+
examples/int8/training/vgg16/data/*
50+
examples/int8/datasets/data/*

core/compiler.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ void AddEngineToGraph(
3535
runtime::CudaDevice& device_info,
3636
std::string engine_id = "",
3737
bool fallback = false) {
38-
auto engine_ptr =
39-
c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name() + engine_id, serialized_engine, device_info);
38+
auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(
39+
mod._ivalue()->name() + "_engine_" + engine_id, serialized_engine, device_info);
4040
// Get required metadata about the engine out
4141
auto num_io = engine_ptr->num_io;
4242
auto name = engine_ptr->name;
@@ -119,8 +119,8 @@ void AddEngineToGraph(
119119
}
120120

121121
bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::string method_name) {
122-
// Go through Lowering to simplify graph and extract weight parameters
123-
auto graph_and_parameters = lowering::Lower(mod, method_name);
122+
// Go through Lowering to simplify graph
123+
auto graph_and_parameters = lowering::Lower(mod, method_name, lowering::LowerInfo());
124124

125125
auto g = graph_and_parameters.first;
126126
LOG_DEBUG(*g << "(CheckMethodOperatorSupport)\n");
@@ -130,7 +130,7 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::stri
130130

131131
std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::string method_name, CompileSpec cfg) {
132132
// Go through Lowering to simplify graph and extract weight parameters
133-
auto graph_and_parameters = lowering::Lower(mod, method_name);
133+
auto graph_and_parameters = lowering::Lower(mod, method_name, cfg.lower_info);
134134

135135
auto convert_cfg = std::move(cfg.convert_info);
136136
auto g = graph_and_parameters.first;
@@ -309,7 +309,7 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
309309
// Compile only forward methods. forward method contains the entire graph.
310310
if (method.name().compare("forward") == 0) {
311311
auto new_g = std::make_shared<torch::jit::Graph>();
312-
auto graph_and_parameters = lowering::Lower(mod, method.name());
312+
auto graph_and_parameters = lowering::Lower(mod, method.name(), cfg.lower_info);
313313

314314
auto g = graph_and_parameters.first;
315315
auto params = graph_and_parameters.second;

core/compiler.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <vector>
55
#include "core/conversion/conversion.h"
66
#include "core/ir/ir.h"
7+
#include "core/lowering/lowering.h"
78
#include "core/partitioning/partitioning.h"
89
#include "core/runtime/runtime.h"
910
#include "torch/csrc/jit/api/module.h"
@@ -14,6 +15,7 @@ namespace core {
1415
struct CompileSpec {
1516
CompileSpec(std::vector<ir::Input> inputs) : convert_info(std::move(inputs)) {}
1617
conversion::ConversionInfo convert_info;
18+
lowering::LowerInfo lower_info;
1719
partitioning::PartitionInfo partition_info;
1820
};
1921

core/conversion/conversion_ignorelist.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ const std::unordered_set<std::string>& get_non_convertable_nodes() {
1616
"aten::backward",
1717
"aten::save",
1818
"aten::contiguous",
19-
"aten::to",
2019
"prim::RaiseException",
2120
"prim::Print",
2221
"prim::device",

core/conversion/conversionctx/ConversionCtx.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,11 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
6969
case nvinfer1::DataType::kINT8:
7070
TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
7171
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
72-
if (settings.calibrator == nullptr) {
72+
if (!settings.calibrator) {
7373
LOG_INFO(
74-
"INT8 kernels are enabled but not calibrator was provided, assuming source model was trained quantization aware");
74+
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
75+
} else {
76+
cfg->setInt8Calibrator(settings.calibrator);
7577
}
7678
break;
7779
case nvinfer1::DataType::kFLOAT:

core/conversion/converters/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ cc_library(
5454
"NodeConverterRegistry.cpp",
5555
"impl/activation.cpp",
5656
"impl/batch_norm.cpp",
57+
"impl/cast.cpp",
5758
"impl/concat.cpp",
5859
"impl/constant.cpp",
5960
"impl/constant_pad.cpp",
@@ -68,6 +69,7 @@ cc_library(
6869
"impl/matrix_multiply.cpp",
6970
"impl/normalize.cpp",
7071
"impl/pooling.cpp",
72+
"impl/quantization.cpp",
7173
"impl/reduce.cpp",
7274
"impl/replication_pad.cpp",
7375
"impl/select.cpp",

core/conversion/converters/converter_util.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ nvinfer1::ITensor* castITensor(ConversionCtx* ctx, nvinfer1::ITensor* tensor, nv
142142
}
143143
}
144144

145-
nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t) {
145+
nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t, const std::string& name) {
146146
bool post_freeze_cast = false;
147147
nvinfer1::DataType post_freeze_cast_type = nvinfer1::DataType::kFLOAT;
148148
// Other "unsupported weights types" can be added to this check here
@@ -175,9 +175,15 @@ nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t) {
175175

176176
std::ostringstream tensor_id;
177177
tensor_id << reinterpret_cast<int*>(out);
178+
std::string tensor_name;
178179

179-
LOG_DEBUG(ctx->logger, "Freezing tensor " << tensor_id.str() << " as an IConstantLayer");
180-
const_layer->setName(("[Freeze Tensor " + tensor_id.str() + " ]").c_str());
180+
if (!name.empty()) {
181+
tensor_name = name;
182+
} else {
183+
tensor_name = tensor_id.str();
184+
}
185+
LOG_DEBUG(ctx->logger, "Freezing tensor " << tensor_name << " as an IConstantLayer");
186+
const_layer->setName(("[Freeze Tensor " + tensor_name + " ]").c_str());
181187

182188
if (post_freeze_cast) {
183189
out = castITensor(ctx, out, post_freeze_cast_type);

core/conversion/converters/converter_util.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ nvinfer1::ILayer* add_elementwise(
4545
nvinfer1::ITensor* castITensor(ConversionCtx* ctx, nvinfer1::ITensor* tensor, nvinfer1::DataType dtype);
4646

4747
// Freeze an at::Tensor in a IConstant layer
48-
nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t);
48+
nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t, const std::string& name = std::string());
4949

5050
} // namespace converters
5151
} // namespace conversion
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include <torch/torch.h>
2+
#include "core/conversion/converters/converter_util.h"
3+
#include "core/conversion/converters/converters.h"
4+
#include "core/util/prelude.h"
5+
#include "core/util/trt_util.h"
6+
7+
namespace trtorch {
8+
namespace core {
9+
namespace conversion {
10+
namespace converters {
11+
namespace impl {
12+
namespace {
13+
14+
auto cast_registrations TRTORCH_UNUSED =
15+
RegisterNodeConversionPatterns()
16+
.pattern(
17+
{"aten::to.dtype(Tensor self, int dtype, bool non_blocking=False, bool copy=False, int? memory_format=None) -> (Tensor)",
18+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
19+
auto self = args[0].ITensorOrFreeze(ctx);
20+
auto output_dtype = args[1].unwrapToScalar().to<int64_t>();
21+
auto trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(output_dtype));
22+
auto casted_itensor = castITensor(ctx, self, trt_dtype);
23+
auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
24+
LOG_DEBUG("[aten::to.dtype] Output tensor shape: " << output->getDimensions());
25+
26+
return true;
27+
}})
28+
.pattern(
29+
{"aten::to.other(Tensor self, Tensor other, bool non_blocking=False, bool copy=False, int? memory_format=None) -> (Tensor)",
30+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
31+
auto self = args[0].ITensorOrFreeze(ctx);
32+
nvinfer1::DataType other_dtype = args[1].ITensorOrFreeze(ctx)->getType();
33+
auto casted_itensor = castITensor(ctx, self, other_dtype);
34+
auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
35+
LOG_DEBUG("[aten::to.other] Output tensor shape: " << output->getDimensions());
36+
37+
return true;
38+
}})
39+
.pattern(
40+
{"aten::to.prim_Device(Tensor(a) self, Device? device, int? dtype=None, bool non_blocking=False, bool copy=False) -> (Tensor(b|a))",
41+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
42+
auto self = args[0].ITensorOrFreeze(ctx);
43+
if (args[2].isIValue() && !args[2].IValue()->isScalar()) {
44+
auto output = ctx->AssociateValueAndTensor(n->outputs()[0], self);
45+
LOG_DEBUG("[aten::to.prim_Device] Output tensor shape: " << output->getDimensions());
46+
return true;
47+
}
48+
49+
auto output_dtype = args[2].unwrapToScalar().to<int64_t>();
50+
auto trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(output_dtype));
51+
auto casted_itensor = castITensor(ctx, self, trt_dtype);
52+
auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
53+
LOG_DEBUG("[aten::to.prim_Device] Output tensor shape: " << output->getDimensions());
54+
55+
return true;
56+
}});
57+
// clang-format on
58+
} // namespace
59+
} // namespace impl
60+
} // namespace converters
61+
} // namespace conversion
62+
} // namespace core
63+
} // namespace trtorch

core/conversion/converters/impl/constant.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@ auto constant_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
1616
// used for Fundimentally this is because of the differing
1717
// philosophies between TensorRT and PyTorch, i.e. Variables contain
1818
// Tensors vs. just Tensors
19-
20-
auto t = args[0].unwrapToTensor();
21-
auto const_out = ctx->AssociateValueAndTensor(n->outputs()[0], tensor_to_const(ctx, t));
22-
23-
LOG_DEBUG("Output tensor shape: " << const_out->getDimensions());
24-
19+
nvinfer1::ITensor* output;
20+
if (args[0].isITensor()){
21+
output = ctx->AssociateValueAndTensor(n->outputs()[0], args[0].ITensor());
22+
} else{
23+
auto t = args[0].unwrapToTensor();
24+
auto const_out = tensor_to_const(ctx, t, util::node_info(n).c_str());
25+
output = ctx->AssociateValueAndTensor(n->outputs()[0], const_out);
26+
}
27+
LOG_DEBUG("Output tensor shape: " << output->getDimensions());
28+
2529
return true;
2630
}});
2731
// clang-format on

core/conversion/converters/impl/conv_deconv.cpp

Lines changed: 84 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,97 @@ namespace impl {
1111
namespace {
1212

1313
bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args) {
14-
auto in = args[0].ITensor(); // assumes non-static input Tensor
15-
auto w = Weights(ctx, args[1].unwrapToTensor());
14+
// Input to conv/deconv
15+
auto in = args[0].ITensor();
16+
17+
// Conv /deconv parameters
1618
auto stride = util::toDims(args[3].unwrapToIntList());
1719
auto padding = util::toDims(args[4].unwrapToIntList());
1820
auto dilation = util::toDims(args[5].unwrapToIntList());
1921
bool transposed = args[6].unwrapToBool();
2022
auto out_padding = util::toDims(args[7].unwrapToIntList());
2123
int64_t groups = args[8].unwrapToInt();
2224

25+
// Reshape the parameters to 2D if needed
26+
if (stride.nbDims == 1) {
27+
stride = util::unsqueezeDims(stride, 1, 1);
28+
LOG_DEBUG("Reshaped stride: " << stride);
29+
}
30+
if (dilation.nbDims == 1) {
31+
dilation = util::unsqueezeDims(dilation, 1, 1);
32+
LOG_DEBUG("Reshaped dilation: " << dilation);
33+
}
34+
if (padding.nbDims == 1) {
35+
padding = util::unsqueezeDims(padding, 1, 0);
36+
LOG_DEBUG("Reshaped padding: " << padding);
37+
}
38+
if (out_padding.nbDims == 1) {
39+
out_padding = util::unsqueezeDims(out_padding, 1, 0);
40+
LOG_DEBUG("Reshaped out_padding: " << out_padding);
41+
}
42+
43+
// Get bias tensor or initialize it to zeros.
44+
Weights bias;
45+
if (args[2].IValue()->isTensor()) {
46+
bias = Weights(ctx, args[2].unwrapToTensor());
47+
} else {
48+
bias = Weights();
49+
}
50+
51+
// Handle case when weights of conv/deconv is an ITensor. This case happens for QAT networks where
52+
// conv_weights -> Quantize -> Dequantize -> new_conv_weights -> conv <- input
53+
// new_conv_weights will be an ITensor because it is an output of Dequantize layer defined in impl/quantization.cpp
54+
if (args[1].isITensor()) {
55+
// Get the kernel tensor
56+
auto kernel = args[1].ITensor();
57+
auto kernel_dims = kernel->getDimensions();
58+
59+
// Make a new Dims with only the spatial dimensions.
60+
nvinfer1::Dims filter_dim;
61+
int64_t nbSpatialDims = in->getDimensions().nbDims - 2;
62+
TRTORCH_CHECK(
63+
nbSpatialDims = kernel_dims.nbDims - 2,
64+
"Number of input spatial dimensions should match the kernel spatial dimensions");
65+
filter_dim.nbDims = nbSpatialDims;
66+
filter_dim.d[0] = kernel_dims.d[2];
67+
filter_dim.d[1] = kernel_dims.d[3];
68+
69+
// Initialize a dummy constant kernel to pass it to INetwork->addConvolutionNd/addDeconvolutionNd API.
70+
auto kernel_weights = nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0};
71+
72+
nvinfer1::ILayer* layer = nullptr;
73+
if (transposed) {
74+
nvinfer1::IDeconvolutionLayer* deconvLayer =
75+
ctx->net->addDeconvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
76+
deconvLayer->setStrideNd(stride);
77+
deconvLayer->setDilationNd(dilation);
78+
deconvLayer->setNbGroups(groups);
79+
deconvLayer->setPaddingNd(padding);
80+
// Set deconv kernel weights
81+
deconvLayer->setInput(1, *kernel);
82+
TRTORCH_CHECK(deconvLayer, "Unable to create deconv layer with non-const weights from node: " << *n);
83+
layer = deconvLayer;
84+
} else {
85+
nvinfer1::IConvolutionLayer* convLayer =
86+
ctx->net->addConvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
87+
convLayer->setStrideNd(stride);
88+
convLayer->setPaddingMode(nvinfer1::PaddingMode::kCAFFE_ROUND_DOWN);
89+
convLayer->setPaddingNd(padding);
90+
convLayer->setPostPadding(out_padding);
91+
convLayer->setDilationNd(dilation);
92+
convLayer->setNbGroups(groups);
93+
94+
// Set conv kernel weights
95+
convLayer->setInput(1, *kernel);
96+
layer = convLayer;
97+
}
98+
99+
ctx->AssociateValueAndTensor(n->outputs()[0], layer->getOutput(0));
100+
LOG_DEBUG("Output tensor shape: " << layer->getOutput(0)->getDimensions());
101+
return true;
102+
}
103+
104+
auto w = Weights(ctx, args[1].unwrapToTensor());
23105
auto dims = in->getDimensions();
24106
auto orig_dims = dims;
25107
LOG_DEBUG("Input dims: " << orig_dims);
@@ -47,32 +129,9 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
47129
w.kernel_shape.d[1] = 1;
48130
LOG_DEBUG("Reshaped Weights: " << w);
49131
}
50-
if (stride.nbDims == 1) {
51-
stride = util::unsqueezeDims(stride, 1, 1);
52-
LOG_DEBUG("Reshaped stride: " << stride);
53-
}
54-
if (dilation.nbDims == 1) {
55-
dilation = util::unsqueezeDims(dilation, 1, 1);
56-
LOG_DEBUG("Reshaped dilation: " << dilation);
57-
}
58-
if (padding.nbDims == 1) {
59-
padding = util::unsqueezeDims(padding, 1, 0);
60-
LOG_DEBUG("Reshaped padding: " << padding);
61-
}
62-
if (out_padding.nbDims == 1) {
63-
out_padding = util::unsqueezeDims(out_padding, 1, 0);
64-
LOG_DEBUG("Reshaped out_padding: " << out_padding);
65-
}
66132

67133
nvinfer1::ILayer* new_layer;
68134
if (transposed) {
69-
Weights bias;
70-
if (args[2].IValue()->isTensor()) {
71-
bias = Weights(ctx, args[2].unwrapToTensor());
72-
} else {
73-
bias = Weights(ctx, torch::zeros(w.shape.d[1] * groups));
74-
}
75-
76135
// shape of deconvolution's weight: [in, out/groups, ...]
77136
auto deconv = ctx->net->addDeconvolutionNd(*in, w.shape.d[1] * groups, w.kernel_shape, w.data, bias.data);
78137
TRTORCH_CHECK(deconv, "Unable to create deconvolution layer from node: " << *n);
@@ -90,13 +149,6 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
90149
#endif
91150
new_layer = deconv;
92151
} else {
93-
Weights bias;
94-
if (args[2].IValue()->isTensor()) {
95-
bias = Weights(ctx, args[2].unwrapToTensor());
96-
} else {
97-
bias = Weights(ctx, torch::zeros(w.shape.d[0]));
98-
}
99-
100152
// shape of convolution's weight: [out, in/groups, ...]
101153
auto conv = ctx->net->addConvolutionNd(*in, w.shape.d[0], w.kernel_shape, w.data, bias.data);
102154
TRTORCH_CHECK(conv, "Unable to create convolution layer from node: " << *n);

core/conversion/converters/impl/matrix_multiply.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ auto mm_registrations TRTORCH_UNUSED =
2525

2626
auto mm_layer = ctx->net->addMatrixMultiply(
2727
*self, nvinfer1::MatrixOperation::kNONE, *other, nvinfer1::MatrixOperation::kNONE);
28+
2829
TRTORCH_CHECK(mm_layer, "Unable to create matrix multiplication node: " << *n);
2930
mm_layer->setName(util::node_info(n).c_str());
3031
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], mm_layer->getOutput(0));

0 commit comments

Comments
 (0)