pytorch
diff --git a/‎.gitignore
Lines changed: 4 additions & 0 deletions b/‎.gitignore
Lines changed: 4 additions & 0 deletions
diff --git a/‎core/compiler.cpp
Lines changed: 6 additions & 6 deletions b/‎core/compiler.cpp
Lines changed: 6 additions & 6 deletions
diff --git a/‎core/compiler.h
Lines changed: 2 additions & 0 deletions b/‎core/compiler.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/conversion/conversion_ignorelist.cpp
Lines changed: 0 additions & 1 deletion b/‎core/conversion/conversion_ignorelist.cpp
Lines changed: 0 additions & 1 deletion
diff --git a/‎core/conversion/conversionctx/ConversionCtx.cpp
Lines changed: 4 additions & 2 deletions b/‎core/conversion/conversionctx/ConversionCtx.cpp
Lines changed: 4 additions & 2 deletions
diff --git a/‎core/conversion/converters/BUILD
Lines changed: 2 additions & 0 deletions b/‎core/conversion/converters/BUILD
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/conversion/converters/converter_util.cpp
Lines changed: 9 additions & 3 deletions b/‎core/conversion/converters/converter_util.cpp
Lines changed: 9 additions & 3 deletions
diff --git a/‎core/conversion/converters/converter_util.h
Lines changed: 1 addition & 1 deletion b/‎core/conversion/converters/converter_util.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/conversion/converters/impl/cast.cpp
Lines changed: 63 additions & 0 deletions b/‎core/conversion/converters/impl/cast.cpp
Lines changed: 63 additions & 0 deletions
diff --git a/‎core/conversion/converters/impl/constant.cpp
Lines changed: 10 additions & 6 deletions b/‎core/conversion/converters/impl/constant.cpp
Lines changed: 10 additions & 6 deletions
diff --git a/‎core/conversion/converters/impl/conv_deconv.cpp
Lines changed: 84 additions & 32 deletions b/‎core/conversion/converters/impl/conv_deconv.cpp
Lines changed: 84 additions & 32 deletions
diff --git a/‎core/conversion/converters/impl/matrix_multiply.cpp
Lines changed: 1 addition & 0 deletions b/‎core/conversion/converters/impl/matrix_multiply.cpp
Lines changed: 1 addition & 0 deletions
@@ -44,3 +44,7 @@ tests/py/data
 examples/**/deps/**/*
 !examples/**/deps/.gitkeep
 examples/trtorchrt_example/trtorchrt_example
+examples/int8/ptq/ptq
+examples/int8/qat/qat
+examples/int8/training/vgg16/data/*
+examples/int8/datasets/data/*
@@ -35,8 +35,8 @@ void AddEngineToGraph(
     runtime::CudaDevice& device_info,
     std::string engine_id = "",
     bool fallback = false) {
-  auto engine_ptr =
-      c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name() + engine_id, serialized_engine, device_info);
+  auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(
+      mod._ivalue()->name() + "_engine_" + engine_id, serialized_engine, device_info);
   // Get required metadata about the engine out
   auto num_io = engine_ptr->num_io;
   auto name = engine_ptr->name;
@@ -119,8 +119,8 @@ void AddEngineToGraph(
 }
 
 bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::string method_name) {
-  // Go through Lowering to simplify graph and extract weight parameters
-  auto graph_and_parameters = lowering::Lower(mod, method_name);
+  // Go through Lowering to simplify graph
+  auto graph_and_parameters = lowering::Lower(mod, method_name, lowering::LowerInfo());
 
   auto g = graph_and_parameters.first;
   LOG_DEBUG(*g << "(CheckMethodOperatorSupport)\n");
@@ -130,7 +130,7 @@ bool CheckMethodOperatorSupport(const torch::jit::script::Module& mod, std::stri
 
 std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::string method_name, CompileSpec cfg) {
   // Go through Lowering to simplify graph and extract weight parameters
-  auto graph_and_parameters = lowering::Lower(mod, method_name);
+  auto graph_and_parameters = lowering::Lower(mod, method_name, cfg.lower_info);
 
   auto convert_cfg = std::move(cfg.convert_info);
   auto g = graph_and_parameters.first;
@@ -309,7 +309,7 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
     // Compile only forward methods. forward method contains the entire graph.
     if (method.name().compare("forward") == 0) {
       auto new_g = std::make_shared<torch::jit::Graph>();
-      auto graph_and_parameters = lowering::Lower(mod, method.name());
+      auto graph_and_parameters = lowering::Lower(mod, method.name(), cfg.lower_info);
 
       auto g = graph_and_parameters.first;
       auto params = graph_and_parameters.second;
 
@@ -4,6 +4,7 @@
 #include <vector>
 #include "core/conversion/conversion.h"
 #include "core/ir/ir.h"
+#include "core/lowering/lowering.h"
 #include "core/partitioning/partitioning.h"
 #include "core/runtime/runtime.h"
 #include "torch/csrc/jit/api/module.h"
@@ -14,6 +15,7 @@ namespace core {
 struct CompileSpec {
   CompileSpec(std::vector<ir::Input> inputs) : convert_info(std::move(inputs)) {}
   conversion::ConversionInfo convert_info;
+  lowering::LowerInfo lower_info;
   partitioning::PartitionInfo partition_info;
 };
 
 
@@ -16,7 +16,6 @@ const std::unordered_set<std::string>& get_non_convertable_nodes() {
     "aten::backward",
     "aten::save",
     "aten::contiguous",
-    "aten::to",
     "prim::RaiseException",
     "prim::Print",
     "prim::device",
 
@@ -69,9 +69,11 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
       case nvinfer1::DataType::kINT8:
         TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
         cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
-        if (settings.calibrator == nullptr) {
+        if (!settings.calibrator) {
           LOG_INFO(
-              "INT8 kernels are enabled but not calibrator was provided, assuming source model was trained quantization aware");
+              "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
+        } else {
+          cfg->setInt8Calibrator(settings.calibrator);
         }
         break;
       case nvinfer1::DataType::kFLOAT:
 
@@ -54,6 +54,7 @@ cc_library(
         "NodeConverterRegistry.cpp",
         "impl/activation.cpp",
         "impl/batch_norm.cpp",
+        "impl/cast.cpp",
         "impl/concat.cpp",
         "impl/constant.cpp",
         "impl/constant_pad.cpp",
@@ -68,6 +69,7 @@ cc_library(
         "impl/matrix_multiply.cpp",
         "impl/normalize.cpp",
         "impl/pooling.cpp",
+        "impl/quantization.cpp",
         "impl/reduce.cpp",
         "impl/replication_pad.cpp",
         "impl/select.cpp",
 
@@ -142,7 +142,7 @@ nvinfer1::ITensor* castITensor(ConversionCtx* ctx, nvinfer1::ITensor* tensor, nv
   }
 }
 
-nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t) {
+nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t, const std::string& name) {
   bool post_freeze_cast = false;
   nvinfer1::DataType post_freeze_cast_type = nvinfer1::DataType::kFLOAT;
   // Other "unsupported weights types" can be added to this check here
@@ -175,9 +175,15 @@ nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t) {
 
   std::ostringstream tensor_id;
   tensor_id << reinterpret_cast<int*>(out);
+  std::string tensor_name;
 
-  LOG_DEBUG(ctx->logger, "Freezing tensor " << tensor_id.str() << " as an IConstantLayer");
-  const_layer->setName(("[Freeze Tensor " + tensor_id.str() + " ]").c_str());
+  if (!name.empty()) {
+    tensor_name = name;
+  } else {
+    tensor_name = tensor_id.str();
+  }
+  LOG_DEBUG(ctx->logger, "Freezing tensor " << tensor_name << " as an IConstantLayer");
+  const_layer->setName(("[Freeze Tensor " + tensor_name + " ]").c_str());
 
   if (post_freeze_cast) {
     out = castITensor(ctx, out, post_freeze_cast_type);
 
@@ -45,7 +45,7 @@ nvinfer1::ILayer* add_elementwise(
 nvinfer1::ITensor* castITensor(ConversionCtx* ctx, nvinfer1::ITensor* tensor, nvinfer1::DataType dtype);
 
 // Freeze an at::Tensor in a IConstant layer
-nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t);
+nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t, const std::string& name = std::string());
 
 } // namespace converters
 } // namespace conversion
 
@@ -0,0 +1,63 @@
+#include <torch/torch.h>
+#include "core/conversion/converters/converter_util.h"
+#include "core/conversion/converters/converters.h"
+#include "core/util/prelude.h"
+#include "core/util/trt_util.h"
+
+namespace trtorch {
+namespace core {
+namespace conversion {
+namespace converters {
+namespace impl {
+namespace {
+
+auto cast_registrations TRTORCH_UNUSED =
+    RegisterNodeConversionPatterns()
+        .pattern(
+            {"aten::to.dtype(Tensor self, int dtype, bool non_blocking=False, bool copy=False, int? memory_format=None) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto self = args[0].ITensorOrFreeze(ctx);
+               auto output_dtype = args[1].unwrapToScalar().to<int64_t>();
+               auto trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(output_dtype));
+               auto casted_itensor = castITensor(ctx, self, trt_dtype);
+               auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
+               LOG_DEBUG("[aten::to.dtype] Output tensor shape: " << output->getDimensions());
+
+               return true;
+             }})
+        .pattern(
+            {"aten::to.other(Tensor self, Tensor other, bool non_blocking=False, bool copy=False, int? memory_format=None) -> (Tensor)",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto self = args[0].ITensorOrFreeze(ctx);
+               nvinfer1::DataType other_dtype = args[1].ITensorOrFreeze(ctx)->getType();
+               auto casted_itensor = castITensor(ctx, self, other_dtype);
+               auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
+               LOG_DEBUG("[aten::to.other] Output tensor shape: " << output->getDimensions());
+
+               return true;
+             }})
+        .pattern(
+            {"aten::to.prim_Device(Tensor(a) self, Device? device, int? dtype=None, bool non_blocking=False, bool copy=False) -> (Tensor(b|a))",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto self = args[0].ITensorOrFreeze(ctx);
+               if (args[2].isIValue() && !args[2].IValue()->isScalar()) {
+                 auto output = ctx->AssociateValueAndTensor(n->outputs()[0], self);
+                 LOG_DEBUG("[aten::to.prim_Device] Output tensor shape: " << output->getDimensions());
+                 return true;
+               }
+
+               auto output_dtype = args[2].unwrapToScalar().to<int64_t>();
+               auto trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(output_dtype));
+               auto casted_itensor = castITensor(ctx, self, trt_dtype);
+               auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
+               LOG_DEBUG("[aten::to.prim_Device] Output tensor shape: " << output->getDimensions());
+
+               return true;
+             }});
+// clang-format on
+} // namespace
+} // namespace impl
+} // namespace converters
+} // namespace conversion
+} // namespace core
+} // namespace trtorch
@@ -16,12 +16,16 @@ auto constant_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
               // used for Fundimentally this is because of the differing
               // philosophies between TensorRT and PyTorch, i.e. Variables contain
               // Tensors vs. just Tensors
-
-              auto t = args[0].unwrapToTensor();
-              auto const_out = ctx->AssociateValueAndTensor(n->outputs()[0], tensor_to_const(ctx, t));
-
-              LOG_DEBUG("Output tensor shape: " << const_out->getDimensions());
-
+              nvinfer1::ITensor* output;
+              if (args[0].isITensor()){
+                output = ctx->AssociateValueAndTensor(n->outputs()[0], args[0].ITensor());
+              } else{
+                auto t = args[0].unwrapToTensor();
+                auto const_out = tensor_to_const(ctx, t, util::node_info(n).c_str());
+                output = ctx->AssociateValueAndTensor(n->outputs()[0], const_out);
+              }
+              LOG_DEBUG("Output tensor shape: " << output->getDimensions());
+              
               return true;
             }});
 // clang-format on
 
@@ -11,15 +11,97 @@ namespace impl {
 namespace {
 
 bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args) {
-  auto in = args[0].ITensor(); // assumes non-static input Tensor
-  auto w = Weights(ctx, args[1].unwrapToTensor());
+  // Input to conv/deconv
+  auto in = args[0].ITensor();
+
+  // Conv /deconv parameters
   auto stride = util::toDims(args[3].unwrapToIntList());
   auto padding = util::toDims(args[4].unwrapToIntList());
   auto dilation = util::toDims(args[5].unwrapToIntList());
   bool transposed = args[6].unwrapToBool();
   auto out_padding = util::toDims(args[7].unwrapToIntList());
   int64_t groups = args[8].unwrapToInt();
 
+  // Reshape the parameters to 2D if needed
+  if (stride.nbDims == 1) {
+    stride = util::unsqueezeDims(stride, 1, 1);
+    LOG_DEBUG("Reshaped stride: " << stride);
+  }
+  if (dilation.nbDims == 1) {
+    dilation = util::unsqueezeDims(dilation, 1, 1);
+    LOG_DEBUG("Reshaped dilation: " << dilation);
+  }
+  if (padding.nbDims == 1) {
+    padding = util::unsqueezeDims(padding, 1, 0);
+    LOG_DEBUG("Reshaped padding: " << padding);
+  }
+  if (out_padding.nbDims == 1) {
+    out_padding = util::unsqueezeDims(out_padding, 1, 0);
+    LOG_DEBUG("Reshaped out_padding: " << out_padding);
+  }
+
+  // Get bias tensor or initialize it to zeros.
+  Weights bias;
+  if (args[2].IValue()->isTensor()) {
+    bias = Weights(ctx, args[2].unwrapToTensor());
+  } else {
+    bias = Weights();
+  }
+
+  // Handle case when weights of conv/deconv is an ITensor. This case happens for QAT networks where
+  // conv_weights -> Quantize -> Dequantize -> new_conv_weights -> conv <- input
+  // new_conv_weights will be an ITensor because it is an output of Dequantize layer defined in impl/quantization.cpp
+  if (args[1].isITensor()) {
+    // Get the kernel tensor
+    auto kernel = args[1].ITensor();
+    auto kernel_dims = kernel->getDimensions();
+
+    // Make a new Dims with only the spatial dimensions.
+    nvinfer1::Dims filter_dim;
+    int64_t nbSpatialDims = in->getDimensions().nbDims - 2;
+    TRTORCH_CHECK(
+        nbSpatialDims = kernel_dims.nbDims - 2,
+        "Number of input spatial dimensions should match the kernel spatial dimensions");
+    filter_dim.nbDims = nbSpatialDims;
+    filter_dim.d[0] = kernel_dims.d[2];
+    filter_dim.d[1] = kernel_dims.d[3];
+
+    // Initialize a dummy constant kernel to pass it to INetwork->addConvolutionNd/addDeconvolutionNd API.
+    auto kernel_weights = nvinfer1::Weights{nvinfer1::DataType::kFLOAT, nullptr, 0};
+
+    nvinfer1::ILayer* layer = nullptr;
+    if (transposed) {
+      nvinfer1::IDeconvolutionLayer* deconvLayer =
+          ctx->net->addDeconvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
+      deconvLayer->setStrideNd(stride);
+      deconvLayer->setDilationNd(dilation);
+      deconvLayer->setNbGroups(groups);
+      deconvLayer->setPaddingNd(padding);
+      // Set deconv kernel weights
+      deconvLayer->setInput(1, *kernel);
+      TRTORCH_CHECK(deconvLayer, "Unable to create deconv layer with non-const weights from node: " << *n);
+      layer = deconvLayer;
+    } else {
+      nvinfer1::IConvolutionLayer* convLayer =
+          ctx->net->addConvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
+      convLayer->setStrideNd(stride);
+      convLayer->setPaddingMode(nvinfer1::PaddingMode::kCAFFE_ROUND_DOWN);
+      convLayer->setPaddingNd(padding);
+      convLayer->setPostPadding(out_padding);
+      convLayer->setDilationNd(dilation);
+      convLayer->setNbGroups(groups);
+
+      // Set conv kernel weights
+      convLayer->setInput(1, *kernel);
+      layer = convLayer;
+    }
+
+    ctx->AssociateValueAndTensor(n->outputs()[0], layer->getOutput(0));
+    LOG_DEBUG("Output tensor shape: " << layer->getOutput(0)->getDimensions());
+    return true;
+  }
+
+  auto w = Weights(ctx, args[1].unwrapToTensor());
   auto dims = in->getDimensions();
   auto orig_dims = dims;
   LOG_DEBUG("Input dims: " << orig_dims);
@@ -47,32 +129,9 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
     w.kernel_shape.d[1] = 1;
     LOG_DEBUG("Reshaped Weights: " << w);
   }
-  if (stride.nbDims == 1) {
-    stride = util::unsqueezeDims(stride, 1, 1);
-    LOG_DEBUG("Reshaped stride: " << stride);
-  }
-  if (dilation.nbDims == 1) {
-    dilation = util::unsqueezeDims(dilation, 1, 1);
-    LOG_DEBUG("Reshaped dilation: " << dilation);
-  }
-  if (padding.nbDims == 1) {
-    padding = util::unsqueezeDims(padding, 1, 0);
-    LOG_DEBUG("Reshaped padding: " << padding);
-  }
-  if (out_padding.nbDims == 1) {
-    out_padding = util::unsqueezeDims(out_padding, 1, 0);
-    LOG_DEBUG("Reshaped out_padding: " << out_padding);
-  }
 
   nvinfer1::ILayer* new_layer;
   if (transposed) {
-    Weights bias;
-    if (args[2].IValue()->isTensor()) {
-      bias = Weights(ctx, args[2].unwrapToTensor());
-    } else {
-      bias = Weights(ctx, torch::zeros(w.shape.d[1] * groups));
-    }
-
     // shape of deconvolution's weight: [in, out/groups, ...]
     auto deconv = ctx->net->addDeconvolutionNd(*in, w.shape.d[1] * groups, w.kernel_shape, w.data, bias.data);
     TRTORCH_CHECK(deconv, "Unable to create deconvolution layer from node: " << *n);
@@ -90,13 +149,6 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
 #endif
     new_layer = deconv;
   } else {
-    Weights bias;
-    if (args[2].IValue()->isTensor()) {
-      bias = Weights(ctx, args[2].unwrapToTensor());
-    } else {
-      bias = Weights(ctx, torch::zeros(w.shape.d[0]));
-    }
-
     // shape of convolution's weight: [out, in/groups, ...]
     auto conv = ctx->net->addConvolutionNd(*in, w.shape.d[0], w.kernel_shape, w.data, bias.data);
     TRTORCH_CHECK(conv, "Unable to create convolution layer from node: " << *n);
 
@@ -25,6 +25,7 @@ auto mm_registrations TRTORCH_UNUSED =
 
                     auto mm_layer = ctx->net->addMatrixMultiply(
                         *self, nvinfer1::MatrixOperation::kNONE, *other, nvinfer1::MatrixOperation::kNONE);
+
                     TRTORCH_CHECK(mm_layer, "Unable to create matrix multiplication node: " << *n);
                     mm_layer->setName(util::node_info(n).c_str());
                     auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], mm_layer->getOutput(0));