chore: Fix lowering, comment CSE pass

peri044 · peri044 · commit 339919d3f898 · 2021-07-18T01:47:51.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -70,11 +70,10 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
         cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
       }
       input_type = nvinfer1::DataType::kFLOAT;
-      // TRTORCH_CHECK(
-      //     settings.calibrator != nullptr,
-      //     "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec
-      //     struct with your calibrator");
-      // cfg->setInt8Calibrator(settings.calibrator);
+      // Networks trained with Quantization aware training approach don't need a calibrator as they have Q/DQ nodes.
+      if (!settings.calibrator){
+        LOG_WARNING("Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
+      }
       break;
     case nvinfer1::DataType::kFLOAT:
     default:
diff --git a/core/conversion/converters/BUILD b/core/conversion/converters/BUILD
@@ -47,6 +47,7 @@ cc_library(
         "impl/matrix_multiply.cpp",
         "impl/normalize.cpp",
         "impl/pooling.cpp",
+        "impl/quantization.cpp",
         "impl/reduce.cpp",
         "impl/replication_pad.cpp",
         "impl/select.cpp",
diff --git a/core/conversion/converters/impl/linear.cpp b/core/conversion/converters/impl/linear.cpp
@@ -64,6 +64,27 @@ auto linear_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns().patt
          LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
          return true;
        }
+
+       auto w_tensor = args[1].IValue()->toTensor();
+       Weights w = Weights(ctx, w_tensor);
+
+       nvinfer1::ILayer* new_layer;
+       if (!args[2].IValue()->isNone()) {
+         Weights b(ctx, args[2].IValue()->toTensor());
+         new_layer = ctx->net->addFullyConnected(*in, w.num_output_maps, w.data, b.data);
+       } else {
+         LOG_DEBUG("There is no bias for the linear layer");
+         new_layer = ctx->net->addFullyConnected(*in, w.num_output_maps, w.data, Weights().data);
+       }
+
+       TRTORCH_CHECK(new_layer, "Unable to create linear layer from node: " << *n);
+
+       new_layer->setName(util::node_info(n).c_str());
+       auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
+
+       LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
+
+       return true;
      }});
 } // namespace
 } // namespace impl
diff --git a/core/lowering/lowering.cpp b/core/lowering/lowering.cpp
@@ -1,7 +1,3 @@
-#include "core/lowering/lowering.h"
-#include <torch/csrc/jit/passes/inliner.h>
-#include "core/lowering/passes/passes.h"
-#include "core/util/prelude.h"
 #include "torch/csrc/jit/passes/common_subexpression_elimination.h"
 #include "torch/csrc/jit/passes/create_functional_graphs.h"
 #include "torch/csrc/jit/passes/dead_code_elimination.h"
@@ -14,6 +10,10 @@
 #include "torch/csrc/jit/passes/peephole.h"
 #include "torch/csrc/jit/passes/remove_mutation.h"
 
+#include "core/lowering/lowering.h"
+#include "core/lowering/passes/passes.h"
+#include "core/util/prelude.h"
+
 namespace trtorch {
 namespace core {
 namespace lowering {
@@ -42,9 +42,10 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g) {
   passes::Conv3DToConvolution(g);
   passes::FuseAddMMBranches(g);
   passes::RemoveBNDimCheck(g);
-  torch::jit::EliminateCommonSubexpression(g);
+  LOG_INFO("====PRE CSE =====" << *g);
+  // torch::jit::EliminateCommonSubexpression(g);
+  LOG_INFO("====POST CSE =====" << *g);
   // torch::jit::UnrollLoops(g);
-  torch::jit::EliminateCommonSubexpression(g);
   passes::UnpackAddMM(g);
   // passes::UnpackBatchNorm(g);
   passes::UnpackLogSoftmax(g);
@@ -65,18 +66,17 @@ std::pair<std::shared_ptr<torch::jit::Graph>, std::vector<torch::jit::IValue>> L
     std::string method_name) {
   auto lowered_mod = mod; // LowerModule(mod);
   auto g = lowered_mod.get_method(method_name).graph();
-  Inline(*g);
-  LOG_INFO("========INLINING : " << *g);
+  LOG_GRAPH(*g);
 
   // Go through TRTorch Lowering to reformat graph to be conversion friendly
   // and also segment for accelerators and executors (TRT-DLA, TRT-GPU, PYT)
   LOG_GRAPH("TRTorch Graph Lowering");
-  lowering::LowerGraph(g);
+  // lowering::LowerGraph(g);
 
   LOG_GRAPH("LibTorch Lowering");
   auto graph_and_ivalues = torch::jit::LowerGraph(*g, lowered_mod._ivalue());
+  lowering::LowerGraph(graph_and_ivalues.first);
   // Is this necessary?
-
   lowering::LowerBlock(g->block());
 
   return graph_and_ivalues;
diff --git a/core/util/jit_util.h b/core/util/jit_util.h
@@ -13,6 +13,8 @@ inline std::string node_info(const torch::jit::Node* n) {
   std::stringstream ss;
   ss << *n;
   std::string node_info = ss.str();
+  // Nodes in torchscript graph have file name and line numbers commented for every node. Remove that when returning a node name for easier readability. 
+  node_info = node_info.substr(0, node_info.find("#", 0));
   node_info.erase(std::remove(node_info.begin(), node_info.end(), '\n'), node_info.end());
   return node_info;
 }

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,8 @@ inline std::string node_info(const torch::jit::Node* n) {`
`13`	`13`	`std::stringstream ss;`
`14`	`14`	`ss << *n;`
`15`	`15`	`std::string node_info = ss.str();`
	`16`	`+ // Nodes in torchscript graph have file name and line numbers commented for every node. Remove that when returning a node name for easier readability.`
	`17`	`+ node_info = node_info.substr(0, node_info.find("#", 0));`
`16`	`18`	`node_info.erase(std::remove(node_info.begin(), node_info.end(), '\n'), node_info.end());`
`17`	`19`	`return node_info;`
`18`	`20`	`}`