feat: [collection] support user defined input data type

inocsin · inocsin · commit dbbf5cc86062 · 2022-03-17T19:43:47.000+08:00
Signed-off-by: inocsin &lt;vcheungyi@163.com&gt;
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -331,10 +331,22 @@ void MapInputsAndDetermineDTypes(
           spec[i].dtype = nvinfer1::DataType::kFLOAT;
         } else if (spec[i].dtype_is_user_defined && cfg.partition_info.enabled) {
           if (!est_type_opt[i]) {
-            LOG_INFO("Cannot infer input tensor dtype in graph, unable to verify user input dtype settings");
+            LOG_INFO("Cannot infer input tensor dtype in graph, compiler is going to use the user setting");
+            // TODO set input data type
+
+            std::stringstream ss;
+            ss << "For input " << in->debugName() << ", found user specified input dtype as ";
+            ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype;
+            // ss << cfg.convert_info.inputs.find(in)->second.dtype;
+            ss << ". The compiler is going to use the user setting " << cfg.convert_info.collection_inputs.find(in)->second[i].dtype;
+            auto warn_str = ss.str();
+            LOG_WARNING(warn_str);
+            // Overwrite type map with user settings
+            first_use_type_map[in][i] = {util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype)};
+
           } else {
             // if (util::TRTDataTypeToScalarType(cfg.convert_info.inputs.find(in)->second.dtype) != est_type_opt.value()) {
-              if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype) != est_type_opt[i].value()) {
+            if (util::TRTDataTypeToScalarType(cfg.convert_info.collection_inputs.find(in)->second[i].dtype) != est_type_opt[i].value()) {
               std::stringstream ss;
               ss << "For input " << in->debugName() << ", found user specified input dtype as ";
               ss << cfg.convert_info.collection_inputs.find(in)->second[i].dtype;
diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp
@@ -246,19 +246,25 @@ CollectionTypeMap get_block_first_calc_dtypes_opt_collection(torch::jit::Block*
       LOG_DEBUG("get_block_first_calc_dtypes_opt_collection TupleType");
       // TODO: to evaluate the data type of tuple element
       // make sure very time get the same ptr
+      c10::optional<at::ScalarType> tp = get_value_first_calc_dtype_opt(b, i);
       at::ArrayRef<torch::jit::Value*> unpack_tuple = torch::jit::createTupleUnpack(i);
       LOG_DEBUG("get_block_first_calc_dtypes_opt_collection: tuple size " << unpack_tuple.size());
-      std::vector<c10::optional<at::ScalarType>> empty_dytpes(unpack_tuple.size());
-      types.insert({i, empty_dytpes}); // insert an empty 
+      // Assume all tuple has the same datatype
+      // std::vector<c10::optional<at::ScalarType>> dytpes(unpack_tuple.size(), tp);
+      std::vector<c10::optional<at::ScalarType>> dytpes(unpack_tuple.size());
+      types.insert({i, dytpes}); // insert an empty 
       // for (auto item: unpack_tuple) {
       //   torch::jit::Value* in = item;
       //   types.insert({in, get_value_first_calc_dtype_opt(b, i)});
       // }
 
     } else if(i->type()->kind() == torch::jit::TypeKind::ListType) {
       // TODO: to decide the size of list and type of list element
-      LOG_DEBUG("get_block_first_calc_dtypes_opt ListType");
-      types.insert({i, {}}); // insert an empty 
+      LOG_DEBUG("get_block_first_calc_dtypes_opt ListType: use size " << i->uses().size());
+      c10::optional<at::ScalarType> tp = get_value_first_calc_dtype_opt(b, i);
+      // std::vector<c10::optional<at::ScalarType>> dytpes(i->uses().size());
+      std::vector<c10::optional<at::ScalarType>> dytpes(i->uses().size(), tp);
+      types.insert({i, dytpes}); // insert an empty
 
     }
   }
diff --git a/core/ir/ir.h b/core/ir/ir.h
@@ -52,7 +52,7 @@ struct GraphInputs {
   //   // TODO construct the IValue
   // }
   torch::jit::IValue input_signature;  // nested Input, full input spec
-  std::vector<Input> flattened_inputs;  // flattend Input, can be removed
+  std::vector<Input> flattened_inputs;  // flattend Input
   std::vector<std::vector<Input>> collection_inputs; // only support two layer nesting, e.g. ((a, b), [c, d], e)
 };
 
diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp
@@ -67,8 +67,9 @@ std::unordered_map<const torch::jit::Value*, torch::jit::IValue> generateRandomI
         // types for list is {}
         // auto in = generateSingleInput(input.second[i], types[input.first][i]);
         // TODO: need to decide the input type of list elements in ir.cpp
-        c10::optional<at::ScalarType> type_opt = {};
-        auto in = generateSingleInput(input.second[i], type_opt);
+        // c10::optional<at::ScalarType> type_opt = {};
+        // auto in = generateSingleInput(input.second[i], type_opt);
+        auto in = generateSingleInput(input.second[i], types[input.first][i]);
         // list.push_back(in.clone());
         generic_list.push_back(in.clone());
         LOG_DEBUG("generateRandomInputs, 1");
diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp
@@ -90,25 +90,6 @@ void flatten_dfs(std::vector<torchtrt::core::ir::Input>& flattened_inputs, std::
 torch_tensorrt::core::ir::GraphInputs to_internal_graph_inputs(GraphInputs external_graph_input) {
   torch_tensorrt::core::ir::GraphInputs internal_graph_input;
 
-  // // flattened version
-  // if (external_graph_input.flattened_inputs.size() > 0) {
-  //   // std::vector<torch::jit::IValue> input_shape_list;
-  //   auto empty_ivalue = torch::jit::IValue(c10::make_intrusive<torchtrt::core::ir::Input>(torchtrt::core::ir::Input()));
-  //   c10::TypePtr type = empty_ivalue.type();
-  //   auto input_shape_list = c10::impl::GenericList(type);
-  //   std::vector<torchtrt::core::ir::Input> internal_input = to_vec_internal_inputs(external_graph_input.flattened_inputs);
-  //   for (auto input_shape: internal_input) {
-  //     auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torchtrt::core::ir::Input>(input_shape)));
-  //     input_shape_list.push_back(input_shape_ivalue);
-  //   }
-
-  //   torch::jit::IValue input_signature(input_shape_list);
-  //   internal_graph_input.flattened_inputs = internal_input;
-  //   internal_graph_input.input_signature = input_signature;
-    
-  // }
-  // // nested version
-  // else {
     std::vector<torchtrt::core::ir::Input> flattened_inputs;
     std::vector<std::vector<torchtrt::core::ir::Input>> collection_inputs;
 
@@ -134,6 +115,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) {
     internal.graph_inputs.collection_inputs.resize(internal.inputs.size());
     for (int i = 0; i < internal.inputs.size(); i++) {
       internal.graph_inputs.collection_inputs[i].push_back(internal.inputs[i]);
+      internal.graph_inputs.flattened_inputs = internal.inputs;
     }
   }
 
diff --git a/cpp/src/torch_tensorrt.cpp b/cpp/src/torch_tensorrt.cpp
@@ -30,7 +30,6 @@ torch::jit::script::Module compile(const torch::jit::script::Module& module, Com
   LOG_DEBUG(get_build_info());
   // Want to export a much simpler (non TRT header dependent) API so doing the
   // type conversion here
-  printf("in torch_tensorrt::ts::compile\n");
   return torch_tensorrt::core::CompileGraph(module, to_internal_compile_spec(info));
 }
 
diff --git a/tests/cpp/test_collection.cpp b/tests/cpp/test_collection.cpp
@@ -10,9 +10,10 @@ TEST(CppAPITests, TestCollectionTupleInput) {
 
   std::string path =
   "/root/Torch-TensorRT/tuple_input.ts";
-  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat);
-  std::vector<at::Tensor> inputs;
-  inputs.push_back(in0);
+  // torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat);
+  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
+  // std::vector<at::Tensor> inputs;
+  // inputs.push_back(in0);
 
   torch::jit::Module mod;
   try {
@@ -23,13 +24,13 @@ TEST(CppAPITests, TestCollectionTupleInput) {
   }
   mod.eval();
   mod.to(torch::kCUDA);
-  
 
-  std::vector<torch::jit::IValue> inputs_;
 
-  for (auto in : inputs) {
-    inputs_.push_back(torch::jit::IValue(in.clone()));
-  }
+  // std::vector<torch::jit::IValue> inputs_;
+
+  // for (auto in : inputs) {
+  //   inputs_.push_back(torch::jit::IValue(in.clone()));
+  // }
 
 
   std::vector<torch::jit::IValue> complex_inputs, complex_inputs_list;
@@ -42,16 +43,12 @@ TEST(CppAPITests, TestCollectionTupleInput) {
   // torch::jit::IValue input_list_ivalue = torch::jit::IValue(input_list);
 
   complex_inputs.push_back(input_tuple);
-  // complex_inputs_list.push_back(in0);
-  // complex_inputs_list.push_back(in0);
-
-
 
   auto out = mod.forward(complex_inputs);
   LOG_DEBUG("Finish torchscirpt forward");
 
-
-  auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown);
+  // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown);
+  auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf);
 
   auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));
 
@@ -63,7 +60,6 @@ TEST(CppAPITests, TestCollectionTupleInput) {
 
   std::tuple<torch::jit::IValue, torch::jit::IValue> input_shape_tuple(input_shape_ivalue, input_shape_ivalue);
 
-
   torch::jit::IValue complex_input_shape(input_shape_tuple);
   std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
   torch::jit::IValue complex_input_shape2(input_tuple2);
@@ -74,13 +70,12 @@ TEST(CppAPITests, TestCollectionTupleInput) {
   compile_settings.min_block_size = 1;
 
   // // FP16 execution
-  // compile_settings.enabled_precisions = {torch::kHalf};
+  compile_settings.enabled_precisions = {torch::kHalf};
   // // Compile module
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   LOG_DEBUG("Finish compile");
   auto trt_out = trt_mod.forward(complex_inputs);
-  // auto trt_out = trt_mod.forward(complex_inputs_list);
-
+  // std::cout << out.toTensor() << std::endl;
 
   ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5));
 }
@@ -90,7 +85,7 @@ TEST(CppAPITests, TestCollectionNormalInput) {
 
   std::string path =
   "/root/Torch-TensorRT/normal_model.ts";
-  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat);
+  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
   std::vector<at::Tensor> inputs;
   inputs.push_back(in0);
   inputs.push_back(in0);
@@ -116,14 +111,14 @@ TEST(CppAPITests, TestCollectionNormalInput) {
   LOG_DEBUG("Finish torchscirpt forward");
 
   std::vector<torch_tensorrt::Input> input_range;
-  input_range.push_back({in0.sizes(), torch::kF32});
-  input_range.push_back({in0.sizes(), torch::kF32});
+  input_range.push_back({in0.sizes(), torch::kF16});
+  input_range.push_back({in0.sizes(), torch::kF16});
   torch_tensorrt::ts::CompileSpec compile_settings(input_range);
   compile_settings.require_full_compilation = true;
   compile_settings.min_block_size = 1;
 
   // // FP16 execution
-  // compile_settings.enabled_precisions = {torch::kHalf};
+  compile_settings.enabled_precisions = {torch::kHalf};
   // // Compile module
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   LOG_DEBUG("Finish compile");
@@ -138,7 +133,7 @@ TEST(CppAPITests, TestCollectionListInput) {
 
   std::string path =
   "/root/Torch-TensorRT/list_input.ts";
-  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kFloat);
+  torch::Tensor in0 = torch::randn({1, 3, 512, 512}, torch::kCUDA).to(torch::kHalf);
   std::vector<at::Tensor> inputs;
   inputs.push_back(in0);
 
@@ -173,7 +168,8 @@ TEST(CppAPITests, TestCollectionListInput) {
   LOG_DEBUG("Finish torchscirpt forward");
 
 
-  auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown);
+  // auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kUnknown);
+  auto input_shape = torch_tensorrt::Input(in0.sizes(), torch_tensorrt::DataType::kHalf);
 
   auto input_shape_ivalue = torch::jit::IValue(std::move(c10::make_intrusive<torch_tensorrt::Input>(input_shape)));
 
@@ -194,13 +190,13 @@ TEST(CppAPITests, TestCollectionListInput) {
   compile_settings.torch_executed_ops.push_back("aten::__getitem__");
 
   // // FP16 execution
-  // compile_settings.enabled_precisions = {torch::kHalf};
+  compile_settings.enabled_precisions = {torch::kHalf};
   // // Compile module
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   LOG_DEBUG("Finish compile");
   auto trt_out = trt_mod.forward(complex_inputs);
   // auto trt_out = trt_mod.forward(complex_inputs_list);
 
-
+  // std::cout << out.toTensor() << std::endl;
   ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(out.toTensor(), trt_out.toTensor(), 1e-5));
 }

Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,6 @@ torch::jit::script::Module compile(const torch::jit::script::Module& module, Com`
`30`	`30`	`LOG_DEBUG(get_build_info());`
`31`	`31`	`// Want to export a much simpler (non TRT header dependent) API so doing the`
`32`	`32`	`// type conversion here`
`33`		`- printf("in torch_tensorrt::ts::compile\n");`
`34`	`33`	`return torch_tensorrt::core::CompileGraph(module, to_internal_compile_spec(info));`
`35`	`34`	`}`
`36`	`35`