fix: fix bugs in aten::to

bowang007 · bowang007 · commit 2ecd18798c3a · 2022-04-01T16:48:14.000-07:00
Signed-off-by: Bo Wang &lt;bowa@nvidia.com&gt;
diff --git a/core/conversion/converters/impl/cast.cpp b/core/conversion/converters/impl/cast.cpp
@@ -26,6 +26,23 @@ auto cast_registrations TORCHTRT_UNUSED =
                return true;
              }})
         .pattern(
+            {"aten::to.device(Tensor(a) self, Device device, int dtype, bool non_blocking=False, bool copy=False, int? memory_format=None) -> (Tensor(a))",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               // what this function does is basically the same with the previous one, however, we cannot lower this
+               // signature to previous one because this will incur the device issues when we run Torchscript module in
+               // later shape analysis phase of fallback
+               auto self = args[0].ITensorOrFreeze(ctx);
+               auto output_dtype = args[2].unwrapToScalar().to<int64_t>();
+
+               auto trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(output_dtype));
+
+               auto casted_itensor = castITensor(ctx, self, trt_dtype);
+               auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
+               LOG_DEBUG("[aten::to.device] Output tensor shape: " << output->getDimensions());
+
+               return true;
+             }})
+        .pattern(
             {"aten::to.other(Tensor self, Tensor other, bool non_blocking=False, bool copy=False, int? memory_format=None) -> (Tensor)",
              [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                auto self = args[0].ITensorOrFreeze(ctx);
diff --git a/core/lowering/passes/reduce_to.cpp b/core/lowering/passes/reduce_to.cpp
@@ -8,22 +8,14 @@ namespace lowering {
 namespace passes {
 
 void ReduceToOperation(std::shared_ptr<torch::jit::Graph>& graph) {
-  std::string to_device_pattern = R"IR(
-        graph(%x, %device, %dtype, %nb, %copy, %format):
-            %out : Tensor = aten::to(%x, %device, %dtype, %nb, %copy, %format)
-            return (%out))IR";
-  std::string to_dtype_pattern = R"IR(
-        graph(%x, %device, %dtype, %nb, %copy, %format):
-            %out : Tensor = aten::to(%x, %dtype, %nb, %copy, %format)
-            return (%out))IR";
   std::string to_dtype_layout_pattern = R"IR(
-        graph(%x, %device, %dtype, %layout, %pm, %nb, %copy, %format):
-            %out : Tensor = aten::to(%x, %device, %dtype, %layout, %pm, %nb, %copy, %format)
+        graph(%x, %dtype, %layout, %device, %pm, %nb, %copy, %format):
+            %out : Tensor = aten::to(%x, %dtype, %layout, %device, %pm, %nb, %copy, %format)
             return (%out))IR";
 
   std::string to_dtype_multi_input_pattern = R"IR(
-        graph(%x, %device, %dtype, %layout, %pm, %nb, %copy, %format):
-            %out : Tensor = aten::to(%x, %dtype, %nb, %copy, %format)
+        graph(%x, %dtype, %layout, %device, %pm, %nb, %copy, %format):
+            %out : Tensor = aten::to(%x, %device, %dtype, %nb, %copy, %format)
             return (%out))IR";
 
   std::string to_type_as_pattern = R"IR(
@@ -38,11 +30,6 @@ void ReduceToOperation(std::shared_ptr<torch::jit::Graph>& graph) {
             %out : Tensor = aten::to(%input, %other, %5, %5, %6)
             return (%out))IR";
 
-  // replace aten::to.device with aten::to.dtype
-  torch::jit::SubgraphRewriter map_aten_device_to_dtype;
-  map_aten_device_to_dtype.RegisterRewritePattern(to_device_pattern, to_dtype_pattern);
-  map_aten_device_to_dtype.runOnGraph(graph);
-
   // replace aten::to.dtype_layout with aten::to.dtype
   torch::jit::SubgraphRewriter map_aten_dtype_layout;
   map_aten_dtype_layout.RegisterRewritePattern(to_dtype_layout_pattern, to_dtype_multi_input_pattern);