pytorch
diff --git a/‎.ci/docker/ci_commit_pins/nightly.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/nightly.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 15 additions & 7 deletions b/‎CMakeLists.txt‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 122 additions & 24 deletions b/‎backends/arm/arm_backend.py‎
Lines changed: 122 additions & 24 deletions
diff --git a/‎backends/arm/test/test_models.py‎
Lines changed: 4 additions & 5 deletions b/‎backends/arm/test/test_models.py‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎backends/arm/tosa_quant_utils.py‎
Lines changed: 10 additions & 11 deletions b/‎backends/arm/tosa_quant_utils.py‎
Lines changed: 10 additions & 11 deletions
diff --git a/‎backends/xnnpack/runtime/XNNExecutor.cpp‎
Lines changed: 11 additions & 0 deletions b/‎backends/xnnpack/runtime/XNNExecutor.cpp‎
Lines changed: 11 additions & 0 deletions
@@ -1 +1 @@
-dev20230929
+dev20231002
@@ -31,3 +31,6 @@
 [submodule "backends/arm/third-party/serialization_lib"]
 	path = backends/arm/third-party/serialization_lib
 	url = https://git.mlplatform.org/tosa/serialization_lib.git
+[submodule "third-party/flatcc"]
+	path = third-party/flatcc
+	url = https://github.com/dvidelabs/flatcc.git
@@ -54,15 +54,15 @@ if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Debug)
 endif()
 
-# https://cmake.org/cmake/help/latest/command/add_definitions.html
-# Adds definitions to the compiler command line for
-# - targets in the current directory, before and after this command is invoked
-# - targets in sub-directories added after this command is invoked
+# https://cmake.org/cmake/help/latest/command/add_definitions.html Adds
+# definitions to the compiler command line for - targets in the current
+# directory, before and after this command is invoked - targets in
+# sub-directories added after this command is invoked
 if(CMAKE_BUILD_TYPE STREQUAL "Release")
   # Avoid pulling in the logging strings, which can be large.
   add_definitions(-DET_LOG_ENABLED=0)
-  # Avoid pulling in the flatbuffer data verification
-  # logic, which can add about 20kB.
+  # Avoid pulling in the flatbuffer data verification logic, which can add about
+  # 20kB.
   add_definitions(-DET_ENABLE_PROGRAM_VERIFICATION=0)
 endif()
 
@@ -109,6 +109,9 @@ endif()
 option(EXECUTORCH_BUILD_XNNPACK
        "Build xnn_executor_runner which depends on XNNPACK" OFF)
 
+option(EXECUTORCH_BUILD_SDK
+       "Build the ExecuTorch SDK library and the SDK example runner.")
+
 if(NOT BUCK2)
   set(BUCK2 buck2)
 endif()
@@ -282,7 +285,7 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
   endif()
 
   add_executable(executor_runner ${_executor_runner__srcs})
-  if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
+  if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
     target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
   endif()
   target_link_libraries(executor_runner ${_executor_runner_libs})
@@ -309,5 +312,10 @@ endif()
 if(EXECUTORCH_BUILD_SIZE_TEST)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
 endif()
+
+if(EXECUTORCH_BUILD_SDK)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
+endif()
+
 # Print all summary
 executorch_print_configuration_summary()
@@ -217,6 +217,12 @@ def getNodeArgs(node):
     return [tosa_mapping.TosaArg(arg) for arg in node.args]
 
 
+def getQuantNodeArgs(node):
+    quant_args = [tosa_mapping.TosaArg(arg) for arg in node.args]
+    # Return the scale and zp
+    return quant_args[1].number, quant_args[2].number
+
+
 @final
 class ArmBackend(BackendDetails):
     @staticmethod
@@ -253,6 +259,7 @@ def preprocess(  # noqa: C901
                 outp = tosa_mapping.TosaArg(node)
 
                 is_quant_node = tosa_quant_utils.isQuantNode(node)
+
                 if is_quant_node:
                     tosa_fb.currRegion.currBasicBlock.addTensor(
                         outp.name, outp.shape, ts.DType.INT8
@@ -345,13 +352,17 @@ def preprocess(  # noqa: C901
                 elif exir_ops.edge.aten.addmm.default == node.target:
                     bias, input, weight = inputs
 
+                    output_dtype = ts.DType.INT8 if is_quant_node else outp.dtype
+
                     # Reshape input, weight, bias tensors
                     input_reshape_res = promote_shape(
-                        tosa_fb, input, (1,) + input.shape, outp.dtype
+                        tosa_fb, input, (1,) + input.shape, output_dtype
                     )
                     weight_reshape_res = promote_shape(
-                        tosa_fb, weight, (1,) + weight.shape, outp.dtype
+                        tosa_fb, weight, (1,) + weight.shape, output_dtype
                     )
+
+                    bias_dtype = ts.DType.INT32 if is_quant_node else outp.dtype
                     bias_reshape_res = promote_shape(
                         tosa_fb,
                         bias,
@@ -360,36 +371,87 @@ def preprocess(  # noqa: C901
                             1,
                         )
                         + bias.shape,
-                        outp.dtype,
+                        bias_dtype,
                     )
 
                     # Add dummy batch 1 to mm_shape
                     mm_shape = (1, input.shape[0], weight.shape[1])
                     # Define Intermediate tensor for MatMul res
-                    mm_res = tosa_fb.addIntermediate(mm_shape, outp.dtype)
+                    mm_res = tosa_fb.addIntermediate(
+                        mm_shape, ts.DType.INT32 if is_quant_node else output_dtype
+                    )
 
                     # Add MatMulOp
+                    attr_matmul = ts.TosaSerializerAttribute()
+                    a_zp, b_zp = (-128, 0) if is_quant_node else (0, 0)
+                    attr_matmul.MatMulAttribute(a_zp, b_zp)
                     tosa_fb.addOperator(
                         TosaOp.Op().MATMUL,
                         [input_reshape_res.name, weight_reshape_res.name],
                         [mm_res.name],
-                        attr_torch_to_tosa(TosaOp.Op().MATMUL, node),
+                        attr_matmul,
                     )
 
                     # Add AddOp
-                    add_res = tosa_fb.addIntermediate(mm_shape, outp.dtype)
+                    add_res = tosa_fb.addIntermediate(
+                        mm_shape, ts.DType.INT32 if is_quant_node else output_dtype
+                    )
+
                     tosa_fb.addOperator(
                         TosaOp.Op().ADD,
                         [bias_reshape_res.name, mm_res.name],
                         [add_res.name],
                         None,
                     )
 
+                    if is_quant_node:
+                        # Read inputs' parent nodes
+                        #
+                        _, input_node, weight_node = node.all_input_nodes
+                        input_scale, _ = getQuantNodeArgs(input_node)
+                        weight_node_q_node = weight_node.all_input_nodes[0]
+                        weight_scale, _ = getQuantNodeArgs(weight_node_q_node)
+
+                        consumer_node = list(node.users)[0]
+                        consumer_node_scale, consumer_node_node_zp = getQuantNodeArgs(
+                            consumer_node
+                        )
+
+                        output_rescale_scale = (
+                            input_scale * weight_scale
+                        ) / consumer_node_scale
+                        (
+                            multiplier_output,
+                            shift_output,
+                        ) = tosa_quant_utils.computeMultiplierAndShift(
+                            output_rescale_scale
+                        )
+
+                        attr_rescale_output = ts.TosaSerializerAttribute()
+                        attr_rescale_output.RescaleAttribute(
+                            input_zp=0,
+                            output_zp=consumer_node_node_zp,
+                            multiplier=[multiplier_output],
+                            shift=[shift_output],
+                            scale32=True,
+                            double_round=True,
+                            per_channel=False,
+                        )
+                        add_res_int8 = tosa_fb.addIntermediate(mm_shape, ts.DType.INT8)
+                        tosa_fb.addOperator(
+                            TosaOp.Op().RESCALE,
+                            [add_res.name],
+                            [add_res_int8.name],
+                            attr_rescale_output,
+                        )
                     # Reshape final result to original shape
                     attr_out = ts.TosaSerializerAttribute()
                     attr_out.ReshapeAttribute(outp.shape)
                     tosa_fb.addOperator(
-                        TosaOp.Op().RESHAPE, [add_res.name], [outp.name], attr_out
+                        TosaOp.Op().RESHAPE,
+                        [add_res_int8.name if is_quant_node else add_res.name],
+                        [outp.name],
+                        attr_out,
                     )
                 elif exir_ops.edge.aten.permute_copy.default == node.target:
                     attr = ts.TosaSerializerAttribute()
@@ -700,20 +762,11 @@ def preprocess(  # noqa: C901
                         [outp.name],
                         attr_mul,
                     )
-                elif operator.getitem == node.target:
-                    item_name = inputs[0].name
-                    ## Simply add an identityOp
-                    tosa_fb.addOperator(TosaOp.Op().IDENTITY, [item_name], [outp.name])
-                elif (
-                    exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
-                    == node.target
-                ):
-                    item_name = inputs[0].name
-                    tosa_fb.addOperator(TosaOp.Op().IDENTITY, [item_name], [outp.name])
-                elif (
-                    exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
-                    == node.target
-                ):
+                elif node.target in [
+                    operator.getitem,
+                    tosa_quant_utils.q_op,
+                    tosa_quant_utils.dq_op,
+                ]:
                     item_name = inputs[0].name
                     ## Simply add an identityOp
                     tosa_fb.addOperator(TosaOp.Op().IDENTITY, [item_name], [outp.name])
@@ -740,9 +793,54 @@ def preprocess(  # noqa: C901
 
                     assert isinstance(p_data, torch.Tensor), "Expect Attr to be tensor"
                     weight_values = p_data.detach().numpy()
-                    tosa_fb.addConst(
-                        inputs[0].shape, inputs[0].dtype, weight_values, name=out
-                    )
+
+                    # Check if they're for quantized nodes
+                    consumer_node = list(node.users)[0]
+                    if consumer_node.target in tosa_quant_utils.dq_q_ops:
+                        _, weight_node_scale, weight_node_zp, _, _, _ = getNodeArgs(
+                            consumer_node
+                        )
+
+                        weight_values_quantized = (
+                            (weight_values / weight_node_scale.number)
+                            + weight_node_zp.number
+                        ).astype(np.int8)
+                        tosa_fb.addConst(
+                            inputs[0].shape,
+                            ts.DType.INT8,
+                            weight_values_quantized,
+                            name=out,
+                        )
+                    elif (
+                        consumer_node.target == exir_ops.edge.aten.addmm.default
+                        and list(consumer_node.users)[0].target == tosa_quant_utils.q_op
+                    ):
+                        (
+                            _,
+                            input_node,
+                            weight_node_permuted,
+                        ) = consumer_node.all_input_nodes
+                        weight_node = weight_node_permuted.all_input_nodes[0]
+
+                        input_node_scale, _ = getQuantNodeArgs(input_node)
+                        weight_node_scale, weight_node_zp = getQuantNodeArgs(
+                            weight_node
+                        )
+
+                        weight_values_quantized = (
+                            weight_values / (input_node_scale * weight_node_scale)
+                        ).astype(np.int32)
+
+                        tosa_fb.addConst(
+                            inputs[0].shape,
+                            ts.DType.INT32,
+                            weight_values_quantized,
+                            name=out,
+                        )
+                    else:
+                        tosa_fb.addConst(
+                            inputs[0].shape, inputs[0].dtype, weight_values, name=out
+                        )
                 elif out in edge_program.graph_signature.inputs_to_buffers:
                     parameter_name = edge_program.graph_signature.inputs_to_buffers[
                         node.name
 
@@ -69,19 +69,18 @@ def forward(self, x, y):
     @register_test
     class simple_linear(torch.nn.Module):
         inputs = {
-            TosaProfile.BI: (torch.ones(128, 20),),
-            TosaProfile.MI: (torch.ones(128, 20),),
+            TosaProfile.BI: (torch.ones(100, 20),),
+            TosaProfile.MI: (torch.ones(100, 20),),
         }
 
         def __init__(self):
             super().__init__()
+            torch.manual_seed(42)
             self.fc = torch.nn.Linear(20, 30)
-            self.relu6 = torch.nn.ReLU6()
 
         def forward(self, x):
             x = self.fc(x)
-            x = self.relu6(x)
-            return x + x
+            return x
 
     @register_test
     class simple_conv2d(torch.nn.Module):
 
@@ -12,25 +12,24 @@
 from serializer.tosa_serializer import TosaOp, TosaSerializerTensor
 
 
+q_op = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
+dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
+dq_q_ops = [q_op, dq_op]
+
+
 def isQuantNode(node):
     consumer_node = list(node.users)[0]
+    input = node.all_input_nodes[0]
     return (
-        consumer_node.target
-        == exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
-        or node.target
-        in [
-            exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
-            exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
-        ]
+        consumer_node.target == q_op
+        or node.target in dq_q_ops
+        or input.target in dq_q_ops
     )
 
 
 def isQuantArg(arg):
     consumer_node = list(arg.users)[0]
-    return (
-        consumer_node.target
-        == exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
-    )
+    return consumer_node.target == q_op
 
 
 # TOSA uses the RESCALE operation to scale between values with differing precision.
 
@@ -68,6 +68,17 @@ Error XNNExecutor::set_external_input(uint32_t id, Tensor* input) {
     return Error::NotSupported;
 #endif
   } else {
+    // TODO(T165403530): Test insure accuracy for int64 --> float32 conversion
+    if (input->scalar_type() == ScalarType::Long) {
+      // Input data type is int64. However, XNNPACK doesn't support
+      // int64. This means that the data needs to be casted to float
+      // In order for XNNPACK to properly use it.
+      const int64_t* data_64 = input->const_data_ptr<int64_t>();
+      float* data_f32 = input->mutable_data_ptr<float>();
+      for (int j = 0; j < input->numel(); j++) {
+        data_f32[j] = data_64[j];
+      }
+    }
     externals_.emplace_back(xnn_external_value{id, input->mutable_data_ptr()});
   }
   return Error::Ok;