diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp
new file mode 100644
index 00000000000..3b40871a791
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/impl/Split.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Copy.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
+
+namespace vkcompute {
+
+void add_split_with_sizes_default_node(
+    ComputeGraph& graph,
+    ValueRef in,
+    const std::vector<int64_t>& split_sizes,
+    int64_t dim,
+    ValueRef out_list_ref) {
+  vTensorPtr t_in = graph.get_tensor(in);
+
+  VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked));
+
+  ValueListPtr out_list = graph.get_value_list(out_list_ref);
+
+  NchwDim nchw_dim = normalize_to_nchw_dim(*t_in, dim);
+
+  VK_CHECK_COND(out_list->size() == split_sizes.size());
+
+  for (int split_idx = 0; split_idx < split_sizes.size(); split_idx++) {
+    int64_t split_size = split_sizes[split_idx];
+    ValueRef out_ref = (*out_list)[split_idx];
+
+    vTensorPtr t_out = graph.get_tensor(out_ref);
+    VK_CHECK_COND(check_memory_layout_is(*t_out, api::kChannelsPacked));
+    VK_CHECK_COND(dim_at(*t_out, nchw_dim) == split_size);
+  }
+
+  if (nchw_dim == DimWidth) {
+    api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
+    api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
+
+    for (ValueRef out_ref : *out_list) {
+      // Doesn't need to use split_size since we have already verified that the
+      // output tensor's size matches with the split_size.
+      vTensorPtr t_out = graph.get_tensor(out_ref);
+      api::utils::ivec3 range = t_out->texture_limits();
+      add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref);
+
+      src_offset.data[0] += range.data[0];
+    }
+  } else if (nchw_dim == DimHeight) {
+    api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
+    api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
+
+    for (ValueRef out_ref : *out_list) {
+      vTensorPtr t_out = graph.get_tensor(out_ref);
+      api::utils::ivec3 range = t_out->texture_limits();
+      add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref);
+
+      src_offset.data[1] += range.data[1];
+    }
+  } else if (nchw_dim == DimBatch) {
+    api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
+    api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);
+
+    for (ValueRef out_ref : *out_list) {
+      vTensorPtr t_out = graph.get_tensor(out_ref);
+      api::utils::ivec3 range = t_out->texture_limits();
+      add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref);
+
+      src_offset.data[2] += range.data[2];
+    }
+  } else if (nchw_dim == DimChannel) {
+    int32_t src_offset = 0;
+    int32_t dst_offset = 0;
+
+    for (ValueRef out_ref : *out_list) {
+      vTensorPtr t_out = graph.get_tensor(out_ref);
+      int32_t range = dim_at<Dim4D::Channel>(t_out->sizes());
+      add_copy_channel_offset_node(
+          graph, in, range, src_offset, dst_offset, out_ref);
+      src_offset += range;
+    }
+
+  } else {
+    VK_THROW("not ipmlemented");
+  }
+}
+
+void add_split_with_sizes_default_node(
+    ComputeGraph& graph,
+    ValueRef in,
+    ValueRef split_sizes_ref,
+    ValueRef dim_ref,
+    ValueRef out) {
+  int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
+  std::vector<int64_t> split_sizes = *(graph.get_int_list(split_sizes_ref));
+
+  add_split_with_sizes_default_node(graph, in, split_sizes, dim, out);
+}
+
+void split_with_sizes_default(
+    ComputeGraph& graph,
+    const std::vector<ValueRef>& args) {
+  add_split_with_sizes_default_node(graph, args[0], args[1], args[2], args[3]);
+}
+
+void add_split_tensor_node(
+    ComputeGraph& graph,
+    ValueRef in,
+    ValueRef split_size_ref,
+    ValueRef dim_ref,
+    ValueRef out) {
+  int64_t split_size = graph.extract_scalar<int64_t>(split_size_ref);
+  int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
+
+  vTensorPtr t_in = graph.get_tensor(in);
+  NchwDim nchw_dim = normalize_to_nchw_dim(*t_in, dim);
+  int64_t size = dim_at(*t_in, nchw_dim);
+  std::vector<int64_t> split_sizes(size / split_size, split_size);
+
+  add_split_with_sizes_default_node(graph, in, split_sizes, dim, out);
+}
+
+void split_tensor(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  add_split_tensor_node(graph, args[0], args[1], args[2], args[3]);
+}
+
+REGISTER_OPERATORS {
+  VK_REGISTER_OP(aten.split_with_sizes.default, split_with_sizes_default);
+  VK_REGISTER_OP(aten.split.Tensor, split_tensor);
+}
+
+} // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h b/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h
index e7b9a614e28..c5a47b7776a 100644
--- a/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h
+++ b/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h
@@ -12,6 +12,27 @@
 
 namespace vkcompute {
 
+// A canonical way to represent dimensions as enum. Motivation behind a
+// canonical enum is that in the user tensor, it is using a "big-endian"-ish
+// mechanism to reference a dimension in a nchw-tensor, leading to tensor of
+// different dimension have different mapping from dim to the underlying texture
+// dimension. For instasnce, for a 2d (height x width) tensors, dim 0 refers to
+// height and dim 1 refers to width; for a 4d (batch x channel x height x width)
+// tensor, dim 0 refers to batch and dim 1 refers to channel. Using this
+// canonical enum allows us to bring clarity in code.
+
+enum NchwDim : uint32_t {
+  DimWidth = 1u,
+  DimHeight = 2u,
+  DimChannel = 3u,
+  DimBatch = 4u,
+};
+
+// Convert a dim provided by user into canonical enum.
+inline NchwDim normalize_to_nchw_dim(const vTensor& v_in, int32_t dim) {
+  return static_cast<NchwDim>(v_in.dim() - dim);
+}
+
 /*
  * Maps a semantic dimension name to an integer that
  * corresponds to its innermost ordering in a 4D tensor in
@@ -20,10 +41,10 @@ namespace vkcompute {
  * corresponds to 2, and so on.
  */
 struct Dim4D {
-  static constexpr uint32_t Width = 1u;
-  static constexpr uint32_t Height = 2u;
-  static constexpr uint32_t Channel = 3u;
-  static constexpr uint32_t Batch = 4u;
+  static constexpr uint32_t Width = DimWidth;
+  static constexpr uint32_t Height = DimHeight;
+  static constexpr uint32_t Channel = DimChannel;
+  static constexpr uint32_t Batch = DimBatch;
 };
 
 /*
@@ -65,34 +86,20 @@ uint32_t dim_at(const std::vector<int64_t>& sizes) {
   return dims < N ? 1 : api::utils::safe_downcast<uint32_t>(sizes[dims - N]);
 }
 
+inline uint32_t dim_at(const std::vector<int64_t>& sizes, NchwDim nchw_dim) {
+  const uint32_t dims = sizes.size();
+  return dims < nchw_dim
+      ? 1
+      : api::utils::safe_downcast<uint32_t>(sizes[dims - nchw_dim]);
+}
+
 template <uint32_t N>
 uint32_t dim_at(const vTensor& v_in) {
   return dim_at<N>(v_in.sizes());
 }
 
-// A canonical way to represent dimensions as enum. Intended to use the same
-// value as Dim4D for potential future refactoring.
-
-enum NchwDim {
-  DimWidth = 1,
-  DimHeight = 2,
-  DimChannel = 3,
-  DimBatch = 4,
-};
-
-/* This function return a NchwDim
- * given a Tensor and a user provided dim. The reason for this normalization is
- * that in the user tensor coordinate, it is using a "big-endian" mechanism when
- * referring to a nchw dimension, in that dim=0 refers to the batch dimension in
- * a 4d tensor but dim=0 reference to height in a 2d tensor. Despite in a common
- * texture representation of channel packing, a 2d tensor has exactly the same
- * layout as a 4d with the batch and channel size equals to 1. This function
- * returns a canonical dimension to simplify dimension reasoning in the code.
- *
- */
-
-inline NchwDim normalize_to_nchw_dim(const vTensor& v_in, int32_t dim) {
-  return static_cast<NchwDim>(v_in.dim() - dim);
+inline uint32_t dim_at(const vTensor& v_in, NchwDim nchw_dim) {
+  return dim_at(v_in.sizes(), nchw_dim);
 }
 
 inline std::ostream& operator<<(std::ostream& os, NchwDim nchw_dim) {
diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp
index 842cfa2f4fc..f5e0d2b1713 100644
--- a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp
@@ -8,8 +8,6 @@
 
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
 
-#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
-
 namespace vkcompute {
 
 //
diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py
index f0659ad8232..a326402cc39 100644
--- a/backends/vulkan/test/op_tests/cases.py
+++ b/backends/vulkan/test/op_tests/cases.py
@@ -438,9 +438,7 @@ def get_cat_inputs():
             ([(3, 5), (4, 5)], 0),
             ([(3, 5), (4, 5), (1, 5)], 0),
             (
-                [
-                    (3, 5),
-                ],
+                [(3, 5)],
                 0,
             ),
             # Cat on Width
@@ -449,9 +447,7 @@ def get_cat_inputs():
             ([(5, 3), (5, 4)], 1),
             ([(5, 3), (5, 4), (5, 1)], 1),
             (
-                [
-                    (5, 4),
-                ],
+                [(5, 4)],
                 1,
             ),
             ([(5,), (6,)], 0),
@@ -474,6 +470,91 @@ def get_cat_inputs():
     return test_suite
 
 
+def get_split_with_sizes_inputs():
+    Test = namedtuple("VkSliceTest", ["self", "sizes", "dim"])
+    test_cases = [
+        # Split on Width
+        Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=3),
+        Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=2),
+        Test(self=(7, 10, 10), sizes=[1, 9], dim=2),
+        Test(self=(10, 10), sizes=[1, 9], dim=1),
+        Test(self=(10,), sizes=[1, 9], dim=0),
+        # Split on Height
+        Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=2),
+        Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=1),
+        Test(self=(7, 10, 10), sizes=[10], dim=1),
+        Test(self=(7, 6, 10), sizes=[1, 1, 1, 1, 1, 1], dim=1),
+        Test(self=(10, 10), sizes=[1, 2, 3, 4], dim=0),
+        # Split on Batch
+        Test(self=(10, 7, 10, 10), sizes=[3, 6, 1], dim=0),
+        Test(self=(10, 7, 10, 10), sizes=[10], dim=0),
+        # Split on Channel
+        Test(self=(7, 13, 4, 8), sizes=[3, 6, 1, 3], dim=1),
+        Test(self=(7, 13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=1),
+        Test(self=(13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=0),
+        Test(self=(13, 4, 8), sizes=[2, 9, 2], dim=0),
+        Test(self=(13, 4, 8), sizes=[13], dim=0),
+    ]
+    test_suite = VkTestSuite([tuple(tc) for tc in test_cases])
+
+    test_suite.layouts = [
+        "api::kChannelsPacked",
+    ]
+    test_suite.data_gen = "make_seq_tensor"
+    test_suite.dtypes = ["at::kFloat"]
+    return test_suite
+
+
+def get_split_tensor_inputs():
+    test_suite = VkTestSuite(
+        [
+            # Split on Width
+            ((S1, 7, 10, 12), 12, 3),
+            ((S1, 7, 10, 12), 3, 3),
+            ((S1, 7, 10, 12), 1, 3),
+            ((7, 10, 12), 12, 2),
+            ((7, 10, 12), 3, 2),
+            ((7, 10, 12), 1, 2),
+            ((10, 12), 12, 1),
+            ((10, 12), 3, 1),
+            ((10, 12), 1, 1),
+            ((12,), 12, 0),
+            ((12,), 3, 0),
+            ((12,), 1, 0),
+            # Split on Height
+            ((S1, 7, 12, 8), 12, 2),
+            ((S1, 7, 12, 8), 3, 2),
+            ((S1, 7, 12, 8), 1, 2),
+            ((7, 12, 8), 12, 1),
+            ((7, 12, 8), 3, 1),
+            ((7, 12, 8), 1, 1),
+            ((12, 8), 12, 0),
+            ((12, 8), 3, 0),
+            ((12, 8), 1, 0),
+            # Split  on Batch
+            ((12, 7, 10, 10), 12, 0),
+            ((12, 7, 10, 10), 3, 0),
+            ((12, 7, 10, 10), 1, 0),
+            # Split  on Channel
+            ((7, 15, 10, 10), 15, 1),
+            ((7, 15, 10, 10), 5, 1),
+            ((7, 15, 10, 10), 3, 1),
+            ((7, 15, 10, 10), 1, 1),
+            ((15, 10, 10), 15, 0),
+            ((15, 10, 10), 5, 0),
+            ((15, 10, 10), 3, 0),
+            ((15, 10, 10), 1, 0),
+        ]
+    )
+
+    test_suite.layouts = [
+        "api::kChannelsPacked",
+    ]
+    test_suite.data_gen = "make_seq_tensor"
+    test_suite.dtypes = ["at::kFloat"]
+    return test_suite
+
+
 test_suites = {
     "aten.add.Tensor": get_binary_elementwise_inputs(),
     "aten.sub.Tensor": get_binary_elementwise_inputs(),
@@ -494,4 +575,6 @@ def get_cat_inputs():
     "aten.clone.default": get_clone_inputs(),
     "aten.repeat.default": get_repeat_inputs(),
     "aten.cat.default": get_cat_inputs(),
+    "aten.split_with_sizes.default": get_split_with_sizes_inputs(),
+    "aten.split.Tensor": get_split_tensor_inputs(),
 }
diff --git a/backends/vulkan/test/op_tests/utils/codegen.py b/backends/vulkan/test/op_tests/utils/codegen.py
index ac5e25fa596..a43998b47c9 100644
--- a/backends/vulkan/test/op_tests/utils/codegen.py
+++ b/backends/vulkan/test/op_tests/utils/codegen.py
@@ -24,6 +24,7 @@
     OPT_LAYOUT,
     OPT_MEMORY_FORMAT,
     OPT_SCALAR_TYPE,
+    TENSOR_VECTOR,
     TestSuite,
     TestSuiteGen,
     THREE_TENSOR_TUPLE,
@@ -73,6 +74,26 @@ class ValueRef:
     is_out: bool = False
     requires_prepack: bool = False
     supports_prepack: bool = False
+    # When is_dynamic_size is true, the underlying object size is not known
+    # during code-gen. Example is the out value for aten.split where the out
+    # value is a vector<Tensor>. In these cases, we need to use an additional
+    # vector or at::TensorList to track these values.
+    is_dynamic_size: bool = False
+
+    @property
+    def io_value_list_name(self):
+        assert self.is_dynamic_size
+        return f"{self.name}_io_value_list"
+
+    @property
+    def value_list_name(self):
+        assert self.is_dynamic_size
+        return f"{self.name}_value_list"
+
+    @property
+    def vk_out(self):
+        assert self.is_out
+        return f"vk_{self.name}"
 
 
 ValueRefList = Union[ValueRef, List[ValueRef]]
@@ -177,6 +198,18 @@ def __init__(self, op_reg_name: str, f: NativeFunction, suite_def: TestSuite):
                     is_out=False,
                 ),
             ]
+        elif ret_type == TENSOR_VECTOR:
+            self.refs["out"] = ValueRef(
+                name="out_ref",
+                src_cpp_name="out",
+                src_cpp_type=ret_type,
+                is_out=True,
+                is_dynamic_size=True,
+            )
+        else:
+            raise NotImplementedError(
+                f"ret_type: {ret_type} not supported for out value"
+            )
 
     ## ATen code generation
 
@@ -267,6 +300,21 @@ def create_value_for(self, ref: ValueRefList) -> str:  # noqa: C901
             ret_str += "}\n"
             ret_str += f"ValueRef {ref.name} = {self.graph}{self.dot}add_value_list(std::move({ref.name}_value_refs));\n"
             return ret_str
+        elif ref.src_cpp_type == TENSOR_VECTOR:
+            ret_str = f"""
+std::vector<IOValueRef> {ref.io_value_list_name};
+std::vector<ValueRef> {ref.value_list_name};
+for (int i=0; i<out.size(); i++) {{
+    const at::Tensor& cur = out[i];
+    IOValueRef io_value_ref;
+    io_value_ref.value = {self.graph}{self.dot}add_tensor(
+        cur.sizes().vec(), from_at_scalartype(cur.scalar_type()));
+    {ref.io_value_list_name}.emplace_back(io_value_ref);
+    {ref.value_list_name}.emplace_back(io_value_ref.value);
+}}
+ValueRef out_ref = {self.graph}{self.dot}add_value_list(std::move({ref.value_list_name}));
+"""
+            return ret_str
 
         ret_str = f"{cpp_type} {ref.name} = {self.graph}{self.dot}"
         if ref.src_cpp_type == AT_TENSOR and not prepack:
@@ -331,6 +379,15 @@ def set_output(self, ref: ValueRefList) -> str:
             for r in ref[:-1]:
                 ret_str += self.set_output(r)
             return ret_str
+        elif ref.src_cpp_type == TENSOR_VECTOR:
+            assert ref.is_out
+            ret_str = f"""
+for (int i=0; i<out.size(); i++) {{
+    {ref.io_value_list_name}[i].staging = {self.graph}{self.dot}set_output_tensor(
+        {ref.io_value_list_name}[i].value);
+}}
+"""
+            return ret_str
 
         assert ref.src_cpp_type == AT_TENSOR and ref.is_out
         ret_str = f"ValueRef {ref.name}_staging = {self.graph}{self.dot}"
@@ -387,7 +444,17 @@ def declare_vk_out_for(self, ref: Union[ValueRef, List[ValueRef]]) -> str:
             for r in ref[:-1]:
                 ret_str += self.declare_vk_out_for(r)
             return ret_str
+        elif ref.src_cpp_type == TENSOR_VECTOR:
+            assert ref.is_out
+            ret_str = f"""
+std::vector<at::Tensor> {ref.vk_out};
+for (int i=0; i<out.size(); i++) {{
+    {ref.vk_out}.emplace_back(at::empty_like(out[i]).contiguous());
+}}
+"""
+            return ret_str
 
+        assert ref.src_cpp_type == AT_TENSOR and ref.is_out
         ret_str = f"at::Tensor vk_{ref.name} = at::empty_like({ref.src_cpp_name})"
         ret_str += ".contiguous();\n"
         return ret_str
@@ -398,6 +465,17 @@ def copy_from_staging(self, ref: ValueRefList) -> str:
             for r in ref[:-1]:
                 ret_str += self.copy_from_staging(r)
             return ret_str
+        elif ref.src_cpp_type == TENSOR_VECTOR:
+            assert ref.is_out
+            ret_str = f"""
+for (int i=0; i<out.size(); i++) {{
+    {self.graph}{self.dot}copy_from_staging(
+        {ref.io_value_list_name}[i].staging,
+        {ref.vk_out}[i].mutable_data_ptr(),
+        {ref.vk_out}[i].numel());
+}}
+"""
+            return ret_str
 
         assert ref.src_cpp_type == AT_TENSOR and ref.is_out
         ret_str = f"{self.graph}{self.dot}copy_from_staging({ref.name}_staging, "
@@ -413,6 +491,14 @@ def check_graph_out(self, ref: ValueRefList) -> str:
             for r in ref[:-1]:
                 ret_str += self.check_graph_out(r)
             return ret_str
+        elif ref.src_cpp_type == TENSOR_VECTOR:
+            assert ref.is_out
+            ret_str = f"""
+for (int i=0; i<out.size(); i++) {{
+    EXPECT_TRUE(check_close(out[i], {ref.vk_out}[i], rtol, atol));
+}}
+"""
+            return ret_str
 
         return f"EXPECT_TRUE(check_close({ref.src_cpp_name}, vk_{ref.name}, rtol, atol));\n"
 
diff --git a/backends/vulkan/test/op_tests/utils/codegen_base.py b/backends/vulkan/test/op_tests/utils/codegen_base.py
index e9fbe0b1b29..6dac97583c6 100644
--- a/backends/vulkan/test/op_tests/utils/codegen_base.py
+++ b/backends/vulkan/test/op_tests/utils/codegen_base.py
@@ -31,6 +31,7 @@
 OPT_SCALAR_TYPE = "::std::optional<at::ScalarType>"
 TWO_TENSOR_TUPLE = "::std::tuple<at::Tensor,at::Tensor>"
 THREE_TENSOR_TUPLE = "::std::tuple<at::Tensor,at::Tensor,at::Tensor>"
+TENSOR_VECTOR = "::std::vector<at::Tensor>"
 
 ###########################
 ## Test Suite definition ##