Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions backends/vulkan/runtime/graph/ops/impl/Split.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/Copy.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>

namespace vkcompute {

void add_split_with_sizes_default_node(
ComputeGraph& graph,
ValueRef in,
const std::vector<int64_t>& split_sizes,
int64_t dim,
ValueRef out_list_ref) {
vTensorPtr t_in = graph.get_tensor(in);

VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked));

ValueListPtr out_list = graph.get_value_list(out_list_ref);

NchwDim nchw_dim = normalize_to_nchw_dim(*t_in, dim);

VK_CHECK_COND(out_list->size() == split_sizes.size());

for (int split_idx = 0; split_idx < split_sizes.size(); split_idx++) {
int64_t split_size = split_sizes[split_idx];
ValueRef out_ref = (*out_list)[split_idx];

vTensorPtr t_out = graph.get_tensor(out_ref);
VK_CHECK_COND(check_memory_layout_is(*t_out, api::kChannelsPacked));
VK_CHECK_COND(dim_at(*t_out, nchw_dim) == split_size);
}

if (nchw_dim == DimWidth) {
api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);

for (ValueRef out_ref : *out_list) {
// Doesn't need to use split_size since we have already verified that the
// output tensor's size matches with the split_size.
vTensorPtr t_out = graph.get_tensor(out_ref);
api::utils::ivec3 range = t_out->texture_limits();
add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref);

src_offset.data[0] += range.data[0];
}
} else if (nchw_dim == DimHeight) {
api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);

for (ValueRef out_ref : *out_list) {
vTensorPtr t_out = graph.get_tensor(out_ref);
api::utils::ivec3 range = t_out->texture_limits();
add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref);

src_offset.data[1] += range.data[1];
}
} else if (nchw_dim == DimBatch) {
api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false);
api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false);

for (ValueRef out_ref : *out_list) {
vTensorPtr t_out = graph.get_tensor(out_ref);
api::utils::ivec3 range = t_out->texture_limits();
add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref);

src_offset.data[2] += range.data[2];
}
} else if (nchw_dim == DimChannel) {
int32_t src_offset = 0;
int32_t dst_offset = 0;

for (ValueRef out_ref : *out_list) {
vTensorPtr t_out = graph.get_tensor(out_ref);
int32_t range = dim_at<Dim4D::Channel>(t_out->sizes());
add_copy_channel_offset_node(
graph, in, range, src_offset, dst_offset, out_ref);
src_offset += range;
}

} else {
VK_THROW("not ipmlemented");
}
}

void add_split_with_sizes_default_node(
ComputeGraph& graph,
ValueRef in,
ValueRef split_sizes_ref,
ValueRef dim_ref,
ValueRef out) {
int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
std::vector<int64_t> split_sizes = *(graph.get_int_list(split_sizes_ref));

add_split_with_sizes_default_node(graph, in, split_sizes, dim, out);
}

void split_with_sizes_default(
ComputeGraph& graph,
const std::vector<ValueRef>& args) {
add_split_with_sizes_default_node(graph, args[0], args[1], args[2], args[3]);
}

void add_split_tensor_node(
ComputeGraph& graph,
ValueRef in,
ValueRef split_size_ref,
ValueRef dim_ref,
ValueRef out) {
int64_t split_size = graph.extract_scalar<int64_t>(split_size_ref);
int64_t dim = graph.extract_scalar<int64_t>(dim_ref);

vTensorPtr t_in = graph.get_tensor(in);
NchwDim nchw_dim = normalize_to_nchw_dim(*t_in, dim);
int64_t size = dim_at(*t_in, nchw_dim);
std::vector<int64_t> split_sizes(size / split_size, split_size);

add_split_with_sizes_default_node(graph, in, split_sizes, dim, out);
}

void split_tensor(ComputeGraph& graph, const std::vector<ValueRef>& args) {
add_split_tensor_node(graph, args[0], args[1], args[2], args[3]);
}

REGISTER_OPERATORS {
VK_REGISTER_OP(aten.split_with_sizes.default, split_with_sizes_default);
VK_REGISTER_OP(aten.split.Tensor, split_tensor);
}

} // namespace vkcompute
61 changes: 34 additions & 27 deletions backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,27 @@

namespace vkcompute {

// A canonical way to represent dimensions as enum. Motivation behind a
// canonical enum is that in the user tensor, it is using a "big-endian"-ish
// mechanism to reference a dimension in a nchw-tensor, leading to tensor of
// different dimension have different mapping from dim to the underlying texture
// dimension. For instasnce, for a 2d (height x width) tensors, dim 0 refers to
// height and dim 1 refers to width; for a 4d (batch x channel x height x width)
// tensor, dim 0 refers to batch and dim 1 refers to channel. Using this
// canonical enum allows us to bring clarity in code.

enum NchwDim : uint32_t {
DimWidth = 1u,
DimHeight = 2u,
DimChannel = 3u,
DimBatch = 4u,
};

// Convert a dim provided by user into canonical enum.
inline NchwDim normalize_to_nchw_dim(const vTensor& v_in, int32_t dim) {
return static_cast<NchwDim>(v_in.dim() - dim);
}

/*
* Maps a semantic dimension name to an integer that
* corresponds to its innermost ordering in a 4D tensor in
Expand All @@ -20,10 +41,10 @@ namespace vkcompute {
* corresponds to 2, and so on.
*/
struct Dim4D {
static constexpr uint32_t Width = 1u;
static constexpr uint32_t Height = 2u;
static constexpr uint32_t Channel = 3u;
static constexpr uint32_t Batch = 4u;
static constexpr uint32_t Width = DimWidth;
static constexpr uint32_t Height = DimHeight;
static constexpr uint32_t Channel = DimChannel;
static constexpr uint32_t Batch = DimBatch;
};

/*
Expand Down Expand Up @@ -65,34 +86,20 @@ uint32_t dim_at(const std::vector<int64_t>& sizes) {
return dims < N ? 1 : api::utils::safe_downcast<uint32_t>(sizes[dims - N]);
}

inline uint32_t dim_at(const std::vector<int64_t>& sizes, NchwDim nchw_dim) {
const uint32_t dims = sizes.size();
return dims < nchw_dim
? 1
: api::utils::safe_downcast<uint32_t>(sizes[dims - nchw_dim]);
}

template <uint32_t N>
uint32_t dim_at(const vTensor& v_in) {
return dim_at<N>(v_in.sizes());
}

// A canonical way to represent dimensions as enum. Intended to use the same
// value as Dim4D for potential future refactoring.

enum NchwDim {
DimWidth = 1,
DimHeight = 2,
DimChannel = 3,
DimBatch = 4,
};

/* This function return a NchwDim
* given a Tensor and a user provided dim. The reason for this normalization is
* that in the user tensor coordinate, it is using a "big-endian" mechanism when
* referring to a nchw dimension, in that dim=0 refers to the batch dimension in
* a 4d tensor but dim=0 reference to height in a 2d tensor. Despite in a common
* texture representation of channel packing, a 2d tensor has exactly the same
* layout as a 4d with the batch and channel size equals to 1. This function
* returns a canonical dimension to simplify dimension reasoning in the code.
*
*/

inline NchwDim normalize_to_nchw_dim(const vTensor& v_in, int32_t dim) {
return static_cast<NchwDim>(v_in.dim() - dim);
inline uint32_t dim_at(const vTensor& v_in, NchwDim nchw_dim) {
return dim_at(v_in.sizes(), nchw_dim);
}

inline std::ostream& operator<<(std::ostream& os, NchwDim nchw_dim) {
Expand Down
2 changes: 0 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@

#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>

#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>

namespace vkcompute {

//
Expand Down
95 changes: 89 additions & 6 deletions backends/vulkan/test/op_tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,7 @@ def get_cat_inputs():
([(3, 5), (4, 5)], 0),
([(3, 5), (4, 5), (1, 5)], 0),
(
[
(3, 5),
],
[(3, 5)],
0,
),
# Cat on Width
Expand All @@ -449,9 +447,7 @@ def get_cat_inputs():
([(5, 3), (5, 4)], 1),
([(5, 3), (5, 4), (5, 1)], 1),
(
[
(5, 4),
],
[(5, 4)],
1,
),
([(5,), (6,)], 0),
Expand All @@ -474,6 +470,91 @@ def get_cat_inputs():
return test_suite


def get_split_with_sizes_inputs():
Test = namedtuple("VkSliceTest", ["self", "sizes", "dim"])
test_cases = [
# Split on Width
Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=3),
Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=2),
Test(self=(7, 10, 10), sizes=[1, 9], dim=2),
Test(self=(10, 10), sizes=[1, 9], dim=1),
Test(self=(10,), sizes=[1, 9], dim=0),
# Split on Height
Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=2),
Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=1),
Test(self=(7, 10, 10), sizes=[10], dim=1),
Test(self=(7, 6, 10), sizes=[1, 1, 1, 1, 1, 1], dim=1),
Test(self=(10, 10), sizes=[1, 2, 3, 4], dim=0),
# Split on Batch
Test(self=(10, 7, 10, 10), sizes=[3, 6, 1], dim=0),
Test(self=(10, 7, 10, 10), sizes=[10], dim=0),
# Split on Channel
Test(self=(7, 13, 4, 8), sizes=[3, 6, 1, 3], dim=1),
Test(self=(7, 13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=1),
Test(self=(13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=0),
Test(self=(13, 4, 8), sizes=[2, 9, 2], dim=0),
Test(self=(13, 4, 8), sizes=[13], dim=0),
]
test_suite = VkTestSuite([tuple(tc) for tc in test_cases])

test_suite.layouts = [
"api::kChannelsPacked",
]
test_suite.data_gen = "make_seq_tensor"
test_suite.dtypes = ["at::kFloat"]
return test_suite


def get_split_tensor_inputs():
test_suite = VkTestSuite(
[
# Split on Width
((S1, 7, 10, 12), 12, 3),
((S1, 7, 10, 12), 3, 3),
((S1, 7, 10, 12), 1, 3),
((7, 10, 12), 12, 2),
((7, 10, 12), 3, 2),
((7, 10, 12), 1, 2),
((10, 12), 12, 1),
((10, 12), 3, 1),
((10, 12), 1, 1),
((12,), 12, 0),
((12,), 3, 0),
((12,), 1, 0),
# Split on Height
((S1, 7, 12, 8), 12, 2),
((S1, 7, 12, 8), 3, 2),
((S1, 7, 12, 8), 1, 2),
((7, 12, 8), 12, 1),
((7, 12, 8), 3, 1),
((7, 12, 8), 1, 1),
((12, 8), 12, 0),
((12, 8), 3, 0),
((12, 8), 1, 0),
# Split on Batch
((12, 7, 10, 10), 12, 0),
((12, 7, 10, 10), 3, 0),
((12, 7, 10, 10), 1, 0),
# Split on Channel
((7, 15, 10, 10), 15, 1),
((7, 15, 10, 10), 5, 1),
((7, 15, 10, 10), 3, 1),
((7, 15, 10, 10), 1, 1),
((15, 10, 10), 15, 0),
((15, 10, 10), 5, 0),
((15, 10, 10), 3, 0),
((15, 10, 10), 1, 0),
]
)

test_suite.layouts = [
"api::kChannelsPacked",
]
test_suite.data_gen = "make_seq_tensor"
test_suite.dtypes = ["at::kFloat"]
return test_suite


test_suites = {
"aten.add.Tensor": get_binary_elementwise_inputs(),
"aten.sub.Tensor": get_binary_elementwise_inputs(),
Expand All @@ -494,4 +575,6 @@ def get_cat_inputs():
"aten.clone.default": get_clone_inputs(),
"aten.repeat.default": get_repeat_inputs(),
"aten.cat.default": get_cat_inputs(),
"aten.split_with_sizes.default": get_split_with_sizes_inputs(),
"aten.split.Tensor": get_split_tensor_inputs(),
}
Loading