diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index 6d0672377e8..37ca1fd6e14 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -334,18 +334,18 @@ bool maybe_resize_input( const size_t input_i, exec_aten::Tensor& et_tensor) { ValueRef in_tensor_ref = graph->inputs()[input_i].value; - vTensor& in_tensor = graph->get_val(in_tensor_ref).toTensor(); + vTensorPtr in_tensor = graph->get_tensor(in_tensor_ref); ET_CHECK_MSG( - et_tensor.dim() == in_tensor.sizes().size(), + et_tensor.dim() == in_tensor->sizes().size(), "Cannot resize input tensor: old ndim %zu does not match new ndim %zu", - static_cast(in_tensor.sizes().size()), + static_cast(in_tensor->sizes().size()), static_cast(et_tensor.dim())); bool should_resize = false; std::vector new_sizes(et_tensor.dim()); for (size_t i = 0; i < et_tensor.dim(); i++) { - if (in_tensor.sizes()[i] != et_tensor.sizes()[i]) { + if (in_tensor->sizes()[i] != et_tensor.sizes()[i]) { should_resize = true; } new_sizes.at(i) = et_tensor.sizes()[i]; @@ -356,9 +356,9 @@ bool maybe_resize_input( } ET_CHECK_MSG( - in_tensor.numel() == et_tensor.numel(), + in_tensor->numel() == et_tensor.numel(), "Vulkan tensor numel %zu does not match ET tensor numel %zu", - static_cast(in_tensor.numel()), + static_cast(in_tensor->numel()), static_cast(et_tensor.numel())); return should_resize; @@ -369,12 +369,12 @@ void maybe_resize_output( const size_t output_i, exec_aten::Tensor& et_tensor) { ValueRef out_tensor_ref = graph->outputs()[output_i].value; - vTensor& out_tensor = graph->get_val(out_tensor_ref).toTensor(); + vTensorPtr out_tensor = graph->get_tensor(out_tensor_ref); exec_aten::SizesType new_output_size[kTensorDimensionLimit]; - size_t ndim = out_tensor.sizes().size(); + size_t ndim = out_tensor->sizes().size(); for (int i = 0; i < ndim; ++i) { - new_output_size[i] = out_tensor.sizes()[i]; + new_output_size[i] = out_tensor->sizes()[i]; } exec_aten::ArrayRef output_size{new_output_size, ndim}; diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 1834b602131..4f17b08f5ec 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -17,6 +17,39 @@ namespace vkcompute { +// +// VTensorPtr +// + +#define VALUE_PTR_CLASS_IMPL(classname, ctype, type_name) \ + classname::classname(ComputeGraph* const graph, const ValueRef idx) \ + : graph_(graph), ptr_(&(graph_->values_.at(idx).to##type_name())) { \ + graph_->values_in_use_++; \ + } \ + ctype* classname::operator->() const { \ + return ptr_; \ + } \ + ctype& classname::operator*() const { \ + return *ptr_; \ + } \ + classname::~classname() { \ + graph_->values_in_use_--; \ + } + +VALUE_PTR_CLASS_IMPL(vTensorPtr, vTensor, Tensor) +VALUE_PTR_CLASS_IMPL(TensorRefPtr, TensorRef, TensorRef) +VALUE_PTR_CLASS_IMPL(StagingPtr, api::StorageBuffer, Staging) +VALUE_PTR_CLASS_IMPL(IntListPtr, std::vector, IntList) +VALUE_PTR_CLASS_IMPL(DoubleListPtr, std::vector, DoubleList) +VALUE_PTR_CLASS_IMPL(BoolListPtr, std::vector, BoolList) +VALUE_PTR_CLASS_IMPL(ValueListPtr, std::vector, ValueList) + +#undef VALUE_PTR_CLASS_IMPL + +// +// ComputeGraph +// + ComputeGraph::ComputeGraph(GraphConfig config) : config_{config}, prepack_descriptor_counts_{}, @@ -105,6 +138,35 @@ api::GPUMemoryLayout ComputeGraph::suggested_memory_layout( return api::kChannelsPacked; } +void ComputeGraph::check_no_active_value_ptrs() { + VK_CHECK_COND( + values_in_use_ == 0, + "Make sure that there are no pointers stored from the return values of " + "`ComputeGraph::get_*()` functions in scope before adding Values to the " + "graph. Modifying the graph's values may cause existing pointers to be " + "invalidated."); +} + +std::vector ComputeGraph::get_sizes_of(ValueRef idx) { + Value& val = values_.at(idx); + if (val.isTensor()) { + return val.toTensor().sizes(); + } else if (val.isTensorRef()) { + return val.toTensorRef().sizes; + } + VK_THROW("Could not get sizes of value with type ", val.type()); +} + +api::ScalarType ComputeGraph::get_dtype_of(ValueRef idx) { + Value& val = values_.at(idx); + if (val.isTensor()) { + return val.toTensor().dtype(); + } else if (val.isTensorRef()) { + return val.toTensorRef().dtype; + } + VK_THROW("Could not get dtype of value with type ", val.type()); +} + ValueRef ComputeGraph::add_tensor( const std::vector& sizes, const api::ScalarType dtype, @@ -114,6 +176,7 @@ ValueRef ComputeGraph::add_tensor( bool allocate_memory = shared_object_idx < 0; ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); values_.emplace_back(vTensor( context(), sizes, dtype, storage_type, memory_layout, allocate_memory)); @@ -133,18 +196,17 @@ ValueRef ComputeGraph::add_tensor( } ValueRef ComputeGraph::add_tensor_like( - const ValueRef vref, + const ValueRef idx, const api::StorageType storage_type, const api::GPUMemoryLayout memory_layout) { - TensorRef& tref = get_val(vref).toTensorRef(); - return add_tensor(tref.sizes, tref.dtype, storage_type, memory_layout); + return add_tensor( + get_sizes_of(idx), get_dtype_of(idx), storage_type, memory_layout); } ValueRef ComputeGraph::add_tensor_like( - const ValueRef vref, + const ValueRef idx, const api::GPUMemoryLayout memory_layout) { - TensorRef& tref = get_val(vref).toTensorRef(); - return add_tensor(tref.sizes, tref.dtype, memory_layout); + return add_tensor(get_sizes_of(idx), get_dtype_of(idx), memory_layout); } ValueRef ComputeGraph::add_tensor( @@ -160,6 +222,7 @@ ValueRef ComputeGraph::add_tensorref( const api::ScalarType dtype, const void* const data) { ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); values_.emplace_back(TensorRef(sizes, dtype, data)); return idx; } @@ -168,24 +231,28 @@ ValueRef ComputeGraph::add_staging( const api::ScalarType dtype, const size_t numel) { ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); values_.emplace_back(api::StorageBuffer(context(), dtype, numel)); return idx; } ValueRef ComputeGraph::add_none() { ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); values_.emplace_back(); return idx; } ValueRef ComputeGraph::add_value_list(std::vector&& value) { ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); values_.emplace_back(std::move(value)); return idx; } ValueRef ComputeGraph::add_string(std::string&& str) { ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); values_.emplace_back(std::move(str)); return idx; } @@ -194,8 +261,9 @@ ValueRef ComputeGraph::set_input_tensor( const ValueRef idx, const bool use_staging) { if (use_staging) { - vTensor& tensor = get_val(idx).toTensor(); - ValueRef staging_idx = add_staging(tensor.dtype(), tensor.gpu_numel()); + api::ScalarType dtype = get_tensor(idx)->dtype(); + size_t gpu_numel = get_tensor(idx)->gpu_numel(); + ValueRef staging_idx = add_staging(dtype, gpu_numel); add_staging_to_tensor_node(*this, staging_idx, idx); inputs_.push_back({idx, staging_idx}); return staging_idx; @@ -208,8 +276,9 @@ ValueRef ComputeGraph::set_output_tensor( const ValueRef idx, const bool use_staging) { if (use_staging) { - vTensor& tensor = get_val(idx).toTensor(); - ValueRef staging_idx = add_staging(tensor.dtype(), tensor.gpu_numel()); + api::ScalarType dtype = get_tensor(idx)->dtype(); + size_t gpu_numel = get_tensor(idx)->gpu_numel(); + ValueRef staging_idx = add_staging(dtype, gpu_numel); add_tensor_to_staging_node(*this, idx, staging_idx); outputs_.push_back({idx, staging_idx}); return staging_idx; @@ -229,20 +298,18 @@ void ComputeGraph::copy_into_staging( const ValueRef idx, const void* data, const size_t numel) { - Value& in_val = get_val(idx); - api::StorageBuffer& staging = in_val.toStaging(); - size_t nbytes = numel * api::element_size(staging.dtype()); - copy_ptr_to_staging(data, staging, nbytes); + StagingPtr staging = get_staging(idx); + size_t nbytes = numel * api::element_size(staging->dtype()); + copy_ptr_to_staging(data, *staging, nbytes); } void ComputeGraph::copy_from_staging( const ValueRef idx, void* data, const size_t numel) { - Value& out_val = get_val(idx); - api::StorageBuffer& staging = out_val.toStaging(); - size_t nbytes = numel * api::element_size(staging.dtype()); - copy_staging_to_ptr(staging, data, nbytes); + StagingPtr staging = get_staging(idx); + size_t nbytes = numel * api::element_size(staging->dtype()); + copy_staging_to_ptr(*staging, data, nbytes); } void ComputeGraph::prepare() { @@ -308,7 +375,7 @@ void ComputeGraph::resize_input( const int64_t idx, const std::vector& new_sizes) { IOValueRef io_val = inputs_.at(idx); - get_val(io_val.value).toTensor().virtual_resize(new_sizes); + get_tensor(io_val.value)->virtual_resize(new_sizes); } void ComputeGraph::propagate_resize() { diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 28bb3ecf123..2e39ed1bdfc 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -36,6 +36,38 @@ struct is_valid_scalar_type : std::true_type {}; template <> struct is_valid_scalar_type : std::true_type {}; +// +// Guarded Pointer Classes +// + +class ComputeGraph; + +#define DECL_VALUE_PTR_CLASS(classname, ctype) \ + class classname final { \ + ComputeGraph* const graph_; \ + ctype* ptr_; \ + \ + public: \ + explicit classname(ComputeGraph* const graph, const ValueRef idx); \ + ctype* operator->() const; \ + ctype& operator*() const; \ + ~classname(); \ + }; + +DECL_VALUE_PTR_CLASS(vTensorPtr, vTensor) +DECL_VALUE_PTR_CLASS(TensorRefPtr, TensorRef) +DECL_VALUE_PTR_CLASS(StagingPtr, api::StorageBuffer) +DECL_VALUE_PTR_CLASS(IntListPtr, std::vector) +DECL_VALUE_PTR_CLASS(DoubleListPtr, std::vector) +DECL_VALUE_PTR_CLASS(BoolListPtr, std::vector) +DECL_VALUE_PTR_CLASS(ValueListPtr, std::vector) + +#undef DECL_VALUE_PTR_CLASS + +// +// ComputeGraph +// + /* * This is the core data structure used to execute Vulkan models in graph mode. * As opposed to ATen/eager mode where a command buffer is encoded every @@ -68,6 +100,9 @@ class ComputeGraph final { std::vector inputs_; std::vector outputs_; + protected: + size_t values_in_use_ = 0; + public: // // Accessors @@ -89,34 +124,64 @@ class ComputeGraph final { const api::ShaderInfo& shader_info, bool execute); - /* - * Returns the value at a particular index in the graph. If storing this - * function's return value in a lvalue reference, it is imperative that no - * values are added to the graph while the reference is in scope, otherwise - * the underlying value may have been moved as part of a vector resize. - */ - inline Value& get_val(ValueRef idx) { - return values_.at(idx); +#define GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(ptr_type, short_name, type_name) \ + inline ptr_type get_##short_name(const ValueRef idx) { \ + return ptr_type(this, idx); \ + } \ + inline bool val_is_##short_name(const ValueRef idx) { \ + return values_.at(idx).is##type_name(); \ } - inline const std::vector& get_val_sizes(ValueRef idx) { - Value& val = get_val(idx); - if (val.isTensor()) { - return val.toTensor().sizes(); - } else if (val.isTensorRef()) { - return val.toTensorRef().sizes; - } - VK_THROW("Could not get sizes of value with type ", val.type()); + GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(vTensorPtr, tensor, Tensor) + GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(TensorRefPtr, tref, TensorRef) + GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(StagingPtr, staging, Staging) + GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(IntListPtr, int_list, IntList) + GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(DoubleListPtr, double_list, DoubleList) + GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(BoolListPtr, bool_list, BoolList) + GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS(ValueListPtr, value_list, ValueList) + +#undef GET_AND_CHECK_VAL_AS_PTR_TYPE_FNS + +#define GET_AND_CHECK_VAL_AS_TYPE_FNS(ctype, short_name, type_name) \ + inline ctype get_##short_name(const ValueRef idx) { \ + return values_.at(idx).to##type_name(); \ + } \ + inline bool val_is_##short_name(const ValueRef idx) { \ + return values_.at(idx).is##type_name(); \ + } + + GET_AND_CHECK_VAL_AS_TYPE_FNS(int64_t, int, Int) + GET_AND_CHECK_VAL_AS_TYPE_FNS(double, double, Double) + GET_AND_CHECK_VAL_AS_TYPE_FNS(bool, bool, Bool) + GET_AND_CHECK_VAL_AS_TYPE_FNS(std::string, string, String) + +#undef GET_AND_CHECK_VAL_AS_TYPE_FNS + + inline bool val_is_none(const ValueRef idx) { + return values_.at(idx).isNone(); } - inline api::ScalarType get_val_dtype(ValueRef idx) { - Value& val = get_val(idx); - if (val.isTensor()) { - return val.toTensor().dtype(); - } else if (val.isTensorRef()) { - return val.toTensorRef().dtype; + inline TypeTag get_val_type(const ValueRef idx) { + return values_.at(idx).type(); + } + + std::vector get_sizes_of(ValueRef idx); + + api::ScalarType get_dtype_of(ValueRef idx); + + template + T extract_scalar(const ValueRef idx) { + Value& value = values_.at(idx); + if (value.isInt()) { + return static_cast(value.toInt()); + } + if (value.isDouble()) { + return static_cast(value.toDouble()); } - VK_THROW("Could not get dtype of value with type ", val.type()); + if (value.isBool()) { + return static_cast(value.toBool()); + } + VK_THROW("Cannot extract scalar from Value with type ", value.type()); } inline std::vector>& prepack_nodes() { @@ -156,13 +221,17 @@ class ComputeGraph final { * Returns the memory layout of a Tensor value at the specified index. */ inline api::GPUMemoryLayout memory_layout_of(ValueRef idx) { - return get_val(idx).toTensor().gpu_memory_layout(); + return get_tensor(idx)->gpu_memory_layout(); } // // Graph Building // + private: + void check_no_active_value_ptrs(); + + public: /* * Add a `vTensor` value to the graph with the specified properties. There are * various convenience overloads of this function that may be used instead. @@ -318,12 +387,25 @@ class ComputeGraph final { // void print_readable(); + + // + // Friend classes + // + + friend class vTensorPtr; + friend class TensorRefPtr; + friend class StagingPtr; + friend class IntListPtr; + friend class DoubleListPtr; + friend class BoolListPtr; + friend class ValueListPtr; }; template inline typename std::enable_if::value, ValueRef>::type ComputeGraph::add_scalar(T value) { ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); values_.emplace_back(value); return idx; } @@ -332,6 +414,7 @@ template inline typename std::enable_if::value, ValueRef>::type ComputeGraph::add_scalar_list(std::vector&& value) { ValueRef idx(static_cast(values_.size())); + check_no_active_value_ptrs(); values_.emplace_back(std::move(value)); return idx; } diff --git a/backends/vulkan/runtime/graph/Logging.cpp b/backends/vulkan/runtime/graph/Logging.cpp index b5994d4a21c..00d7837503a 100644 --- a/backends/vulkan/runtime/graph/Logging.cpp +++ b/backends/vulkan/runtime/graph/Logging.cpp @@ -86,7 +86,7 @@ void ComputeGraph::print_readable() { ss << v_tensor.sizes(); std::cout << ss.str(); } else if (val.isTensorRef()) { - const TensorRef& tensor_ref = val.toTensorRef(); + const TensorRef tensor_ref = val.toTensorRef(); std::stringstream ss; ss << tensor_ref.sizes; std::cout << ss.str(); diff --git a/backends/vulkan/runtime/graph/containers/Constant.h b/backends/vulkan/runtime/graph/containers/Constant.h index 3d85d60ce0e..a2ce20bad85 100644 --- a/backends/vulkan/runtime/graph/containers/Constant.h +++ b/backends/vulkan/runtime/graph/containers/Constant.h @@ -28,12 +28,6 @@ struct TensorRef final { const std::vector& t_sizes, api::ScalarType t_dtype, const void* const t_data); - - TensorRef(const TensorRef&) = default; - TensorRef& operator=(const TensorRef&) = default; - - TensorRef(TensorRef&&) = default; - TensorRef& operator=(TensorRef&&) = default; }; } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/containers/SharedObject.cpp b/backends/vulkan/runtime/graph/containers/SharedObject.cpp index ad6ea54f9d9..cbc526700c3 100644 --- a/backends/vulkan/runtime/graph/containers/SharedObject.cpp +++ b/backends/vulkan/runtime/graph/containers/SharedObject.cpp @@ -13,13 +13,13 @@ namespace vkcompute { void SharedObject::add_user(ComputeGraph* const graph, const ValueRef idx) { - vTensor& t = graph->get_val(idx).toTensor(); + vTensorPtr t = graph->get_tensor(idx); // // Aggregate Memory Requirements // - const VkMemoryRequirements mem_reqs = t.get_memory_requirements(); + const VkMemoryRequirements mem_reqs = t->get_memory_requirements(); aggregate_memory_requirements.size = std::max(mem_reqs.size, aggregate_memory_requirements.size); aggregate_memory_requirements.alignment = @@ -30,7 +30,7 @@ void SharedObject::add_user(ComputeGraph* const graph, const ValueRef idx) { // Aggregate Allocation Create Info // - const VmaAllocationCreateInfo create_info = t.get_allocation_create_info(); + const VmaAllocationCreateInfo create_info = t->get_allocation_create_info(); // Clear out CREATE_STRATEGY bit flags in case of conflict VmaAllocationCreateFlags clear_mask = ~VMA_ALLOCATION_CREATE_STRATEGY_MASK; VmaAllocationCreateFlags create_flags = create_info.flags & clear_mask; @@ -62,7 +62,7 @@ void SharedObject::bind_users(ComputeGraph* const graph) { return; } for (const ValueRef idx : users) { - graph->get_val(idx).toTensor().bind_allocation(allocation); + graph->get_tensor(idx)->bind_allocation(allocation); } } diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index db8d9f38e57..74c593e2caa 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -18,8 +18,9 @@ namespace vkcompute { api::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) { std::string noop_shader_name("no_op"); - add_ndim_suffix(noop_shader_name, graph.get_val(packed).toTensor()); - add_dtype_suffix(noop_shader_name, graph.get_val(packed).toTensor()); + vTensorPtr t_packed = graph.get_tensor(packed); + add_ndim_suffix(noop_shader_name, *t_packed); + add_dtype_suffix(noop_shader_name, *t_packed); return VK_KERNEL_FROM_STR(noop_shader_name); } @@ -43,30 +44,30 @@ PrepackNode::PrepackNode( } api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { - vTensor& packed = graph->get_val(packed_).toTensor(); + vTensorPtr packed = graph->get_tensor(packed_); // If no TensorRef is provided, create a staging buffer of zeros according to // the vTensor metadata. - if (graph->get_val(tref_).isNone()) { - size_t numel = api::utils::multiply_integers(packed.sizes()); - api::StorageBuffer staging(graph->context(), packed.dtype(), numel); - size_t nbytes = numel * api::element_size(packed.dtype()); + if (graph->val_is_none(tref_)) { + size_t numel = api::utils::multiply_integers(packed->sizes()); + api::StorageBuffer staging(graph->context(), packed->dtype(), numel); + size_t nbytes = numel * api::element_size(packed->dtype()); set_staging_zeros(staging, nbytes); return staging; } - TensorRef& tref = graph->get_val(tref_).toTensorRef(); - size_t numel = api::utils::multiply_integers(tref.sizes); - api::StorageBuffer staging(graph->context(), tref.dtype, numel); - size_t nbytes = numel * api::element_size(tref.dtype); - copy_ptr_to_staging(tref.data, staging, nbytes); + TensorRefPtr tref = graph->get_tref(tref_); + size_t numel = api::utils::multiply_integers(tref->sizes); + api::StorageBuffer staging(graph->context(), tref->dtype, numel); + size_t nbytes = numel * api::element_size(tref->dtype); + copy_ptr_to_staging(tref->data, staging, nbytes); return staging; } void PrepackNode::encode(ComputeGraph* graph) { api::Context* const context = graph->context(); - vTensor& packed = graph->get_val(packed_).toTensor(); + vTensorPtr packed = graph->get_tensor(packed_); api::StorageBuffer staging = create_staging_buffer(graph); std::unique_lock cmd_lock = context->dispatch_lock(); @@ -78,7 +79,7 @@ void PrepackNode::encode(ComputeGraph* graph) { uint32_t idx = 0; bind_tensor_to_descriptor_set( - packed, + *packed, pipeline_barrier, api::MemoryAccessType::WRITE, descriptor_set, @@ -100,7 +101,7 @@ void PrepackNode::encode(ComputeGraph* graph) { context->get_descriptor_set(noop_shader_, {1, 1, 1}); bind_tensor_to_descriptor_set( - packed, + *packed, pipeline_barrier, api::MemoryAccessType::READ, descriptor_set, diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.h b/backends/vulkan/runtime/graph/ops/PrepackNode.h index d0ff8afa660..c309200efe7 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.h +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.h @@ -46,7 +46,6 @@ class PrepackNode final { const api::utils::uvec3 local_workgroup_size_; const ValueRef tref_; const ValueRef packed_; - // TODO(T180906457): allow re-computing param buffers. std::vector> params_; private: diff --git a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp index 8f23286ff58..2f13a26890d 100644 --- a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp @@ -32,16 +32,16 @@ void resize_binary_op_node( const std::vector& args, const std::vector& extra_args) { (void)extra_args; - vTensor& out = graph->get_val(args[0].refs[0]).toTensor(); + vTensorPtr out = graph->get_tensor(args[0].refs[0]); // TODO(T183442143): Verify tensors are broadcastable. - vTensor& self = graph->get_val(args[1].refs[0]).toTensor(); - vTensor& other = graph->get_val(args[1].refs[1]).toTensor(); + vTensorPtr self = graph->get_tensor(args[1].refs[0]); + vTensorPtr other = graph->get_tensor(args[1].refs[1]); std::vector new_out_sizes = - calculate_broadcasted_output_size(self, other); + calculate_broadcasted_output_size(*self, *other); - out.virtual_resize(new_out_sizes); + out->virtual_resize(new_out_sizes); } void add_binary_op_node( @@ -55,31 +55,30 @@ void add_binary_op_node( ValueRef arg2 = prepack_if_tensor_ref(graph, in2, graph.memory_layout_of(arg1)); - vTensor& t_in1 = graph.get_val(arg1).toTensor(); - vTensor& t_in2 = graph.get_val(arg2).toTensor(); - - vTensor& t_out = graph.get_val(out).toTensor(); + vTensorPtr t_in1 = graph.get_tensor(arg1); + vTensorPtr t_in2 = graph.get_tensor(arg2); + vTensorPtr t_out = graph.get_tensor(out); - check_binary_op_args(t_in1, t_in2, t_out); + check_binary_op_args(*t_in1, *t_in2, *t_out); - api::utils::uvec3 global_size = t_out.virtual_extents(); + api::utils::uvec3 global_size = t_out->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); float alpha_val = 1.0f; // String is checked since floor_div passes in an unused string argument in // place of alpha - if (is_valid(alpha) && !graph.get_val(alpha).isString()) { - alpha_val = extract_scalar(graph.get_val(alpha)); + if (is_valid(alpha) && !graph.val_is_string(alpha)) { + alpha_val = graph.extract_scalar(alpha); } const api::utils::ivec2 broadcast_params = - create_broadcast_params(t_in1, t_in2); + create_broadcast_params(*t_in1, *t_in2); std::string kernel_name("binary_"); kernel_name.reserve(kShaderNameReserve); kernel_name += op_name; - add_memory_layout_suffix(kernel_name, t_out); - add_dtype_suffix(kernel_name, t_out); + add_memory_layout_suffix(kernel_name, *t_out); + add_dtype_suffix(kernel_name, *t_out); graph.execute_nodes().emplace_back(new ExecuteNode( graph, @@ -90,9 +89,9 @@ void add_binary_op_node( {{out, api::MemoryAccessType::WRITE}, {{arg1, arg2}, api::MemoryAccessType::READ}}, // Shader params buffers - {t_out.gpu_sizes_ubo(), - t_in1.gpu_sizes_ubo(), - t_in2.gpu_sizes_ubo(), + {t_out->gpu_sizes_ubo(), + t_in1->gpu_sizes_ubo(), + t_in2->gpu_sizes_ubo(), graph.create_params_buffer(broadcast_params), graph.create_params_buffer(alpha_val)}, // Resizing diff --git a/backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp b/backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp index 409d6b9f8cc..af979a72cb0 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Conv2d.cpp @@ -17,31 +17,35 @@ #include +#include + namespace vkcompute { void resize_conv2d_node( ComputeGraph* graph, const std::vector& args, const std::vector& extra_args) { - vTensor& out = graph->get_val(args[0].refs[0]).toTensor(); - vTensor& self = graph->get_val(args[1].refs[0]).toTensor(); + vTensorPtr out = graph->get_tensor(args[0].refs[0]); + vTensorPtr self = graph->get_tensor(args[1].refs[0]); - size_t ndim = self.sizes().size(); + size_t ndim = self->sizes().size(); std::vector new_out_sizes(ndim); - const bool transposed = graph->get_val(extra_args[4]).toBool(); + const bool transposed = graph->get_bool(extra_args[4]); // Batch, Channel if (ndim == 4) { - new_out_sizes.at(ndim - 4) = self.sizes().at(ndim - 4); + new_out_sizes.at(ndim - 4) = self->sizes().at(ndim - 4); } - const auto& weight_sizes = graph->get_val(extra_args[0]).toTensorRef().sizes; + + TensorRefPtr weight_ref = graph->get_tref(extra_args[0]); + const auto& weight_sizes = weight_ref->sizes; new_out_sizes.at(ndim - 3) = transposed ? weight_sizes.at(ndim - 3) : weight_sizes.at(ndim - 4); // Height, Width const auto& new_out_sizes_hw = calc_out_sizes_hw( *graph, - self.sizes(), + self->sizes(), extra_args[0], /*kernel_size_only = */ false, {extra_args[1], extra_args[2], extra_args[3], extra_args[5]}, @@ -49,7 +53,7 @@ void resize_conv2d_node( new_out_sizes.at(ndim - 2) = new_out_sizes_hw.at(0); new_out_sizes.at(ndim - 1) = new_out_sizes_hw.at(1); - out.virtual_resize(new_out_sizes); + out->virtual_resize(new_out_sizes); } ValueRef prepack_biases( @@ -57,16 +61,19 @@ ValueRef prepack_biases( const ValueRef vref, const ValueRef weight, const bool transposed) { - TensorRef& tref = graph.get_val(weight).toTensorRef(); - const int64_t out_channels = transposed ? tref.sizes.at(1) : tref.sizes.at(0); + auto sizes = graph.get_sizes_of(weight); + const int64_t out_channels = transposed ? sizes.at(1) : sizes.at(0); ValueRef v = graph.add_tensor( - {out_channels}, tref.dtype, api::kTexture2D, api::kWidthPacked); - vTensor& t = graph.get_val(v).toTensor(); + {out_channels}, + graph.get_dtype_of(weight), + api::kTexture2D, + api::kWidthPacked); + vTensorPtr t = graph.get_tensor(v); - api::ShaderInfo shader = get_nchw_to_image_shader(t); + api::ShaderInfo shader = get_nchw_to_image_shader(*t); - api::utils::uvec3 global_size = t.extents(); + api::utils::uvec3 global_size = t->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); graph.prepack_nodes().emplace_back(new PrepackNode( @@ -76,7 +83,7 @@ ValueRef prepack_biases( local_size, vref, v, - {t.gpu_sizes_ubo(), t.cpu_sizes_ubo()})); + {t->gpu_sizes_ubo(), t->cpu_sizes_ubo()})); return v; } @@ -100,7 +107,7 @@ api::ShaderInfo get_conv2d_shader( case Conv2dMethod::Depthwise: kernel_name = "conv2d_dw"; if (!prepack_weights) { - const auto& weight_sizes = graph.get_val(weight).toTensorRef().sizes; + const auto& weight_sizes = graph.get_tref(weight)->sizes; if (weight_sizes.at(2) == 3 && weight_sizes.at(3) == 3) { kernel_name += "_output_tile_3x3"; } @@ -178,21 +185,21 @@ ValueRef prepack_weights( ComputeGraph& graph, const ValueRef vref, const Conv2dMethod method) { - const auto original_sizes = graph.get_val(vref).toTensorRef().sizes; - const auto& final_sizes = get_final_sizes(original_sizes, method); + const auto original_sizes = graph.get_sizes_of(vref); + const auto final_sizes = get_final_sizes(original_sizes, method); ValueRef v = graph.add_tensor( final_sizes, - graph.get_val(vref).toTensorRef().dtype, + graph.get_dtype_of(vref), api::kTexture2D, api::kChannelsPacked); - vTensor& t = graph.get_val(v).toTensor(); + vTensorPtr t = graph.get_tensor(v); - api::utils::uvec3 global_size = t.extents(); + api::utils::uvec3 global_size = t->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); api::ShaderInfo shader = - get_conv2d_shader(graph, t, /*prepack_weights = */ true, method, vref); + get_conv2d_shader(graph, *t, /*prepack_weights = */ true, method, vref); const auto& padded_sizes = get_padded_sizes(original_sizes, method); @@ -203,7 +210,7 @@ ValueRef prepack_weights( local_size, vref, v, - {t.gpu_sizes_ubo(), + {t->gpu_sizes_ubo(), graph.create_params_buffer( api::utils::make_ivec4(original_sizes, /*reverse = */ true)), graph.create_params_buffer( @@ -237,7 +244,7 @@ Conv2dParams create_conv2d_params( p.kernel_size.data[1] + (p.kernel_size.data[1] - 1) * (p.dilation.data[1] - 1), }); - const auto& weight_sizes = graph.get_val(weight).toTensorRef().sizes; + const auto weight_sizes = graph.get_sizes_of(weight); const int32_t in_group_size = api::utils::safe_downcast(api::utils::align_up( transposed ? weight_sizes.at(0) : weight_sizes.at(1), INT64_C(4))); @@ -265,7 +272,7 @@ Conv2dMethod get_conv2d_method( const ValueRef weight, const int64_t groups, const bool transposed) { - const auto& weight_sizes = graph.get_val(weight).toTensorRef().sizes; + const auto weight_sizes = graph.get_sizes_of(weight); if (!transposed && weight_sizes.at(0) == groups && weight_sizes.at(1) == 1) { return Conv2dMethod::Depthwise; } @@ -293,8 +300,8 @@ void add_conv2d_node( const ValueRef output_padding, const ValueRef groups, const ValueRef out) { - const bool transposed_val = graph.get_val(transposed).toBool(); - const int64_t groups_val = graph.get_val(groups).toInt(); + const bool transposed_val = graph.get_bool(transposed); + const int64_t groups_val = graph.get_int(groups); const Conv2dMethod method = get_conv2d_method(graph, weight, groups_val, transposed_val); @@ -303,11 +310,11 @@ void add_conv2d_node( ValueRef arg_weight = prepack_weights(graph, weight, method); ValueRef arg_bias = prepack_biases(graph, bias, weight, transposed_val); - vTensor& t_in = graph.get_val(arg_in).toTensor(); - vTensor& t_out = graph.get_val(out).toTensor(); - check_conv2d_args(t_in, t_out); + vTensorPtr t_in = graph.get_tensor(arg_in); + vTensorPtr t_out = graph.get_tensor(out); + check_conv2d_args(*t_in, *t_out); - api::utils::uvec3 global_size = t_out.virtual_extents(); + api::utils::uvec3 global_size = t_out->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); KernelParams kernel_params = create_kernel_params( @@ -323,7 +330,7 @@ void add_conv2d_node( check_conv2d_params(kernel_params, transposed_val); api::ShaderInfo shader = get_conv2d_shader( - graph, t_out, /*prepack_weights = */ false, method, weight); + graph, *t_out, /*prepack_weights = */ false, method, weight); graph.execute_nodes().emplace_back(new ExecuteNode( graph, @@ -335,8 +342,8 @@ void add_conv2d_node( {{arg_in, arg_weight, arg_bias}, api::MemoryAccessType::READ}}, // Shader params buffers { - t_out.extents_ubo(), - t_in.extents_ubo(), + t_out->extents_ubo(), + t_in->extents_ubo(), graph.create_params_buffer(kernel_params), graph.create_params_buffer(extra_params), }, diff --git a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp index 32b5d613b1b..1c83bf9169a 100644 --- a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp @@ -37,22 +37,22 @@ void resize_matmul_node( const std::vector& args, const std::vector& extra_args) { (void)extra_args; - vTensor& out = graph->get_val(args[0].refs[0]).toTensor(); - vTensor& mat1 = graph->get_val(args[1].refs[0]).toTensor(); - vTensor& mat2 = graph->get_val(args[1].refs[1]).toTensor(); + vTensorPtr out = graph->get_tensor(args[0].refs[0]); + vTensorPtr mat1 = graph->get_tensor(args[1].refs[0]); + vTensorPtr mat2 = graph->get_tensor(args[1].refs[1]); std::vector new_out_sizes(3); - if (mat1.sizes().size() == 2) { + if (mat1->sizes().size() == 2) { new_out_sizes.resize(2); - new_out_sizes.at(0) = mat1.sizes().at(0); - new_out_sizes.at(1) = mat2.sizes().at(1); + new_out_sizes.at(0) = mat1->sizes().at(0); + new_out_sizes.at(1) = mat2->sizes().at(1); } else { - new_out_sizes.at(0) = mat1.sizes().at(0); - new_out_sizes.at(1) = mat1.sizes().at(1); - new_out_sizes.at(2) = mat2.sizes().at(2); + new_out_sizes.at(0) = mat1->sizes().at(0); + new_out_sizes.at(1) = mat1->sizes().at(1); + new_out_sizes.at(2) = mat2->sizes().at(2); } - out.virtual_resize(new_out_sizes); + out->virtual_resize(new_out_sizes); } void add_matmul_node( @@ -69,20 +69,20 @@ void add_matmul_node( ValueRef arg2 = prepack_if_tensor_ref(graph, mat2, mat2_layout); - vTensor& t_mat1 = graph.get_val(arg1).toTensor(); - vTensor& t_mat2 = graph.get_val(arg2).toTensor(); - vTensor& t_out = graph.get_val(out).toTensor(); + vTensorPtr t_mat1 = graph.get_tensor(arg1); + vTensorPtr t_mat2 = graph.get_tensor(arg2); + vTensorPtr t_out = graph.get_tensor(out); - check_matmul_args(t_mat1, t_mat2, t_out); + check_matmul_args(*t_mat1, *t_mat2, *t_out); - api::utils::uvec3 global_size = t_out.virtual_extents(); + api::utils::uvec3 global_size = t_out->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); std::string kernel_name("matmul"); kernel_name.reserve(kShaderNameReserve); - add_memory_layout_suffix(kernel_name, t_mat1); - add_memory_layout_suffix(kernel_name, t_mat2); - add_dtype_suffix(kernel_name, t_out); + add_memory_layout_suffix(kernel_name, *t_mat1); + add_memory_layout_suffix(kernel_name, *t_mat2); + add_dtype_suffix(kernel_name, *t_out); graph.execute_nodes().emplace_back(new ExecuteNode( graph, @@ -93,7 +93,7 @@ void add_matmul_node( {{out, api::MemoryAccessType::WRITE}, {{arg1, arg2}, api::MemoryAccessType::READ}}, // Shader params buffers - {t_out.extents_ubo(), t_mat1.cpu_sizes_ubo()}, + {t_out->extents_ubo(), t_mat1->cpu_sizes_ubo()}, // Resizing resize_matmul_node)); } diff --git a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp index 632d540334d..8464173d507 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp @@ -21,31 +21,31 @@ void resize_max_pool2d_node( ComputeGraph* graph, const std::vector& args, const std::vector& extra_args) { - vTensor& out = graph->get_val(args[0].refs[0]).toTensor(); - vTensor& indices = graph->get_val(args[0].refs[1]).toTensor(); - vTensor& self = graph->get_val(args[1].refs[0]).toTensor(); + vTensorPtr out = graph->get_tensor(args[0].refs[0]); + vTensorPtr indices = graph->get_tensor(args[0].refs[1]); + vTensorPtr self = graph->get_tensor(args[1].refs[0]); - size_t ndim = self.sizes().size(); + size_t ndim = self->sizes().size(); std::vector new_out_sizes(ndim); // Batch, Channel if (ndim == 4) { - new_out_sizes.at(ndim - 4) = self.sizes().at(ndim - 4); + new_out_sizes.at(ndim - 4) = self->sizes().at(ndim - 4); } - new_out_sizes.at(ndim - 3) = self.sizes().at(ndim - 3); + new_out_sizes.at(ndim - 3) = self->sizes().at(ndim - 3); // Height, Width const auto& new_out_sizes_hw = calc_out_sizes_hw( *graph, - self.sizes(), + self->sizes(), extra_args[0], /*kernel_size_only = */ true, {extra_args[1], extra_args[2], extra_args[3], extra_args[4]}); new_out_sizes.at(ndim - 2) = new_out_sizes_hw.at(0); new_out_sizes.at(ndim - 1) = new_out_sizes_hw.at(1); - out.virtual_resize(new_out_sizes); - indices.virtual_resize(new_out_sizes); + out->virtual_resize(new_out_sizes); + indices->virtual_resize(new_out_sizes); } void check_max_pool2d_args(const vTensor& in, const vTensor& out) { @@ -63,18 +63,18 @@ void add_max_pool2d_node( const ValueRef ceil_mode, const ValueRef out) { ValueRef arg = prepack_if_tensor_ref(graph, in); - vTensor& t_in = graph.get_val(arg).toTensor(); + vTensorPtr t_in = graph.get_tensor(arg); - const auto& out_val = graph.get_val(out).toValueList(); - vTensor& t_out = graph.get_val(out_val[0]).toTensor(); + const auto out_val = graph.get_value_list(out); + vTensorPtr t_out = graph.get_tensor(out_val->at(0)); - check_max_pool2d_args(t_in, t_out); + check_max_pool2d_args(*t_in, *t_out); - api::utils::uvec3 global_size = t_out.virtual_extents(); + api::utils::uvec3 global_size = t_out->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); std::string kernel_name("max_pool2d"); - add_dtype_suffix(kernel_name, t_out); + add_dtype_suffix(kernel_name, *t_out); KernelParams kernel_params = create_kernel_params( graph, @@ -90,12 +90,12 @@ void add_max_pool2d_node( global_size, local_size, // Inputs and Outputs - {{{out_val[0], out_val[1]}, api::MemoryAccessType::WRITE}, + {{{out_val->at(0), out_val->at(1)}, api::MemoryAccessType::WRITE}, {arg, api::MemoryAccessType::READ}}, // Shader params buffers { - t_out.extents_ubo(), - t_in.extents_ubo(), + t_out->extents_ubo(), + t_in->extents_ubo(), graph.create_params_buffer(kernel_params), }, // Resizing diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp index 7d646a27111..71e41cbf3a6 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp @@ -19,12 +19,12 @@ void add_staging_to_tensor_node( ComputeGraph& graph, const ValueRef in_staging, const ValueRef out_tensor) { - vTensor& t_out = graph.get_val(out_tensor).toTensor(); - VK_CHECK_COND(graph.get_val(in_staging).isStaging()); + vTensorPtr t_out = graph.get_tensor(out_tensor); + VK_CHECK_COND(graph.val_is_staging(in_staging)); - api::ShaderInfo shader = get_nchw_to_image_shader(t_out); + api::ShaderInfo shader = get_nchw_to_image_shader(*t_out); - api::utils::uvec3 global_size = t_out.extents(); + api::utils::uvec3 global_size = t_out->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); graph.execute_nodes().emplace_back(new ExecuteNode( @@ -34,19 +34,19 @@ void add_staging_to_tensor_node( local_size, {{out_tensor, api::MemoryAccessType::WRITE}, {in_staging, api::MemoryAccessType::READ}}, - {t_out.gpu_sizes_ubo(), t_out.cpu_sizes_ubo()})); + {t_out->gpu_sizes_ubo(), t_out->cpu_sizes_ubo()})); } void add_tensor_to_staging_node( ComputeGraph& graph, const ValueRef in_tensor, const ValueRef out_staging) { - vTensor& t_in = graph.get_val(in_tensor).toTensor(); - VK_CHECK_COND(graph.get_val(out_staging).isStaging()); + vTensorPtr t_in = graph.get_tensor(in_tensor); + VK_CHECK_COND(graph.val_is_staging(out_staging)); - api::ShaderInfo shader = get_image_to_nchw_shader(t_in); + api::ShaderInfo shader = get_image_to_nchw_shader(*t_in); - api::utils::uvec3 global_size = t_in.extents(); + api::utils::uvec3 global_size = t_in->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); graph.execute_nodes().emplace_back(new ExecuteNode( @@ -56,7 +56,7 @@ void add_tensor_to_staging_node( local_size, {{in_tensor, api::MemoryAccessType::READ}, {out_staging, api::MemoryAccessType::WRITE}}, - {t_in.gpu_sizes_ubo(), t_in.cpu_sizes_ubo()})); + {t_in->gpu_sizes_ubo(), t_in->cpu_sizes_ubo()})); } ValueRef prepack( @@ -64,11 +64,11 @@ ValueRef prepack( const ValueRef vref, const api::GPUMemoryLayout layout) { ValueRef v = graph.add_tensor_like(vref, layout); - vTensor& t = graph.get_val(v).toTensor(); + vTensorPtr t = graph.get_tensor(v); - api::ShaderInfo shader = get_nchw_to_image_shader(t); + api::ShaderInfo shader = get_nchw_to_image_shader(*t); - api::utils::uvec3 global_size = t.extents(); + api::utils::uvec3 global_size = t->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); graph.prepack_nodes().emplace_back(new PrepackNode( @@ -78,7 +78,7 @@ ValueRef prepack( local_size, vref, v, - {t.gpu_sizes_ubo(), t.cpu_sizes_ubo()})); + {t->gpu_sizes_ubo(), t->cpu_sizes_ubo()})); return v; } @@ -87,7 +87,7 @@ ValueRef prepack_if_tensor_ref( ComputeGraph& graph, const ValueRef v, const api::GPUMemoryLayout layout) { - if (graph.get_val(v).isTensorRef()) { + if (graph.val_is_tref(v)) { return prepack(graph, v, layout); } else { return v; @@ -95,9 +95,9 @@ ValueRef prepack_if_tensor_ref( } ValueRef prepack_if_tensor_ref(ComputeGraph& graph, const ValueRef v) { - if (graph.get_val(v).isTensorRef()) { + if (graph.val_is_tref(v)) { api::GPUMemoryLayout layout = - graph.suggested_memory_layout(graph.get_val(v).toTensorRef().sizes); + graph.suggested_memory_layout(graph.get_tref(v)->sizes); return prepack(graph, v, layout); } else { return v; diff --git a/backends/vulkan/runtime/graph/ops/impl/Sum.cpp b/backends/vulkan/runtime/graph/ops/impl/Sum.cpp index 8407eacadfb..0d0c74e1145 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Sum.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Sum.cpp @@ -34,15 +34,15 @@ void resize_sum_node( const std::vector& args, const std::vector& extra_args) { (void)args; - vTensor& out = graph->get_val(extra_args[0]).toTensor(); - vTensor& in = graph->get_val(extra_args[1]).toTensor(); + vTensorPtr out = graph->get_tensor(extra_args[0]); + vTensorPtr in = graph->get_tensor(extra_args[1]); const auto dim = extra_args[2]; const auto keepdim = extra_args[3]; - std::vector output_size = calc_out_sizes(in, dim, keepdim); + std::vector output_size = calc_out_sizes(*in, dim, keepdim); - out.virtual_resize(output_size); + out->virtual_resize(output_size); } void check_sum_args(const vTensor& in, const vTensor& out) { @@ -58,17 +58,17 @@ void add_sum_dim_node( const ValueRef out) { ValueRef arg = prepack_if_tensor_ref(graph, in); - vTensor& t_out = graph.get_val(out).toTensor(); - vTensor& t_input = graph.get_val(in).toTensor(); + vTensorPtr t_out = graph.get_tensor(out); + vTensorPtr t_input = graph.get_tensor(in); - check_sum_args(t_input, t_out); + check_sum_args(*t_input, *t_out); - int64_t in_dim = t_input.sizes().size(); + int64_t in_dim = t_input->sizes().size(); int32_t channel = - in_dim > 2 ? static_cast(t_input.sizes()[in_dim - 3]) : 1; - uint32_t dim_size = t_input.sizes()[dim]; + in_dim > 2 ? static_cast(t_input->sizes()[in_dim - 3]) : 1; + uint32_t dim_size = t_input->sizes()[dim]; - api::utils::uvec3 global_size = t_out.virtual_extents(); + api::utils::uvec3 global_size = t_out->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); std::string kernel_name("sum_dim"); @@ -77,7 +77,7 @@ void add_sum_dim_node( kernel_name += "_keepdim"; } - add_dtype_suffix(kernel_name, t_out); + add_dtype_suffix(kernel_name, *t_out); graph.execute_nodes().emplace_back(new ExecuteNode( graph, @@ -87,7 +87,7 @@ void add_sum_dim_node( // Inputs and Outputs {{out, api::MemoryAccessType::WRITE}, {arg, api::MemoryAccessType::READ}}, // Shader params buffers - {t_out.extents_ubo(), + {t_out->extents_ubo(), graph.create_params_buffer(dim + 4 - in_dim), graph.create_params_buffer(dim_size), graph.create_params_buffer(int(ceil(channel / 4.0)))}, @@ -102,8 +102,8 @@ ValueRef add_node( const int dim, const bool keepdim, const api::ScalarType dtype = api::kFloat) { - vTensor& v_input = graph.get_val(input).toTensor(); - std::vector output_size = calc_out_sizes(v_input, dim, keepdim); + std::vector output_size = + calc_out_sizes(*(graph.get_tensor(input)), dim, keepdim); return graph.add_tensor(output_size, dtype, api::kChannelsPacked); } @@ -113,12 +113,11 @@ void add_sum_dim_IntList( const ValueRef opt_dim, const ValueRef keepdim, const ValueRef out) { - bool keepdim_val = graph.get_val(keepdim).toBool(); - vTensor& in_tensor = graph.get_val(in).toTensor(); + bool keepdim_val = graph.get_bool(keepdim); std::set dims_set; - const auto& dims_to_sum = graph.get_val(opt_dim).toIntList(); - int64_t in_dim = in_tensor.sizes().size(); + const auto dims_to_sum = *graph.get_int_list(opt_dim); + int64_t in_dim = graph.get_tensor(in)->sizes().size(); if (dims_to_sum.empty()) { // If dim is not specified, reduce over all dims diff --git a/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp index 2ea14a41237..5a46b82ba38 100644 --- a/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp @@ -25,10 +25,10 @@ void resize_unary_op_node( const std::vector& args, const std::vector& extra_args) { (void)extra_args; - vTensor& out = graph->get_val(args[0].refs[0]).toTensor(); - vTensor& self = graph->get_val(args[1].refs[0]).toTensor(); + vTensorPtr out = graph->get_tensor(args[0].refs[0]); + vTensorPtr self = graph->get_tensor(args[1].refs[0]); - out.virtual_resize(self.sizes()); + out->virtual_resize(self->sizes()); } void add_unary_op_node( @@ -40,12 +40,12 @@ void add_unary_op_node( const std::string& op_name) { ValueRef arg = prepack_if_tensor_ref(graph, in); - vTensor& t_out = graph.get_val(out).toTensor(); - api::utils::uvec3 global_size = t_out.virtual_extents(); + vTensorPtr t_out = graph.get_tensor(out); + api::utils::uvec3 global_size = t_out->extents(); api::utils::uvec3 local_size = adaptive_work_group_size(global_size); std::string kernel_name(op_name); - add_dtype_suffix(kernel_name, t_out); + add_dtype_suffix(kernel_name, *t_out); graph.execute_nodes().emplace_back(new ExecuteNode( graph, @@ -55,7 +55,7 @@ void add_unary_op_node( // Inputs and Outputs {{out, api::MemoryAccessType::WRITE}, {arg, api::MemoryAccessType::READ}}, // Shader params buffers - {t_out.extents_ubo(), + {t_out->extents_ubo(), graph.create_params_buffer(min), graph.create_params_buffer(max)}, // Resizing @@ -63,8 +63,8 @@ void add_unary_op_node( } float get_val_or_inf(ComputeGraph& graph, const ValueRef& val, bool max) { - if (!graph.get_val(val).isNone()) { - return extract_scalar(graph.get_val(val)); + if (!graph.val_is_none(val)) { + return graph.extract_scalar(val); } return max ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp b/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp index f1f3bfc6828..d342c4521f6 100644 --- a/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp @@ -11,8 +11,7 @@ namespace vkcompute { api::utils::ivec2 make_ivec2_from_list(ComputeGraph& graph, ValueRef vref) { - return api::utils::make_ivec2( - graph.get_val(vref).toIntList(), /*reverse = */ true); + return api::utils::make_ivec2(*graph.get_int_list(vref), /*reverse = */ true); } api::utils::ivec2 make_ivec2_kernel_size( @@ -22,7 +21,7 @@ api::utils::ivec2 make_ivec2_kernel_size( if (kernel_size_only) { return make_ivec2_from_list(graph, weight); } else { - const auto weight_sizes = graph.get_val(weight).toTensorRef().sizes; + const auto weight_sizes = graph.get_tref(weight)->sizes; return api::utils::make_ivec2({weight_sizes.at(3), weight_sizes.at(2)}); } } @@ -151,8 +150,9 @@ std::vector calc_out_sizes_hw( return calc_transpose_out_sizes_hw( in_sizes, kernel_size, stride, padding, dilation, output_padding); } else { - Value& vref = graph.get_val(args[3]); - const bool ceil_mode = vref.isBool() ? vref.toBool() : false; + const bool ceil_mode = + graph.val_is_bool(args[3]) ? graph.get_bool(args[3]) : false; + return calc_out_sizes_hw( in_sizes, kernel_size, stride, padding, dilation, ceil_mode); } diff --git a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp index fc80b987604..9fec07faa13 100644 --- a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp @@ -36,18 +36,18 @@ uint32_t bind_values_to_descriptor_set( uint32_t idx = base_idx; for (auto& arg : args) { for (auto& ref : arg.refs) { - Value& val = graph->get_val(ref); - if (val.isTensor()) { + if (graph->val_is_tensor(ref)) { bind_tensor_to_descriptor_set( - val.toTensor(), + *(graph->get_tensor(ref)), pipeline_barrier, arg.access, descriptor_set, idx++); - } else if (val.isStaging()) { - bind_staging_to_descriptor_set(val.toStaging(), descriptor_set, idx++); + } else if (graph->val_is_staging(ref)) { + bind_staging_to_descriptor_set( + *(graph->get_staging(ref)), descriptor_set, idx++); } else { - VK_THROW("Unsupported type: ", val.type()); + VK_THROW("Unsupported type: ", graph->get_val_type(ref)); } } } diff --git a/backends/vulkan/test/op_tests/utils/codegen.py b/backends/vulkan/test/op_tests/utils/codegen.py index 121d527cb46..301f1056c1e 100644 --- a/backends/vulkan/test/op_tests/utils/codegen.py +++ b/backends/vulkan/test/op_tests/utils/codegen.py @@ -248,8 +248,8 @@ def virtual_resize(self, ref: ValueRefList) -> str: assert ref.src_cpp_type == AT_TENSOR and ref.is_in if self.prepack_ref(ref): return "" - ret_str = f"{self.graph}{self.dot}get_val({ref.name}.value).toTensor()" - ret_str += f".virtual_resize({ref.src_cpp_name}.sizes().vec());\n" + ret_str = f"{self.graph}{self.dot}get_tensor({ref.name}.value)" + ret_str += f"->virtual_resize({ref.src_cpp_name}.sizes().vec());\n" return ret_str def copy_into_staging(self, ref: ValueRefList) -> str: diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index bab054b3d8d..7bd732a5a0c 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -162,7 +162,7 @@ void fill_vtensor( const IOValueRef idx, float val, bool iota) { - std::vector data(graph.get_val(idx.value).toTensor().gpu_numel()); + std::vector data(graph.get_tensor(idx.value)->gpu_numel()); if (iota) { std::iota(data.begin(), data.end(), val); } else { @@ -231,13 +231,13 @@ void execute_graph_and_check_output( for (size_t i = 0; i < graph.outputs().size(); ++i) { IOValueRef out_ioval = graph.outputs().at(i); - vTensor& t_out = graph.get_val(out_ioval.value).toTensor(); + vTensorPtr t_out = graph.get_tensor(out_ioval.value); - std::vector output_data(t_out.gpu_numel()); + std::vector output_data(t_out->gpu_numel()); graph.copy_from_staging( out_ioval.staging, output_data.data(), output_data.size()); - for (size_t j = 0; j < t_out.numel(); ++j) { + for (size_t j = 0; j < t_out->numel(); ++j) { CHECK_VALUE(output_data, j, expected_outputs.at(i)); } } diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index bfbf9d68c29..bf9580d1bfc 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -377,9 +377,8 @@ TEST_F(VulkanComputeAPITest, texture_virtual_resize) { // Compute Graph Tests // -#define EXTRACT_TENSOR(name) \ - std::vector data_##name( \ - graph.get_val(name.value).toTensor().gpu_numel()); \ +#define EXTRACT_TENSOR(name) \ + std::vector data_##name(graph.get_tensor(name.value)->gpu_numel()); \ graph.copy_from_staging(name.staging, data_##name.data(), data_##name.size()); TEST(VulkanComputeGraphTest, test_values_scalars) { @@ -389,10 +388,10 @@ TEST(VulkanComputeGraphTest, test_values_scalars) { ValueRef idx; idx = graph.add_scalar(4); - EXPECT_TRUE(graph.get_val(idx).toInt() == 4); + EXPECT_TRUE(graph.get_int(idx) == 4); idx = graph.add_scalar(5.5f); - EXPECT_TRUE(graph.get_val(idx).toDouble() == 5.5f); + EXPECT_TRUE(graph.get_double(idx) == 5.5f); } TEST(VulkanComputeGraphTest, test_values_scalar_list_inplace_constructed) { @@ -400,10 +399,10 @@ TEST(VulkanComputeGraphTest, test_values_scalar_list_inplace_constructed) { ComputeGraph graph(config); ValueRef idx = graph.add_scalar_list({1, 2, 3, 4}); - const auto& arr = graph.get_val(idx).toIntList(); - EXPECT_TRUE(arr.size() == 4); + const auto arr = graph.get_int_list(idx); + EXPECT_TRUE(arr->size() == 4); for (int i = 0; i < 4; i++) { - EXPECT_TRUE(arr[i] == i + 1); + EXPECT_TRUE(arr->at(i) == i + 1); } } @@ -416,10 +415,10 @@ TEST(VulkanComputeGraphTest, test_values_scalar_list_outside_constructed) { std::vector data = {5.0, 4.0, 3.0, 2.0, 1.0}; idx = graph.add_scalar_list(std::move(data)); } - const auto& arr = graph.get_val(idx).toDoubleList(); - EXPECT_TRUE(arr.size() == 5); + const auto& arr = graph.get_double_list(idx); + EXPECT_TRUE(arr->size() == 5); for (int i = 0; i < 5; i++) { - EXPECT_TRUE(arr[i] == (5 - i)); + EXPECT_TRUE(arr->at(i) == (5 - i)); } } @@ -432,7 +431,7 @@ TEST(VulkanComputeGraphTest, test_values_string) { std::string data = "hello, world"; idx = graph.add_string(std::move(data)); } - std::string& stored = graph.get_val(idx).toString(); + std::string stored = graph.get_string(idx); EXPECT_TRUE(stored == "hello, world"); } @@ -475,7 +474,7 @@ TEST(VulkanComputeGraphTest, test_simple_graph) { EXTRACT_TENSOR(out); // Sanity check that the values are correct - for (size_t i = 0; i < graph.get_val(out.value).toTensor().numel(); ++i) { + for (size_t i = 0; i < graph.get_tensor(out.value)->numel(); ++i) { CHECK_VALUE(data_out, i, val_c); } } @@ -533,7 +532,7 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) { EXTRACT_TENSOR(out); // Sanity check that the values are correct - for (size_t i = 0; i < graph.get_val(out.value).toTensor().numel(); ++i) { + for (size_t i = 0; i < graph.get_tensor(out.value)->numel(); ++i) { CHECK_VALUE(data_out, i, val_out); } } @@ -608,11 +607,11 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { {8, 44, 34}, {4, 13, 56}, {8, 12, 64}, {12, 55, 33}, {4, 54, 10}}; for (auto& new_sizes : new_sizes_list) { - graph.get_val(a.value).toTensor().virtual_resize(new_sizes); - graph.get_val(b.value).toTensor().virtual_resize(new_sizes); - graph.get_val(c).toTensor().virtual_resize(new_sizes); - graph.get_val(d.value).toTensor().virtual_resize(new_sizes); - graph.get_val(e).toTensor().virtual_resize(new_sizes); + graph.get_tensor(a.value)->virtual_resize(new_sizes); + graph.get_tensor(b.value)->virtual_resize(new_sizes); + graph.get_tensor(c)->virtual_resize(new_sizes); + graph.get_tensor(d.value)->virtual_resize(new_sizes); + graph.get_tensor(e)->virtual_resize(new_sizes); float val_a = new_sizes[1] + 4.0f; float val_b = new_sizes[2] + 1.5f; @@ -629,7 +628,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { EXTRACT_TENSOR(out); // Sanity check that the values are correct - for (size_t i = 0; i < graph.get_val(out.value).toTensor().numel(); i++) { + for (size_t i = 0; i < graph.get_tensor(out.value)->numel(); i++) { CHECK_VALUE(data_out, i, val_out); } } @@ -644,7 +643,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { graph.propagate_resize(); // Check output shape - EXPECT_TRUE(graph.get_val(out.value).toTensor().sizes() == new_sizes); + EXPECT_TRUE(graph.get_tensor(out.value)->sizes() == new_sizes); float val_a = new_sizes[1] + 6.0f; float val_b = new_sizes[2] + 2.5f; @@ -661,7 +660,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { EXTRACT_TENSOR(out); // Sanity check that the values are correct - for (size_t i = 0; i < graph.get_val(out.value).toTensor().numel(); i++) { + for (size_t i = 0; i < graph.get_tensor(out.value)->numel(); i++) { CHECK_VALUE(data_out, i, val_out); } } @@ -716,7 +715,7 @@ TEST(VulkanComputeGraphTest, test_large_graph) { EXTRACT_TENSOR(out); - for (int i = 0; i < graph.get_val(out.value).toTensor().numel(); i++) { + for (int i = 0; i < graph.get_tensor(out.value)->numel(); i++) { CHECK_VALUE(data_out, i, val_e); } } @@ -1130,19 +1129,19 @@ void test_max_pool2d( fill_vtensor(graph, graph.inputs().at(0), base_val, /*iota = */ true); - vTensor& t_in = graph.get_val(in_ioval.value).toTensor(); - std::vector input_data(t_in.gpu_numel()); + vTensorPtr t_in = graph.get_tensor(in_ioval.value); + std::vector input_data(t_in->gpu_numel()); graph.copy_from_staging( in_ioval.staging, input_data.data(), input_data.size()); graph.execute(); - vTensor& t_out = graph.get_val(out_ioval.value).toTensor(); - std::vector output_data(t_out.gpu_numel()); + vTensorPtr t_out = graph.get_tensor(out_ioval.value); + std::vector output_data(t_out->gpu_numel()); graph.copy_from_staging( out_ioval.staging, output_data.data(), output_data.size()); - vTensor& t_idx = graph.get_val(idx_ioval.value).toTensor(); - std::vector index_data(t_idx.gpu_numel()); + vTensorPtr t_idx = graph.get_tensor(idx_ioval.value); + std::vector index_data(t_idx->gpu_numel()); graph.copy_from_staging( idx_ioval.staging, index_data.data(), index_data.size()); @@ -1150,9 +1149,9 @@ void test_max_pool2d( int h_offset = kernel_copy[0] - 1; int w_offset = kernel_copy[1] - 1; - int h_out = api::utils::val_at(-2, t_out.sizes()); - int w_out = api::utils::val_at(-1, t_out.sizes()); - int w_in = api::utils::val_at(-1, t_in.sizes()); + int h_out = api::utils::val_at(-2, t_out->sizes()); + int w_out = api::utils::val_at(-1, t_out->sizes()); + int w_in = api::utils::val_at(-1, t_in->sizes()); for (size_t i = 0; i < h_out; ++i) { for (size_t j = 0; j < w_out; ++j) { size_t idx_out = i * w_out + j;