From 61a2f321090084c8c0bae36f4ff839633d934fca Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 28 May 2025 15:09:31 -0700 Subject: [PATCH 1/2] split out some commits [ghstack-poisoned] --- .lintrunner.toml | 2 + kernels/portable/cpu/util/targets.bzl | 10 ++ kernels/portable/cpu/util/test/CMakeLists.txt | 16 +- kernels/portable/cpu/util/test/targets.bzl | 11 ++ .../cpu/util/test/vectorized_math_test.cpp | 95 +++++++++++ kernels/portable/cpu/util/vectorized_math.h | 148 ++++++++++++++++++ .../core/portable_type/c10/c10/targets.bzl | 6 +- test/utils/OSSTestConfig.json | 12 -- 8 files changed, 277 insertions(+), 23 deletions(-) create mode 100644 kernels/portable/cpu/util/test/vectorized_math_test.cpp create mode 100644 kernels/portable/cpu/util/vectorized_math.h diff --git a/.lintrunner.toml b/.lintrunner.toml index 5e7b4ff0951..4a7f8515791 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -271,6 +271,8 @@ exclude_patterns = [ 'examples/**', 'exir/verification/bindings.cpp', 'extension/**', + # Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include. + 'kernels/portable/cpu/util/vectorized_math.h', 'kernels/optimized/**', 'runtime/core/exec_aten/**', # Want to be able to keep c10 in sync with PyTorch core. diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index 560e0472881..0e1e1f2e3a9 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -307,6 +307,16 @@ def define_common_targets(): ], ) + runtime.cxx_library( + name = "vectorized_math", + exported_headers = ["vectorized_math.h"], + visibility = ["//executorch/..."], + exported_deps = [ + "//executorch/runtime/core/portable_type:portable_type", + "//executorch/runtime/core/exec_aten/util:scalar_type_util", + ], + ) + # Utility functions that can be used by operators that perform reduction for aten_mode in get_aten_mode_options(): suffix = "_aten" if aten_mode else "" diff --git a/kernels/portable/cpu/util/test/CMakeLists.txt b/kernels/portable/cpu/util/test/CMakeLists.txt index d95b3a81b5c..41bfea54020 100644 --- a/kernels/portable/cpu/util/test/CMakeLists.txt +++ b/kernels/portable/cpu/util/test/CMakeLists.txt @@ -4,26 +4,22 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -# @generated by test/utils/generate_gtest_cmakelists.py -# -# This file should be formatted with -# ~~~ -# cmake-format -i CMakeLists.txt -# ~~~ -# It should also be cmake-lint clean. -# - cmake_minimum_required(VERSION 3.19) set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..) include(${EXECUTORCH_ROOT}/tools/cmake/Test.cmake) +include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) set(_test_srcs broadcast_indexes_range_test.cpp broadcast_test.cpp - reduce_test.cpp + reduce_test.cpp vectorized_math_test.cpp ) et_cxx_test( kernels_portable_cpu_util_test SOURCES ${_test_srcs} EXTRA_LIBS portable_kernels portable_ops_lib ) + +find_package_torch_headers() +target_include_directories(kernels_portable_cpu_util_test PRIVATE ${TORCH_INCLUDE_DIRS}) +target_compile_definitions(kernels_portable_cpu_util_test PRIVATE ET_USE_PYTORCH_HEADERS) diff --git a/kernels/portable/cpu/util/test/targets.bzl b/kernels/portable/cpu/util/test/targets.bzl index 178eb25a79b..4b167c6e946 100644 --- a/kernels/portable/cpu/util/test/targets.bzl +++ b/kernels/portable/cpu/util/test/targets.bzl @@ -32,3 +32,14 @@ def define_common_targets(): "//executorch/kernels/portable/cpu/util:reduce_util", ], ) + + # this test requires ET_USE_PYTORCH_HEADERS, which doesn't work in OSS Buck. + if not runtime.is_oss: + runtime.cxx_test( + name = "vectorized_math_test", + srcs = ["vectorized_math_test.cpp"], + deps = [ + "//executorch/kernels/portable/cpu/util:vectorized_math", + "//executorch/runtime/core/portable_type/c10/c10:c10", + ], + ) diff --git a/kernels/portable/cpu/util/test/vectorized_math_test.cpp b/kernels/portable/cpu/util/test/vectorized_math_test.cpp new file mode 100644 index 00000000000..95ce327c53c --- /dev/null +++ b/kernels/portable/cpu/util/test/vectorized_math_test.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +#include + +#include + +#ifndef ET_USE_PYTORCH_HEADERS +#error "This test requires ET_USE_PYTORCH_HEADERS!" +#endif // ET_USE_PYTORCH_HEADERS + +TEST(VectorizedMathTest, BasicUnary) { + __at_align__ float result_floats[at::vec::Vectorized::size()] = {0}; + const auto x_vec = at::vec::Vectorized::arange(0, 1); + const auto result_vec = executorch::math::exp(x_vec); + result_vec.store(result_floats); + for (const auto ii : c10::irange(at::vec::Vectorized::size())) { + EXPECT_FLOAT_EQ(result_floats[ii], std::exp(ii)); + } +} + +namespace { +template +void test_unary_t_to_float() { + __at_align__ float result_floats[at::vec::Vectorized::size()] = {0}; + const auto x_vec = at::vec::Vectorized::arange(0, 1); + const auto result_vec = executorch::math::exp(x_vec); + static_assert(decltype(result_vec)::size() >= at::vec::Vectorized::size()); + result_vec.store(result_floats, at::vec::Vectorized::size()); + for (const auto ii : c10::irange(at::vec::Vectorized::size())) { + EXPECT_EQ(result_floats[ii], std::expf(ii)) << ii; + } +} + +} // namespace + +TEST(VectorizedMathTest, UnaryInt16ToFloat) { + test_unary_t_to_float(); +} + +TEST(VectorizedMathTest, UnaryInt32ToFloat) { + test_unary_t_to_float(); +} + +TEST(VectorizedMathTest, UnaryInt64ToFloat) { + test_unary_t_to_float(); +} + +TEST(VectorizedMathTest, BasicBinary) { + __at_align__ float result_floats[at::vec::Vectorized::size()] = {0}; + const auto x_vec = at::vec::Vectorized::arange(0, 1); + const auto y_vec = at::vec::Vectorized(2); + const auto result_vec = executorch::math::pow(x_vec, y_vec); + result_vec.store(result_floats); + for (const auto ii : c10::irange(at::vec::Vectorized::size())) { + EXPECT_FLOAT_EQ(result_floats[ii], std::powf(ii, 2)); + } +} + +namespace { +template +void test_binary_t_to_float() { + __at_align__ float result_floats[at::vec::Vectorized::size()] = {0}; + const auto x_vec = at::vec::Vectorized::arange(0, 1); + const auto y_vec = at::vec::Vectorized(2); + const auto result_vec = executorch::math::pow(x_vec, y_vec); + static_assert(decltype(result_vec)::size() >= at::vec::Vectorized::size()); + result_vec.store(result_floats, at::vec::Vectorized::size()); + for (const auto ii : c10::irange(at::vec::Vectorized::size())) { + EXPECT_EQ(result_floats[ii], std::powf(ii, 2)) << ii; + } +} + +TEST(VectorizedMathTest, BinaryInt16ToFloat) { + test_binary_t_to_float(); +} + +TEST(VectorizedMathTest, BinaryInt32ToFloat) { + test_binary_t_to_float(); +} + +TEST(VectorizedMathTest, BinaryInt64ToFloat) { + test_binary_t_to_float(); +} + +} // namespace diff --git a/kernels/portable/cpu/util/vectorized_math.h b/kernels/portable/cpu/util/vectorized_math.h new file mode 100644 index 00000000000..9e706ace56d --- /dev/null +++ b/kernels/portable/cpu/util/vectorized_math.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#ifdef ET_USE_PYTORCH_HEADERS +#include +#endif // ET_USE_PYTORCH_HEADERS + +#include +#include + +#ifdef ET_USE_PYTORCH_HEADERS +namespace executorch { +inline namespace math { +namespace internal { +template +auto convert_to_vectorized_n_of_float(at::vec::Vectorized vec) { + static constexpr auto float_vec_size = at::vec::Vectorized::size(); + static constexpr auto t_vec_size = at::vec::Vectorized::size(); + static constexpr auto result_size = + t_vec_size < float_vec_size ? 1 : t_vec_size / float_vec_size; + static_assert(result_size >= 1); + return at::vec::convert( + at::vec::VectorizedN(vec)); +} +} // namespace internal +} // namespace math +} // namespace executorch +#endif // ET_USE_PYTORCH_HEADERS + +#define _ET_INTERNAL_STD_MATH_FUNC(name) \ + namespace executorch { \ + inline namespace math { \ + using std::name; \ + } \ + } // namespace executorch + +#ifdef ET_USE_PYTORCH_HEADERS +/** + * Internal-usage macro for making a vectorized variant of a unary + * function available in the executorch::math namespace. + */ +#define ET_INTERNAL_VECTORIZED_FLOAT_UNARY_FUNC(func_name) \ + namespace executorch { \ + inline namespace math { \ + template \ + auto func_name(at::vec::Vectorized vec) { \ + if constexpr (!::executorch::runtime::is_floating_point::value) { \ + return internal::convert_to_vectorized_n_of_float(vec).func_name(); \ + } else { \ + return vec.func_name(); \ + } \ + } \ + } \ + } + +#define ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(func_name) \ + namespace executorch { \ + inline namespace math { \ + template \ + auto func_name(at::vec::Vectorized vec0, at::vec::Vectorized vec1) { \ + if constexpr (!::executorch::runtime::is_floating_point::value) { \ + const auto vec_float0 = \ + internal::convert_to_vectorized_n_of_float(vec0); \ + const auto vec_float1 = \ + internal::convert_to_vectorized_n_of_float(vec1); \ + return vec_float0.func_name(vec_float1); \ + } else { \ + return vec0.func_name(vec1); \ + } \ + } \ + } \ + } + +/** + * Internal-usage macro for making a C++ standard library + * floating-point function and a vectorized variant of it available in + * the c10::math namespace. Should be used with functions where the + * corresponding operator is a "float op" in TensorIterator parlance + * (i.e., uses something like build_borrowing_binary_float_op()), + * because it converts non-floating-point arguments to floating point. + */ +#define ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(func_name) \ + _ET_INTERNAL_STD_MATH_FUNC(func_name) \ + ET_INTERNAL_VECTORIZED_FLOAT_UNARY_FUNC(func_name) + +#define ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(func_name) \ + _ET_INTERNAL_STD_MATH_FUNC(func_name) \ + ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(func_name) + +#else // ET_USE_PYTORCH_HEADERS +#define ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(name) \ + _ET_INTERNAL_STD_MATH_FUNC(name) +#define ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(name) \ + _ET_INTERNAL_STD_MATH_FUNC(name) +#endif // ET_USE_PYTORCH_HEADERS + +// To simplify client code, we provide coverage for a bunch of float ops (the +// same ones listed in ATen vml.h) here. +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(abs) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(acos) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(asin) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(atan) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(ceil) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(cos) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(cosh) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(erf) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(erfc) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(exp) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(expm1) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(floor) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log10) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log1p) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(log2) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sin) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sinh) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(sqrt) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(round) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(tan) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(tanh) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(trunc) +ET_INTERNAL_VECTORIZED_STD_FLOAT_UNARY_FUNC(lgamma) + +#ifdef ET_USE_PYTORCH_HEADERS +ET_INTERNAL_VECTORIZED_FLOAT_BINARY_FUNC(rsqrt) +#endif // ET_USE_PYTORCH_HEADERS + +namespace executorch { +inline namespace math { +template >> +T rsqrt(T x) { + return T(1) / std::sqrt(x); +} +} // namespace math +} // namespace executorch + +ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(atan2) +ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(fmod) +ET_INTERNAL_VECTORIZED_STD_FLOAT_BINARY_FUNC(pow) diff --git a/runtime/core/portable_type/c10/c10/targets.bzl b/runtime/core/portable_type/c10/c10/targets.bzl index 4088110246d..70883ef6faf 100644 --- a/runtime/core/portable_type/c10/c10/targets.bzl +++ b/runtime/core/portable_type/c10/c10/targets.bzl @@ -53,7 +53,11 @@ def define_common_targets(): runtime.cxx_library( name = "aten_headers_for_executorch", srcs = [], - visibility = ["//executorch/kernels/optimized/...", "@EXECUTORCH_CLIENTS"], + visibility = [ + "//executorch/kernels/optimized/...", + "//executorch/kernels/portable/cpu/util/...", + "@EXECUTORCH_CLIENTS", + ], exported_deps = select({ "DEFAULT": [], "ovr_config//cpu:arm64": [ diff --git a/test/utils/OSSTestConfig.json b/test/utils/OSSTestConfig.json index 2cfc4b8a995..182d0bfd58a 100644 --- a/test/utils/OSSTestConfig.json +++ b/test/utils/OSSTestConfig.json @@ -68,18 +68,6 @@ "extension_threadpool" ] }, - { - "directory": "kernels/portable/cpu/util/test", - "sources": [ - "broadcast_indexes_range_test.cpp", - "broadcast_test.cpp", - "reduce_test.cpp" - ], - "additional_libs": [ - "portable_kernels", - "portable_ops_lib" - ] - }, { "directory": "runtime/core/portable_type/test", "sources": [ From 78e1abbe536d0661378de40809e07465acaed562 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 28 May 2025 15:48:01 -0700 Subject: [PATCH 2/2] fix visibility [ghstack-poisoned] --- runtime/core/portable_type/targets.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/core/portable_type/targets.bzl b/runtime/core/portable_type/targets.bzl index 41bc6050524..5b6e67fa213 100644 --- a/runtime/core/portable_type/targets.bzl +++ b/runtime/core/portable_type/targets.bzl @@ -26,6 +26,7 @@ def define_common_targets(): visibility = [ "//executorch/backends/...", "//executorch/extension/fb/dynamic_shim/...", + "//executorch/kernels/portable/cpu/...", "//executorch/runtime/core/exec_aten/...", "//executorch/runtime/core/portable_type/test/...", ],