From 81667522d93f44c751f715740efaa5bce37e380e Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 12 Mar 2025 10:06:30 -0700 Subject: [PATCH 1/3] Update [ghstack-poisoned] --- .ci/scripts/unittest-buck2.sh | 4 +--- kernels/optimized/lib_defs.bzl | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.ci/scripts/unittest-buck2.sh b/.ci/scripts/unittest-buck2.sh index a2eb1e94e45..d5168cce0b2 100755 --- a/.ci/scripts/unittest-buck2.sh +++ b/.ci/scripts/unittest-buck2.sh @@ -17,10 +17,8 @@ buck2 query "//backends/apple/... + //backends/example/... + \ //kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \ //kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..." -UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax" -BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX) # TODO: expand the covered scope of Buck targets. # //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH. # //runtime/test/... requires Python torch, which we don't have in our OSS buck setup. -buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... //runtime/backend/... //runtime/core/... \ +buck2 test //kernels/portable/... //runtime/backend/... //runtime/core/... \ //runtime/executor: //runtime/kernel/... //runtime/platform/... diff --git a/kernels/optimized/lib_defs.bzl b/kernels/optimized/lib_defs.bzl index 6e884457e35..dd246f38984 100644 --- a/kernels/optimized/lib_defs.bzl +++ b/kernels/optimized/lib_defs.bzl @@ -234,7 +234,6 @@ def define_libs(is_fbcode=False): exported_deps = [ "//executorch/kernels/optimized:libutils", "//executorch/runtime/core/exec_aten:lib", - "//executorch/runtime/kernel:thread_parallel_interface", ], **get_apple_framework_deps_kwargs(is_fbcode), ) From 46e40a260f095fc181099b616d3c18f1ecab4690 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 12 Mar 2025 10:06:35 -0700 Subject: [PATCH 2/3] Update [ghstack-poisoned] --- runtime/kernel/targets.bzl | 1 - runtime/kernel/thread_parallel_interface.h | 11 ----------- 2 files changed, 12 deletions(-) diff --git a/runtime/kernel/targets.bzl b/runtime/kernel/targets.bzl index 5c95f10276d..e67f76728b8 100644 --- a/runtime/kernel/targets.bzl +++ b/runtime/kernel/targets.bzl @@ -56,7 +56,6 @@ def define_common_targets(): exported_headers = ["thread_parallel_interface.h"], exported_deps = [ "//executorch/runtime/core:core", - "//executorch/runtime/core/portable_type/c10/c10:c10", "//executorch/runtime/platform:platform", ], visibility = [ diff --git a/runtime/kernel/thread_parallel_interface.h b/runtime/kernel/thread_parallel_interface.h index 52100475c7b..1e79acc75a4 100644 --- a/runtime/kernel/thread_parallel_interface.h +++ b/runtime/kernel/thread_parallel_interface.h @@ -11,7 +11,6 @@ #include #include -#include #include #include @@ -30,17 +29,7 @@ inline bool parallel_for_no_threadpool( begin, end); ET_CHECK_OR_RETURN_FALSE(grain_size > 0, "grain_size = %" PRId64, grain_size); -#ifndef NDEBUG - // Go backwards through the range elementwise to catch code that - // assumes parallel_for is in order like a regular for loop. - for (const auto i : c10::irange(begin, end)) { - const auto offset = i - begin; - const auto idx = end - offset - 1; - f(idx, idx + 1); - } -#else // NDEBUG f(begin, end); -#endif return true; } From b7c5ad0e7d720c18c91d2fb2c07295ac4e0e7841 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 12 Mar 2025 10:06:40 -0700 Subject: [PATCH 3/3] Update [ghstack-poisoned] --- .lintrunner.toml | 2 - CMakeLists.txt | 1 + Test.cmake | 1 + build/cmake_deps.toml | 22 ++++- build/executorch-config.cmake | 8 +- extension/llm/custom_ops/op_sdpa.cpp | 2 +- extension/llm/custom_ops/targets.bzl | 1 + extension/parallel/CMakeLists.txt | 25 +++++ extension/parallel/TARGETS | 8 ++ extension/parallel/targets.bzl | 26 ++++++ .../test/CMakeLists.txt | 20 +++- extension/parallel/test/TARGETS | 8 ++ extension/parallel/test/targets.bzl | 19 ++++ .../test/thread_parallel_test.cpp | 46 +++------- .../thread_parallel.cpp | 2 +- extension/parallel/thread_parallel.h | 47 +++++++++- extension/threadpool/CMakeLists.txt | 7 +- extension/threadpool/targets.bzl | 3 - extension/threadpool/test/targets.bzl | 12 --- kernels/optimized/CMakeLists.txt | 2 +- kernels/optimized/blas/BlasKernel.h | 2 +- kernels/optimized/lib_defs.bzl | 6 +- runtime/kernel/targets.bzl | 13 --- runtime/kernel/thread_parallel_interface.h | 92 ------------------- test/utils/OSSTestConfig.json | 10 -- 25 files changed, 194 insertions(+), 191 deletions(-) create mode 100644 extension/parallel/CMakeLists.txt create mode 100644 extension/parallel/TARGETS create mode 100644 extension/parallel/targets.bzl rename extension/{threadpool => parallel}/test/CMakeLists.txt (53%) create mode 100644 extension/parallel/test/TARGETS create mode 100644 extension/parallel/test/targets.bzl rename extension/{threadpool => parallel}/test/thread_parallel_test.cpp (77%) rename extension/{threadpool => parallel}/thread_parallel.cpp (97%) delete mode 100644 runtime/kernel/thread_parallel_interface.h diff --git a/.lintrunner.toml b/.lintrunner.toml index 1a27228d266..7667ac430d1 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -218,8 +218,6 @@ exclude_patterns = [ 'examples/**', 'extension/**', 'kernels/optimized/**', - # Justified include. - 'runtime/kernel/thread_parallel_interface.h', 'scripts/**', 'third-party/**', 'util/**', diff --git a/CMakeLists.txt b/CMakeLists.txt index fabf667cbe1..73b89b6171e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -751,6 +751,7 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO ) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel) endif() if(EXECUTORCH_BUILD_PYBIND) diff --git a/Test.cmake b/Test.cmake index 6bd7a86e70b..d4b5f6aa1db 100644 --- a/Test.cmake +++ b/Test.cmake @@ -13,6 +13,7 @@ if(BUILD_TESTING) add_subdirectory(extension/evalue_util/test) add_subdirectory(extension/kernel_util/test) add_subdirectory(extension/memory_allocator/test) + add_subdirectory(extension/parallel/test) add_subdirectory(extension/pytree/test) add_subdirectory(kernels/portable/cpu/util/test) add_subdirectory(kernels/prim_ops/test) diff --git a/build/cmake_deps.toml b/build/cmake_deps.toml index bbc9eec3a0e..b1ed81b6a7e 100644 --- a/build/cmake_deps.toml +++ b/build/cmake_deps.toml @@ -88,6 +88,7 @@ excludes = [ deps = [ "executorch", "executorch_core", + "extension_parallel", "extension_threadpool", "portable_kernels", ] @@ -130,7 +131,7 @@ excludes = [ deps = [ "executorch_core", "executorch", - "extension_threadpool", + "extension_parallel", ] [targets.optimized_native_cpu_ops] @@ -145,6 +146,7 @@ excludes = [ deps = [ "executorch_core", "executorch", + "extension_parallel", "extension_threadpool", "portable_kernels", ] @@ -225,6 +227,19 @@ deps = [ "extension_runner_util", ] +[targets.extension_parallel] +buck_targets = [ + "//extension/parallel:thread_parallel", +] +filters = [ + ".cpp$", +] +deps = [ + "executorch", + "executorch_core", + "extension_threadpool", +] + [targets.extension_tensor] buck_targets = [ "//extension/tensor:tensor", @@ -364,7 +379,6 @@ excludes = [ deps = [ "executorch", "executorch_core", - "extension_threadpool", "xnnpack_backend", "portable_kernels", ] @@ -379,7 +393,6 @@ filters = [ deps = [ "executorch", "executorch_core", - "extension_threadpool", ] [targets.xnnpack_schema] @@ -414,6 +427,7 @@ deps = [ "executorch", "executorch_core", "optimized_kernels", + "extension_parallel", "extension_threadpool", "reduce_util", "xnnpack_backend", @@ -451,7 +465,7 @@ deps = [ "executorch_core", "extension_data_loader", "extension_module", - "extension_threadpool", + "extension_parallel", "portable_kernels", "quantized_kernels", "xnnpack_backend", diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake index 931d31de8ef..9d429490d58 100644 --- a/build/executorch-config.cmake +++ b/build/executorch-config.cmake @@ -75,6 +75,7 @@ set(lib_list custom_ops extension_module extension_module_static + extension_parallel extension_runner_util extension_tensor extension_threadpool @@ -130,9 +131,14 @@ endforeach() # TODO: investigate use of install(EXPORT) to cleanly handle # target_compile_options/target_compile_definitions for everything. +if(TARGET extension_parallel) + set_target_properties( + extension_parallel PROPERTIES INTERFACE_LINK_LIBRARIES extension_threadpool + ) +endif() if(TARGET cpublas) set_target_properties( - cpublas PROPERTIES INTERFACE_LINK_LIBRARIES extension_threadpool + cpublas PROPERTIES INTERFACE_LINK_LIBRARIES extension_parallel ) endif() if(TARGET extension_threadpool) diff --git a/extension/llm/custom_ops/op_sdpa.cpp b/extension/llm/custom_ops/op_sdpa.cpp index 371fcf38a24..f0a7775e803 100644 --- a/extension/llm/custom_ops/op_sdpa.cpp +++ b/extension/llm/custom_ops/op_sdpa.cpp @@ -19,8 +19,8 @@ #include #ifdef ET_USE_THREADPOOL +#include #include -#include #endif #include diff --git a/extension/llm/custom_ops/targets.bzl b/extension/llm/custom_ops/targets.bzl index 1c4686fe3d0..e3e8b30520f 100644 --- a/extension/llm/custom_ops/targets.bzl +++ b/extension/llm/custom_ops/targets.bzl @@ -37,6 +37,7 @@ def define_common_targets(): "//executorch/kernels/optimized:libblas{}".format(mkl_dep), "//executorch/kernels/optimized:libvec", "//executorch/extension/kernel_util:kernel_util", + "//executorch/extension/parallel:thread_parallel", "//executorch/extension/threadpool:threadpool", ], deps = [ diff --git a/extension/parallel/CMakeLists.txt b/extension/parallel/CMakeLists.txt new file mode 100644 index 00000000000..7f727aafe46 --- /dev/null +++ b/extension/parallel/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Please keep this file formatted by running: +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ + +if(NOT (EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)) + message(FATAL_ERROR "extension/parallel requires extension/threadpool") +endif() + +add_library(extension_parallel thread_parallel.cpp) + +target_link_libraries(extension_parallel PUBLIC executorch_core extension_threadpool) +target_compile_options(extension_parallel PUBLIC ${_common_compile_options}) + +install( + TARGETS extension_parallel + DESTINATION lib + INCLUDES + DESTINATION ${_common_include_directories}) diff --git a/extension/parallel/TARGETS b/extension/parallel/TARGETS new file mode 100644 index 00000000000..2341af9282f --- /dev/null +++ b/extension/parallel/TARGETS @@ -0,0 +1,8 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() diff --git a/extension/parallel/targets.bzl b/extension/parallel/targets.bzl new file mode 100644 index 00000000000..82a8502c034 --- /dev/null +++ b/extension/parallel/targets.bzl @@ -0,0 +1,26 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime") + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + + runtime.cxx_library( + name = "thread_parallel", + srcs = [ + "thread_parallel.cpp", + ], + exported_headers = [ + "thread_parallel.h", + ], + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + deps = [ + "//executorch/extension/threadpool:threadpool", + "//executorch/runtime/core:core", + ], + ) diff --git a/extension/threadpool/test/CMakeLists.txt b/extension/parallel/test/CMakeLists.txt similarity index 53% rename from extension/threadpool/test/CMakeLists.txt rename to extension/parallel/test/CMakeLists.txt index 3f9b13f2ab4..ab37f66c17d 100644 --- a/extension/threadpool/test/CMakeLists.txt +++ b/extension/parallel/test/CMakeLists.txt @@ -4,8 +4,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -# @generated by test/utils/generate_gtest_cmakelists.py -# # This file should be formatted with # ~~~ # cmake-format -i CMakeLists.txt @@ -14,14 +12,28 @@ # cmake_minimum_required(VERSION 3.19) +project(extension_parallel_test) + +# Use C++17 for test. +set(CMAKE_CXX_STANDARD 17) set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..) include(${EXECUTORCH_ROOT}/build/Test.cmake) -set(_test_srcs thread_parallel_test.cpp threadpool_test.cpp) +set(_test_srcs thread_parallel_test.cpp ../thread_parallel.cpp) et_cxx_test( - extension_threadpool_test SOURCES ${_test_srcs} EXTRA_LIBS + extension_parallel_test + SOURCES + ${_test_srcs} + EXTRA_LIBS + pthreadpool + cpuinfo extension_threadpool ) +target_include_directories( + extension_parallel_test + PRIVATE ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include + ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include +) diff --git a/extension/parallel/test/TARGETS b/extension/parallel/test/TARGETS new file mode 100644 index 00000000000..2341af9282f --- /dev/null +++ b/extension/parallel/test/TARGETS @@ -0,0 +1,8 @@ +# Any targets that should be shared between fbcode and xplat must be defined in +# targets.bzl. This file can contain fbcode-only targets. + +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() diff --git a/extension/parallel/test/targets.bzl b/extension/parallel/test/targets.bzl new file mode 100644 index 00000000000..791c0727471 --- /dev/null +++ b/extension/parallel/test/targets.bzl @@ -0,0 +1,19 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + + runtime.cxx_test( + name = "thread_parallel_test", + srcs = [ + "thread_parallel_test.cpp", + ], + deps = [ + "//executorch/extension/parallel:thread_parallel", + "//executorch/runtime/platform:platform", + ], + ) diff --git a/extension/threadpool/test/thread_parallel_test.cpp b/extension/parallel/test/thread_parallel_test.cpp similarity index 77% rename from extension/threadpool/test/thread_parallel_test.cpp rename to extension/parallel/test/thread_parallel_test.cpp index fd72211a789..d386429100d 100644 --- a/extension/threadpool/test/thread_parallel_test.cpp +++ b/extension/parallel/test/thread_parallel_test.cpp @@ -11,13 +11,13 @@ #include #include -#include +#include #include using namespace ::testing; using ::executorch::extension::parallel_for; -class ParallelTest : public ::testing::TestWithParam { +class ParallelTest : public ::testing::Test { protected: void SetUp() override { data_.fill(0); @@ -42,25 +42,12 @@ class ParallelTest : public ::testing::TestWithParam { } } - template - bool parallel_for( - const int64_t begin, - const int64_t end, - const int64_t grain_size, - const Func& func) { - if (GetParam()) { - return executorch::extension::parallel_for(begin, end, grain_size, func); - } - return executorch::extension::internal::parallel_for_no_threadpool( - begin, end, grain_size, func); - } - std::array data_; std::mutex mutex_; int sum_of_all_elements_; }; -TEST_P(ParallelTest, TestAllInvoked) { +TEST_F(ParallelTest, TestAllInvoked) { EXPECT_TRUE(parallel_for(0, 10, 1, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -70,7 +57,7 @@ TEST_P(ParallelTest, TestAllInvoked) { } } -TEST_P(ParallelTest, TestAllInvokedWithMutex) { +TEST_F(ParallelTest, TestAllInvokedWithMutex) { EXPECT_TRUE(parallel_for(0, 10, 1, [this](int64_t begin, int64_t end) { this->RunExclusiveTask(begin, end); })); @@ -83,7 +70,7 @@ TEST_P(ParallelTest, TestAllInvokedWithMutex) { EXPECT_EQ(sum_of_all_elements_, expected_sum); } -TEST_P(ParallelTest, TestInvalidRange) { +TEST_F(ParallelTest, TestInvalidRange) { et_pal_init(); EXPECT_FALSE(parallel_for(10, 0, 1, [this](int64_t begin, int64_t end) { this->RunExclusiveTask(begin, end); @@ -95,7 +82,7 @@ TEST_P(ParallelTest, TestInvalidRange) { EXPECT_EQ(sum_of_all_elements_, 0); } -TEST_P(ParallelTest, TestInvalidRange2) { +TEST_F(ParallelTest, TestInvalidRange2) { et_pal_init(); EXPECT_FALSE(parallel_for(6, 5, 1, [this](int64_t begin, int64_t end) { this->RunExclusiveTask(begin, end); @@ -107,7 +94,7 @@ TEST_P(ParallelTest, TestInvalidRange2) { EXPECT_EQ(sum_of_all_elements_, 0); } -TEST_P(ParallelTest, TestInvokePartialFromBeginning) { +TEST_F(ParallelTest, TestInvokePartialFromBeginning) { EXPECT_TRUE(parallel_for(0, 5, 1, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -120,7 +107,7 @@ TEST_P(ParallelTest, TestInvokePartialFromBeginning) { } } -TEST_P(ParallelTest, TestInvokePartialToEnd) { +TEST_F(ParallelTest, TestInvokePartialToEnd) { EXPECT_TRUE(parallel_for(5, 10, 1, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -133,7 +120,7 @@ TEST_P(ParallelTest, TestInvokePartialToEnd) { } } -TEST_P(ParallelTest, TestInvokePartialMiddle) { +TEST_F(ParallelTest, TestInvokePartialMiddle) { EXPECT_TRUE(parallel_for(2, 8, 1, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -149,7 +136,7 @@ TEST_P(ParallelTest, TestInvokePartialMiddle) { } } -TEST_P(ParallelTest, TestChunkSize2) { +TEST_F(ParallelTest, TestChunkSize2) { EXPECT_TRUE(parallel_for(0, 10, 2, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -159,7 +146,7 @@ TEST_P(ParallelTest, TestChunkSize2) { } } -TEST_P(ParallelTest, TestChunkSize2Middle) { +TEST_F(ParallelTest, TestChunkSize2Middle) { EXPECT_TRUE(parallel_for(3, 8, 2, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -175,7 +162,7 @@ TEST_P(ParallelTest, TestChunkSize2Middle) { } } -TEST_P(ParallelTest, TestChunkSize3) { +TEST_F(ParallelTest, TestChunkSize3) { EXPECT_TRUE(parallel_for(0, 10, 3, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -185,7 +172,7 @@ TEST_P(ParallelTest, TestChunkSize3) { } } -TEST_P(ParallelTest, TestChunkSize6) { +TEST_F(ParallelTest, TestChunkSize6) { EXPECT_TRUE(parallel_for(0, 10, 6, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -195,7 +182,7 @@ TEST_P(ParallelTest, TestChunkSize6) { } } -TEST_P(ParallelTest, TestChunkSizeTooLarge) { +TEST_F(ParallelTest, TestChunkSizeTooLarge) { EXPECT_TRUE(parallel_for(0, 10, 11, [this](int64_t begin, int64_t end) { this->RunTask(begin, end); })); @@ -204,8 +191,3 @@ TEST_P(ParallelTest, TestChunkSizeTooLarge) { EXPECT_EQ(data_[i], i); } } - -INSTANTIATE_TEST_SUITE_P( - ParallelTestWithOrWithoutThreadpool, - ParallelTest, - ::testing::Values(true, false)); diff --git a/extension/threadpool/thread_parallel.cpp b/extension/parallel/thread_parallel.cpp similarity index 97% rename from extension/threadpool/thread_parallel.cpp rename to extension/parallel/thread_parallel.cpp index 3c79a6775e6..fa09b240ad1 100644 --- a/extension/threadpool/thread_parallel.cpp +++ b/extension/parallel/thread_parallel.cpp @@ -10,9 +10,9 @@ #include #include +#include #include #include -#include #include namespace executorch { diff --git a/extension/parallel/thread_parallel.h b/extension/parallel/thread_parallel.h index 8bd1a572cd7..8b174075ae9 100644 --- a/extension/parallel/thread_parallel.h +++ b/extension/parallel/thread_parallel.h @@ -8,7 +8,46 @@ #pragma once -// This header is a stub left behind after the move to -// executorch/runtime/kernel. As such, it is deprecated; include and -// use the below header directly instead. -#include +#include +#include + +namespace executorch { +namespace extension { + +/** + * A helper to run function in parallel. + * + * begin, end: describe the extent of the workitems via first and last workitem + * to be processed + * grain_size: number of workitems processed by user callback which is + * described below + * f: user function applied in parallel to the chunks, signature: + * void f(int64_t begin, int64_t end) + * Returns true if all work items are processed successfully, false otherwise + * + * Warning: parallel_for does NOT copy thread local states from the current + * thread to the worker threads. Users need to protect the access to captured + * data if they mutate them in f. + */ +bool parallel_for( + const int64_t begin, + const int64_t end, + const int64_t grain_size, + const std::function& f); + +int64_t get_thread_num(); + +void set_thread_num(int64_t thread_num); + +} // namespace extension +} // namespace executorch + +namespace torch { +namespace executor { +// TODO(T197294990): Remove these deprecated aliases once all users have moved +// to the new `::executorch` namespaces. +using ::executorch::extension::get_thread_num; +using ::executorch::extension::parallel_for; +using ::executorch::extension::set_thread_num; +} // namespace executor +} // namespace torch diff --git a/extension/threadpool/CMakeLists.txt b/extension/threadpool/CMakeLists.txt index 6e107cb6634..c1d86acf75d 100644 --- a/extension/threadpool/CMakeLists.txt +++ b/extension/threadpool/CMakeLists.txt @@ -21,8 +21,7 @@ if(NOT CMAKE_CXX_STANDARD) endif() add_library( - extension_threadpool threadpool.cpp threadpool_guard.cpp thread_parallel.cpp - cpuinfo_utils.cpp + extension_threadpool threadpool.cpp threadpool_guard.cpp cpuinfo_utils.cpp ) target_link_libraries( extension_threadpool PUBLIC executorch_core cpuinfo pthreadpool @@ -43,7 +42,3 @@ install( INCLUDES DESTINATION ${_common_include_directories} ) - -if(BUILD_TESTING) - add_subdirectory(test) -endif() diff --git a/extension/threadpool/targets.bzl b/extension/threadpool/targets.bzl index 1c34dbbc7d4..8bb0398b385 100644 --- a/extension/threadpool/targets.bzl +++ b/extension/threadpool/targets.bzl @@ -9,7 +9,6 @@ def define_common_targets(): """ _THREADPOOL_SRCS = [ - "thread_parallel.cpp", "threadpool.cpp", "threadpool_guard.cpp", ] + (["fb/threadpool_use_n_threads.cpp"] if not runtime.is_oss else []) @@ -30,8 +29,6 @@ def define_common_targets(): exported_deps = [ third_party_dep("pthreadpool"), third_party_dep("cpuinfo"), - # Allow users to use the header without an extra deps entry. - "//executorch/runtime/kernel:thread_parallel_interface", ], exported_preprocessor_flags = [ "-DET_USE_THREADPOOL", diff --git a/extension/threadpool/test/targets.bzl b/extension/threadpool/test/targets.bzl index 8bdf776c825..b8a39d8969a 100644 --- a/extension/threadpool/test/targets.bzl +++ b/extension/threadpool/test/targets.bzl @@ -18,15 +18,3 @@ def define_common_targets(): "//executorch/extension/threadpool:threadpool", ], ) - - runtime.cxx_test( - name = "thread_parallel_test", - srcs = [ - "thread_parallel_test.cpp", - ], - deps = [ - "//executorch/extension/threadpool:threadpool", - "//executorch/runtime/kernel:thread_parallel_interface", - "//executorch/runtime/platform:platform", - ], - ) diff --git a/kernels/optimized/CMakeLists.txt b/kernels/optimized/CMakeLists.txt index 23e26bfa72b..c6d31c20263 100644 --- a/kernels/optimized/CMakeLists.txt +++ b/kernels/optimized/CMakeLists.txt @@ -43,7 +43,7 @@ endif() list(TRANSFORM _optimized_cpublas__srcs PREPEND "${EXECUTORCH_ROOT}/") add_library(cpublas STATIC ${_optimized_cpublas__srcs}) target_link_libraries( - cpublas PUBLIC executorch_core eigen_blas extension_threadpool + cpublas PUBLIC executorch_core eigen_blas extension_parallel extension_threadpool ) target_compile_options(cpublas PUBLIC ${_common_compile_options}) diff --git a/kernels/optimized/blas/BlasKernel.h b/kernels/optimized/blas/BlasKernel.h index fc47b4482d6..c2b03cfebdd 100644 --- a/kernels/optimized/blas/BlasKernel.h +++ b/kernels/optimized/blas/BlasKernel.h @@ -11,8 +11,8 @@ #include #include +#include #include -#include #include diff --git a/kernels/optimized/lib_defs.bzl b/kernels/optimized/lib_defs.bzl index dd246f38984..659c7afe090 100644 --- a/kernels/optimized/lib_defs.bzl +++ b/kernels/optimized/lib_defs.bzl @@ -186,10 +186,7 @@ def define_libs(is_fbcode=False): ], ) - LIBBLAS_DEPS = [ - third_party_dep("cpuinfo"), - "//executorch/extension/threadpool:threadpool", - ] + LIBBLAS_DEPS = [third_party_dep("cpuinfo")] for libblas_name, mkl_dep in [("libblas", "fbsource//third-party/mkl:mkl_lp64_omp"), ("libblas_mkl_noomp", "fbsource//third-party/mkl:mkl")]: runtime.cxx_library( @@ -232,6 +229,7 @@ def define_libs(is_fbcode=False): "DEFAULT": [], }) + LIBBLAS_DEPS, exported_deps = [ + "//executorch/extension/parallel:thread_parallel", "//executorch/kernels/optimized:libutils", "//executorch/runtime/core/exec_aten:lib", ], diff --git a/runtime/kernel/targets.bzl b/runtime/kernel/targets.bzl index e67f76728b8..d49435f2825 100644 --- a/runtime/kernel/targets.bzl +++ b/runtime/kernel/targets.bzl @@ -51,19 +51,6 @@ def define_common_targets(): preprocessor_flags = ["-DMAX_KERNEL_NUM=1"], ) - runtime.cxx_library( - name = "thread_parallel_interface", - exported_headers = ["thread_parallel_interface.h"], - exported_deps = [ - "//executorch/runtime/core:core", - "//executorch/runtime/platform:platform", - ], - visibility = [ - "//executorch/...", - "@EXECUTORCH_CLIENTS", - ], - ) - for aten_mode in get_aten_mode_options(): aten_suffix = "_aten" if aten_mode else "" diff --git a/runtime/kernel/thread_parallel_interface.h b/runtime/kernel/thread_parallel_interface.h deleted file mode 100644 index 1e79acc75a4..00000000000 --- a/runtime/kernel/thread_parallel_interface.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include - -#include -#include - -namespace executorch { -namespace extension { -namespace internal { -template -inline bool parallel_for_no_threadpool( - const int64_t begin, - const int64_t end, - const int64_t grain_size, - const Func& f) { - ET_CHECK_OR_RETURN_FALSE( - begin >= 0 && end >= 0 && end >= begin, - "begin = %" PRId64 ", end = %" PRId64, - begin, - end); - ET_CHECK_OR_RETURN_FALSE(grain_size > 0, "grain_size = %" PRId64, grain_size); - f(begin, end); - return true; -} - -} // namespace internal - -#ifdef ET_USE_THREADPOOL -/** - * A helper to run a function in parallel. - * - * begin, end: describe the extent of the workitems via first and last workitem - * to be processed - * grain_size: number of workitems processed by user callback which is - * described below - * f: user function applied in parallel to the chunks, signature: - * void f(int64_t begin, int64_t end) - * Returns true if all work items are processed successfully, false otherwise - * - * Warning: parallel_for does NOT copy thread local states from the current - * thread to the worker threads. Users need to protect the access to captured - * data if they mutate them in f. - */ -bool parallel_for( - const int64_t begin, - const int64_t end, - const int64_t grain_size, - const std::function& f); - -int64_t get_thread_num(); - -void set_thread_num(int64_t thread_num); -#else // ET_USE_THREADPOOL -template -bool parallel_for( - const int64_t begin, - const int64_t end, - const int64_t grain_size, - const Func& func) { - return internal::parallel_for_no_threadpool(begin, end, grain_size, func); -} - -inline int64_t get_thread_num() { - return 0; -} - -inline void set_thread_num(int64_t thread_num) { - ET_DCHECK_MSG(false, "cannot set_thread_num without threading support!"); -} -#endif // ET_USE_THREADPOOL -} // namespace extension -} // namespace executorch - -namespace torch { -namespace executor { -// TODO(T197294990): Remove these deprecated aliases once all users have moved -// to the new `::executorch` namespaces. -using ::executorch::extension::get_thread_num; -using ::executorch::extension::parallel_for; -using ::executorch::extension::set_thread_num; -} // namespace executor -} // namespace torch diff --git a/test/utils/OSSTestConfig.json b/test/utils/OSSTestConfig.json index be594f9d5f4..cc5e625f1e8 100644 --- a/test/utils/OSSTestConfig.json +++ b/test/utils/OSSTestConfig.json @@ -59,16 +59,6 @@ "extension_tensor" ] }, - { - "directory": "extension/threadpool/test", - "sources": [ - "thread_parallel_test.cpp", - "threadpool_test.cpp" - ], - "additional_libs": [ - "extension_threadpool" - ] - }, { "directory": "kernels/portable/cpu/util/test", "sources": [