Skip to content

Revert "Revert "Split & remove extension_parallel (#8983)"" #9190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .ci/scripts/unittest-buck2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ buck2 query "//backends/apple/... + //backends/example/... + \
//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."

UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
# TODO: expand the covered scope of Buck targets.
# //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
# //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
buck2 test //kernels/portable/... //runtime/backend/... //runtime/core/... \
buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... //runtime/backend/... //runtime/core/... \
//runtime/executor: //runtime/kernel/... //runtime/platform/...
2 changes: 2 additions & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ exclude_patterns = [
'examples/**',
'extension/**',
'kernels/optimized/**',
# Justified <functional> include.
'runtime/kernel/thread_parallel_interface.h',
'scripts/**',
'third-party/**',
'util/**',
Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
AND EXECUTORCH_BUILD_CPUINFO
)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
endif()

if(EXECUTORCH_BUILD_PYBIND)
Expand Down
1 change: 0 additions & 1 deletion Test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ if(BUILD_TESTING)
add_subdirectory(extension/evalue_util/test)
add_subdirectory(extension/kernel_util/test)
add_subdirectory(extension/memory_allocator/test)
add_subdirectory(extension/parallel/test)
add_subdirectory(extension/pytree/test)
add_subdirectory(kernels/portable/cpu/util/test)
add_subdirectory(kernels/prim_ops/test)
Expand Down
22 changes: 4 additions & 18 deletions build/cmake_deps.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ excludes = [
deps = [
"executorch",
"executorch_core",
"extension_parallel",
"extension_threadpool",
"portable_kernels",
]
Expand Down Expand Up @@ -131,7 +130,7 @@ excludes = [
deps = [
"executorch_core",
"executorch",
"extension_parallel",
"extension_threadpool",
]

[targets.optimized_native_cpu_ops]
Expand All @@ -146,7 +145,6 @@ excludes = [
deps = [
"executorch_core",
"executorch",
"extension_parallel",
"extension_threadpool",
"portable_kernels",
]
Expand Down Expand Up @@ -227,19 +225,6 @@ deps = [
"extension_runner_util",
]

[targets.extension_parallel]
buck_targets = [
"//extension/parallel:thread_parallel",
]
filters = [
".cpp$",
]
deps = [
"executorch",
"executorch_core",
"extension_threadpool",
]

[targets.extension_tensor]
buck_targets = [
"//extension/tensor:tensor",
Expand Down Expand Up @@ -379,6 +364,7 @@ excludes = [
deps = [
"executorch",
"executorch_core",
"extension_threadpool",
"xnnpack_backend",
"portable_kernels",
]
Expand All @@ -393,6 +379,7 @@ filters = [
deps = [
"executorch",
"executorch_core",
"extension_threadpool",
]

[targets.xnnpack_schema]
Expand Down Expand Up @@ -427,7 +414,6 @@ deps = [
"executorch",
"executorch_core",
"optimized_kernels",
"extension_parallel",
"extension_threadpool",
"reduce_util",
"xnnpack_backend",
Expand Down Expand Up @@ -465,7 +451,7 @@ deps = [
"executorch_core",
"extension_data_loader",
"extension_module",
"extension_parallel",
"extension_threadpool",
"portable_kernels",
"quantized_kernels",
"xnnpack_backend",
Expand Down
8 changes: 1 addition & 7 deletions build/executorch-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ set(lib_list
custom_ops
extension_module
extension_module_static
extension_parallel
extension_runner_util
extension_tensor
extension_threadpool
Expand Down Expand Up @@ -131,14 +130,9 @@ endforeach()

# TODO: investigate use of install(EXPORT) to cleanly handle
# target_compile_options/target_compile_definitions for everything.
if(TARGET extension_parallel)
set_target_properties(
extension_parallel PROPERTIES INTERFACE_LINK_LIBRARIES extension_threadpool
)
endif()
if(TARGET cpublas)
set_target_properties(
cpublas PROPERTIES INTERFACE_LINK_LIBRARIES extension_parallel
cpublas PROPERTIES INTERFACE_LINK_LIBRARIES extension_threadpool
)
endif()
if(TARGET extension_threadpool)
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/custom_ops/op_sdpa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#include <vector>

#ifdef ET_USE_THREADPOOL
#include <executorch/extension/parallel/thread_parallel.h>
#include <executorch/extension/threadpool/threadpool.h>
#include <executorch/runtime/kernel/thread_parallel_interface.h>
#endif
#include <executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h>

Expand Down
1 change: 0 additions & 1 deletion extension/llm/custom_ops/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def define_common_targets():
"//executorch/kernels/optimized:libblas{}".format(mkl_dep),
"//executorch/kernels/optimized:libvec",
"//executorch/extension/kernel_util:kernel_util",
"//executorch/extension/parallel:thread_parallel",
"//executorch/extension/threadpool:threadpool",
],
deps = [
Expand Down
25 changes: 0 additions & 25 deletions extension/parallel/CMakeLists.txt

This file was deleted.

8 changes: 0 additions & 8 deletions extension/parallel/TARGETS

This file was deleted.

26 changes: 0 additions & 26 deletions extension/parallel/targets.bzl

This file was deleted.

8 changes: 0 additions & 8 deletions extension/parallel/test/TARGETS

This file was deleted.

19 changes: 0 additions & 19 deletions extension/parallel/test/targets.bzl

This file was deleted.

47 changes: 4 additions & 43 deletions extension/parallel/thread_parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,46 +8,7 @@

#pragma once

#include <cstdint>
#include <functional>

namespace executorch {
namespace extension {

/**
* A helper to run function in parallel.
*
* begin, end: describe the extent of the workitems via first and last workitem
* to be processed
* grain_size: number of workitems processed by user callback which is
* described below
* f: user function applied in parallel to the chunks, signature:
* void f(int64_t begin, int64_t end)
* Returns true if all work items are processed successfully, false otherwise
*
* Warning: parallel_for does NOT copy thread local states from the current
* thread to the worker threads. Users need to protect the access to captured
* data if they mutate them in f.
*/
bool parallel_for(
const int64_t begin,
const int64_t end,
const int64_t grain_size,
const std::function<void(int64_t, int64_t)>& f);

int64_t get_thread_num();

void set_thread_num(int64_t thread_num);

} // namespace extension
} // namespace executorch

namespace torch {
namespace executor {
// TODO(T197294990): Remove these deprecated aliases once all users have moved
// to the new `::executorch` namespaces.
using ::executorch::extension::get_thread_num;
using ::executorch::extension::parallel_for;
using ::executorch::extension::set_thread_num;
} // namespace executor
} // namespace torch
// This header is a stub left behind after the move to
// executorch/runtime/kernel. As such, it is deprecated; include and
// use the below header directly instead.
#include <executorch/runtime/kernel/thread_parallel_interface.h>
7 changes: 6 additions & 1 deletion extension/threadpool/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ if(NOT CMAKE_CXX_STANDARD)
endif()

add_library(
extension_threadpool threadpool.cpp threadpool_guard.cpp cpuinfo_utils.cpp
extension_threadpool threadpool.cpp threadpool_guard.cpp thread_parallel.cpp
cpuinfo_utils.cpp
)
target_link_libraries(
extension_threadpool PUBLIC executorch_core cpuinfo pthreadpool
Expand All @@ -42,3 +43,7 @@ install(
INCLUDES
DESTINATION ${_common_include_directories}
)

if(BUILD_TESTING)
add_subdirectory(test)
endif()
3 changes: 3 additions & 0 deletions extension/threadpool/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def define_common_targets():
"""

_THREADPOOL_SRCS = [
"thread_parallel.cpp",
"threadpool.cpp",
"threadpool_guard.cpp",
] + (["fb/threadpool_use_n_threads.cpp"] if not runtime.is_oss else [])
Expand All @@ -29,6 +30,8 @@ def define_common_targets():
exported_deps = [
third_party_dep("pthreadpool"),
third_party_dep("cpuinfo"),
# Allow users to use the header without an extra deps entry.
"//executorch/runtime/kernel:thread_parallel_interface",
],
exported_preprocessor_flags = [
"-DET_USE_THREADPOOL",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# @generated by test/utils/generate_gtest_cmakelists.py
#
# This file should be formatted with
# ~~~
# cmake-format -i CMakeLists.txt
Expand All @@ -12,28 +14,14 @@
#

cmake_minimum_required(VERSION 3.19)
project(extension_parallel_test)

# Use C++17 for test.
set(CMAKE_CXX_STANDARD 17)

set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)

include(${EXECUTORCH_ROOT}/build/Test.cmake)

set(_test_srcs thread_parallel_test.cpp ../thread_parallel.cpp)
set(_test_srcs thread_parallel_test.cpp threadpool_test.cpp)

et_cxx_test(
extension_parallel_test
SOURCES
${_test_srcs}
EXTRA_LIBS
pthreadpool
cpuinfo
extension_threadpool_test SOURCES ${_test_srcs} EXTRA_LIBS
extension_threadpool
)
target_include_directories(
extension_parallel_test
PRIVATE ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
)
12 changes: 12 additions & 0 deletions extension/threadpool/test/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,15 @@ def define_common_targets():
"//executorch/extension/threadpool:threadpool",
],
)

runtime.cxx_test(
name = "thread_parallel_test",
srcs = [
"thread_parallel_test.cpp",
],
deps = [
"//executorch/extension/threadpool:threadpool",
"//executorch/runtime/kernel:thread_parallel_interface",
"//executorch/runtime/platform:platform",
],
)
Loading
Loading