diff --git a/lib/gc/CMakeLists.txt b/lib/gc/CMakeLists.txt index ea92ba80e..03f7023b8 100644 --- a/lib/gc/CMakeLists.txt +++ b/lib/gc/CMakeLists.txt @@ -6,4 +6,5 @@ include(functions) add_subdirectory(CAPI) add_subdirectory(Dialect) -add_subdirectory(Transforms) \ No newline at end of file +add_subdirectory(Transforms) +add_subdirectory(ExecutionEngine) \ No newline at end of file diff --git a/lib/gc/ExecutionEngine/CMakeLists.txt b/lib/gc/ExecutionEngine/CMakeLists.txt new file mode 100644 index 000000000..8aa223412 --- /dev/null +++ b/lib/gc/ExecutionEngine/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(CPURuntime) diff --git a/lib/gc/ExecutionEngine/CPURuntime/CMakeLists.txt b/lib/gc/ExecutionEngine/CPURuntime/CMakeLists.txt new file mode 100644 index 000000000..6be58e28f --- /dev/null +++ b/lib/gc/ExecutionEngine/CPURuntime/CMakeLists.txt @@ -0,0 +1,15 @@ +find_package(OpenMP REQUIRED) + +if ("iomp" IN_LIST OpenMP_C_LIB_NAMES OR "omp" IN_LIST OpenMP_C_LIB_NAMES OR "omp5" IN_LIST OpenMP_C_LIB_NAMES) +else() + add_definitions("-DGC_NEEDS_OMP_WRAPPER=1") +endif() + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") +add_mlir_library(GCCpuRuntime + SHARED + Parallel.cpp + + EXCLUDE_FROM_LIBMLIR + ) \ No newline at end of file diff --git a/lib/gc/ExecutionEngine/CPURuntime/Parallel.cpp b/lib/gc/ExecutionEngine/CPURuntime/Parallel.cpp new file mode 100644 index 000000000..3a5b4c2c1 --- /dev/null +++ b/lib/gc/ExecutionEngine/CPURuntime/Parallel.cpp @@ -0,0 +1,188 @@ +//===-- Parallel.cpp - parallel ---------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +#define WEAK_SYMBOL __attribute__((weak)) + +namespace { +struct barrier_t { + alignas(64) std::atomic pending_; + std::atomic rounds_; + uint64_t total_; + // pad barrier to size of cacheline to avoid false sharing + char padding_[64 - 4 * sizeof(int32_t)]; +}; + +using barrier_idle_func = uint64_t (*)(std::atomic *remaining, + int32_t expected_remain, int32_t tid, + void *args); +} // namespace + +extern "C" { +int gc_runtime_keep_alive = 0; +void gc_arrive_at_barrier(barrier_t *b, barrier_idle_func idle_func, + void *idle_args) { + auto cur_round = b->rounds_.load(std::memory_order_acquire); + auto cnt = --b->pending_; + assert(cnt >= 0); + if (cnt == 0) { + b->pending_.store(b->total_); + b->rounds_.store(cur_round + 1); + } else { + if (idle_func) { + if (cur_round != b->rounds_.load()) { + return; + } + idle_func(&b->rounds_, cur_round + 1, -1, idle_args); + } + while (cur_round == b->rounds_.load()) { + _mm_pause(); + } + } +} + +static_assert(sizeof(barrier_t) == 64, "size of barrier_t should be 64-byte"); + +void gc_init_barrier(barrier_t *b, int num_barriers, uint64_t thread_count) { + for (int i = 0; i < num_barriers; i++) { + b[i].total_ = thread_count; + b[i].pending_.store(thread_count); + b[i].rounds_.store(0); + } +} + +#if GC_NEEDS_OMP_WRAPPER +void WEAK_SYMBOL __kmpc_barrier(void *loc, int32_t global_tid) { +#pragma omp barrier +} + +int WEAK_SYMBOL __kmpc_global_thread_num(void *loc) { + return omp_get_thread_num(); +} + +// The implementation was extracted and simplified from LLVM libomp +// at openmp/runtime/src/kmp_sched.cpp +void WEAK_SYMBOL __kmpc_for_static_init_8u(void *loc, int32_t gtid, + int32_t schedtype, + int32_t *plastiter, uint64_t *plower, + uint64_t *pupper, int64_t *pstride, + int64_t incr, int64_t chunk) { + if (unlikely(schedtype != 34)) { + std::abort(); + } + const int32_t FALSE = 0; + const int32_t TRUE = 1; + using UT = uint64_t; + // using ST = int64_t; + /* this all has to be changed back to TID and such.. */ + uint32_t tid = gtid; + uint32_t nth = omp_get_num_threads(); + UT trip_count; + + /* special handling for zero-trip loops */ + if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { + if (plastiter != nullptr) + *plastiter = FALSE; + /* leave pupper and plower set to entire iteration space */ + *pstride = incr; /* value should never be used */ + return; + } + + if (nth == 1) { + if (plastiter != nullptr) + *plastiter = TRUE; + *pstride = + (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); + return; + } + + /* compute trip count */ + if (incr == 1) { + trip_count = *pupper - *plower + 1; + } else if (incr == -1) { + trip_count = *plower - *pupper + 1; + } else if (incr > 0) { + // upper-lower can exceed the limit of signed type + trip_count = (UT)(*pupper - *plower) / incr + 1; + } else { + trip_count = (UT)(*plower - *pupper) / (-incr) + 1; + } + if (trip_count < nth) { + if (tid < trip_count) { + *pupper = *plower = *plower + tid * incr; + } else { + // set bounds so non-active threads execute no iterations + *plower = *pupper + (incr > 0 ? 1 : -1); + } + if (plastiter != nullptr) + *plastiter = (tid == trip_count - 1); + } else { + UT small_chunk = trip_count / nth; + UT extras = trip_count % nth; + *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); + *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); + if (plastiter != nullptr) + *plastiter = (tid == nth - 1); + } + *pstride = trip_count; +} + +void WEAK_SYMBOL __kmpc_for_static_fini(void *ptr, int32_t v) {} + +static thread_local int next_num_threads = 0; + +/*! +@ingroup PARALLEL +The type for a microtask which gets passed to @ref __kmpc_fork_call(). +The arguments to the outlined function are +@param global_tid the global thread identity of the thread executing the +function. +@param bound_tid the local identity of the thread executing the function +@param ... pointers to shared variables accessed by the function. +*/ +using kmpc_micro = void (*)(int32_t *global_tid, int32_t *bound_tid, ...); +void WEAK_SYMBOL __kmpc_fork_call(void *loc, int32_t argc, void *pfunc, ...) { + if (unlikely(argc != 1 && argc != 0)) { + std::abort(); + } + va_list ap; + va_start(ap, pfunc); + void *c = va_arg(ap, void *); + int32_t global_tid = 0; + if (unlikely(next_num_threads)) { +#pragma omp parallel num_threads(next_num_threads) + { + kmpc_micro func = (kmpc_micro)(pfunc); + func(&global_tid, nullptr, c); + } + next_num_threads = 0; + } else { +#pragma omp parallel + { + kmpc_micro func = (kmpc_micro)(pfunc); + func(&global_tid, nullptr, c); + } + } + va_end(ap); +} + +void WEAK_SYMBOL __kmpc_push_num_threads(void *loc, int32_t global_tid, + int32_t num_threads) { + next_num_threads = num_threads; +} +#endif +} diff --git a/scripts/license.py b/scripts/license.py index 49f28eaa8..3ed2ce521 100644 --- a/scripts/license.py +++ b/scripts/license.py @@ -15,10 +15,10 @@ # SPDX-License-Identifier: Apache-2.0 import datetime, sys, re, argparse -from typing import Dict, Set +from typing import Dict, Set, List WIDTH: int = 80 -intel_license: list[str] = [ +intel_license: List[str] = [ 'Copyright \\(C\\) (\\d\\d\\d\\d-)?$YEAR Intel Corporation', '', 'Licensed under the Apache License, Version 2.0 (the "License");', @@ -35,7 +35,7 @@ 'SPDX-License-Identifier: Apache-2.0', ] -llvm_license: list[str] = [ +llvm_license: List[str] = [ "===-{1,2} $FILE - .* -*\\*- $LANG -\\*-===", '', 'This file is licensed under the Apache License v2.0 with LLVM Exceptions.', @@ -45,7 +45,7 @@ "===-*===", ] -def check_license(filepath: str, license: list[str], var: Dict[str, str], re_line: Set[int]): +def check_license(filepath: str, license: List[str], var: Dict[str, str], re_line: Set[int]): with open(filepath, 'r') as f: idx: int = 0 for line in f.readlines(): @@ -117,7 +117,7 @@ def use_llvm_license(path: str) -> bool: var: Dict[str, str] = {} re_line: Set[int] = set() - lic = list[str] + lic = List[str] if filepath.startswith("test/") or filepath.startswith("./test/"): continue diff --git a/src/gc-cpu-runner/CMakeLists.txt b/src/gc-cpu-runner/CMakeLists.txt index f3f768612..2599eef84 100644 --- a/src/gc-cpu-runner/CMakeLists.txt +++ b/src/gc-cpu-runner/CMakeLists.txt @@ -1,3 +1,20 @@ +################################################################################ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 +################################################################################ + if(GC_DEV_LINK_LLVM_DYLIB) set(LLVM_LINK_COMPONENTS LLVM @@ -36,7 +53,8 @@ endif() #LLVM_LINK_COMPONENTS is processed by LLVM cmake in add_llvm_executable set(gc_cpu_runner_libs - ${MLIR_LINK_COMPONENTS}) + ${MLIR_LINK_COMPONENTS} + GCCpuRuntime) add_mlir_tool(gc-cpu-runner gc-cpu-runner.cpp ) diff --git a/src/gc-cpu-runner/gc-cpu-runner.cpp b/src/gc-cpu-runner/gc-cpu-runner.cpp index 3ece8f2ff..353abffe9 100644 --- a/src/gc-cpu-runner/gc-cpu-runner.cpp +++ b/src/gc-cpu-runner/gc-cpu-runner.cpp @@ -27,7 +27,11 @@ #include "llvm/Support/TargetSelect.h" #include +extern int gc_runtime_keep_alive; + int main(int argc, char **argv) { + // keeps GCCPURuntime linked + gc_runtime_keep_alive = 0; llvm::InitLLVM y(argc, argv); llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); diff --git a/test/gc/cpu-runner/tid.mlir b/test/gc/cpu-runner/tid.mlir new file mode 100644 index 000000000..aedcc0a20 --- /dev/null +++ b/test/gc/cpu-runner/tid.mlir @@ -0,0 +1,37 @@ +// RUN: gc-opt %s --convert-cpuruntime-to-llvm --convert-openmp-to-llvm --convert-func-to-llvm --convert-arith-to-llvm --convert-cf-to-llvm --reconcile-unrealized-casts | gc-cpu-runner -e main -entry-point-result=void | FileCheck %s +module { + func.func private @omp_get_thread_num() -> i32 + + func.func @check_parallel() { + %c64 = arith.constant 64 : index + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c8 = arith.constant 8 : index + %0 = llvm.mlir.constant(1 : i64) : i64 + omp.parallel num_threads(%c8: index) { + omp.wsloop { + omp.loop_nest (%arg1, %arg2) : index = (%c0, %c0) to (%c1, %c64) step (%c1, %c1) { + cpuruntime.printf "ITR %zu\n" %arg2 : index + omp.yield + } + omp.terminator + } + %tid = func.call @omp_get_thread_num() : () -> i32 + cpuruntime.printf "EXIT %d\n" %tid : i32 + omp.terminator + } + return + } + + func.func @main() { + %0 = func.call @omp_get_thread_num() : () -> i32 + cpuruntime.printf "TID %d\n" %0 : i32 + call @check_parallel() : ()->() + return + } + // CHECK: TID 0 + // CHECK-COUNT-64: ITR {{[0-9]+}} + // CHECK-NOT: ITR + // CHECK-COUNT-8: EXIT {{[0-9]+}} + // CHECK-NOT: EXIT +} \ No newline at end of file