From 3bfe08c987e5860c3056f312035ae1bcd3e76565 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sun, 6 Oct 2024 12:27:09 +0200 Subject: [PATCH 1/3] [libc] GPU RPC interpace: add return value to `rpc_host_call` --- libc/newhdrgen/yaml/gpu/rpc.yaml | 2 +- libc/spec/gpu_ext.td | 2 +- libc/src/gpu/rpc_host_call.cpp | 9 +++++++-- libc/src/gpu/rpc_host_call.h | 2 +- libc/utils/gpu/server/rpc_server.cpp | 9 +++++++-- offload/test/libc/host_call.c | 20 +++++++++++++++----- 6 files changed, 32 insertions(+), 12 deletions(-) diff --git a/libc/newhdrgen/yaml/gpu/rpc.yaml b/libc/newhdrgen/yaml/gpu/rpc.yaml index 61856bc0c7d69..9c03038b291f2 100644 --- a/libc/newhdrgen/yaml/gpu/rpc.yaml +++ b/libc/newhdrgen/yaml/gpu/rpc.yaml @@ -16,7 +16,7 @@ functions: - name: rpc_host_call standards: - GPUExtensions - return_type: void + return_type: long long arguments: - type: void * - type: void * diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td index dce81ff778620..8848d9d52e1d5 100644 --- a/libc/spec/gpu_ext.td +++ b/libc/spec/gpu_ext.td @@ -7,7 +7,7 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> { [ FunctionSpec< "rpc_host_call", - RetValSpec, + RetValSpec, [ArgSpec, ArgSpec, ArgSpec] >, ] diff --git a/libc/src/gpu/rpc_host_call.cpp b/libc/src/gpu/rpc_host_call.cpp index ca2e331340a6c..b33cb19a0de47 100644 --- a/libc/src/gpu/rpc_host_call.cpp +++ b/libc/src/gpu/rpc_host_call.cpp @@ -17,14 +17,19 @@ namespace LIBC_NAMESPACE_DECL { // This calls the associated function pointer on the RPC server with the given // arguments. We expect that the pointer here is a valid pointer on the server. -LLVM_LIBC_FUNCTION(void, rpc_host_call, (void *fn, void *data, size_t size)) { +LLVM_LIBC_FUNCTION(long long, rpc_host_call, + (void *fn, void *data, size_t size)) { rpc::Client::Port port = rpc::client.open(); port.send_n(data, size); port.send([=](rpc::Buffer *buffer) { buffer->data[0] = reinterpret_cast(fn); }); - port.recv([](rpc::Buffer *) {}); + long long ret; + port.recv([&](rpc::Buffer *buffer) { + ret = static_cast(buffer->data[0]); + }); port.close(); + return ret; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/gpu/rpc_host_call.h b/libc/src/gpu/rpc_host_call.h index 7cfea757ccdfd..e7efc5618346a 100644 --- a/libc/src/gpu/rpc_host_call.h +++ b/libc/src/gpu/rpc_host_call.h @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE_DECL { -void rpc_host_call(void *fn, void *buffer, size_t size); +long long rpc_host_call(void *fn, void *buffer, size_t size); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 6951c5ae147df..3707971441d4b 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -319,13 +319,18 @@ rpc_status_t handle_server_impl( } case RPC_HOST_CALL: { uint64_t sizes[lane_size] = {0}; + long long results[lane_size] = {0}; void *args[lane_size] = {nullptr}; port->recv_n(args, sizes, [&](uint64_t size) { return temp_storage.alloc(size); }); port->recv([&](rpc::Buffer *buffer, uint32_t id) { - reinterpret_cast(buffer->data[0])(args[id]); + using func_ptr_t = long long (*)(void *); + auto func = reinterpret_cast(buffer->data[0]); + results[id] = func(args[id]); + }); + port->send([&](rpc::Buffer *buffer, uint32_t id) { + buffer->data[0] = static_cast(results[id]); }); - port->send([&](rpc::Buffer *, uint32_t id) {}); break; } case RPC_FEOF: { diff --git a/offload/test/libc/host_call.c b/offload/test/libc/host_call.c index 11260cc285765..12e9cb0f17e5e 100644 --- a/offload/test/libc/host_call.c +++ b/offload/test/libc/host_call.c @@ -8,14 +8,14 @@ #pragma omp begin declare variant match(device = {kind(gpu)}) // Extension provided by the 'libc' project. -void rpc_host_call(void *fn, void *args, size_t size); +long long rpc_host_call(void *fn, void *args, size_t size); #pragma omp declare target to(rpc_host_call) device_type(nohost) #pragma omp end declare variant #pragma omp begin declare variant match(device = {kind(cpu)}) // Dummy host implementation to make this work for all targets. -void rpc_host_call(void *fn, void *args, size_t size) { - ((void (*)(void *))fn)(args); +long long rpc_host_call(void *fn, void *args, size_t size) { + return ((long long (*)(void *))fn)(args); } #pragma omp end declare variant @@ -25,17 +25,26 @@ typedef struct args_s { } args_t; // CHECK-DAG: Thread: 0, Block: 0 +// CHECK-DAG: Result: 42 // CHECK-DAG: Thread: 1, Block: 0 +// CHECK-DAG: Result: 42 // CHECK-DAG: Thread: 0, Block: 1 +// CHECK-DAG: Result: 42 // CHECK-DAG: Thread: 1, Block: 1 +// CHECK-DAG: Result: 42 // CHECK-DAG: Thread: 0, Block: 2 +// CHECK-DAG: Result: 42 // CHECK-DAG: Thread: 1, Block: 2 +// CHECK-DAG: Result: 42 // CHECK-DAG: Thread: 0, Block: 3 +// CHECK-DAG: Result: 42 // CHECK-DAG: Thread: 1, Block: 3 -void foo(void *data) { +// CHECK-DAG: Result: 42 +long long foo(void *data) { assert(omp_is_initial_device() && "Not executing on host?"); args_t *args = (args_t *)data; printf("Thread: %d, Block: %d\n", args->thread_id, args->block_id); + return 42; } void *fn_ptr = NULL; @@ -49,6 +58,7 @@ int main() { #pragma omp parallel num_threads(2) { args_t args = {omp_get_thread_num(), omp_get_team_num()}; - rpc_host_call(fn_ptr, &args, sizeof(args_t)); + long long res = rpc_host_call(fn_ptr, &args, sizeof(args_t)); + printf("Result: %d\n", (int)res); } } From 17ebd2974e154c7656f72e5f0467a1b5aad11bb3 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sun, 6 Oct 2024 19:02:08 +0200 Subject: [PATCH 2/3] use unsigned --- libc/newhdrgen/yaml/gpu/rpc.yaml | 2 +- libc/spec/gpu_ext.td | 2 +- libc/src/gpu/rpc_host_call.cpp | 6 +++--- libc/src/gpu/rpc_host_call.h | 2 +- offload/test/libc/host_call.c | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libc/newhdrgen/yaml/gpu/rpc.yaml b/libc/newhdrgen/yaml/gpu/rpc.yaml index 9c03038b291f2..da4f6afb7856d 100644 --- a/libc/newhdrgen/yaml/gpu/rpc.yaml +++ b/libc/newhdrgen/yaml/gpu/rpc.yaml @@ -16,7 +16,7 @@ functions: - name: rpc_host_call standards: - GPUExtensions - return_type: long long + return_type: unsigned long long arguments: - type: void * - type: void * diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td index 8848d9d52e1d5..d99531dc06bcd 100644 --- a/libc/spec/gpu_ext.td +++ b/libc/spec/gpu_ext.td @@ -7,7 +7,7 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> { [ FunctionSpec< "rpc_host_call", - RetValSpec, + RetValSpec, [ArgSpec, ArgSpec, ArgSpec] >, ] diff --git a/libc/src/gpu/rpc_host_call.cpp b/libc/src/gpu/rpc_host_call.cpp index b33cb19a0de47..f21fadc319c61 100644 --- a/libc/src/gpu/rpc_host_call.cpp +++ b/libc/src/gpu/rpc_host_call.cpp @@ -17,16 +17,16 @@ namespace LIBC_NAMESPACE_DECL { // This calls the associated function pointer on the RPC server with the given // arguments. We expect that the pointer here is a valid pointer on the server. -LLVM_LIBC_FUNCTION(long long, rpc_host_call, +LLVM_LIBC_FUNCTION(unsigned long long, rpc_host_call, (void *fn, void *data, size_t size)) { rpc::Client::Port port = rpc::client.open(); port.send_n(data, size); port.send([=](rpc::Buffer *buffer) { buffer->data[0] = reinterpret_cast(fn); }); - long long ret; + unsigned long long ret; port.recv([&](rpc::Buffer *buffer) { - ret = static_cast(buffer->data[0]); + ret = static_cast(buffer->data[0]); }); port.close(); return ret; diff --git a/libc/src/gpu/rpc_host_call.h b/libc/src/gpu/rpc_host_call.h index e7efc5618346a..861149dead561 100644 --- a/libc/src/gpu/rpc_host_call.h +++ b/libc/src/gpu/rpc_host_call.h @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE_DECL { -long long rpc_host_call(void *fn, void *buffer, size_t size); +unsigned long long rpc_host_call(void *fn, void *buffer, size_t size); } // namespace LIBC_NAMESPACE_DECL diff --git a/offload/test/libc/host_call.c b/offload/test/libc/host_call.c index 12e9cb0f17e5e..61c4e14d5b388 100644 --- a/offload/test/libc/host_call.c +++ b/offload/test/libc/host_call.c @@ -8,14 +8,14 @@ #pragma omp begin declare variant match(device = {kind(gpu)}) // Extension provided by the 'libc' project. -long long rpc_host_call(void *fn, void *args, size_t size); +unsigned long long rpc_host_call(void *fn, void *args, size_t size); #pragma omp declare target to(rpc_host_call) device_type(nohost) #pragma omp end declare variant #pragma omp begin declare variant match(device = {kind(cpu)}) // Dummy host implementation to make this work for all targets. -long long rpc_host_call(void *fn, void *args, size_t size) { - return ((long long (*)(void *))fn)(args); +unsigned long long rpc_host_call(void *fn, void *args, size_t size) { + return ((unsigned long long (*)(void *))fn)(args); } #pragma omp end declare variant @@ -58,7 +58,7 @@ int main() { #pragma omp parallel num_threads(2) { args_t args = {omp_get_thread_num(), omp_get_team_num()}; - long long res = rpc_host_call(fn_ptr, &args, sizeof(args_t)); + unsigned long long res = rpc_host_call(fn_ptr, &args, sizeof(args_t)); printf("Result: %d\n", (int)res); } } From 9e7fdb6c76fb22ccbfa9567586de00186fb50503 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Sun, 6 Oct 2024 19:03:59 +0200 Subject: [PATCH 3/3] more unsigned --- libc/utils/gpu/server/rpc_server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 3707971441d4b..ca10e67509ae6 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -319,12 +319,12 @@ rpc_status_t handle_server_impl( } case RPC_HOST_CALL: { uint64_t sizes[lane_size] = {0}; - long long results[lane_size] = {0}; + unsigned long long results[lane_size] = {0}; void *args[lane_size] = {nullptr}; port->recv_n(args, sizes, [&](uint64_t size) { return temp_storage.alloc(size); }); port->recv([&](rpc::Buffer *buffer, uint32_t id) { - using func_ptr_t = long long (*)(void *); + using func_ptr_t = unsigned long long (*)(void *); auto func = reinterpret_cast(buffer->data[0]); results[id] = func(args[id]); });