-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[libc] Implement temporary printf
on the GPU
#85331
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-libc Author: Joseph Huber (jhuber6) ChangesSummary: Conceptually, this patch has the client copy over its format string and This will require a built-in extension to the varargs support to get Full diff: https://github.com/llvm/llvm-project/pull/85331.diff 12 Files Affected:
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 4fb87cb9f5a33e..b678350e9fcb15 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -211,6 +211,7 @@ set(TARGET_LIBC_ENTRYPOINTS
# gpu/rpc.h entrypoints
libc.src.gpu.rpc_host_call
+ libc.src.gpu.rpc_fprintf
)
set(TARGET_LIBM_ENTRYPOINTS
diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index 919ea039c18e32..faed7b5f5ff46a 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -31,6 +31,9 @@ typedef enum {
RPC_FTELL,
RPC_FFLUSH,
RPC_UNGETC,
+ RPC_PRINTF_TO_STDOUT,
+ RPC_PRINTF_TO_STDERR,
+ RPC_PRINTF_TO_STREAM,
RPC_LAST = 0xFFFF,
} rpc_opcode_t;
diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td
index dce81ff7786203..5400e0afa7564a 100644
--- a/libc/spec/gpu_ext.td
+++ b/libc/spec/gpu_ext.td
@@ -10,6 +10,14 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> {
RetValSpec<VoidType>,
[ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>]
>,
+ FunctionSpec<
+ "rpc_fprintf",
+ RetValSpec<IntType>,
+ [ArgSpec<FILERestrictedPtr>,
+ ArgSpec<ConstCharRestrictedPtr>,
+ ArgSpec<VoidPtr>,
+ ArgSpec<SizeTType>]
+ >,
]
>;
let Headers = [
diff --git a/libc/src/__support/arg_list.h b/libc/src/__support/arg_list.h
index 9de17651142f49..edc785ead63e4d 100644
--- a/libc/src/__support/arg_list.h
+++ b/libc/src/__support/arg_list.h
@@ -60,6 +60,35 @@ class MockArgList {
size_t read_count() const { return arg_counter; }
};
+// Used for the GPU implementation of `printf`. This models a variadic list as a
+// simple array of pointers that are built manually by the implementation.
+class StructArgList {
+ void *ptr;
+
+public:
+ LIBC_INLINE StructArgList(void *ptr) : ptr(ptr) {}
+ LIBC_INLINE StructArgList(const StructArgList &other) { ptr = other.ptr; }
+ LIBC_INLINE StructArgList() = default;
+ LIBC_INLINE ~StructArgList() = default;
+
+ LIBC_INLINE StructArgList &operator=(const StructArgList &rhs) {
+ ptr = rhs.ptr;
+ return *this;
+ }
+
+ LIBC_INLINE void *get_ptr() const { return ptr; }
+
+ template <class T> LIBC_INLINE T next_var() {
+ ptr = reinterpret_cast<void *>(
+ ((reinterpret_cast<uintptr_t>(ptr) + alignof(T) - 1) / alignof(T)) *
+ alignof(T));
+
+ T val = *reinterpret_cast<T *>(ptr);
+ ptr = reinterpret_cast<unsigned char *>(ptr) + sizeof(T);
+ return val;
+ }
+}; // namespace __llvm_libc
+
} // namespace internal
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt
index e20228516b5112..4508abea7a8886 100644
--- a/libc/src/gpu/CMakeLists.txt
+++ b/libc/src/gpu/CMakeLists.txt
@@ -8,3 +8,15 @@ add_entrypoint_object(
libc.src.__support.RPC.rpc_client
libc.src.__support.GPU.utils
)
+
+add_entrypoint_object(
+ rpc_fprintf
+ SRCS
+ rpc_fprintf.cpp
+ HDRS
+ rpc_fprintf.h
+ DEPENDS
+ libc.src.stdio.gpu.gpu_file
+ libc.src.__support.RPC.rpc_client
+ libc.src.__support.GPU.utils
+)
diff --git a/libc/src/gpu/rpc_fprintf.cpp b/libc/src/gpu/rpc_fprintf.cpp
new file mode 100644
index 00000000000000..fdff535aa49bc2
--- /dev/null
+++ b/libc/src/gpu/rpc_fprintf.cpp
@@ -0,0 +1,74 @@
+//===-- GPU implementation of fprintf -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "rpc_fprintf.h"
+
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/GPU/utils.h"
+#include "src/__support/RPC/rpc_client.h"
+#include "src/__support/common.h"
+#include "src/stdio/gpu/file.h"
+
+namespace LIBC_NAMESPACE {
+
+template <uint16_t opcode>
+int fprintf_impl(::FILE *__restrict file, const char *__restrict format,
+ size_t format_size, void *args, size_t args_size) {
+ uint64_t mask = gpu::get_lane_mask();
+ rpc::Client::Port port = rpc::client.open<opcode>();
+
+ if constexpr (opcode == RPC_PRINTF_TO_STREAM) {
+ port.send([&](rpc::Buffer *buffer) {
+ buffer->data[0] = reinterpret_cast<uintptr_t>(file);
+ });
+ }
+
+ port.send_n(format, format_size);
+ port.send_n(args, args_size);
+
+ uint32_t num_strs;
+ port.recv([&](rpc::Buffer *buffer) {
+ num_strs = static_cast<uint32_t>(buffer->data[0]);
+ });
+
+ for (uint32_t i = 0; gpu::ballot(mask, i < num_strs); ++i) {
+ const char *str = nullptr;
+ port.recv([&](rpc::Buffer *buffer) {
+ str = reinterpret_cast<const char *>(buffer->data[0]);
+ });
+ uint64_t size = str ? internal::string_length(str) + 1 : 0;
+ port.send_n(str, size);
+ }
+
+ uint32_t ret;
+ port.recv([&](rpc::Buffer *buffer) {
+ ret = static_cast<uint32_t>(buffer->data[0]);
+ });
+ port.close();
+ return ret;
+}
+
+// TODO: This is a stand-in function that uses a struct pointer and size in
+// place of varargs. Once varargs support is added we will use that to
+// implement the real version.
+LLVM_LIBC_FUNCTION(int, rpc_fprintf,
+ (::FILE *__restrict stream, const char *__restrict format,
+ void *args, size_t size)) {
+ cpp::string_view str(format);
+ if (stream == stdout)
+ return fprintf_impl<RPC_PRINTF_TO_STDOUT>(stream, format, str.size() + 1,
+ args, size);
+ else if (stream == stderr)
+ return fprintf_impl<RPC_PRINTF_TO_STDERR>(stream, format, str.size() + 1,
+ args, size);
+ else
+ return fprintf_impl<RPC_PRINTF_TO_STREAM>(stream, format, str.size() + 1,
+ args, size);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/gpu/rpc_fprintf.h b/libc/src/gpu/rpc_fprintf.h
new file mode 100644
index 00000000000000..053f7b4f818ae8
--- /dev/null
+++ b/libc/src/gpu/rpc_fprintf.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for RPC functions -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+namespace LIBC_NAMESPACE {
+
+int rpc_fprintf(::FILE *__restrict stream, const char *__restrict format,
+ void *argc, size_t size);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
diff --git a/libc/test/integration/src/stdio/CMakeLists.txt b/libc/test/integration/src/stdio/CMakeLists.txt
index 61caa2e5fa17b1..51c5ee25a6b263 100644
--- a/libc/test/integration/src/stdio/CMakeLists.txt
+++ b/libc/test/integration/src/stdio/CMakeLists.txt
@@ -1,3 +1,6 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+ add_subdirectory(${LIBC_TARGET_OS})
+endif()
add_custom_target(stdio-integration-tests)
add_dependencies(libc-integration-tests stdio-integration-tests)
diff --git a/libc/test/integration/src/stdio/gpu/CMakeLists.txt b/libc/test/integration/src/stdio/gpu/CMakeLists.txt
new file mode 100644
index 00000000000000..6327c45e1ea5a3
--- /dev/null
+++ b/libc/test/integration/src/stdio/gpu/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_custom_target(stdio-gpu-integration-tests)
+add_dependencies(libc-integration-tests stdio-gpu-integration-tests)
+
+# Create an output directory for any temporary test files.
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testdata)
+
+# These tests are not for correctness testing, but are instead a convenient way
+# to generate hermetic binaries for comparitive binary size testing.
+add_integration_test(
+ printf_test
+ SUITE
+ stdio-gpu-integration-tests
+ SRCS
+ printf.cpp
+ DEPENDS
+ libc.src.gpu.rpc_fprintf
+ libc.src.stdio.fopen
+ LOADER_ARGS
+ --threads 32
+ --blocks 4
+)
diff --git a/libc/test/integration/src/stdio/gpu/printf.cpp b/libc/test/integration/src/stdio/gpu/printf.cpp
new file mode 100644
index 00000000000000..b4115801060ed4
--- /dev/null
+++ b/libc/test/integration/src/stdio/gpu/printf.cpp
@@ -0,0 +1,58 @@
+//===-- RPC test to check args to printf ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "test/IntegrationTest/test.h"
+
+#include "src/__support/GPU/utils.h"
+#include "src/gpu/rpc_fprintf.h"
+#include "src/stdio/fopen.h"
+
+using namespace LIBC_NAMESPACE;
+
+FILE *file = LIBC_NAMESPACE::fopen("testdata/test_data.txt", "w");
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+ ASSERT_TRUE(file && "failed to open file");
+ // Check basic printing.
+ int written = 0;
+ written = LIBC_NAMESPACE::rpc_fprintf(file, "A simple string\n", nullptr, 0);
+ ASSERT_EQ(written, 16);
+
+ const char *str = "A simple string\n";
+ written = LIBC_NAMESPACE::rpc_fprintf(file, "%s", &str,
+ sizeof("A sipmle string\n"));
+ ASSERT_EQ(written, 16);
+
+ // Check printing a different value with each thread.
+ uint64_t thread_id = gpu::get_thread_id();
+ written = LIBC_NAMESPACE::rpc_fprintf(file, "%8ld\n", &thread_id,
+ sizeof(thread_id));
+ ASSERT_EQ(written, 9);
+
+ struct {
+ uint32_t x = 1;
+ char c = 'c';
+ double f = 1.0;
+ } args;
+ written =
+ LIBC_NAMESPACE::rpc_fprintf(file, "%d%c%.1f\n", &args, sizeof(args));
+ ASSERT_EQ(written, 6);
+
+ // Check that the server correctly handles divergent numbers of arguments.
+ const char *format = gpu::get_thread_id() % 2 ? "%s" : "%20ld\n";
+ written = LIBC_NAMESPACE::rpc_fprintf(file, format, &str,
+ sizeof("A sipmle string\n"));
+ ASSERT_EQ(written, gpu::get_thread_id() % 2 ? 16 : 21);
+
+ format = gpu::get_thread_id() % 2 ? "%s" : str;
+ written = LIBC_NAMESPACE::rpc_fprintf(file, format, &str,
+ sizeof("A sipmle string\n"));
+ ASSERT_EQ(written, 16);
+
+ return 0;
+}
diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt
index 6fca72cfae95fb..766ced2c5400c3 100644
--- a/libc/utils/gpu/server/CMakeLists.txt
+++ b/libc/utils/gpu/server/CMakeLists.txt
@@ -1,4 +1,8 @@
-add_library(llvmlibc_rpc_server STATIC rpc_server.cpp)
+add_library(llvmlibc_rpc_server STATIC
+ ${LIBC_SOURCE_DIR}/src/stdio/printf_core/writer.cpp
+ ${LIBC_SOURCE_DIR}/src/stdio/printf_core/converter.cpp
+ rpc_server.cpp
+)
# Include the RPC implemenation from libc.
target_include_directories(llvmlibc_rpc_server PRIVATE ${LIBC_SOURCE_DIR})
@@ -9,6 +13,9 @@ target_include_directories(llvmlibc_rpc_server PUBLIC ${CMAKE_CURRENT_SOURCE_DIR
target_compile_options(llvmlibc_rpc_server PUBLIC
$<$<CXX_COMPILER_ID:GNU>:-Wno-attributes>)
target_compile_definitions(llvmlibc_rpc_server PUBLIC
+ LIBC_COPT_ARRAY_ARG_LIST
+ LIBC_COPT_PRINTF_DISABLE_WRITE_INT
+ LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
LIBC_NAMESPACE=${LIBC_NAMESPACE})
# Install the server and associated header.
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 90af1569c4c53b..b3004c8325a5b7 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -9,7 +9,13 @@
#include "llvmlibc_rpc_server.h"
#include "src/__support/RPC/rpc.h"
+#include "src/__support/arg_list.h"
+#include "src/stdio/printf_core/converter.h"
+#include "src/stdio/printf_core/parser.h"
+#include "src/stdio/printf_core/writer.h"
+
#include "src/stdio/gpu/file.h"
+#include <algorithm>
#include <atomic>
#include <cstdio>
#include <cstring>
@@ -20,6 +26,7 @@
#include <vector>
using namespace LIBC_NAMESPACE;
+using namespace LIBC_NAMESPACE::printf_core;
static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
"Buffer size mismatch");
@@ -27,6 +34,131 @@ static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,
"Incorrect maximum port count");
+template <uint32_t lane_size> void handle_printf(rpc::Server::Port &port) {
+ FILE *files[lane_size] = {nullptr};
+ // Get the appropriate output stream to use.
+ if (port.get_opcode() == RPC_PRINTF_TO_STREAM)
+ port.recv([&](rpc::Buffer *buffer, uint32_t id) {
+ files[id] = reinterpret_cast<FILE *>(buffer->data[0]);
+ });
+ else if (port.get_opcode() == RPC_PRINTF_TO_STDOUT)
+ std::fill(files, files + lane_size, stdout);
+ else
+ std::fill(files, files + lane_size, stderr);
+
+ uint64_t format_sizes[lane_size] = {0};
+ void *format[lane_size] = {nullptr};
+
+ uint64_t args_sizes[lane_size] = {0};
+ void *args[lane_size] = {nullptr};
+
+ // Recieve the format string and arguments from the client.
+ port.recv_n(format, format_sizes,
+ [&](uint64_t size) { return new char[size]; });
+ port.recv_n(args, args_sizes, [&](uint64_t size) { return new char[size]; });
+
+ // Identify any arguments that are actually pointers to strings on the client.
+ // Additionally we want to determine how much buffer space we need to print.
+ std::vector<void *> strs_to_copy[lane_size];
+ int buffer_size[lane_size] = {0};
+ for (uint32_t lane = 0; lane < lane_size; ++lane) {
+ if (!format[lane])
+ continue;
+
+ WriteBuffer wb(nullptr, 0);
+ Writer writer(&wb);
+
+ internal::StructArgList printf_args(args[lane]);
+ Parser<internal::StructArgList> parser(
+ reinterpret_cast<const char *>(format[lane]), printf_args);
+
+ for (FormatSection cur_section = parser.get_next_section();
+ !cur_section.raw_string.empty();
+ cur_section = parser.get_next_section()) {
+ if (cur_section.has_conv && cur_section.conv_name == 's') {
+ strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr);
+ } else if (cur_section.has_conv) {
+ convert(&writer, cur_section);
+ } else {
+ writer.write(cur_section.raw_string);
+ }
+ }
+ buffer_size[lane] = writer.get_chars_written();
+ }
+
+ // Send the number of string arguments we wish to copy back.
+ port.send([&](rpc::Buffer *buffer, uint32_t lane) {
+ buffer->data[0] = strs_to_copy[lane].size();
+ });
+
+ // Recieve the strings from the client and push them into a new buffer.
+ std::vector<void *> copied_strs[lane_size];
+ while (std::any_of(std::begin(strs_to_copy), std::end(strs_to_copy),
+ [](const auto &v) { return !v.empty(); })) {
+ port.send([&](rpc::Buffer *buffer, uint32_t id) {
+ void *ptr = !strs_to_copy[id].empty() ? strs_to_copy[id].back() : nullptr;
+ buffer->data[0] = reinterpret_cast<uintptr_t>(ptr);
+ if (!strs_to_copy[id].empty())
+ strs_to_copy[id].pop_back();
+ });
+ uint64_t str_sizes[lane_size] = {0};
+ void *strs[lane_size] = {nullptr};
+ port.recv_n(strs, str_sizes, [](uint64_t size) { return new char[size]; });
+ for (uint32_t lane = 0; lane < lane_size; ++lane) {
+ if (!strs[lane])
+ continue;
+
+ copied_strs[lane].emplace_back(strs[lane]);
+ buffer_size[lane] += str_sizes[lane];
+ }
+ }
+
+ // Perform the final formatting and printing using the LLVM C library support.
+ int results[lane_size] = {0};
+ for (uint32_t lane = 0; lane < lane_size; ++lane) {
+ if (!format[lane])
+ continue;
+
+ std::unique_ptr<char[]> buffer(new char[buffer_size[lane]]);
+ WriteBuffer wb(buffer.get(), buffer_size[lane]);
+ Writer writer(&wb);
+
+ internal::StructArgList printf_args(args[lane]);
+ Parser<internal::StructArgList> parser(
+ reinterpret_cast<const char *>(format[lane]), printf_args);
+
+ // Parse and print the format string using the arguments we copied from
+ // the client.
+ for (FormatSection cur_section = parser.get_next_section();
+ !cur_section.raw_string.empty();
+ cur_section = parser.get_next_section()) {
+ // If this argument was a string we use the memory buffer we copied from
+ // the client by replacing the raw pointer with the copied one.
+ if (cur_section.has_conv && cur_section.conv_name == 's' &&
+ !copied_strs[lane].empty()) {
+ cur_section.conv_val_ptr = copied_strs[lane].back();
+ delete[] reinterpret_cast<char *>(copied_strs[lane].back());
+ copied_strs[lane].pop_back();
+ }
+ if (cur_section.has_conv) {
+ convert(&writer, cur_section);
+ } else {
+ writer.write(cur_section.raw_string);
+ }
+ }
+ results[lane] =
+ fwrite(buffer.get(), 1, writer.get_chars_written(), files[lane]);
+ if (results[lane] != writer.get_chars_written())
+ results[lane] = -1;
+ }
+
+ port.send([&](rpc::Buffer *buffer, uint32_t id) {
+ buffer->data[0] = static_cast<uint64_t>(results[id]);
+ delete[] reinterpret_cast<char *>(format[id]);
+ delete[] reinterpret_cast<char *>(args[id]);
+ });
+}
+
template <uint32_t lane_size>
rpc_status_t handle_server_impl(
rpc::Server &server,
@@ -190,6 +322,12 @@ rpc_status_t handle_server_impl(
});
break;
}
+ case RPC_PRINTF_TO_STREAM:
+ case RPC_PRINTF_TO_STDOUT:
+ case RPC_PRINTF_TO_STDERR: {
+ handle_printf<lane_size>(*port);
+ break;
+ }
case RPC_NOOP: {
port->recv([](rpc::Buffer *) {});
break;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This mostly LGTM with a minor comment.
if (cur_section.has_conv && cur_section.conv_name == 's') { | ||
strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr); | ||
} else if (cur_section.has_conv) { | ||
convert(&writer, cur_section); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you should probably store the result of convert
here and below for error checking purposes. It returns a negative value if it fails.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, if it returns some value we should set the return value to -1?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it returns a negative number then you should return that value immediately, if possible. I don't think you'll actually run into any errors (there's a list of error values I've defined at the bottom of printf_core/core_structs.h
) but if you do then something has gone seriously wrong.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alright, made it break the loop and return negative.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it doesn't seem like you're checking the return value of convert here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is just to get the size required for the buffer. If there's a conversion error it will show up below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a comment to say that this is intentional.
libc/utils/gpu/server/rpc_server.cpp
Outdated
for (FormatSection cur_section = parser.get_next_section(); | ||
!cur_section.raw_string.empty(); | ||
cur_section = parser.get_next_section()) { | ||
if (cur_section.has_conv && cur_section.conv_name == 's') { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Remove curly braces
|
||
using namespace LIBC_NAMESPACE; | ||
|
||
FILE *file = LIBC_NAMESPACE::fopen("testdata/test_data.txt", "w"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why bother doing this in a static constructor
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The static constructor is executed by a single thread in a separate kernel. That means that it will only be done once and it guaranteed to be done before all the threads and blocks of the main test.
FILE *file = LIBC_NAMESPACE::fopen("testdata/test_data.txt", "w"); | ||
|
||
TEST_MAIN(int argc, char **argv, char **envp) { | ||
ASSERT_TRUE(file && "failed to open file"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ASSERT_TRUE() << filename?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a sanity check to make sure that the file was actually created, the filename isn't important.
ca2efd5
to
f417b33
Compare
Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on llvm#85331
I haven't decoded the implementation. A stop-gap implementation that hangs off the openmp special casing of printf seems broadly reasonable. I'd suggest going much heavier on testing though. Especially since said tests will remain valid and useful if this implementation is replaced. Perhaps libc already has some large number of printf style tests that will run on the GPU (successfully) with this shim in place? |
Broadly speaking we copy over the string and arguments from the GPU. The CPU then parses the string to get the needed buffer size and to identify any
I can add some more tests, however since all this amounts to is passing data to the CPU version of libc's printf I was primarily just concerned with whether or not it handles situations relating to the GPU correctly. Evenutally when this is "real" |
Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on llvm#85331
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall LGTM with a comment
if (cur_section.has_conv && cur_section.conv_name == 's') { | ||
strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr); | ||
} else if (cur_section.has_conv) { | ||
convert(&writer, cur_section); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it doesn't seem like you're checking the return value of convert here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Summary: This patch adds a temporary implementation that uses a struct-based interface in lieu of varargs support. Once varargs support exists we will move this implementation to the "real" printf implementation. Conceptually, this patch has the client copy over its format string and arguments to the server. The server will then scan the format string searching for any specifiers that are actually a string. If it is a string then we will send the pointer back to the server to tell it to copy it back. This copied value will then replace the pointer when the final formatting is done. This will require a built-in extension to the varargs support to get access to the underlying struct. The varargs used on the GPU will simply be a struct wrapped in a varargs ABI.
Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on llvm#85331
Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on #85331
Summary: Relanding after reverting, only applies to AMDGPU for now. This patch adds an implementation of printf that's provided by the GPU C library runtime. This pritnf currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This printf differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the %n option. Depends on #85331
Summary:
This patch adds a temporary implementation that uses a struct-based
interface in lieu of varargs support. Once varargs support exists we
will move this implementation to the "real" printf implementation.
Conceptually, this patch has the client copy over its format string and
arguments to the server. The server will then scan the format string
searching for any specifiers that are actually a string. If it is a
string then we will send the pointer back to the server to tell it to
copy it back. This copied value will then replace the pointer when the
final formatting is done.
This will require a built-in extension to the varargs support to get
access to the underlying struct. The varargs used on the GPU will simply
be a struct wrapped in a varargs ABI.