[libc] Implement temporary `printf` on the GPU #85331

jhuber6 · 2024-03-14T23:17:30Z

Summary:
This patch adds a temporary implementation that uses a struct-based
interface in lieu of varargs support. Once varargs support exists we
will move this implementation to the "real" printf implementation.

Conceptually, this patch has the client copy over its format string and
arguments to the server. The server will then scan the format string
searching for any specifiers that are actually a string. If it is a
string then we will send the pointer back to the server to tell it to
copy it back. This copied value will then replace the pointer when the
final formatting is done.

This will require a built-in extension to the varargs support to get
access to the underlying struct. The varargs used on the GPU will simply
be a struct wrapped in a varargs ABI.

llvmbot · 2024-03-14T23:18:00Z

@llvm/pr-subscribers-libc

Author: Joseph Huber (jhuber6)

Changes

Summary:
This patch adds a temporary implementation that uses a struct-based
interface in lieu of varargs support. Once varargs support exists we
will move this implementation to the "real" printf implementation.

Conceptually, this patch has the client copy over its format string and
arguments to the server. The server will then scan the format string
searching for any specifiers that are actually a string. If it is a
string then we will send the pointer back to the server to tell it to
copy it back. This copied value will then replace the pointer when the
final formatting is done.

This will require a built-in extension to the varargs support to get
access to the underlying struct. The varargs used on the GPU will simply
be a struct wrapped in a varargs ABI.

Full diff: https://github.com/llvm/llvm-project/pull/85331.diff

12 Files Affected:

(modified) libc/config/gpu/entrypoints.txt (+1)
(modified) libc/include/llvm-libc-types/rpc_opcodes_t.h (+3)
(modified) libc/spec/gpu_ext.td (+8)
(modified) libc/src/__support/arg_list.h (+29)
(modified) libc/src/gpu/CMakeLists.txt (+12)
(added) libc/src/gpu/rpc_fprintf.cpp (+74)
(added) libc/src/gpu/rpc_fprintf.h (+22)
(modified) libc/test/integration/src/stdio/CMakeLists.txt (+3)
(added) libc/test/integration/src/stdio/gpu/CMakeLists.txt (+21)
(added) libc/test/integration/src/stdio/gpu/printf.cpp (+58)
(modified) libc/utils/gpu/server/CMakeLists.txt (+8-1)
(modified) libc/utils/gpu/server/rpc_server.cpp (+138)

diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 4fb87cb9f5a33e..b678350e9fcb15 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -211,6 +211,7 @@ set(TARGET_LIBC_ENTRYPOINTS
 
     # gpu/rpc.h entrypoints
     libc.src.gpu.rpc_host_call
+    libc.src.gpu.rpc_fprintf
 )
 
 set(TARGET_LIBM_ENTRYPOINTS
diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index 919ea039c18e32..faed7b5f5ff46a 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -31,6 +31,9 @@ typedef enum {
   RPC_FTELL,
   RPC_FFLUSH,
   RPC_UNGETC,
+  RPC_PRINTF_TO_STDOUT,
+  RPC_PRINTF_TO_STDERR,
+  RPC_PRINTF_TO_STREAM,
   RPC_LAST = 0xFFFF,
 } rpc_opcode_t;
 
diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td
index dce81ff7786203..5400e0afa7564a 100644
--- a/libc/spec/gpu_ext.td
+++ b/libc/spec/gpu_ext.td
@@ -10,6 +10,14 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> {
             RetValSpec<VoidType>,
             [ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>]
         >,
+        FunctionSpec<
+            "rpc_fprintf",
+            RetValSpec<IntType>,
+            [ArgSpec<FILERestrictedPtr>,
+             ArgSpec<ConstCharRestrictedPtr>,
+             ArgSpec<VoidPtr>,
+             ArgSpec<SizeTType>]
+        >,
     ]
   >;
   let Headers = [
diff --git a/libc/src/__support/arg_list.h b/libc/src/__support/arg_list.h
index 9de17651142f49..edc785ead63e4d 100644
--- a/libc/src/__support/arg_list.h
+++ b/libc/src/__support/arg_list.h
@@ -60,6 +60,35 @@ class MockArgList {
   size_t read_count() const { return arg_counter; }
 };
 
+// Used for the GPU implementation of `printf`. This models a variadic list as a
+// simple array of pointers that are built manually by the implementation.
+class StructArgList {
+  void *ptr;
+
+public:
+  LIBC_INLINE StructArgList(void *ptr) : ptr(ptr) {}
+  LIBC_INLINE StructArgList(const StructArgList &other) { ptr = other.ptr; }
+  LIBC_INLINE StructArgList() = default;
+  LIBC_INLINE ~StructArgList() = default;
+
+  LIBC_INLINE StructArgList &operator=(const StructArgList &rhs) {
+    ptr = rhs.ptr;
+    return *this;
+  }
+
+  LIBC_INLINE void *get_ptr() const { return ptr; }
+
+  template <class T> LIBC_INLINE T next_var() {
+    ptr = reinterpret_cast<void *>(
+        ((reinterpret_cast<uintptr_t>(ptr) + alignof(T) - 1) / alignof(T)) *
+        alignof(T));
+
+    T val = *reinterpret_cast<T *>(ptr);
+    ptr = reinterpret_cast<unsigned char *>(ptr) + sizeof(T);
+    return val;
+  }
+}; // namespace __llvm_libc
+
 } // namespace internal
 } // namespace LIBC_NAMESPACE
 
diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt
index e20228516b5112..4508abea7a8886 100644
--- a/libc/src/gpu/CMakeLists.txt
+++ b/libc/src/gpu/CMakeLists.txt
@@ -8,3 +8,15 @@ add_entrypoint_object(
     libc.src.__support.RPC.rpc_client
     libc.src.__support.GPU.utils
 )
+
+add_entrypoint_object(
+  rpc_fprintf
+  SRCS
+    rpc_fprintf.cpp
+  HDRS
+    rpc_fprintf.h
+  DEPENDS
+    libc.src.stdio.gpu.gpu_file
+    libc.src.__support.RPC.rpc_client
+    libc.src.__support.GPU.utils
+)
diff --git a/libc/src/gpu/rpc_fprintf.cpp b/libc/src/gpu/rpc_fprintf.cpp
new file mode 100644
index 00000000000000..fdff535aa49bc2
--- /dev/null
+++ b/libc/src/gpu/rpc_fprintf.cpp
@@ -0,0 +1,74 @@
+//===-- GPU implementation of fprintf -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "rpc_fprintf.h"
+
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/GPU/utils.h"
+#include "src/__support/RPC/rpc_client.h"
+#include "src/__support/common.h"
+#include "src/stdio/gpu/file.h"
+
+namespace LIBC_NAMESPACE {
+
+template <uint16_t opcode>
+int fprintf_impl(::FILE *__restrict file, const char *__restrict format,
+                 size_t format_size, void *args, size_t args_size) {
+  uint64_t mask = gpu::get_lane_mask();
+  rpc::Client::Port port = rpc::client.open<opcode>();
+
+  if constexpr (opcode == RPC_PRINTF_TO_STREAM) {
+    port.send([&](rpc::Buffer *buffer) {
+      buffer->data[0] = reinterpret_cast<uintptr_t>(file);
+    });
+  }
+
+  port.send_n(format, format_size);
+  port.send_n(args, args_size);
+
+  uint32_t num_strs;
+  port.recv([&](rpc::Buffer *buffer) {
+    num_strs = static_cast<uint32_t>(buffer->data[0]);
+  });
+
+  for (uint32_t i = 0; gpu::ballot(mask, i < num_strs); ++i) {
+    const char *str = nullptr;
+    port.recv([&](rpc::Buffer *buffer) {
+      str = reinterpret_cast<const char *>(buffer->data[0]);
+    });
+    uint64_t size = str ? internal::string_length(str) + 1 : 0;
+    port.send_n(str, size);
+  }
+
+  uint32_t ret;
+  port.recv([&](rpc::Buffer *buffer) {
+    ret = static_cast<uint32_t>(buffer->data[0]);
+  });
+  port.close();
+  return ret;
+}
+
+// TODO: This is a stand-in function that uses a struct pointer and size in
+// place of varargs. Once varargs support is added we will use that to
+// implement the real version.
+LLVM_LIBC_FUNCTION(int, rpc_fprintf,
+                   (::FILE *__restrict stream, const char *__restrict format,
+                    void *args, size_t size)) {
+  cpp::string_view str(format);
+  if (stream == stdout)
+    return fprintf_impl<RPC_PRINTF_TO_STDOUT>(stream, format, str.size() + 1,
+                                              args, size);
+  else if (stream == stderr)
+    return fprintf_impl<RPC_PRINTF_TO_STDERR>(stream, format, str.size() + 1,
+                                              args, size);
+  else
+    return fprintf_impl<RPC_PRINTF_TO_STREAM>(stream, format, str.size() + 1,
+                                              args, size);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/gpu/rpc_fprintf.h b/libc/src/gpu/rpc_fprintf.h
new file mode 100644
index 00000000000000..053f7b4f818ae8
--- /dev/null
+++ b/libc/src/gpu/rpc_fprintf.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for RPC functions -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+namespace LIBC_NAMESPACE {
+
+int rpc_fprintf(::FILE *__restrict stream, const char *__restrict format,
+                void *argc, size_t size);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
diff --git a/libc/test/integration/src/stdio/CMakeLists.txt b/libc/test/integration/src/stdio/CMakeLists.txt
index 61caa2e5fa17b1..51c5ee25a6b263 100644
--- a/libc/test/integration/src/stdio/CMakeLists.txt
+++ b/libc/test/integration/src/stdio/CMakeLists.txt
@@ -1,3 +1,6 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+  add_subdirectory(${LIBC_TARGET_OS})
+endif()
 add_custom_target(stdio-integration-tests)
 add_dependencies(libc-integration-tests stdio-integration-tests)
 
diff --git a/libc/test/integration/src/stdio/gpu/CMakeLists.txt b/libc/test/integration/src/stdio/gpu/CMakeLists.txt
new file mode 100644
index 00000000000000..6327c45e1ea5a3
--- /dev/null
+++ b/libc/test/integration/src/stdio/gpu/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_custom_target(stdio-gpu-integration-tests)
+add_dependencies(libc-integration-tests stdio-gpu-integration-tests)
+
+# Create an output directory for any temporary test files.
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testdata)
+
+# These tests are not for correctness testing, but are instead a convenient way
+# to generate hermetic binaries for comparitive binary size testing.
+add_integration_test(
+  printf_test
+  SUITE
+    stdio-gpu-integration-tests
+  SRCS
+    printf.cpp
+  DEPENDS
+    libc.src.gpu.rpc_fprintf
+    libc.src.stdio.fopen
+  LOADER_ARGS
+    --threads 32
+    --blocks 4
+)
diff --git a/libc/test/integration/src/stdio/gpu/printf.cpp b/libc/test/integration/src/stdio/gpu/printf.cpp
new file mode 100644
index 00000000000000..b4115801060ed4
--- /dev/null
+++ b/libc/test/integration/src/stdio/gpu/printf.cpp
@@ -0,0 +1,58 @@
+//===-- RPC test to check args to printf ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "test/IntegrationTest/test.h"
+
+#include "src/__support/GPU/utils.h"
+#include "src/gpu/rpc_fprintf.h"
+#include "src/stdio/fopen.h"
+
+using namespace LIBC_NAMESPACE;
+
+FILE *file = LIBC_NAMESPACE::fopen("testdata/test_data.txt", "w");
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+  ASSERT_TRUE(file && "failed to open file");
+  // Check basic printing.
+  int written = 0;
+  written = LIBC_NAMESPACE::rpc_fprintf(file, "A simple string\n", nullptr, 0);
+  ASSERT_EQ(written, 16);
+
+  const char *str = "A simple string\n";
+  written = LIBC_NAMESPACE::rpc_fprintf(file, "%s", &str,
+                                        sizeof("A sipmle string\n"));
+  ASSERT_EQ(written, 16);
+
+  // Check printing a different value with each thread.
+  uint64_t thread_id = gpu::get_thread_id();
+  written = LIBC_NAMESPACE::rpc_fprintf(file, "%8ld\n", &thread_id,
+                                        sizeof(thread_id));
+  ASSERT_EQ(written, 9);
+
+  struct {
+    uint32_t x = 1;
+    char c = 'c';
+    double f = 1.0;
+  } args;
+  written =
+      LIBC_NAMESPACE::rpc_fprintf(file, "%d%c%.1f\n", &args, sizeof(args));
+  ASSERT_EQ(written, 6);
+
+  // Check that the server correctly handles divergent numbers of arguments.
+  const char *format = gpu::get_thread_id() % 2 ? "%s" : "%20ld\n";
+  written = LIBC_NAMESPACE::rpc_fprintf(file, format, &str,
+                                        sizeof("A sipmle string\n"));
+  ASSERT_EQ(written, gpu::get_thread_id() % 2 ? 16 : 21);
+
+  format = gpu::get_thread_id() % 2 ? "%s" : str;
+  written = LIBC_NAMESPACE::rpc_fprintf(file, format, &str,
+                                        sizeof("A sipmle string\n"));
+  ASSERT_EQ(written, 16);
+
+  return 0;
+}
diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt
index 6fca72cfae95fb..766ced2c5400c3 100644
--- a/libc/utils/gpu/server/CMakeLists.txt
+++ b/libc/utils/gpu/server/CMakeLists.txt
@@ -1,4 +1,8 @@
-add_library(llvmlibc_rpc_server STATIC rpc_server.cpp)
+add_library(llvmlibc_rpc_server STATIC
+  ${LIBC_SOURCE_DIR}/src/stdio/printf_core/writer.cpp
+  ${LIBC_SOURCE_DIR}/src/stdio/printf_core/converter.cpp
+  rpc_server.cpp
+)
 
 # Include the RPC implemenation from libc.
 target_include_directories(llvmlibc_rpc_server PRIVATE ${LIBC_SOURCE_DIR})
@@ -9,6 +13,9 @@ target_include_directories(llvmlibc_rpc_server PUBLIC ${CMAKE_CURRENT_SOURCE_DIR
 target_compile_options(llvmlibc_rpc_server PUBLIC
                        $<$<CXX_COMPILER_ID:GNU>:-Wno-attributes>)
 target_compile_definitions(llvmlibc_rpc_server PUBLIC
+                           LIBC_COPT_ARRAY_ARG_LIST
+                           LIBC_COPT_PRINTF_DISABLE_WRITE_INT
+                           LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
                            LIBC_NAMESPACE=${LIBC_NAMESPACE})
 
 # Install the server and associated header.
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 90af1569c4c53b..b3004c8325a5b7 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -9,7 +9,13 @@
 #include "llvmlibc_rpc_server.h"
 
 #include "src/__support/RPC/rpc.h"
+#include "src/__support/arg_list.h"
+#include "src/stdio/printf_core/converter.h"
+#include "src/stdio/printf_core/parser.h"
+#include "src/stdio/printf_core/writer.h"
+
 #include "src/stdio/gpu/file.h"
+#include <algorithm>
 #include <atomic>
 #include <cstdio>
 #include <cstring>
@@ -20,6 +26,7 @@
 #include <vector>
 
 using namespace LIBC_NAMESPACE;
+using namespace LIBC_NAMESPACE::printf_core;
 
 static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
               "Buffer size mismatch");
@@ -27,6 +34,131 @@ static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
 static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,
               "Incorrect maximum port count");
 
+template <uint32_t lane_size> void handle_printf(rpc::Server::Port &port) {
+  FILE *files[lane_size] = {nullptr};
+  // Get the appropriate output stream to use.
+  if (port.get_opcode() == RPC_PRINTF_TO_STREAM)
+    port.recv([&](rpc::Buffer *buffer, uint32_t id) {
+      files[id] = reinterpret_cast<FILE *>(buffer->data[0]);
+    });
+  else if (port.get_opcode() == RPC_PRINTF_TO_STDOUT)
+    std::fill(files, files + lane_size, stdout);
+  else
+    std::fill(files, files + lane_size, stderr);
+
+  uint64_t format_sizes[lane_size] = {0};
+  void *format[lane_size] = {nullptr};
+
+  uint64_t args_sizes[lane_size] = {0};
+  void *args[lane_size] = {nullptr};
+
+  // Recieve the format string and arguments from the client.
+  port.recv_n(format, format_sizes,
+              [&](uint64_t size) { return new char[size]; });
+  port.recv_n(args, args_sizes, [&](uint64_t size) { return new char[size]; });
+
+  // Identify any arguments that are actually pointers to strings on the client.
+  // Additionally we want to determine how much buffer space we need to print.
+  std::vector<void *> strs_to_copy[lane_size];
+  int buffer_size[lane_size] = {0};
+  for (uint32_t lane = 0; lane < lane_size; ++lane) {
+    if (!format[lane])
+      continue;
+
+    WriteBuffer wb(nullptr, 0);
+    Writer writer(&wb);
+
+    internal::StructArgList printf_args(args[lane]);
+    Parser<internal::StructArgList> parser(
+        reinterpret_cast<const char *>(format[lane]), printf_args);
+
+    for (FormatSection cur_section = parser.get_next_section();
+         !cur_section.raw_string.empty();
+         cur_section = parser.get_next_section()) {
+      if (cur_section.has_conv && cur_section.conv_name == 's') {
+        strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr);
+      } else if (cur_section.has_conv) {
+        convert(&writer, cur_section);
+      } else {
+        writer.write(cur_section.raw_string);
+      }
+    }
+    buffer_size[lane] = writer.get_chars_written();
+  }
+
+  // Send the number of string arguments we wish to copy back.
+  port.send([&](rpc::Buffer *buffer, uint32_t lane) {
+    buffer->data[0] = strs_to_copy[lane].size();
+  });
+
+  // Recieve the strings from the client and push them into a new buffer.
+  std::vector<void *> copied_strs[lane_size];
+  while (std::any_of(std::begin(strs_to_copy), std::end(strs_to_copy),
+                     [](const auto &v) { return !v.empty(); })) {
+    port.send([&](rpc::Buffer *buffer, uint32_t id) {
+      void *ptr = !strs_to_copy[id].empty() ? strs_to_copy[id].back() : nullptr;
+      buffer->data[0] = reinterpret_cast<uintptr_t>(ptr);
+      if (!strs_to_copy[id].empty())
+        strs_to_copy[id].pop_back();
+    });
+    uint64_t str_sizes[lane_size] = {0};
+    void *strs[lane_size] = {nullptr};
+    port.recv_n(strs, str_sizes, [](uint64_t size) { return new char[size]; });
+    for (uint32_t lane = 0; lane < lane_size; ++lane) {
+      if (!strs[lane])
+        continue;
+
+      copied_strs[lane].emplace_back(strs[lane]);
+      buffer_size[lane] += str_sizes[lane];
+    }
+  }
+
+  // Perform the final formatting and printing using the LLVM C library support.
+  int results[lane_size] = {0};
+  for (uint32_t lane = 0; lane < lane_size; ++lane) {
+    if (!format[lane])
+      continue;
+
+    std::unique_ptr<char[]> buffer(new char[buffer_size[lane]]);
+    WriteBuffer wb(buffer.get(), buffer_size[lane]);
+    Writer writer(&wb);
+
+    internal::StructArgList printf_args(args[lane]);
+    Parser<internal::StructArgList> parser(
+        reinterpret_cast<const char *>(format[lane]), printf_args);
+
+    // Parse and print the format string using the arguments we copied from
+    // the client.
+    for (FormatSection cur_section = parser.get_next_section();
+         !cur_section.raw_string.empty();
+         cur_section = parser.get_next_section()) {
+      // If this argument was a string we use the memory buffer we copied from
+      // the client by replacing the raw pointer with the copied one.
+      if (cur_section.has_conv && cur_section.conv_name == 's' &&
+          !copied_strs[lane].empty()) {
+        cur_section.conv_val_ptr = copied_strs[lane].back();
+        delete[] reinterpret_cast<char *>(copied_strs[lane].back());
+        copied_strs[lane].pop_back();
+      }
+      if (cur_section.has_conv) {
+        convert(&writer, cur_section);
+      } else {
+        writer.write(cur_section.raw_string);
+      }
+    }
+    results[lane] =
+        fwrite(buffer.get(), 1, writer.get_chars_written(), files[lane]);
+    if (results[lane] != writer.get_chars_written())
+      results[lane] = -1;
+  }
+
+  port.send([&](rpc::Buffer *buffer, uint32_t id) {
+    buffer->data[0] = static_cast<uint64_t>(results[id]);
+    delete[] reinterpret_cast<char *>(format[id]);
+    delete[] reinterpret_cast<char *>(args[id]);
+  });
+}
+
 template <uint32_t lane_size>
 rpc_status_t handle_server_impl(
     rpc::Server &server,
@@ -190,6 +322,12 @@ rpc_status_t handle_server_impl(
     });
     break;
   }
+  case RPC_PRINTF_TO_STREAM:
+  case RPC_PRINTF_TO_STDOUT:
+  case RPC_PRINTF_TO_STDERR: {
+    handle_printf<lane_size>(*port);
+    break;
+  }
   case RPC_NOOP: {
     port->recv([](rpc::Buffer *) {});
     break;

michaelrj-google

This mostly LGTM with a minor comment.

michaelrj-google · 2024-03-14T23:40:24Z

libc/utils/gpu/server/rpc_server.cpp

+      if (cur_section.has_conv && cur_section.conv_name == 's') {
+        strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr);
+      } else if (cur_section.has_conv) {
+        convert(&writer, cur_section);


you should probably store the result of convert here and below for error checking purposes. It returns a negative value if it fails.

So, if it returns some value we should set the return value to -1?

If it returns a negative number then you should return that value immediately, if possible. I don't think you'll actually run into any errors (there's a list of error values I've defined at the bottom of printf_core/core_structs.h) but if you do then something has gone seriously wrong.

Alright, made it break the loop and return negative.

it doesn't seem like you're checking the return value of convert here.

This is just to get the size required for the buffer. If there's a conversion error it will show up below.

Added a comment to say that this is intentional.

libc/src/__support/arg_list.h

lntue · 2024-03-15T01:47:09Z

libc/utils/gpu/server/rpc_server.cpp

+    for (FormatSection cur_section = parser.get_next_section();
+         !cur_section.raw_string.empty();
+         cur_section = parser.get_next_section()) {
+      if (cur_section.has_conv && cur_section.conv_name == 's') {


nit: Remove curly braces

arsenm · 2024-03-15T06:49:05Z

libc/test/integration/src/stdio/gpu/printf.cpp

+
+using namespace LIBC_NAMESPACE;
+
+FILE *file = LIBC_NAMESPACE::fopen("testdata/test_data.txt", "w");


Why bother doing this in a static constructor

The static constructor is executed by a single thread in a separate kernel. That means that it will only be done once and it guaranteed to be done before all the threads and blocks of the main test.

arsenm · 2024-03-15T06:50:00Z

libc/test/integration/src/stdio/gpu/printf.cpp

+FILE *file = LIBC_NAMESPACE::fopen("testdata/test_data.txt", "w");
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+  ASSERT_TRUE(file && "failed to open file");


ASSERT_TRUE() << filename?

Just a sanity check to make sure that the file was actually created, the filename isn't important.

Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on llvm#85331

JonChesterfield · 2024-03-19T13:48:08Z

I haven't decoded the implementation.

A stop-gap implementation that hangs off the openmp special casing of printf seems broadly reasonable.

I'd suggest going much heavier on testing though. Especially since said tests will remain valid and useful if this implementation is replaced. Perhaps libc already has some large number of printf style tests that will run on the GPU (successfully) with this shim in place?

jhuber6 · 2024-03-19T13:54:55Z

I haven't decoded the implementation.

Broadly speaking we copy over the string and arguments from the GPU. The CPU then parses the string to get the needed buffer size and to identify any %s arguments. If it was %s the next packet sent will contain a pointer for the client to copy back. If it wasn't the pointer will be null and we will have a return value. Once all that information is on the server it just uses the printf utilities to emit the string.

I'd suggest going much heavier on testing though. Especially since said tests will remain valid and useful if this implementation is replaced. Perhaps libc already has some large number of printf style tests that will run on the GPU (successfully) with this shim in place?

I can add some more tests, however since all this amounts to is passing data to the CPU version of libc's printf I was primarily just concerned with whether or not it handles situations relating to the GPU correctly. Evenutally when this is "real" printf we'll get those tests on the GPU as well, but for now I'm just relying on CPU testing to verify stuff like %0.03-f works.

Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on llvm#85331

jhuber6 · 2024-04-01T12:27:33Z

ping

michaelrj-google

Overall LGTM with a comment

michaelrj-google · 2024-04-02T16:57:19Z

libc/utils/gpu/server/rpc_server.cpp

+      if (cur_section.has_conv && cur_section.conv_name == 's') {
+        strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr);
+      } else if (cur_section.has_conv) {
+        convert(&writer, cur_section);


it doesn't seem like you're checking the return value of convert here.

michaelrj-google

LGTM

Summary: This patch adds a temporary implementation that uses a struct-based interface in lieu of varargs support. Once varargs support exists we will move this implementation to the "real" printf implementation. Conceptually, this patch has the client copy over its format string and arguments to the server. The server will then scan the format string searching for any specifiers that are actually a string. If it is a string then we will send the pointer back to the server to tell it to copy it back. This copied value will then replace the pointer when the final formatting is done. This will require a built-in extension to the varargs support to get access to the underlying struct. The varargs used on the GPU will simply be a struct wrapped in a varargs ABI.

Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on llvm#85331

Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on #85331

Summary: Relanding after reverting, only applies to AMDGPU for now. This patch adds an implementation of printf that's provided by the GPU C library runtime. This pritnf currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This printf differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the %n option. Depends on #85331

jhuber6 requested review from arsenm, Artem-B, jdoerfert, JonChesterfield, lntue, michaelrj-google and shiltian March 14, 2024 23:17

llvmbot added the libc label Mar 14, 2024

michaelrj-google reviewed Mar 14, 2024

View reviewed changes

jhuber6 force-pushed the NewPrintf branch from 402ef75 to 5513bec Compare March 15, 2024 00:03

lntue reviewed Mar 15, 2024

View reviewed changes

libc/src/__support/arg_list.h Outdated Show resolved Hide resolved

jhuber6 force-pushed the NewPrintf branch from 5513bec to 9c14590 Compare March 15, 2024 00:07

lntue reviewed Mar 15, 2024

View reviewed changes

jhuber6 force-pushed the NewPrintf branch from 9c14590 to 3fb6efc Compare March 15, 2024 01:49

arsenm reviewed Mar 15, 2024

View reviewed changes

jhuber6 force-pushed the NewPrintf branch 3 times, most recently from ca2efd5 to f417b33 Compare March 17, 2024 20:45

jhuber6 mentioned this pull request Mar 18, 2024

[Libomptarget] Add RPC-based printf implementation for OpenMP #85638

Merged

jhuber6 force-pushed the NewPrintf branch from f417b33 to c2c9aa2 Compare March 19, 2024 14:31

jhuber6 mentioned this pull request Mar 25, 2024

[libc] Remove obsolete LIBC_HAS_BUILTIN macro #86554

Merged

michaelrj-google reviewed Apr 2, 2024

View reviewed changes

jhuber6 force-pushed the NewPrintf branch from c2c9aa2 to 3d15f3f Compare April 2, 2024 17:33

jhuber6 force-pushed the NewPrintf branch from 3d15f3f to 3ee4c8c Compare April 2, 2024 19:19

michaelrj-google approved these changes Apr 2, 2024

View reviewed changes

jhuber6 force-pushed the NewPrintf branch from 3ee4c8c to 0281f9c Compare April 2, 2024 21:16

jhuber6 merged commit 7327014 into llvm:main Apr 2, 2024


		using namespace LIBC_NAMESPACE;

		FILE *file = LIBC_NAMESPACE::fopen("testdata/test_data.txt", "w");

[libc] Implement temporary printf on the GPU #85331

[libc] Implement temporary printf on the GPU #85331

Uh oh!

Conversation

jhuber6 commented Mar 14, 2024

Uh oh!

llvmbot commented Mar 14, 2024

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jhuber6 Apr 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

lntue Mar 15, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

JonChesterfield commented Mar 19, 2024

Uh oh!

jhuber6 commented Mar 19, 2024

Uh oh!

jhuber6 commented Apr 1, 2024

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

[libc] Implement temporary `printf` on the GPU #85331

[libc] Implement temporary `printf` on the GPU #85331

jhuber6 Apr 2, 2024 •

edited

Loading

lntue Mar 15, 2024 •

edited

Loading