diff --git a/impeller/renderer/backend/vulkan/command_encoder_vk.cc b/impeller/renderer/backend/vulkan/command_encoder_vk.cc index d963d8519f12a..4bd8f69391eb3 100644 --- a/impeller/renderer/backend/vulkan/command_encoder_vk.cc +++ b/impeller/renderer/backend/vulkan/command_encoder_vk.cc @@ -16,8 +16,9 @@ class TrackedObjectsVK { public: explicit TrackedObjectsVK( const std::weak_ptr& device_holder, - const std::shared_ptr& pool) - : desc_pool_(device_holder) { + const std::shared_ptr& pool, + std::unique_ptr probe) + : desc_pool_(device_holder), probe_(std::move(probe)) { if (!pool) { return; } @@ -78,6 +79,8 @@ class TrackedObjectsVK { DescriptorPoolVK& GetDescriptorPool() { return desc_pool_; } + GPUProbe& GetGPUProbe() const { return *probe_.get(); } + private: DescriptorPoolVK desc_pool_; // `shared_ptr` since command buffers have a link to the command pool. @@ -86,6 +89,7 @@ class TrackedObjectsVK { std::set> tracked_objects_; std::set> tracked_buffers_; std::set> tracked_textures_; + std::unique_ptr probe_; bool is_valid_ = false; FML_DISALLOW_COPY_AND_ASSIGN(TrackedObjectsVK); @@ -115,7 +119,8 @@ std::shared_ptr CommandEncoderFactoryVK::Create() { } auto tracked_objects = std::make_shared( - context_vk.GetDeviceHolder(), tls_pool); + context_vk.GetDeviceHolder(), tls_pool, + context->GetGPUTracer()->CreateGPUProbe()); auto queue = context_vk.GetGraphicsQueue(); if (!tracked_objects || !tracked_objects->IsValid() || !queue) { @@ -134,25 +139,23 @@ std::shared_ptr CommandEncoderFactoryVK::Create() { context_vk.SetDebugName(tracked_objects->GetCommandBuffer(), label_.value()); } - context->GetGPUTracer()->RecordCmdBufferStart( + tracked_objects->GetGPUProbe().RecordCmdBufferStart( tracked_objects->GetCommandBuffer()); - return std::make_shared( - context_vk.GetDeviceHolder(), tracked_objects, queue, - context_vk.GetFenceWaiter(), context->GetGPUTracer()); + return std::make_shared(context_vk.GetDeviceHolder(), + tracked_objects, queue, + context_vk.GetFenceWaiter()); } CommandEncoderVK::CommandEncoderVK( std::weak_ptr device_holder, std::shared_ptr tracked_objects, const std::shared_ptr& queue, - std::shared_ptr fence_waiter, - const std::shared_ptr& gpu_tracer) + std::shared_ptr fence_waiter) : device_holder_(std::move(device_holder)), tracked_objects_(std::move(tracked_objects)), queue_(queue), - fence_waiter_(std::move(fence_waiter)), - gpu_tracer_(gpu_tracer) {} + fence_waiter_(std::move(fence_waiter)) {} CommandEncoderVK::~CommandEncoderVK() = default; @@ -183,23 +186,20 @@ bool CommandEncoderVK::Submit(SubmitCallback callback) { auto command_buffer = GetCommandBuffer(); - auto end_frame = gpu_tracer_->RecordCmdBufferEnd(command_buffer); + tracked_objects_->GetGPUProbe().RecordCmdBufferEnd(command_buffer); auto status = command_buffer.end(); if (status != vk::Result::eSuccess) { - gpu_tracer_->OnFenceComplete(end_frame, false); VALIDATION_LOG << "Failed to end command buffer: " << vk::to_string(status); return false; } std::shared_ptr strong_device = device_holder_.lock(); if (!strong_device) { - gpu_tracer_->OnFenceComplete(end_frame, false); VALIDATION_LOG << "Device lost."; return false; } auto [fence_result, fence] = strong_device->GetDevice().createFenceUnique({}); if (fence_result != vk::Result::eSuccess) { - gpu_tracer_->OnFenceComplete(end_frame, false); VALIDATION_LOG << "Failed to create fence: " << vk::to_string(fence_result); return false; } @@ -209,7 +209,6 @@ bool CommandEncoderVK::Submit(SubmitCallback callback) { submit_info.setCommandBuffers(buffers); status = queue_->Submit(submit_info, *fence); if (status != vk::Result::eSuccess) { - gpu_tracer_->OnFenceComplete(end_frame, false); VALIDATION_LOG << "Failed to submit queue: " << vk::to_string(status); return false; } @@ -217,14 +216,12 @@ bool CommandEncoderVK::Submit(SubmitCallback callback) { // Submit will proceed, call callback with true when it is done and do not // call when `reset` is collected. fail_callback = false; - auto gpu_tracer = gpu_tracer_; return fence_waiter_->AddFence( std::move(fence), - [callback, tracked_objects = std::move(tracked_objects_), gpu_tracer, - end_frame] { - if (end_frame.has_value()) { - gpu_tracer->OnFenceComplete(end_frame, true); - } + [callback, tracked_objects = std::move(tracked_objects_)]() mutable { + // Ensure tracked objects are destructed before calling any final + // callbacks. + tracked_objects.reset(); if (callback) { callback(true); } diff --git a/impeller/renderer/backend/vulkan/command_encoder_vk.h b/impeller/renderer/backend/vulkan/command_encoder_vk.h index 5a1e7d9d72948..641a776853abc 100644 --- a/impeller/renderer/backend/vulkan/command_encoder_vk.h +++ b/impeller/renderer/backend/vulkan/command_encoder_vk.h @@ -6,7 +6,6 @@ #include #include -#include #include "flutter/fml/macros.h" #include "impeller/renderer/backend/vulkan/command_pool_vk.h" @@ -26,6 +25,7 @@ class Texture; class TextureSourceVK; class TrackedObjectsVK; class FenceWaiterVK; +class GPUProbe; class CommandEncoderFactoryVK { public: @@ -51,8 +51,7 @@ class CommandEncoderVK { CommandEncoderVK(std::weak_ptr device_holder, std::shared_ptr tracked_objects, const std::shared_ptr& queue, - std::shared_ptr fence_waiter, - const std::shared_ptr& gpu_tracer); + std::shared_ptr fence_waiter); ~CommandEncoderVK(); @@ -91,7 +90,6 @@ class CommandEncoderVK { std::shared_ptr tracked_objects_; std::shared_ptr queue_; const std::shared_ptr fence_waiter_; - std::shared_ptr gpu_tracer_; bool is_valid_ = true; void Reset(); diff --git a/impeller/renderer/backend/vulkan/gpu_tracer_vk.cc b/impeller/renderer/backend/vulkan/gpu_tracer_vk.cc index 1d16f90661cf5..ea738ad269ce4 100644 --- a/impeller/renderer/backend/vulkan/gpu_tracer_vk.cc +++ b/impeller/renderer/backend/vulkan/gpu_tracer_vk.cc @@ -4,6 +4,8 @@ #include "impeller/renderer/backend/vulkan/gpu_tracer_vk.h" +#include +#include #include #include #include "fml/logging.h" @@ -47,7 +49,7 @@ void GPUTracerVK::MarkFrameEnd() { } Lock lock(trace_state_mutex_); - current_state_ = (current_state_ + 1) % 16; + current_state_ = (current_state_ + 1) % kTraceStatesSize; auto& state = trace_states_[current_state_]; // If there are still pending buffers on the trace state we're switching to, @@ -59,11 +61,15 @@ void GPUTracerVK::MarkFrameEnd() { state.pending_buffers = 0; state.current_index = 0; - state.contains_failure = false; in_frame_ = false; } -void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer) { +std::unique_ptr GPUTracerVK::CreateGPUProbe() { + return std::make_unique(weak_from_this()); +} + +void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer, + GPUProbe& probe) { if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ || !in_frame_) { return; @@ -98,45 +104,47 @@ void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer) { buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe, trace_states_[current_state_].query_pool.get(), state.current_index); - state.pending_buffers += 1; state.current_index += 1; + if (!probe.index_.has_value()) { + state.pending_buffers += 1; + probe.index_ = current_state_; + } } -std::optional GPUTracerVK::RecordCmdBufferEnd( - const vk::CommandBuffer& buffer) { +void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer, + GPUProbe& probe) { if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ || !in_frame_) { - return std::nullopt; + return; } Lock lock(trace_state_mutex_); GPUTraceState& state = trace_states_[current_state_]; if (state.current_index >= kPoolSize) { - return current_state_; + return; } buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe, state.query_pool.get(), state.current_index); state.current_index += 1; - return current_state_; + if (!probe.index_.has_value()) { + state.pending_buffers += 1; + probe.index_ = current_state_; + } } -void GPUTracerVK::OnFenceComplete(std::optional maybe_frame_index, - bool success) { - if (!enabled_ || !maybe_frame_index.has_value()) { +void GPUTracerVK::OnFenceComplete(size_t frame_index) { + if (!enabled_) { return; } - auto frame_index = maybe_frame_index.value(); Lock lock(trace_state_mutex_); GPUTraceState& state = trace_states_[frame_index]; - if (state.pending_buffers == 0) { - return; - } - state.contains_failure = !success; + + FML_DCHECK(state.pending_buffers > 0); state.pending_buffers -= 1; - if (state.pending_buffers == 0 && !state.contains_failure) { + if (state.pending_buffers == 0) { auto buffer_count = state.current_index; std::vector bits(buffer_count); @@ -171,4 +179,34 @@ void GPUTracerVK::OnFenceComplete(std::optional maybe_frame_index, } } +GPUProbe::GPUProbe(const std::weak_ptr& tracer) + : tracer_(tracer) {} + +GPUProbe::~GPUProbe() { + if (!index_.has_value()) { + return; + } + auto tracer = tracer_.lock(); + if (!tracer) { + return; + } + tracer->OnFenceComplete(index_.value()); +} + +void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) { + auto tracer = tracer_.lock(); + if (!tracer) { + return; + } + tracer->RecordCmdBufferStart(buffer, *this); +} + +void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) { + auto tracer = tracer_.lock(); + if (!tracer) { + return; + } + tracer->RecordCmdBufferEnd(buffer, *this); +} + } // namespace impeller diff --git a/impeller/renderer/backend/vulkan/gpu_tracer_vk.h b/impeller/renderer/backend/vulkan/gpu_tracer_vk.h index 238bf618f199e..e1fd7150f8355 100644 --- a/impeller/renderer/backend/vulkan/gpu_tracer_vk.h +++ b/impeller/renderer/backend/vulkan/gpu_tracer_vk.h @@ -11,28 +11,19 @@ namespace impeller { +class GPUProbe; + /// @brief A class that uses timestamp queries to record the approximate GPU /// execution time. -class GPUTracerVK { +class GPUTracerVK : public std::enable_shared_from_this { public: explicit GPUTracerVK(const std::shared_ptr& device_holder); ~GPUTracerVK() = default; - /// @brief Record a timestamp query into the provided cmd buffer to record - /// start time. - void RecordCmdBufferStart(const vk::CommandBuffer& buffer); - - /// @brief Record a timestamp query into the provided cmd buffer to record end - /// time. - /// - /// Returns the index that should be passed to [OnFenceComplete]. - std::optional RecordCmdBufferEnd(const vk::CommandBuffer& buffer); - - /// @brief Signal that the cmd buffer is completed. - /// - /// If [frame_index] is std::nullopt, this frame recording is ignored. - void OnFenceComplete(std::optional frame_index, bool success); + /// @brief Create a GPUProbe to trace the execution of a command buffer on the + /// GPU. + std::unique_ptr CreateGPUProbe(); /// @brief Signal the start of a frame workload. /// @@ -47,20 +38,34 @@ class GPUTracerVK { bool IsEnabled() const; private: + friend class GPUProbe; + + static const constexpr size_t kTraceStatesSize = 32u; + + /// @brief Signal that the cmd buffer is completed. + /// + /// If [frame_index] is std::nullopt, this frame recording is ignored. + void OnFenceComplete(size_t frame); + + /// @brief Record a timestamp query into the provided cmd buffer to record + /// start time. + void RecordCmdBufferStart(const vk::CommandBuffer& buffer, GPUProbe& probe); + + /// @brief Record a timestamp query into the provided cmd buffer to record end + /// time. + void RecordCmdBufferEnd(const vk::CommandBuffer& buffer, GPUProbe& probe); + const std::shared_ptr device_holder_; struct GPUTraceState { size_t current_index = 0; size_t pending_buffers = 0; - // If a cmd buffer submission fails for any reason, this field is used - // to indicate that the query pool results may be incomplete and this - // frame should be discarded. - bool contains_failure = false; vk::UniqueQueryPool query_pool; }; mutable Mutex trace_state_mutex_; - GPUTraceState trace_states_[16] IPLR_GUARDED_BY(trace_state_mutex_); + GPUTraceState trace_states_[kTraceStatesSize] IPLR_GUARDED_BY( + trace_state_mutex_); size_t current_state_ IPLR_GUARDED_BY(trace_state_mutex_) = 0u; // The number of nanoseconds for each timestamp unit. @@ -81,4 +86,28 @@ class GPUTracerVK { bool enabled_ = false; }; +class GPUProbe { + public: + explicit GPUProbe(const std::weak_ptr& tracer); + + GPUProbe(GPUProbe&&) = delete; + GPUProbe& operator=(GPUProbe&&) = delete; + + ~GPUProbe(); + + /// @brief Record a timestamp query into the provided cmd buffer to record + /// start time. + void RecordCmdBufferStart(const vk::CommandBuffer& buffer); + + /// @brief Record a timestamp query into the provided cmd buffer to record end + /// time. + void RecordCmdBufferEnd(const vk::CommandBuffer& buffer); + + private: + friend class GPUTracerVK; + + std::weak_ptr tracer_; + std::optional index_ = std::nullopt; +}; + } // namespace impeller diff --git a/impeller/renderer/backend/vulkan/swapchain_impl_vk.cc b/impeller/renderer/backend/vulkan/swapchain_impl_vk.cc index 7d0f9823457de..eb089e44ec45d 100644 --- a/impeller/renderer/backend/vulkan/swapchain_impl_vk.cc +++ b/impeller/renderer/backend/vulkan/swapchain_impl_vk.cc @@ -428,10 +428,6 @@ bool SwapchainImplVK::Present(const std::shared_ptr& image, const auto& context = ContextVK::Cast(*context_strong); const auto& sync = synchronizers_[current_frame_]; - /// Record the approximate end of the GPU workload. This is intentionally - /// done before creating the final cmd buffer as that is not tracked. - context.GetGPUTracer()->MarkFrameEnd(); - //---------------------------------------------------------------------------- /// Transition the image to color-attachment-optimal. /// @@ -481,6 +477,8 @@ bool SwapchainImplVK::Present(const std::shared_ptr& image, } } + context.GetGPUTracer()->MarkFrameEnd(); + auto task = [&, index, current_frame = current_frame_] { auto context_strong = context_.lock(); if (!context_strong) { diff --git a/impeller/renderer/backend/vulkan/test/gpu_tracer_unittests.cc b/impeller/renderer/backend/vulkan/test/gpu_tracer_unittests.cc index 1311347257654..8afa8832f72c9 100644 --- a/impeller/renderer/backend/vulkan/test/gpu_tracer_unittests.cc +++ b/impeller/renderer/backend/vulkan/test/gpu_tracer_unittests.cc @@ -2,7 +2,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include #include "flutter/testing/testing.h" // IWYU pragma: keep +#include "fml/synchronization/count_down_latch.h" #include "gtest/gtest.h" #include "impeller/renderer//backend/vulkan/command_encoder_vk.h" #include "impeller/renderer/backend/vulkan/command_buffer_vk.h" @@ -16,51 +18,51 @@ namespace testing { #ifdef IMPELLER_DEBUG TEST(GPUTracerVK, CanTraceCmdBuffer) { auto const context = MockVulkanContextBuilder().Build(); - - auto tracer = std::make_shared(context->GetDeviceHolder()); + auto tracer = context->GetGPUTracer(); ASSERT_TRUE(tracer->IsEnabled()); + tracer->MarkFrameStart(); auto cmd_buffer = context->CreateCommandBuffer(); - auto vk_cmd_buffer = CommandBufferVK::Cast(cmd_buffer.get()); auto blit_pass = cmd_buffer->CreateBlitPass(); + blit_pass->EncodeCommands(context->GetResourceAllocator()); + + auto latch = std::make_shared(1u); + + if (!cmd_buffer->SubmitCommands( + [latch](CommandBuffer::Status status) { latch->CountDown(); })) { + GTEST_FAIL() << "Failed to submit cmd buffer"; + } - tracer->MarkFrameStart(); - tracer->RecordCmdBufferStart(vk_cmd_buffer->GetEncoder()->GetCommandBuffer()); - auto frame_id = tracer->RecordCmdBufferEnd( - vk_cmd_buffer->GetEncoder()->GetCommandBuffer()); tracer->MarkFrameEnd(); + latch->Wait(); - ASSERT_EQ(frame_id, 0u); auto called = GetMockVulkanFunctions(context->GetDevice()); ASSERT_NE(called, nullptr); ASSERT_TRUE(std::find(called->begin(), called->end(), "vkCreateQueryPool") != called->end()); - ASSERT_TRUE(std::find(called->begin(), called->end(), - "vkGetQueryPoolResults") == called->end()); - - tracer->OnFenceComplete(frame_id, true); - ASSERT_TRUE(std::find(called->begin(), called->end(), "vkGetQueryPoolResults") != called->end()); } TEST(GPUTracerVK, DoesNotTraceOutsideOfFrameWorkload) { auto const context = MockVulkanContextBuilder().Build(); - - auto tracer = std::make_shared(context->GetDeviceHolder()); + auto tracer = context->GetGPUTracer(); ASSERT_TRUE(tracer->IsEnabled()); auto cmd_buffer = context->CreateCommandBuffer(); - auto vk_cmd_buffer = CommandBufferVK::Cast(cmd_buffer.get()); auto blit_pass = cmd_buffer->CreateBlitPass(); + blit_pass->EncodeCommands(context->GetResourceAllocator()); + + auto latch = std::make_shared(1u); + if (!cmd_buffer->SubmitCommands( + [latch](CommandBuffer::Status status) { latch->CountDown(); })) { + GTEST_FAIL() << "Failed to submit cmd buffer"; + } - tracer->RecordCmdBufferStart(vk_cmd_buffer->GetEncoder()->GetCommandBuffer()); - auto frame_id = tracer->RecordCmdBufferEnd( - vk_cmd_buffer->GetEncoder()->GetCommandBuffer()); + latch->Wait(); - ASSERT_TRUE(!frame_id.has_value()); auto called = GetMockVulkanFunctions(context->GetDevice()); ASSERT_NE(called, nullptr); @@ -68,13 +70,36 @@ TEST(GPUTracerVK, DoesNotTraceOutsideOfFrameWorkload) { called->end()); ASSERT_TRUE(std::find(called->begin(), called->end(), "vkGetQueryPoolResults") == called->end()); +} - tracer->OnFenceComplete(frame_id, true); +// This cmd buffer starts when there is a frame but finishes when there is none. +// This should result in the same recorded work. +TEST(GPUTracerVK, TracesWithPartialFrameOverlap) { + auto const context = MockVulkanContextBuilder().Build(); + auto tracer = context->GetGPUTracer(); - ASSERT_TRUE(std::find(called->begin(), called->end(), "vkCreateQueryPool") == + ASSERT_TRUE(tracer->IsEnabled()); + tracer->MarkFrameStart(); + + auto cmd_buffer = context->CreateCommandBuffer(); + auto blit_pass = cmd_buffer->CreateBlitPass(); + blit_pass->EncodeCommands(context->GetResourceAllocator()); + tracer->MarkFrameEnd(); + + auto latch = std::make_shared(1u); + if (!cmd_buffer->SubmitCommands( + [latch](CommandBuffer::Status status) { latch->CountDown(); })) { + GTEST_FAIL() << "Failed to submit cmd buffer"; + } + + latch->Wait(); + + auto called = GetMockVulkanFunctions(context->GetDevice()); + ASSERT_NE(called, nullptr); + ASSERT_TRUE(std::find(called->begin(), called->end(), "vkCreateQueryPool") != called->end()); ASSERT_TRUE(std::find(called->begin(), called->end(), - "vkGetQueryPoolResults") == called->end()); + "vkGetQueryPoolResults") != called->end()); } #endif // IMPELLER_DEBUG