Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 19 additions & 22 deletions impeller/renderer/backend/vulkan/command_encoder_vk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ class TrackedObjectsVK {
public:
explicit TrackedObjectsVK(
const std::weak_ptr<const DeviceHolder>& device_holder,
const std::shared_ptr<CommandPoolVK>& pool)
: desc_pool_(device_holder) {
const std::shared_ptr<CommandPoolVK>& pool,
std::unique_ptr<GPUProbe> probe)
: desc_pool_(device_holder), probe_(std::move(probe)) {
if (!pool) {
return;
}
Expand Down Expand Up @@ -78,6 +79,8 @@ class TrackedObjectsVK {

DescriptorPoolVK& GetDescriptorPool() { return desc_pool_; }

GPUProbe& GetGPUProbe() const { return *probe_.get(); }

private:
DescriptorPoolVK desc_pool_;
// `shared_ptr` since command buffers have a link to the command pool.
Expand All @@ -86,6 +89,7 @@ class TrackedObjectsVK {
std::set<std::shared_ptr<SharedObjectVK>> tracked_objects_;
std::set<std::shared_ptr<const Buffer>> tracked_buffers_;
std::set<std::shared_ptr<const TextureSourceVK>> tracked_textures_;
std::unique_ptr<GPUProbe> probe_;
bool is_valid_ = false;

FML_DISALLOW_COPY_AND_ASSIGN(TrackedObjectsVK);
Expand Down Expand Up @@ -115,7 +119,8 @@ std::shared_ptr<CommandEncoderVK> CommandEncoderFactoryVK::Create() {
}

auto tracked_objects = std::make_shared<TrackedObjectsVK>(
context_vk.GetDeviceHolder(), tls_pool);
context_vk.GetDeviceHolder(), tls_pool,
context->GetGPUTracer()->CreateGPUProbe());
auto queue = context_vk.GetGraphicsQueue();

if (!tracked_objects || !tracked_objects->IsValid() || !queue) {
Expand All @@ -134,25 +139,23 @@ std::shared_ptr<CommandEncoderVK> CommandEncoderFactoryVK::Create() {
context_vk.SetDebugName(tracked_objects->GetCommandBuffer(),
label_.value());
}
context->GetGPUTracer()->RecordCmdBufferStart(
tracked_objects->GetGPUProbe().RecordCmdBufferStart(
tracked_objects->GetCommandBuffer());

return std::make_shared<CommandEncoderVK>(
context_vk.GetDeviceHolder(), tracked_objects, queue,
context_vk.GetFenceWaiter(), context->GetGPUTracer());
return std::make_shared<CommandEncoderVK>(context_vk.GetDeviceHolder(),
tracked_objects, queue,
context_vk.GetFenceWaiter());
}

CommandEncoderVK::CommandEncoderVK(
std::weak_ptr<const DeviceHolder> device_holder,
std::shared_ptr<TrackedObjectsVK> tracked_objects,
const std::shared_ptr<QueueVK>& queue,
std::shared_ptr<FenceWaiterVK> fence_waiter,
const std::shared_ptr<GPUTracerVK>& gpu_tracer)
std::shared_ptr<FenceWaiterVK> fence_waiter)
: device_holder_(std::move(device_holder)),
tracked_objects_(std::move(tracked_objects)),
queue_(queue),
fence_waiter_(std::move(fence_waiter)),
gpu_tracer_(gpu_tracer) {}
fence_waiter_(std::move(fence_waiter)) {}

CommandEncoderVK::~CommandEncoderVK() = default;

Expand Down Expand Up @@ -183,23 +186,20 @@ bool CommandEncoderVK::Submit(SubmitCallback callback) {

auto command_buffer = GetCommandBuffer();

auto end_frame = gpu_tracer_->RecordCmdBufferEnd(command_buffer);
tracked_objects_->GetGPUProbe().RecordCmdBufferEnd(command_buffer);

auto status = command_buffer.end();
if (status != vk::Result::eSuccess) {
gpu_tracer_->OnFenceComplete(end_frame, false);
VALIDATION_LOG << "Failed to end command buffer: " << vk::to_string(status);
return false;
}
std::shared_ptr<const DeviceHolder> strong_device = device_holder_.lock();
if (!strong_device) {
gpu_tracer_->OnFenceComplete(end_frame, false);
VALIDATION_LOG << "Device lost.";
return false;
}
auto [fence_result, fence] = strong_device->GetDevice().createFenceUnique({});
if (fence_result != vk::Result::eSuccess) {
gpu_tracer_->OnFenceComplete(end_frame, false);
VALIDATION_LOG << "Failed to create fence: " << vk::to_string(fence_result);
return false;
}
Expand All @@ -209,22 +209,19 @@ bool CommandEncoderVK::Submit(SubmitCallback callback) {
submit_info.setCommandBuffers(buffers);
status = queue_->Submit(submit_info, *fence);
if (status != vk::Result::eSuccess) {
gpu_tracer_->OnFenceComplete(end_frame, false);
VALIDATION_LOG << "Failed to submit queue: " << vk::to_string(status);
return false;
}

// Submit will proceed, call callback with true when it is done and do not
// call when `reset` is collected.
fail_callback = false;
auto gpu_tracer = gpu_tracer_;
return fence_waiter_->AddFence(
std::move(fence),
[callback, tracked_objects = std::move(tracked_objects_), gpu_tracer,
end_frame] {
if (end_frame.has_value()) {
gpu_tracer->OnFenceComplete(end_frame, true);
}
[callback, tracked_objects = std::move(tracked_objects_)]() mutable {
// Ensure tracked objects are destructed before calling any final
// callbacks.
tracked_objects.reset();
if (callback) {
callback(true);
}
Expand Down
6 changes: 2 additions & 4 deletions impeller/renderer/backend/vulkan/command_encoder_vk.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

#include <functional>
#include <optional>
#include <set>

#include "flutter/fml/macros.h"
#include "impeller/renderer/backend/vulkan/command_pool_vk.h"
Expand All @@ -26,6 +25,7 @@ class Texture;
class TextureSourceVK;
class TrackedObjectsVK;
class FenceWaiterVK;
class GPUProbe;

class CommandEncoderFactoryVK {
public:
Expand All @@ -51,8 +51,7 @@ class CommandEncoderVK {
CommandEncoderVK(std::weak_ptr<const DeviceHolder> device_holder,
std::shared_ptr<TrackedObjectsVK> tracked_objects,
const std::shared_ptr<QueueVK>& queue,
std::shared_ptr<FenceWaiterVK> fence_waiter,
const std::shared_ptr<GPUTracerVK>& gpu_tracer);
std::shared_ptr<FenceWaiterVK> fence_waiter);

~CommandEncoderVK();

Expand Down Expand Up @@ -91,7 +90,6 @@ class CommandEncoderVK {
std::shared_ptr<TrackedObjectsVK> tracked_objects_;
std::shared_ptr<QueueVK> queue_;
const std::shared_ptr<FenceWaiterVK> fence_waiter_;
std::shared_ptr<GPUTracerVK> gpu_tracer_;
bool is_valid_ = true;

void Reset();
Expand Down
74 changes: 56 additions & 18 deletions impeller/renderer/backend/vulkan/gpu_tracer_vk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "impeller/renderer/backend/vulkan/gpu_tracer_vk.h"

#include <memory>
#include <optional>
#include <thread>
#include <utility>
#include "fml/logging.h"
Expand Down Expand Up @@ -47,7 +49,7 @@ void GPUTracerVK::MarkFrameEnd() {
}

Lock lock(trace_state_mutex_);
current_state_ = (current_state_ + 1) % 16;
current_state_ = (current_state_ + 1) % kTraceStatesSize;

auto& state = trace_states_[current_state_];
// If there are still pending buffers on the trace state we're switching to,
Expand All @@ -59,11 +61,15 @@ void GPUTracerVK::MarkFrameEnd() {

state.pending_buffers = 0;
state.current_index = 0;
state.contains_failure = false;
in_frame_ = false;
}

void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
std::unique_ptr<GPUProbe> GPUTracerVK::CreateGPUProbe() {
return std::make_unique<GPUProbe>(weak_from_this());
}

void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer,
GPUProbe& probe) {
if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
!in_frame_) {
return;
Expand Down Expand Up @@ -98,45 +104,47 @@ void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
trace_states_[current_state_].query_pool.get(),
state.current_index);
state.pending_buffers += 1;
state.current_index += 1;
if (!probe.index_.has_value()) {
state.pending_buffers += 1;
probe.index_ = current_state_;
}
}

std::optional<size_t> GPUTracerVK::RecordCmdBufferEnd(
const vk::CommandBuffer& buffer) {
void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer,
GPUProbe& probe) {
if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
!in_frame_) {
return std::nullopt;
return;
}
Lock lock(trace_state_mutex_);
GPUTraceState& state = trace_states_[current_state_];

if (state.current_index >= kPoolSize) {
return current_state_;
return;
}

buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
state.query_pool.get(), state.current_index);

state.current_index += 1;
return current_state_;
if (!probe.index_.has_value()) {
state.pending_buffers += 1;
probe.index_ = current_state_;
}
}

void GPUTracerVK::OnFenceComplete(std::optional<size_t> maybe_frame_index,
bool success) {
if (!enabled_ || !maybe_frame_index.has_value()) {
void GPUTracerVK::OnFenceComplete(size_t frame_index) {
if (!enabled_) {
return;
}
auto frame_index = maybe_frame_index.value();
Lock lock(trace_state_mutex_);
GPUTraceState& state = trace_states_[frame_index];
if (state.pending_buffers == 0) {
return;
}
state.contains_failure = !success;

FML_DCHECK(state.pending_buffers > 0);
state.pending_buffers -= 1;

if (state.pending_buffers == 0 && !state.contains_failure) {
if (state.pending_buffers == 0) {
auto buffer_count = state.current_index;
std::vector<uint64_t> bits(buffer_count);

Expand Down Expand Up @@ -171,4 +179,34 @@ void GPUTracerVK::OnFenceComplete(std::optional<size_t> maybe_frame_index,
}
}

GPUProbe::GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer)
: tracer_(tracer) {}

GPUProbe::~GPUProbe() {
if (!index_.has_value()) {
return;
}
auto tracer = tracer_.lock();
if (!tracer) {
return;
}
tracer->OnFenceComplete(index_.value());
}

void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
auto tracer = tracer_.lock();
if (!tracer) {
return;
}
tracer->RecordCmdBufferStart(buffer, *this);
}

void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) {
auto tracer = tracer_.lock();
if (!tracer) {
return;
}
tracer->RecordCmdBufferEnd(buffer, *this);
}

} // namespace impeller
69 changes: 49 additions & 20 deletions impeller/renderer/backend/vulkan/gpu_tracer_vk.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,19 @@

namespace impeller {

class GPUProbe;

/// @brief A class that uses timestamp queries to record the approximate GPU
/// execution time.
class GPUTracerVK {
class GPUTracerVK : public std::enable_shared_from_this<GPUTracerVK> {
public:
explicit GPUTracerVK(const std::shared_ptr<DeviceHolder>& device_holder);

~GPUTracerVK() = default;

/// @brief Record a timestamp query into the provided cmd buffer to record
/// start time.
void RecordCmdBufferStart(const vk::CommandBuffer& buffer);

/// @brief Record a timestamp query into the provided cmd buffer to record end
/// time.
///
/// Returns the index that should be passed to [OnFenceComplete].
std::optional<size_t> RecordCmdBufferEnd(const vk::CommandBuffer& buffer);

/// @brief Signal that the cmd buffer is completed.
///
/// If [frame_index] is std::nullopt, this frame recording is ignored.
void OnFenceComplete(std::optional<size_t> frame_index, bool success);
/// @brief Create a GPUProbe to trace the execution of a command buffer on the
/// GPU.
std::unique_ptr<GPUProbe> CreateGPUProbe();

/// @brief Signal the start of a frame workload.
///
Expand All @@ -47,20 +38,34 @@ class GPUTracerVK {
bool IsEnabled() const;

private:
friend class GPUProbe;

static const constexpr size_t kTraceStatesSize = 32u;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I increased the size of the number of pending queries because on very slow apps I observed that we would get backed up more than the previous 16.


/// @brief Signal that the cmd buffer is completed.
///
/// If [frame_index] is std::nullopt, this frame recording is ignored.
void OnFenceComplete(size_t frame);

/// @brief Record a timestamp query into the provided cmd buffer to record
/// start time.
void RecordCmdBufferStart(const vk::CommandBuffer& buffer, GPUProbe& probe);

/// @brief Record a timestamp query into the provided cmd buffer to record end
/// time.
void RecordCmdBufferEnd(const vk::CommandBuffer& buffer, GPUProbe& probe);

const std::shared_ptr<DeviceHolder> device_holder_;

struct GPUTraceState {
size_t current_index = 0;
size_t pending_buffers = 0;
// If a cmd buffer submission fails for any reason, this field is used
// to indicate that the query pool results may be incomplete and this
// frame should be discarded.
bool contains_failure = false;
vk::UniqueQueryPool query_pool;
};

mutable Mutex trace_state_mutex_;
GPUTraceState trace_states_[16] IPLR_GUARDED_BY(trace_state_mutex_);
GPUTraceState trace_states_[kTraceStatesSize] IPLR_GUARDED_BY(
trace_state_mutex_);
size_t current_state_ IPLR_GUARDED_BY(trace_state_mutex_) = 0u;

// The number of nanoseconds for each timestamp unit.
Expand All @@ -81,4 +86,28 @@ class GPUTracerVK {
bool enabled_ = false;
};

class GPUProbe {
public:
explicit GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer);

GPUProbe(GPUProbe&&) = delete;
GPUProbe& operator=(GPUProbe&&) = delete;

~GPUProbe();

/// @brief Record a timestamp query into the provided cmd buffer to record
/// start time.
void RecordCmdBufferStart(const vk::CommandBuffer& buffer);

/// @brief Record a timestamp query into the provided cmd buffer to record end
/// time.
void RecordCmdBufferEnd(const vk::CommandBuffer& buffer);

private:
friend class GPUTracerVK;

std::weak_ptr<GPUTracerVK> tracer_;
std::optional<size_t> index_ = std::nullopt;
};

} // namespace impeller
Loading