From 93cf5494a4dae2e56092eb29857bb60cc2994b1e Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 15 Dec 2022 14:00:04 +0000 Subject: [PATCH 1/5] Change record & replay queue / graph relationship Better aligns the queue record graph creation mechansism with the [kernel fusion extension](https://github.com/intel/llvm/pull/7098) ```cpp ext::codeplay::experimental::fusion_wrapper w{q}; w.start_fusion(); // 'q' submissions w.complete_fusion() ``` By changing the relationship between a queue and a graph so that recording starts and finishes on a graph we better match kernel fusion. This design is also more exception safe as `end_recording()` can be called in a RAII approach when a graph is destroyed. As a result a graph is now created from queue recording like: ```cpp ext::oneapi::experimental::command_graph graph; graph.begin_recording({q}); // 'q' submissions graph.end_recording(); ``` Addresses Issue https://github.com/reble/llvm/issues/53 --- .../proposed/sycl_ext_oneapi_graph.asciidoc | 134 ++++++++++-------- 1 file changed, 78 insertions(+), 56 deletions(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc index 35e41c73d5483..29eda2be14a6b 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc @@ -321,6 +321,9 @@ public: command_graph(const property_list& propList = {}); command_graph finalize(const context& syclContext) const; + bool begin_recording(const std::vector& queues); + bool end_recording(const std::vector& queues = {}); + node add(const property_list& propList = {}); template @@ -344,9 +347,6 @@ public: using namespace ext::oneapi::experimental; class queue { public: - bool begin_recording(command_graph& graph); - bool end_recording(); - /* -- graph convenience shortcuts -- */ event graph(command_graph graph); @@ -681,7 +681,62 @@ Exceptions: |=== -Table 8. Member functions of the `command_graph` class (executable graph update). +Table 8. Member functions of the `command_graph` class for queue recording. +[cols="2a,a"] +|=== +|Member function|Description + +| +[source, c++] +---- +using namespace ext::oneapi::experimental; +bool begin_recording(const std::vector& queues) +---- + +|Synchronously changes the state of each queue in `queues` to the +`queue_state::recording` state. + +Parameters: + +* `queues` - List of `sycl::queue` objects to change to the + `queue_state::recording` state and start recording commands to the graph + instance. + +Returns: `true` if any queue in `queues` has its state changed from +`queue_state::executing` to `queue_state::recording`, `false` otherwise. + +Exceptions: + +* Throws synchronously with error code `invalid` if the any queue in `queues` is + already recording to a different graph. + +| +[source, c++] +---- +using namespace ext::oneapi::experimental; +bool end_recording(const std::vector& queues = {}) +---- + +|Synchronously changes the state of each queue in `queues` to the +`queue_state::executing` state. If the list of queues is empty, recording +finishes on all queues that are being recorded to the graph and their queue +state is set to `queue_state::executing`. + +Parameters: + +* `queues` - List of `sycl::queue` objects to change to the executing state. + +Returns: `true` if any queue recording to the graph has its state changed from +`queue_state::recording` to `queue_state::executing`, `false` otherwise. + +Exceptions: + +* Throws synchronously with error code `invalid` if any queue in `queues` is + recording to a different graph. + +|=== + +Table 9. Member functions of the `command_graph` class (executable graph update). [cols="2a,a"] |=== |Member function|Description @@ -727,8 +782,8 @@ put into a mode where command-groups are recorded to a graph rather than submitted immediately for execution. <> are also added to the -`sycl::queue` class with this extension. Two functions for selecting the state -of the queue, and another function for submitting a graph to the queue. +`sycl::queue` class in this extension as queue shortcuts for +`handler::exec_graph()`. ==== Queue State @@ -756,7 +811,7 @@ The state of a queue can be queried with `queue::get_info` using template parameter `info::queue::state`. The following entry is added to the {queue-info-table}[queue info table] to define this query: -Table 9. Queue info query +Table 10. Queue info query [cols="2a,a,a"] |=== | Queue Descriptors | Return Type | Description @@ -799,46 +854,11 @@ property and this graph extension. ==== New Queue Member Functions -Table 8. Additional member functions of the `sycl::queue` class. +Table 11. Additional member functions of the `sycl::queue` class. [cols="2a,a"] |=== |Member function|Description -| -[source, c++] ----- -using namespace ext::oneapi::experimental; -bool queue::begin_recording(command_graph& graph) ----- - -|Synchronously changes the state of the queue to the `queue_state::recording` -state. - -Parameters: - -* `graph` - Graph object to start recording commands to. - -Returns: `true` if the queue was previously in the `queue_state::executing` -state, `false` otherwise. - -Exceptions: - -* Throws synchronously with error code `invalid` if the queue is already - recording to a different graph. - -| -[source, c++] ----- -using namespace ext::oneapi::experimental; -bool queue::end_recording() ----- - -|Synchronously changes the state of the queue to the `queue_state::executing` -state. - -Returns: `true` if the queue was previously in the `queue_state::recording` -state, `false` otherwise. - | [source,c++] ---- @@ -874,7 +894,7 @@ containing `handler::depends_on(depEvents)` and `handler::graph(graph)`. ==== New Handler Member Functions -Table 10. Additional member functions of the `sycl::handler` class. +Table 12. Additional member functions of the `sycl::handler` class. [cols="2a,a"] |=== |Member function|Description @@ -917,20 +937,21 @@ preemptively changed the state of the queue. Errors are reported through exceptions, as usual in the SYCL API. For new APIs, submitting a graph for execution can generate unspecified asynchronous errors, while `command_graph::finalize()` may throw unspecified synchronous exceptions. -Synchronous exception errors codes are defined for both -`queue::begin_recording()` and `command_graph::update()`. +Synchronous exception errors codes are defined for all of +`command_graph::begin_recording()`, `command_graph::end_recording()` and +`command_graph::update()`. When a queue is in recording mode asynchronous exceptions will not be -generated, as no device execution is occuring. Synchronous errors specified as +generated, as no device execution is occurring. Synchronous errors specified as being thrown in the default queue executing state, will still be thrown when a queue is in the recording state. -The `queue::begin_recording` and `queue::end_recording` entry-points return a -`bool` value informing the user whether a state change occurred. False is -returned rather than throwing an exception when state isn't changed. This design -is because the queue is already in the state the user desires, so if the -function threw an exception in this case, the application would likely swallow -it and then proceed. +The `command_graph::begin_recording` and `command_graph::end_recording` +entry-points return a `bool` value informing the user whether a related queue +state change occurred. False is returned rather than throwing an exception when +no queue state is changed. This design is because the queues are already in +the state the user desires, so if the function threw an exception in this case, +the application would likely swallow it and then proceed. While a queue is in the recording state, methods performed on that queue which are not command submissions behave as normal. This includes waits, throws, and @@ -1092,7 +1113,7 @@ submitted in its entirety for execution via // `q` will be put in the recording state where commands are recorded to // `graph` rather than submitted for execution immediately. - q.begin_recording(graph); + graph.begin_recording({q}); // Record commands to `graph` with the following topology. // @@ -1135,9 +1156,9 @@ submitted in its entirety for execution via }); }); - // queue will be returned to the executing state where commands are + // queue `q` will be returned to the executing state where commands are // submitted immediately for extension. - q.end_recording(); + graph.end_recording(); } // Finalize the modifiable graph to create an executable graph that can be @@ -1188,4 +1209,5 @@ this feature in the extension. |4|2022-08-10|Pablo Reble|Adding USM shortcuts |5|2022-10-21|Ewan Crawford|Merge in Codeplay vendor extension |6|2022-11-14|Ewan Crawford|Change graph execution to be a function on the handler +|7|2022-12-15|Ewan Crawford|Change record & replay relationship between graph and queue. |======================================== From 2dfe1e0be660dc5b6be0f4ad2ed179c42dc0eaa0 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Mon, 19 Dec 2022 14:45:05 +0000 Subject: [PATCH 2/5] Add section on exception safety This specifies the behaviour on destruction of a modifiable `command_graph` to end recording of queues which are recording to the graph. --- .../proposed/sycl_ext_oneapi_graph.asciidoc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc index 29eda2be14a6b..2b7d86a5625ba 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc @@ -932,6 +932,18 @@ The returned value from the `info::queue::state` should be considered immediately stale in multi-threaded usage, as another thread could have preemptively changed the state of the queue. +=== Exception Safety + +In additional to the destruction semantics provided by the SYCL +{crs}[common reference semantics], when a modifiable `command_graph` is +destroyed recording is ended on any queues that are being recorded to that +graph, equivalent to `this->end_recording()`. + +As a result users don't need to manually wrap queue recording code in a +`try` / `catch` block to reset the state of recording queues on an exception +back to the executing state. Instead, an uncaught exception destroying the +modifiable graph will perform this action, useful in RAII pattern usage. + === Error Handling Errors are reported through exceptions, as usual in the SYCL API. For new APIs, From 559c7f65f3f0eb8793da136aad43a34f0c384d95 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 20 Dec 2022 15:15:14 +0000 Subject: [PATCH 3/5] Provide multiple overloads for begin/end recording Overload the `command_graph::begin_recording()` and `command_graph::end_recording()` functions with variants for both a single queue and a list of queues. --- .../proposed/sycl_ext_oneapi_graph.asciidoc | 96 +++++++++++++++---- 1 file changed, 79 insertions(+), 17 deletions(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc index 2b7d86a5625ba..3a4821e89578a 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc @@ -321,8 +321,12 @@ public: command_graph(const property_list& propList = {}); command_graph finalize(const context& syclContext) const; - bool begin_recording(const std::vector& queues); - bool end_recording(const std::vector& queues = {}); + bool begin_recording(queue recordingQueue); + bool begin_recording(const std::vector& recordingQueues); + + bool end_recording(); + bool end_recording(queue recordingQueue); + bool end_recording(const std::vector& recordingQueues); node add(const property_list& propList = {}); @@ -690,50 +694,108 @@ Table 8. Member functions of the `command_graph` class for queue recording. [source, c++] ---- using namespace ext::oneapi::experimental; -bool begin_recording(const std::vector& queues) +bool begin_recording(queue recordingQueue) ---- -|Synchronously changes the state of each queue in `queues` to the +|Synchronously changes the state of `recordingQueue` to the `queue_state::recording` state. Parameters: -* `queues` - List of `sycl::queue` objects to change to the +* `recordingQueue` - A `sycl::queue` object to change to the `queue_state::recording` state and start recording commands to the graph instance. -Returns: `true` if any queue in `queues` has its state changed from +Returns: `true` if `recordingQueue` has its state changed from `queue_state::executing` to `queue_state::recording`, `false` otherwise. Exceptions: -* Throws synchronously with error code `invalid` if the any queue in `queues` is +* Throws synchronously with error code `invalid` if `recordingQueue` is already recording to a different graph. | [source, c++] ---- using namespace ext::oneapi::experimental; -bool end_recording(const std::vector& queues = {}) +bool begin_recording(const std::vector& recordingQueues) ---- -|Synchronously changes the state of each queue in `queues` to the -`queue_state::executing` state. If the list of queues is empty, recording -finishes on all queues that are being recorded to the graph and their queue -state is set to `queue_state::executing`. +|Synchronously changes the state of each queue in `recordingQueues` to the +`queue_state::recording` state. Parameters: -* `queues` - List of `sycl::queue` objects to change to the executing state. +* `recordingQueues` - List of `sycl::queue` objects to change to the + `queue_state::recording` state and start recording commands to the graph + instance. + +Returns: `true` if any queue in `recordingQueues` has its state changed from +`queue_state::executing` to `queue_state::recording`, `false` otherwise. + +Exceptions: + +* Throws synchronously with error code `invalid` if the any queue in + `recordingQueues` is already recording to a different graph. + +| +[source, c++] +---- +using namespace ext::oneapi::experimental; +bool end_recording() +---- + +|Synchronously finishes recording on all queues that are being recorded to the +graph and their queue state is set to `queue_state::executing`. Returns: `true` if any queue recording to the graph has its state changed from `queue_state::recording` to `queue_state::executing`, `false` otherwise. +| +[source, c++] +---- +using namespace ext::oneapi::experimental; +bool end_recording(queue recordingQueue) +---- + +|Synchronously changes the state of `recordingQueue` to the +`queue_state::executing` state. + +Parameters: + +* `recordingQueue` - A `sycl::queue` object to change to the executing state. + +Returns: `true` if `recordingQueue` has its state changed from +`queue_state::recording` to `queue_state::executing`, `false` otherwise. + Exceptions: -* Throws synchronously with error code `invalid` if any queue in `queues` is +* Throws synchronously with error code `invalid` if `recordingQueue` is recording to a different graph. +| +[source, c++] +---- +using namespace ext::oneapi::experimental; +bool end_recording(const std::vector& recordingQueues) +---- + +|Synchronously changes the state of each queue in `recordingQueues` to the +`queue_state::executing` state. + +Parameters: + +* `recordingQueues` - List of `sycl::queue` objects to change to the executing + state. + +Returns: `true` if any queue in `recordingQueues` has its state changed from +`queue_state::recording` to `queue_state::executing`, `false` otherwise. + +Exceptions: + +* Throws synchronously with error code `invalid` if any queue in + `recordingQueues` is recording to a different graph. + |=== Table 9. Member functions of the `command_graph` class (executable graph update). @@ -934,9 +996,9 @@ preemptively changed the state of the queue. === Exception Safety -In additional to the destruction semantics provided by the SYCL +In addition to the destruction semantics provided by the SYCL {crs}[common reference semantics], when a modifiable `command_graph` is -destroyed recording is ended on any queues that are being recorded to that +destroyed recording is ended on any queues that are recording to that graph, equivalent to `this->end_recording()`. As a result users don't need to manually wrap queue recording code in a @@ -1125,7 +1187,7 @@ submitted in its entirety for execution via // `q` will be put in the recording state where commands are recorded to // `graph` rather than submitted for execution immediately. - graph.begin_recording({q}); + graph.begin_recording(q); // Record commands to `graph` with the following topology. // From 88633e60750934f24371e2495494f27814f4b37b Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 21 Dec 2022 09:53:44 +0000 Subject: [PATCH 4/5] Make wording consistent Co-authored-by: Ben Tracy --- sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc index 3a4821e89578a..5f387b55ecefc 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc @@ -745,7 +745,7 @@ using namespace ext::oneapi::experimental; bool end_recording() ---- -|Synchronously finishes recording on all queues that are being recorded to the +|Synchronously finishes recording on all queues that are recording to the graph and their queue state is set to `queue_state::executing`. Returns: `true` if any queue recording to the graph has its state changed from From 778112b8bf09fe3a41461ffc75e14712275a5511 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 21 Dec 2022 11:41:06 +0000 Subject: [PATCH 5/5] Update after rebase on minor spec clarifications --- .../extensions/proposed/sycl_ext_oneapi_graph.asciidoc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc index 5f387b55ecefc..62d92486e133c 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_graph.asciidoc @@ -163,9 +163,9 @@ for example: [source, c++] ---- -queue.begin_recording(graph); +graph.begin_recording(queue); graph.add(/*command group*/); // Invalid as graph is being recorded to -queue.end_recording(); +graph.end_recording(); ---- == Specification @@ -746,7 +746,7 @@ bool end_recording() ---- |Synchronously finishes recording on all queues that are recording to the -graph and their queue state is set to `queue_state::executing`. +graph and sets their state to `queue_state::executing`. Returns: `true` if any queue recording to the graph has its state changed from `queue_state::recording` to `queue_state::executing`, `false` otherwise. @@ -844,8 +844,7 @@ put into a mode where command-groups are recorded to a graph rather than submitted immediately for execution. <> are also added to the -`sycl::queue` class in this extension as queue shortcuts for -`handler::exec_graph()`. +`sycl::queue` class in this extension as queue shortcuts for `handler::graph()`. ==== Queue State