Skip to content

Commit 2f94fcf

Browse files
authored
[PyOV] Fix hanging on infer request destruction (#24722)
### Details: - Initial problem: `test_custom_op` hanged on destruction because it was waiting for a thread which tried to acquire GIL. - The second problem is that pybind11 doesn't allow to work with GIL besides of current scope and it's impossible to release GIL for destructors. pybind/pybind11#1446 - Current solution allows to release GIL for InferRequest and all called by chain destructors. ### Tickets: - CVS-141744
1 parent 65b5b8e commit 2f94fcf

File tree

8 files changed

+90
-61
lines changed

8 files changed

+90
-61
lines changed

src/bindings/python/src/pyopenvino/core/async_infer_queue.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "pyopenvino/core/common.hpp"
1717
#include "pyopenvino/core/infer_request.hpp"
18+
#include "pyopenvino/utils/utils.hpp"
1819

1920
namespace py = pybind11;
2021

@@ -64,7 +65,7 @@ class AsyncInferQueue {
6465
});
6566
size_t idle_handle = m_idle_handles.front();
6667
// wait for request to make sure it returned from callback
67-
m_requests[idle_handle].m_request.wait();
68+
m_requests[idle_handle].m_request->wait();
6869
if (m_errors.size() > 0)
6970
throw m_errors.front();
7071
return idle_handle;
@@ -75,7 +76,7 @@ class AsyncInferQueue {
7576
// release GIL to avoid deadlock on python callback
7677
py::gil_scoped_release release;
7778
for (auto&& request : m_requests) {
78-
request.m_request.wait();
79+
request.m_request->wait();
7980
}
8081
// acquire the mutex to access m_errors
8182
std::lock_guard<std::mutex> lock(m_mutex);
@@ -87,7 +88,7 @@ class AsyncInferQueue {
8788
for (size_t handle = 0; handle < m_requests.size(); handle++) {
8889
// auto end_time = m_requests[handle].m_end_time; // TODO: pass it bellow? like in InferRequestWrapper
8990

90-
m_requests[handle].m_request.set_callback([this, handle /* ... */](std::exception_ptr exception_ptr) {
91+
m_requests[handle].m_request->set_callback([this, handle /* ... */](std::exception_ptr exception_ptr) {
9192
*m_requests[handle].m_end_time = Time::now();
9293
{
9394
// acquire the mutex to access m_idle_handles
@@ -110,14 +111,17 @@ class AsyncInferQueue {
110111
}
111112

112113
void set_custom_callbacks(py::function f_callback) {
114+
// need to acquire GIL before py::function deletion
115+
auto callback_sp = Common::utils::wrap_pyfunction(std::move(f_callback));
116+
113117
for (size_t handle = 0; handle < m_requests.size(); handle++) {
114-
m_requests[handle].m_request.set_callback([this, f_callback, handle](std::exception_ptr exception_ptr) {
118+
m_requests[handle].m_request->set_callback([this, callback_sp, handle](std::exception_ptr exception_ptr) {
115119
*m_requests[handle].m_end_time = Time::now();
116120
if (exception_ptr == nullptr) {
117121
// Acquire GIL, execute Python function
118122
py::gil_scoped_acquire acquire;
119123
try {
120-
f_callback(m_requests[handle], m_user_ids[handle]);
124+
(*callback_sp)(m_requests[handle], m_user_ids[handle]);
121125
} catch (const py::error_already_set& py_error) {
122126
// This should behave the same as assert(!PyErr_Occurred())
123127
// since constructor for pybind11's error_already_set is
@@ -193,13 +197,13 @@ void regclass_AsyncInferQueue(py::module m) {
193197
// Set new inputs label/id from user
194198
self.m_user_ids[handle] = userdata;
195199
// Update inputs if there are any
196-
self.m_requests[handle].m_request.set_input_tensor(inputs);
200+
self.m_requests[handle].m_request->set_input_tensor(inputs);
197201
// Now GIL can be released - we are NOT working with Python objects in this block
198202
{
199203
py::gil_scoped_release release;
200204
*self.m_requests[handle].m_start_time = Time::now();
201205
// Start InferRequest in asynchronus mode
202-
self.m_requests[handle].m_request.start_async();
206+
self.m_requests[handle].m_request->start_async();
203207
}
204208
},
205209
py::arg("inputs"),
@@ -239,13 +243,13 @@ void regclass_AsyncInferQueue(py::module m) {
239243
// Set new inputs label/id from user
240244
self.m_user_ids[handle] = userdata;
241245
// Update inputs if there are any
242-
Common::set_request_tensors(self.m_requests[handle].m_request, inputs);
246+
Common::set_request_tensors(*self.m_requests[handle].m_request, inputs);
243247
// Now GIL can be released - we are NOT working with Python objects in this block
244248
{
245249
py::gil_scoped_release release;
246250
*self.m_requests[handle].m_start_time = Time::now();
247251
// Start InferRequest in asynchronus mode
248-
self.m_requests[handle].m_request.start_async();
252+
self.m_requests[handle].m_request->start_async();
249253
}
250254
},
251255
py::arg("inputs"),

src/bindings/python/src/pyopenvino/core/common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual) {
614614
py::dict outputs_to_dict(InferRequestWrapper& request, bool share_outputs, bool decode_strings) {
615615
py::dict res;
616616
for (const auto& out : request.m_outputs) {
617-
auto t = request.m_request.get_tensor(out);
617+
auto t = request.m_request->get_tensor(out);
618618
if (t.get_element_type() == ov::element::string) {
619619
if (share_outputs) {
620620
PyErr_WarnEx(PyExc_RuntimeWarning, "Result of a string type will be copied to OVDict!", 1);

0 commit comments

Comments
 (0)