openvinotoolkit
diff --git a/‎src/bindings/python/src/pyopenvino/core/async_infer_queue.cpp
Lines changed: 13 additions & 9 deletions b/‎src/bindings/python/src/pyopenvino/core/async_infer_queue.cpp
Lines changed: 13 additions & 9 deletions
diff --git a/‎src/bindings/python/src/pyopenvino/core/common.cpp
Lines changed: 1 addition & 1 deletion b/‎src/bindings/python/src/pyopenvino/core/common.cpp
Lines changed: 1 addition & 1 deletion
@@ -15,6 +15,7 @@
 
 #include "pyopenvino/core/common.hpp"
 #include "pyopenvino/core/infer_request.hpp"
+#include "pyopenvino/utils/utils.hpp"
 
 namespace py = pybind11;
 
@@ -64,7 +65,7 @@ class AsyncInferQueue {
         });
         size_t idle_handle = m_idle_handles.front();
         // wait for request to make sure it returned from callback
-        m_requests[idle_handle].m_request.wait();
+        m_requests[idle_handle].m_request->wait();
         if (m_errors.size() > 0)
             throw m_errors.front();
         return idle_handle;
@@ -75,7 +76,7 @@ class AsyncInferQueue {
         // release GIL to avoid deadlock on python callback
         py::gil_scoped_release release;
         for (auto&& request : m_requests) {
-            request.m_request.wait();
+            request.m_request->wait();
         }
         // acquire the mutex to access m_errors
         std::lock_guard<std::mutex> lock(m_mutex);
@@ -87,7 +88,7 @@ class AsyncInferQueue {
         for (size_t handle = 0; handle < m_requests.size(); handle++) {
             // auto end_time = m_requests[handle].m_end_time; // TODO: pass it bellow? like in InferRequestWrapper
 
-            m_requests[handle].m_request.set_callback([this, handle /* ... */](std::exception_ptr exception_ptr) {
+            m_requests[handle].m_request->set_callback([this, handle /* ... */](std::exception_ptr exception_ptr) {
                 *m_requests[handle].m_end_time = Time::now();
                 {
                     // acquire the mutex to access m_idle_handles
@@ -110,14 +111,17 @@ class AsyncInferQueue {
     }
 
     void set_custom_callbacks(py::function f_callback) {
+        // need to acquire GIL before py::function deletion
+        auto callback_sp = Common::utils::wrap_pyfunction(std::move(f_callback));
+
         for (size_t handle = 0; handle < m_requests.size(); handle++) {
-            m_requests[handle].m_request.set_callback([this, f_callback, handle](std::exception_ptr exception_ptr) {
+            m_requests[handle].m_request->set_callback([this, callback_sp, handle](std::exception_ptr exception_ptr) {
                 *m_requests[handle].m_end_time = Time::now();
                 if (exception_ptr == nullptr) {
                     // Acquire GIL, execute Python function
                     py::gil_scoped_acquire acquire;
                     try {
-                        f_callback(m_requests[handle], m_user_ids[handle]);
+                        (*callback_sp)(m_requests[handle], m_user_ids[handle]);
                     } catch (const py::error_already_set& py_error) {
                         // This should behave the same as assert(!PyErr_Occurred())
                         // since constructor for pybind11's error_already_set is
@@ -193,13 +197,13 @@ void regclass_AsyncInferQueue(py::module m) {
             // Set new inputs label/id from user
             self.m_user_ids[handle] = userdata;
             // Update inputs if there are any
-            self.m_requests[handle].m_request.set_input_tensor(inputs);
+            self.m_requests[handle].m_request->set_input_tensor(inputs);
             // Now GIL can be released - we are NOT working with Python objects in this block
             {
                 py::gil_scoped_release release;
                 *self.m_requests[handle].m_start_time = Time::now();
                 // Start InferRequest in asynchronus mode
-                self.m_requests[handle].m_request.start_async();
+                self.m_requests[handle].m_request->start_async();
             }
         },
         py::arg("inputs"),
@@ -239,13 +243,13 @@ void regclass_AsyncInferQueue(py::module m) {
             // Set new inputs label/id from user
             self.m_user_ids[handle] = userdata;
             // Update inputs if there are any
-            Common::set_request_tensors(self.m_requests[handle].m_request, inputs);
+            Common::set_request_tensors(*self.m_requests[handle].m_request, inputs);
             // Now GIL can be released - we are NOT working with Python objects in this block
             {
                 py::gil_scoped_release release;
                 *self.m_requests[handle].m_start_time = Time::now();
                 // Start InferRequest in asynchronus mode
-                self.m_requests[handle].m_request.start_async();
+                self.m_requests[handle].m_request->start_async();
             }
         },
         py::arg("inputs"),
 
@@ -614,7 +614,7 @@ uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual) {
 py::dict outputs_to_dict(InferRequestWrapper& request, bool share_outputs, bool decode_strings) {
     py::dict res;
     for (const auto& out : request.m_outputs) {
-        auto t = request.m_request.get_tensor(out);
+        auto t = request.m_request->get_tensor(out);
         if (t.get_element_type() == ov::element::string) {
             if (share_outputs) {
                 PyErr_WarnEx(PyExc_RuntimeWarning, "Result of a string type will be copied to OVDict!", 1);