Fix for deadlock in python callback (#3073)

sgonorov · web-flow · commit 22a69791b185 · 2025-12-03T14:32:29.000Z
Fix update from release branch
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -522,8 +522,8 @@ jobs:
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
             timeout: 360
           - name: 'LLM & VLM'
-            cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/'
-            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
+            cmd: 'python -m pytest -v ./tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py ./tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/'
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
             timeout: 180
           - name: 'GGUF Reader tests'
             cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py'
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
@@ -447,8 +447,8 @@ jobs:
           #   timeout: 240
           # Only supported on X64 or ARM with SVE support
           # - name: 'LLM & VLM'
-          #   cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py'
-          #   run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
+          #   cmd: 'tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py'
+          #   run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
           #   timeout: 180
           - name: 'GGUF Reader tests'
             cmd: 'python -m pytest -v ./tests/python_tests/test_gguf_reader.py'
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -611,8 +611,8 @@ jobs:
             run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching.test }}
             timeout: 360
           - name: 'LLM & VLM'
-            cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py --override-ini cache_dir=/mount/caches/pytest/'
-            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
+            cmd: 'python -m pytest -s -v tests/python_tests/test_llm_pipeline.py tests/python_tests/test_llm_pipeline_static.py tests/python_tests/test_vlm_pipeline.py tests/python_tests/test_structured_output.py tests/python_tests/test_image_generation.py --override-ini cache_dir=/mount/caches/pytest/'
+            run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).visual_language.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test || fromJSON(needs.smart_ci.outputs.affected_components).Image_generation.test }}
             timeout: 180
           - name: 'GGUF Reader tests'
             cmd: 'python -m pytest -s -v tests/python_tests/test_gguf_reader.py'
diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp
@@ -180,10 +180,12 @@ class TorchGenerator : public ov::genai::CppStdGenerator {
     }
 
     float next() override {
+        py::gil_scoped_acquire acquire;
         return m_torch.attr("randn")(1, "generator"_a=m_torch_generator, "dtype"_a=m_float32).attr("item")().cast<float>();
     }
 
     ov::Tensor randn_tensor(const ov::Shape& shape) override {
+        py::gil_scoped_acquire acquire;
         py::object torch_tensor = m_torch.attr("randn")(to_py_list(shape), "generator"_a=m_torch_generator, "dtype"_a=m_float32);
         py::object numpy_tensor = torch_tensor.attr("numpy")();
         py::array numpy_array = py::cast<py::array>(numpy_tensor);
@@ -201,6 +203,32 @@ class TorchGenerator : public ov::genai::CppStdGenerator {
             TorchTensorAllocator(size_t total_size, void * mutable_data, py::object torch_tensor) :
                 m_total_size(total_size), m_mutable_data(mutable_data), m_torch_tensor(torch_tensor) { }
 
+            ~TorchTensorAllocator() {
+                if (m_torch_tensor && Py_IsInitialized()) {
+                    py::gil_scoped_acquire acquire;
+                    m_torch_tensor = py::object();
+                }
+            }
+
+            TorchTensorAllocator(const TorchTensorAllocator& other)
+                : m_total_size(other.m_total_size), m_mutable_data(other.m_mutable_data) {
+                py::gil_scoped_acquire acquire;
+                m_torch_tensor = other.m_torch_tensor;
+            }
+
+            TorchTensorAllocator& operator=(const TorchTensorAllocator& other) {
+                if (this != &other) {
+                    m_total_size = other.m_total_size;
+                    m_mutable_data = other.m_mutable_data;
+                    py::gil_scoped_acquire acquire;
+                    m_torch_tensor = other.m_torch_tensor;
+                }
+                return *this;
+            }
+
+            TorchTensorAllocator(TorchTensorAllocator&&) = default;
+            TorchTensorAllocator& operator=(TorchTensorAllocator&&) = default;
+
             void* allocate(size_t bytes, size_t) const {
                 if (m_total_size == bytes) {
                     return m_mutable_data;
@@ -221,6 +249,7 @@ class TorchGenerator : public ov::genai::CppStdGenerator {
     }
 
     void seed(size_t new_seed) override {
+        py::gil_scoped_acquire acquire;
         create_torch_generator(new_seed);
     }
 };
@@ -448,12 +477,7 @@ void init_image_generation_pipelines(py::module_& m) {
             ) -> py::typing::Union<ov::Tensor> {
                 ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
                 ov::Tensor res;
-                if (params_have_torch_generator(params)) {
-                    // TorchGenerator stores python object which causes segfault after gil_scoped_release
-                    // so if it was passed, we don't release GIL
-                    res = pipe.generate(prompt, params);
-                }
-                else {
+                {
                     py::gil_scoped_release rel;
                     res = pipe.generate(prompt, params);
                 }
@@ -565,12 +589,7 @@ void init_image_generation_pipelines(py::module_& m) {
             ) -> py::typing::Union<ov::Tensor> {
                 ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
                 ov::Tensor res;
-                if (params_have_torch_generator(params)) {
-                    // TorchGenerator stores python object which causes segfault after gil_scoped_release
-                    // so if it was passed, we don't release GIL
-                    res = pipe.generate(prompt, image, params);
-                }
-                else {
+                {
                     py::gil_scoped_release rel;
                     res = pipe.generate(prompt, image, params);
                 }
@@ -676,12 +695,7 @@ void init_image_generation_pipelines(py::module_& m) {
             ) -> py::typing::Union<ov::Tensor> {
                 ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs);
                 ov::Tensor res;
-                if (params_have_torch_generator(params)) {
-                    // TorchGenerator stores python object which causes segfault after gil_scoped_release
-                    // so if it was passed, we don't release GIL
-                    res = pipe.generate(prompt, image, mask_image, params);
-                }
-                else {
+                {
                     py::gil_scoped_release rel;
                     res = pipe.generate(prompt, image, mask_image, params);
                 }
diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
@@ -374,10 +374,22 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) {
         return py::cast<std::shared_ptr<ov::genai::Generator>>(py_obj);
     } else if (py::isinstance<py::function>(py_obj) && property_name == "callback") {
         auto py_callback = py::cast<py::function>(py_obj);
+        auto shared_callback = std::shared_ptr<py::function>(
+            new py::function(py_callback),
+            [](py::function* f) {
+                if (Py_IsInitialized()) {
+                    py::gil_scoped_acquire acquire;
+                    delete f;
+                } else {
+                    delete f;
+                }
+            }
+        );
+
         return std::function<bool(size_t, size_t, ov::Tensor&)>(
-            [py_callback](size_t step, size_t num_steps, ov::Tensor& latent) -> bool {
+            [shared_callback](size_t step, size_t num_steps, ov::Tensor& latent) -> bool {
                 py::gil_scoped_acquire acquire;
-                return py_callback(step, num_steps, latent).cast<bool>();
+                return (*shared_callback)(step, num_steps, latent).cast<bool>();
             }
         );
     } else if ((py::isinstance<py::function>(py_obj) || py::isinstance<ov::genai::StreamerBase>(py_obj) || py::isinstance<std::monostate>(py_obj)) && property_name == "streamer") {
@@ -443,21 +455,40 @@ ov::genai::StreamerVariant pystreamer_to_streamer(const PyBindStreamerVariant& p
 
     std::visit(overloaded {
         [&streamer](const std::function<std::optional<uint16_t>(py::str)>& py_callback){
-            // Wrap python streamer with manual utf-8 decoding. Do not rely
-            // on pybind automatic decoding since it raises exceptions on incomplete strings.
-            auto callback_wrapped = [py_callback](std::string subword) -> ov::genai::StreamingStatus {
+            auto shared_callback = std::shared_ptr<std::function<std::optional<uint16_t>(py::str)>>(
+                new std::function<std::optional<uint16_t>(py::str)>(py_callback),
+                [](std::function<std::optional<uint16_t>(py::str)>* f) {
+                    if (Py_IsInitialized()) {
+                        py::gil_scoped_acquire acquire;
+                        delete f;
+                    } else {
+                        delete f;
+                    }
+                }
+            );
+
+            auto callback_wrapped = [shared_callback](std::string subword) -> ov::genai::StreamingStatus {
                 py::gil_scoped_acquire acquire;
-                auto py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace");
-                std::optional<uint16_t> callback_output = py_callback(py::reinterpret_borrow<py::str>(py_str));
+                PyObject* py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace");
+                if (!py_str) {
+                    PyErr_WriteUnraisable(nullptr);
+                    return StreamingStatus::RUNNING;
+                }
+                auto py_str_obj = py::reinterpret_steal<py::str>(py_str);
+                std::optional<uint16_t> callback_output;
+                try {
+                    callback_output = (*shared_callback)(py_str_obj);
+                } catch (const py::error_already_set&) {
+                    return StreamingStatus::RUNNING;
+                }
                 if (callback_output.has_value()) {
-                    if (*callback_output == (uint16_t)StreamingStatus::RUNNING)
+                    if (*callback_output == static_cast<uint16_t>(StreamingStatus::RUNNING))
                         return StreamingStatus::RUNNING;
-                    else if (*callback_output == (uint16_t)StreamingStatus::CANCEL)
+                    else if (*callback_output == static_cast<uint16_t>(StreamingStatus::CANCEL))
                         return StreamingStatus::CANCEL;
                     return StreamingStatus::STOP;
-                } else {
-                    return StreamingStatus::RUNNING;
                 }
+                return StreamingStatus::RUNNING;
             };
             streamer = callback_wrapped;
         },
diff --git a/src/python/py_utils.hpp b/src/python/py_utils.hpp
@@ -19,7 +19,7 @@ namespace ov::genai::pybind::utils {
 // When StreamerVariant is used utf-8 decoding is done by pybind and can lead to exception on incomplete texts.
 // Therefore strings decoding should be handled with PyUnicode_DecodeUTF8(..., "replace") to not throw errors.
 using PyBindStreamerVariant = std::variant<
-    std::function<std::optional<uint16_t>(std::string)>,
+    std::function<std::optional<uint16_t>(py::str)>,
     std::shared_ptr<StreamerBase>,
     std::monostate>;
 
diff --git a/tests/python_tests/test_image_generation.py b/tests/python_tests/test_image_generation.py