diff --git a/newsfragments/5242.fixed.md b/newsfragments/5242.fixed.md new file mode 100644 index 00000000000..04210360e0c --- /dev/null +++ b/newsfragments/5242.fixed.md @@ -0,0 +1 @@ +Fix segfault when dropping `PyBuffer` after the Python interpreter has been finalized. diff --git a/src/buffer.rs b/src/buffer.rs index 6bef44cfa21..fe824b69450 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -622,7 +622,19 @@ impl PyBuffer { impl Drop for PyBuffer { fn drop(&mut self) { - Python::attach(|_| unsafe { ffi::PyBuffer_Release(&mut *self.0) }); + fn inner(buf: &mut Pin>) { + if Python::try_attach(|_| unsafe { ffi::PyBuffer_Release(&mut **buf) }).is_none() + && crate::internal::state::is_in_gc_traversal() + { + eprintln!("Warning: PyBuffer dropped while in GC traversal, this is a bug and will leak memory."); + } + // If `try_attach` failed and `is_in_gc_traversal()` is false, then probably the interpreter has + // already finalized and we can just assume that the underlying memory has already been freed. + // + // So we don't handle that case here. + } + + inner(&mut self.0); } } diff --git a/src/internal/state.rs b/src/internal/state.rs index 912434cb7c9..8095071af88 100644 --- a/src/internal/state.rs +++ b/src/internal/state.rs @@ -61,6 +61,30 @@ impl AttachGuard { unsafe { Self::acquire_unchecked() } } + /// Variant of the above which will will return `None` if the interpreter cannot be attached to. + #[cfg(any(not(Py_LIMITED_API), Py_3_11, test))] // see Python::try_attach + pub(crate) fn try_acquire() -> Option { + match ATTACH_COUNT.try_with(|c| c.get()) { + Ok(i) if i > 0 => { + // SAFETY: We just checked that the thread is already attached. + return Some(unsafe { Self::assume() }); + } + // Cannot attach during GC traversal. + Ok(ATTACH_FORBIDDEN_DURING_TRAVERSE) => return None, + // other cases handled below + _ => {} + } + + // SAFETY: This API is always sound to call + if unsafe { ffi::Py_IsInitialized() } == 0 { + // If the interpreter is not initialized, we cannot attach. + return None; + } + + // SAFETY: We have ensured the Python interpreter is initialized. + Some(unsafe { Self::acquire_unchecked() }) + } + /// Acquires the `AttachGuard` without performing any state checking. /// /// This can be called in "unsafe" contexts where the normal interpreter state @@ -275,6 +299,14 @@ pub unsafe fn register_decref(obj: NonNull) { } } +/// Private helper function to check if we are currently in a GC traversal (as detected by PyO3). +#[cfg(any(not(Py_LIMITED_API), Py_3_11))] +pub(crate) fn is_in_gc_traversal() -> bool { + ATTACH_COUNT + .try_with(|c| c.get() == ATTACH_FORBIDDEN_DURING_TRAVERSE) + .unwrap_or(false) +} + /// Increments pyo3's internal attach count - to be called whenever an AttachGuard is created. #[inline(always)] fn increment_attach_count() { diff --git a/src/marker.rs b/src/marker.rs index f68d6161419..c36cd7111e8 100644 --- a/src/marker.rs +++ b/src/marker.rs @@ -419,6 +419,22 @@ impl Python<'_> { f(guard.python()) } + /// Variant of [`Python::attach`] which will do no work if the interpreter is in a + /// state where it cannot be attached to: + /// - in the middle of GC traversal + /// - not initialized + #[inline] + #[track_caller] + #[cfg(any(not(Py_LIMITED_API), Py_3_11, test))] // only used in buffer.rs for now, allow in test cfg for simplicity + // TODO: make this API public? + pub(crate) fn try_attach(f: F) -> Option + where + F: for<'py> FnOnce(Python<'py>) -> R, + { + let guard = AttachGuard::try_acquire()?; + Some(f(guard.python())) + } + /// Prepares the use of Python. /// /// If the Python interpreter is not already initialized, this function will initialize it with @@ -805,7 +821,10 @@ impl<'unbound> Python<'unbound> { #[cfg(test)] mod tests { use super::*; - use crate::types::{IntoPyDict, PyList}; + use crate::{ + internal::state::ForbidAttaching, + types::{IntoPyDict, PyList}, + }; #[test] fn test_eval() { @@ -1012,4 +1031,26 @@ cls.func() fn python_is_zst() { assert_eq!(std::mem::size_of::>(), 0); } + + #[test] + fn test_try_attach_fail_during_gc() { + Python::attach(|_| { + assert!(Python::try_attach(|_| {}).is_some()); + + let guard = ForbidAttaching::during_traverse(); + assert!(Python::try_attach(|_| {}).is_none()); + drop(guard); + + assert!(Python::try_attach(|_| {}).is_some()); + }) + } + + #[test] + fn test_try_attach_ok_when_detached() { + Python::attach(|py| { + py.detach(|| { + assert!(Python::try_attach(|_| {}).is_some()); + }); + }); + } } diff --git a/tests/test_gc.rs b/tests/test_gc.rs index e8a608ff00c..284b1eae863 100644 --- a/tests/test_gc.rs +++ b/tests/test_gc.rs @@ -739,3 +739,55 @@ extern "C" fn visit_error( ) -> std::os::raw::c_int { -1 } + +#[test] +#[cfg(any(not(Py_LIMITED_API), Py_3_11))] // buffer availability +fn test_drop_buffer_during_traversal_without_gil() { + use pyo3::buffer::PyBuffer; + use pyo3::types::PyBytes; + + // `PyBuffer` has a drop method which attempts to attach to the Python interpreter, + // if the thread is during traverse we leak it for safety. This should _never_ be happening + // so it's purely a user bug, but we leak to be safe. + + #[pyclass] + struct BufferDropDuringTraversal { + inner: Mutex)>>, + cycle: Option, + } + + #[pymethods] + impl BufferDropDuringTraversal { + #[allow(clippy::unnecessary_wraps)] + fn __traverse__(&self, _visit: PyVisit<'_>) -> Result<(), PyTraverseError> { + self.inner.lock().unwrap().take(); + Ok(()) + } + + fn __clear__(&mut self) { + self.cycle = None; + } + } + + let (guard, check) = drop_check(); + Python::attach(|py| { + let obj = Py::new( + py, + BufferDropDuringTraversal { + inner: Mutex::new(Some(( + guard, + PyBuffer::get(&PyBytes::new(py, b"test")).unwrap(), + ))), + cycle: None, + }, + ) + .unwrap(); + + obj.borrow_mut(py).cycle = Some(obj.clone_ref(py).into_any()); + + let ptr = obj.as_ptr(); + drop(obj); + + check.assert_drops_with_gc(ptr); + }); +} diff --git a/tests/test_pybuffer_drop_without_interpreter.rs b/tests/test_pybuffer_drop_without_interpreter.rs new file mode 100644 index 00000000000..bd1521ae573 --- /dev/null +++ b/tests/test_pybuffer_drop_without_interpreter.rs @@ -0,0 +1,34 @@ +#![cfg(any(not(Py_LIMITED_API), Py_3_11))] // buffer availability +#![cfg(not(any(PyPy, GraalPy)))] // cannot control interpreter lifecycle in PyPy or GraalPy + +//! Dropping `Py` after the interpreter has been finalized should be sound. +//! +//! See e.g. https://github.com/PyO3/pyo3/issues/4632 for an extension of this problem +//! where the interpreter was finalized before `PyBuffer` was dropped. +//! +//! This test runs in its own process to control the interpreter lifecycle. + +use pyo3::{buffer::PyBuffer, types::PyBytes}; + +#[test] +fn test_pybuffer_drop_without_interpreter() { + // SAFETY: this is knowingly unsafe as we're preserving the `Py` object + // after the Python interpreter has been finalized. + // + // However we should still be able to drop it without causing undefined behavior, + // so that process shutdown is sound. + let obj: PyBuffer = unsafe { + pyo3::with_embedded_python_interpreter(|py| { + PyBuffer::get(&PyBytes::new(py, b"abcdef")).unwrap() + }) + }; + + // there should be no interpreter outside of the `with_embedded_python_interpreter` block + assert_eq!(unsafe { pyo3_ffi::Py_IsInitialized() }, 0); + + // dropping object should be sound + drop(obj); + + // dropping object should not re-initialize the interpreter + assert_eq!(unsafe { pyo3_ffi::Py_IsInitialized() }, 0); +}