From 8e0398c060ba50bde4fe47a3685c0857285001bd Mon Sep 17 00:00:00 2001 From: Hrvoje Niksic Date: Sun, 1 Mar 2020 21:31:08 +0100 Subject: [PATCH 1/4] Clarify the relationship between `forget()` and `ManuallyDrop`. As discussed on reddit, this commit addresses two issues with the documentation of `mem::forget()`: * The documentation of `mem::forget()` can confuse the reader because of the discrepancy between usage examples that show correct usage and the accompanying text which speaks of the possibility of double-free. The text that says "if the panic occurs before `mem::forget` was called" refers to a variant of the second example that was never shown, modified to use `mem::forget` instead of `ManuallyDrop`. Ideally the documentation should show both variants, so it's clear what it's talking about. Also, the double free could be fixed just by placing `mem::forget(v)` before the construction of `s`. Since the lifetimes of `s` and `v` wouldn't overlap, there would be no point where panic could cause a double free. This could be mentioned, and contrasted against the more robust fix of using `ManuallyDrop`. * This sentence seems unjustified: "For some types, operations such as passing ownership (to a funcion like `mem::forget`) requires them to actually be fully owned right now [...]". Unlike C++, Rust has no move constructors, its moves are (possibly elided) bitwise copies. Even if you pass an invalid object to `mem::forget`, no harm should come to pass because `mem::forget` consumes the object and exists solely to prevent drop, so there no one left to observe the invalid state state. --- src/libcore/mem/mod.rs | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/libcore/mem/mod.rs b/src/libcore/mem/mod.rs index 1cf2b40e93068..19e3b2a8bb922 100644 --- a/src/libcore/mem/mod.rs +++ b/src/libcore/mem/mod.rs @@ -69,8 +69,26 @@ pub use crate::intrinsics::transmute; /// ``` /// /// The practical use cases for `forget` are rather specialized and mainly come -/// up in unsafe or FFI code. However, [`ManuallyDrop`] is usually preferred -/// for such cases, e.g.: +/// up in unsafe or FFI code. For example: +/// +/// ``` +/// use std::mem; +/// +/// let mut v = vec![65, 122]; +/// // Build a `String` using the contents of `v` +/// let s = unsafe { String::from_raw_parts(v.as_mut_ptr(), 2, v.capacity()) }; +/// // immediately leak `v` because its memory is now managed by `s` +/// mem::forget(v); +/// assert_eq!(s, "Az"); +/// // `s` is implicitly dropped and its memory deallocated. +/// ``` +/// +/// The above is correct, but brittle. If code gets added between the construction of +/// `String` and the invocation of `mem::forget()`, a panic within it will cause a double +/// free because the same memory is handled by both `v` and `s`. This can be fixed by +/// storing the result of `v.as_mut_ptr()` in a local variable and calling `mem::forget()` +/// before `String::from_raw_parts`. This kind of issue can be more robustly prevented by +/// using [`ManuallyDrop`], which is usually preferred for such cases: /// /// ``` /// use std::mem::ManuallyDrop; @@ -88,16 +106,14 @@ pub use crate::intrinsics::transmute; /// // `s` is implicitly dropped and its memory deallocated. /// ``` /// -/// Using `ManuallyDrop` here has two advantages: +/// `ManuallyDrop` robustly prevents double-free because we disable `v`'s destructor +/// before doing anything else. `mem::forget()` doesn't allow this because it consumes its +/// argument, forcing us to call it only after extracting anything we need from `v`. /// -/// * We do not "touch" `v` after disassembling it. For some types, operations -/// such as passing ownership (to a function like `mem::forget`) requires them to actually -/// be fully owned right now; that is a promise we do not want to make here as we are -/// in the process of transferring ownership to the new `String` we are building. -/// * In case of an unexpected panic, `ManuallyDrop` is not dropped, but if the panic -/// occurs before `mem::forget` was called we might end up dropping invalid data, -/// or double-dropping. In other words, `ManuallyDrop` errs on the side of leaking -/// instead of erring on the side of dropping. +/// Note that the above code cannot panic between construction of `ManuallyDrop` and +/// building the string. But even if it could (after a modification), a panic there would +/// result in a leak and not a double free. In other words, `ManuallyDrop` errs on the +/// side of leaking instead of erring on the side of dropping. /// /// [drop]: fn.drop.html /// [uninit]: fn.uninitialized.html From 2a08b0e3006aeb997e42d5df135eea63b071fa75 Mon Sep 17 00:00:00 2001 From: Hrvoje Niksic Date: Wed, 4 Mar 2020 22:12:53 +0100 Subject: [PATCH 2/4] Restore (and reword) the warning against passing invalid values to mem::forget. As pointed out by Ralf Jung, dangling references and boxes are undefined behavior as per https://doc.rust-lang.org/reference/behavior-considered-undefined.html and the Miri checker. --- src/libcore/mem/mod.rs | 52 ++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/src/libcore/mem/mod.rs b/src/libcore/mem/mod.rs index 19e3b2a8bb922..7297f66970de5 100644 --- a/src/libcore/mem/mod.rs +++ b/src/libcore/mem/mod.rs @@ -58,7 +58,9 @@ pub use crate::intrinsics::transmute; /// /// # Examples /// -/// Leak an I/O object, never closing the file: +/// The canonical safe use of `mem::forget` is to circumvent a value's destructor +/// implemented by the `Drop` trait. For example, this will leak a `File`, i.e. reclaim +/// the space taken by the variable but never close the underlying system resource: /// /// ```no_run /// use std::mem; @@ -68,8 +70,14 @@ pub use crate::intrinsics::transmute; /// mem::forget(file); /// ``` /// -/// The practical use cases for `forget` are rather specialized and mainly come -/// up in unsafe or FFI code. For example: +/// This is useful when the ownership of the underlying was previously +/// transferred to code outside of Rust, for example by transmitting the raw +/// file descriptor to C code. +/// +/// # Relationship with `ManuallyDrop` +/// +/// Using `mem::forget` to transmit memory ownership is error-prone and is best +/// replaced with `ManuallyDrop`. Consider, for example, this code: /// /// ``` /// use std::mem; @@ -77,18 +85,25 @@ pub use crate::intrinsics::transmute; /// let mut v = vec![65, 122]; /// // Build a `String` using the contents of `v` /// let s = unsafe { String::from_raw_parts(v.as_mut_ptr(), 2, v.capacity()) }; -/// // immediately leak `v` because its memory is now managed by `s` -/// mem::forget(v); +/// // leak `v` because its memory is now managed by `s` +/// mem::forget(v); // ERROR - v is invalid and must not be passed to a function /// assert_eq!(s, "Az"); /// // `s` is implicitly dropped and its memory deallocated. /// ``` /// -/// The above is correct, but brittle. If code gets added between the construction of -/// `String` and the invocation of `mem::forget()`, a panic within it will cause a double -/// free because the same memory is handled by both `v` and `s`. This can be fixed by -/// storing the result of `v.as_mut_ptr()` in a local variable and calling `mem::forget()` -/// before `String::from_raw_parts`. This kind of issue can be more robustly prevented by -/// using [`ManuallyDrop`], which is usually preferred for such cases: +/// There are two issues with the above example: +/// +/// * If more code were added between the construction of `String` and the invocation of +/// `mem::forget()`, a panic within it would cause a double free because the same memory +/// is handled by both `v` and `s`. +/// * After calling `v.as_mut_ptr()` and transmitting the ownership of the data to `s`, +/// the `v` value is invalid. Although moving a value to `mem::forget` (which won't +/// inspect it) seems safe, some types have strict requirements on their values that +/// make them invalid when dangling or no longer owned. Using invalid values in any +/// way, including passing them to or returning them from functions, constitutes +/// undefined behavior and may break the assumptions made by the compiler. +/// +/// Switching to `ManuallyDrop` avoids both issues: /// /// ``` /// use std::mem::ManuallyDrop; @@ -108,12 +123,15 @@ pub use crate::intrinsics::transmute; /// /// `ManuallyDrop` robustly prevents double-free because we disable `v`'s destructor /// before doing anything else. `mem::forget()` doesn't allow this because it consumes its -/// argument, forcing us to call it only after extracting anything we need from `v`. -/// -/// Note that the above code cannot panic between construction of `ManuallyDrop` and -/// building the string. But even if it could (after a modification), a panic there would -/// result in a leak and not a double free. In other words, `ManuallyDrop` errs on the -/// side of leaking instead of erring on the side of dropping. +/// argument, forcing us to call it only after extracting anything we need from `v`. Even +/// if a panic were introduced between construction of `ManuallyDrop` and building the +/// string (which cannot happen in the code as shown), it would result in a leak and not a +/// double free. In other words, `ManuallyDrop` errs on the side of leaking instead of +/// erring on the side of dropping. +/// +/// Also, `ManuallyDrop` prevents us from having to "touch" `v` after transferring the +/// ownership to `s` - the final step of interacting with `v` to dispoe of it without +/// running its destructor is entirely avoided. /// /// [drop]: fn.drop.html /// [uninit]: fn.uninitialized.html From 755434121cda7d21d301592cf6fbddb7042e59a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hrvoje=20Nik=C5=A1i=C4=87?= Date: Wed, 18 Mar 2020 11:30:39 +0100 Subject: [PATCH 3/4] Minor re-wordings and typo fixes. Co-Authored-By: Ralf Jung --- src/libcore/mem/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/libcore/mem/mod.rs b/src/libcore/mem/mod.rs index 7297f66970de5..253847612adad 100644 --- a/src/libcore/mem/mod.rs +++ b/src/libcore/mem/mod.rs @@ -70,14 +70,14 @@ pub use crate::intrinsics::transmute; /// mem::forget(file); /// ``` /// -/// This is useful when the ownership of the underlying was previously +/// This is useful when the ownership of the underlying resource was previously /// transferred to code outside of Rust, for example by transmitting the raw /// file descriptor to C code. /// /// # Relationship with `ManuallyDrop` /// -/// Using `mem::forget` to transmit memory ownership is error-prone and is best -/// replaced with `ManuallyDrop`. Consider, for example, this code: +/// While `mem::forget` can also be used to transfer *memory* ownership, doing so is error-prone. +/// [`ManuallyDrop`] should be used instead. Consider, for example, this code: /// /// ``` /// use std::mem; @@ -97,9 +97,9 @@ pub use crate::intrinsics::transmute; /// `mem::forget()`, a panic within it would cause a double free because the same memory /// is handled by both `v` and `s`. /// * After calling `v.as_mut_ptr()` and transmitting the ownership of the data to `s`, -/// the `v` value is invalid. Although moving a value to `mem::forget` (which won't -/// inspect it) seems safe, some types have strict requirements on their values that -/// make them invalid when dangling or no longer owned. Using invalid values in any +/// the `v` value is invalid. Even when a value is just moved to `mem::forget` (which won't +/// inspect it), some types have strict requirements on their values that +/// make them invalid when dangling or no longer owned. Using invalid values in any /// way, including passing them to or returning them from functions, constitutes /// undefined behavior and may break the assumptions made by the compiler. /// @@ -123,11 +123,11 @@ pub use crate::intrinsics::transmute; /// /// `ManuallyDrop` robustly prevents double-free because we disable `v`'s destructor /// before doing anything else. `mem::forget()` doesn't allow this because it consumes its -/// argument, forcing us to call it only after extracting anything we need from `v`. Even +/// argument, forcing us to call it only after extracting anything we need from `v`. Even /// if a panic were introduced between construction of `ManuallyDrop` and building the /// string (which cannot happen in the code as shown), it would result in a leak and not a /// double free. In other words, `ManuallyDrop` errs on the side of leaking instead of -/// erring on the side of dropping. +/// erring on the side of (double-)dropping. /// /// Also, `ManuallyDrop` prevents us from having to "touch" `v` after transferring the /// ownership to `s` - the final step of interacting with `v` to dispoe of it without From 2bebe8d87111b544b8f5600fe93cc96391d5c91e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hrvoje=20Nik=C5=A1i=C4=87?= Date: Thu, 19 Mar 2020 13:56:48 +0100 Subject: [PATCH 4/4] Don't hard-code the vector length in the examples. Co-Authored-By: lzutao --- src/libcore/mem/mod.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/libcore/mem/mod.rs b/src/libcore/mem/mod.rs index 253847612adad..dac9ee6a5d91e 100644 --- a/src/libcore/mem/mod.rs +++ b/src/libcore/mem/mod.rs @@ -84,7 +84,7 @@ pub use crate::intrinsics::transmute; /// /// let mut v = vec![65, 122]; /// // Build a `String` using the contents of `v` -/// let s = unsafe { String::from_raw_parts(v.as_mut_ptr(), 2, v.capacity()) }; +/// let s = unsafe { String::from_raw_parts(v.as_mut_ptr(), v.len(), v.capacity()) }; /// // leak `v` because its memory is now managed by `s` /// mem::forget(v); // ERROR - v is invalid and must not be passed to a function /// assert_eq!(s, "Az"); @@ -113,10 +113,9 @@ pub use crate::intrinsics::transmute; /// // does not get dropped! /// let mut v = ManuallyDrop::new(v); /// // Now disassemble `v`. These operations cannot panic, so there cannot be a leak. -/// let ptr = v.as_mut_ptr(); -/// let cap = v.capacity(); +/// let (ptr, len, cap) = (v.as_mut_ptr(), v.len(), v.capacity()); /// // Finally, build a `String`. -/// let s = unsafe { String::from_raw_parts(ptr, 2, cap) }; +/// let s = unsafe { String::from_raw_parts(ptr, len, cap) }; /// assert_eq!(s, "Az"); /// // `s` is implicitly dropped and its memory deallocated. /// ```