From 0f7e0931d0ea2b98f106a10faacf6d44309044de Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Tue, 1 Jul 2014 15:10:22 +0200 Subject: [PATCH 1/4] Add support for in-place map for `Vec`s of types with same size This is implemented using a new struct `PartialVec` which implements the proper drop semantics in case the conversion is interrupted by an unwind. --- src/libcollections/vec.rs | 259 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index 39fe57038b0b2..e6e25b2f03a7a 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -1690,6 +1690,252 @@ pub mod raw { } } +// TODO: Find some way to statically assert that `T` and `U` have the same +// size. +// +/// An owned, partially type-converted vector. +/// +/// No allocations are performed by usage, only a deallocation happens in the +/// destructor which should only run when unwinding. +/// +/// It can be used to convert a vector of `T`s into a vector of `U`s, by +/// converting the individual elements one-by-one. +/// +/// You may call the `push` method as often as you get a `Some(t)` from `pop`. +/// After pushing the same number of `U`s as you got `T`s, you can `unwrap` the +/// vector. +/// +/// # Example +/// +/// ```rust +/// let pv = PartialVec::new(vec![0u, 1]); +/// assert_eq!(pv.pop(), Some(0)); +/// assert_eq!(pv.pop(), Some(1)); +/// assert_eq!(pv.pop(), None); +/// pv.push(2u); +/// pv.push(3); +/// assert_eq!(pv.unwrap(), vec![2, 3]); +/// ``` +// +// Upheld invariants: +// +// (a) `vec` isn't modified except when the `PartialVec` goes out of scope, the +// only thing it is used for is keeping the memory which the `PartialVec` +// uses for the inplace conversion. +// +// (b) `start_u` points to the start of the vector. +// +// (c) `end_u` points to one element beyond the vector. +// +// (d) `start_u` <= `end_u` <= `start_t` <= `end_t`. +// +// (e) From `start_u` (incl.) to `end_u` (excl.) there are sequential instances +// of type `U`. +// +// (f) From `start_t` (incl.) to `end_t` (excl.) there are sequential instances +// of type `T`. + +pub struct PartialVec { + vec: Vec, + + start_u: *mut U, + end_u: *mut U, + start_t: *mut T, + end_t: *mut T, +} + +impl PartialVec { + /// Creates a `PartialVec` from a `Vec`. + pub fn new(mut vec: Vec) -> PartialVec { + // TODO: do this statically + assert!(mem::size_of::() != 0); + assert!(mem::size_of::() != 0); + assert!(mem::size_of::() == mem::size_of::()); + + let start = vec.as_mut_ptr(); + + // This `as int` cast is safe, because the size of the elements of the + // vector is not 0, and: + // + // 1) If the size of the elements in the vector is 1, the `int` may + // overflow, but it has the correct bit pattern so that the + // `.offset()` function will work. + // + // Example: + // Address space 0x0-0xF. + // `u8` array at: 0x1. + // Size of `u8` array: 0x8. + // Calculated `offset`: -0x8. + // After `array.offset(offset)`: 0x9. + // (0x1 + 0x8 = 0x1 - 0x8) + // + // 2) If the size of the elements in the vector is >1, the `uint` -> + // `int` conversion can't overflow. + let offset = vec.len() as int; + + let start_u = start as *mut U; + let end_u = start as *mut U; + let start_t = start; + let end_t = unsafe { start_t.offset(offset) }; + + // (b) is satisfied, `start_u` points to the start of `vec`. + + // (c) is also satisfied, `end_t` points to the end of `vec`. + + // `start_u == end_u == start_t <= end_t`, so also `start_u <= end_u <= + // start_t <= end_t`, thus (b). + + // As `start_u == end_u`, it is represented correctly that there are no + // instances of `U` in `vec`, thus (e) is satisfied. + + // At start, there are only elements of type `T` in `vec`, so (f) is + // satisfied, as `start_t` points to the start of `vec` and `end_t` to + // the end of it. + + // This points inside the vector, as the vector has length `offset`. + + PartialVec { + // (a) is satisfied, `vec` isn't modified in the function. + vec: vec, + start_u: start_u, + end_u: end_u, + start_t: start_t, + end_t: end_t, + } + } + + /// Pops a `T` from the `PartialVec`. + /// + /// Returns `Some(t)` if there are more `T`s in the vector, otherwise + /// `None`. + fn pop(&mut self) -> Option { + // The `if` ensures that there are more `T`s in `vec`. + if self.start_t < self.end_t { + let result; + unsafe { + // (f) is satisfied before, so in this if branch there actually + // is a `T` at `start_t`. After shifting the pointer by one, + // (f) is again satisfied. + result = ptr::read(self.start_t as *const T); + self.start_t = self.start_t.offset(1); + } + Some(result) + } else { + None + } + } + + /// Pushes a new `U` to the `PartialVec`. + /// + /// # Failure + /// + /// Fails if not enough `T`s were popped to have enough space for the new + /// `U`. + pub fn push(&mut self, value: U) { + // The assert assures that still `end_u <= start_t` (d) after + // the function. + assert!(self.end_u as *const () < self.start_t as *const (), + "writing more elements to PartialVec than reading from it") + unsafe { + // (e) is satisfied before, and after writing one `U` + // to `end_u` and shifting it by one, it's again + // satisfied. + ptr::write(self.end_u, value); + self.end_u = self.end_u.offset(1); + } + } + + /// Unwraps the new `Vec` of `U`s after having pushed enough `U`s and + /// popped all `T`s. + /// + /// # Failure + /// + /// Fails if not all `T`s were popped, also fails if not the same amount of + /// `U`s was pushed before calling `unwrap`. + pub fn unwrap(self) -> Vec { + // If `self.end_u == self.end_t`, we know from (e) that there are no + // more `T`s in `vec`, we also know that the whole length of `vec` is + // now used by `U`s, thus we can just transmute `vec` from a vector of + // `T`s to a vector of `U`s safely. + + assert!(self.end_u as *const () == self.end_t as *const (), + "trying to unwrap a PartialVec before completing the writes to it"); + + // Extract `vec` and prevent the destructor of `PartialVec` from + // running. + unsafe { + let vec = ptr::read(&self.vec); + mem::forget(self); + mem::transmute(vec) + } + } +} + +#[unsafe_destructor] +impl Drop for PartialVec { + fn drop(&mut self) { + unsafe { + // As per (a) `vec` hasn't been modified until now. As it has a + // length currently, this would run destructors of `T`s which might + // not be there. So at first, set `vec`s length to `0`. This must + // be done at first to remain memory-safe as the destructors of `U` + // or `T` might cause unwinding where `vec`s destructor would be + // executed. + self.vec.set_len(0); + + // As per (e) and (f) we have instances of `U`s and `T`s in `vec`. + // Destruct them. + while self.start_u < self.end_u { + let _ = ptr::read(self.start_u as *const U); // Run a `U` destructor. + self.start_u = self.start_u.offset(1); + } + while self.start_t < self.end_t { + let _ = ptr::read(self.start_t as *const T); // Run a `T` destructor. + self.start_t = self.start_t.offset(1); + } + // After this destructor ran, the destructor of `vec` will run, + // deallocating the underlying memory. + } + } +} + +impl Iterator for PartialVec { + fn next(&mut self) -> Option { + self.pop() + } +} + +impl Vec { + /// Converts a `Vec` to a `Vec` where `T` and `U` have the same size. + /// + /// # Example + /// + /// ```rust + /// let v = vec![0u, 1, 2]; + /// let w = v.map_inplace(|i| i + 3); + /// assert_eq!(w.as_slice() == &[3, 4, 5]); + /// + /// let big_endian_u16s = vec![0x1122u16, 0x3344]; + /// let u8s = big_endian_u16s.map_inplace(|x| [ + /// ((x >> 8) & 0xff) as u8, + /// (x & 0xff) as u8 + /// ]); + /// assert_eq!(u8s.as_slice() == &[[0x11, 0x22], [0x33, 0x44]]); + /// ``` + pub fn map_inplace(self, f: |T| -> U) -> Vec { + let mut pv = PartialVec::new(self); + loop { + // TODO: need this extra assignment for borrowck to pass + let maybe_t = pv.pop(); + match maybe_t { + Some(t) => pv.push(f(t)), + None => return pv.unwrap(), + }; + } + } +} + + #[cfg(test)] mod tests { extern crate test; @@ -2009,6 +2255,19 @@ mod tests { assert_eq!(vec.swap_remove(0), None); } + #[test] + #[should_fail] + fn test_map_inplace_incompatible_types_fail() { + let v = vec![0u, 1, 2]; + v.map_inplace(|_| ()); + } + + #[test] + fn test_map_inplace() { + let v = vec![0u, 1, 2]; + assert_eq!(v.map_inplace(|i: uint| i as int - 1).as_slice, &[-1i, 0, 1]); + } + #[bench] fn bench_new(b: &mut Bencher) { b.iter(|| { From 3032fc18ca6e60056e8e242022d96f48c61e42cb Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Wed, 2 Jul 2014 00:17:28 +0200 Subject: [PATCH 2/4] PartialVec: Remove TODOs and rename `unwrap` to `into_vec` --- src/libcollections/vec.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index e6e25b2f03a7a..a91b91da65cf3 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -1690,9 +1690,6 @@ pub mod raw { } } -// TODO: Find some way to statically assert that `T` and `U` have the same -// size. -// /// An owned, partially type-converted vector. /// /// No allocations are performed by usage, only a deallocation happens in the @@ -1714,7 +1711,7 @@ pub mod raw { /// assert_eq!(pv.pop(), None); /// pv.push(2u); /// pv.push(3); -/// assert_eq!(pv.unwrap(), vec![2, 3]); +/// assert_eq!(pv.into_vec(), vec![2, 3]); /// ``` // // Upheld invariants: @@ -1747,7 +1744,7 @@ pub struct PartialVec { impl PartialVec { /// Creates a `PartialVec` from a `Vec`. pub fn new(mut vec: Vec) -> PartialVec { - // TODO: do this statically + // FIXME: Assert that the types `T` and `U` have the same size. assert!(mem::size_of::() != 0); assert!(mem::size_of::() != 0); assert!(mem::size_of::() == mem::size_of::()); @@ -1852,7 +1849,7 @@ impl PartialVec { /// /// Fails if not all `T`s were popped, also fails if not the same amount of /// `U`s was pushed before calling `unwrap`. - pub fn unwrap(self) -> Vec { + pub fn into_vec(self) -> Vec { // If `self.end_u == self.end_t`, we know from (e) that there are no // more `T`s in `vec`, we also know that the whole length of `vec` is // now used by `U`s, thus we can just transmute `vec` from a vector of @@ -1925,11 +1922,10 @@ impl Vec { pub fn map_inplace(self, f: |T| -> U) -> Vec { let mut pv = PartialVec::new(self); loop { - // TODO: need this extra assignment for borrowck to pass let maybe_t = pv.pop(); match maybe_t { Some(t) => pv.push(f(t)), - None => return pv.unwrap(), + None => return pv.into_vec(), }; } } From 371bea7d853553ea53d030d83a88294bc6c8e130 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Sun, 3 Aug 2014 14:16:10 +0200 Subject: [PATCH 3/4] Fix some of the issues mentioned in the PR on Github This specifically includes: - Fix of the tests - Remove `transmute` between `Vec`s of different types --- src/libcollections/vec.rs | 65 ++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index a91b91da65cf3..f6011190d40ac 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -1692,6 +1692,9 @@ pub mod raw { /// An owned, partially type-converted vector. /// +/// This struct takes two type parameters `T` and `U` which must be of the +/// same, non-zero size. +/// /// No allocations are performed by usage, only a deallocation happens in the /// destructor which should only run when unwinding. /// @@ -1705,13 +1708,13 @@ pub mod raw { /// # Example /// /// ```rust -/// let pv = PartialVec::new(vec![0u, 1]); +/// let pv = PartialVec::from_vec(vec![0u, 1]); /// assert_eq!(pv.pop(), Some(0)); /// assert_eq!(pv.pop(), Some(1)); /// assert_eq!(pv.pop(), None); /// pv.push(2u); /// pv.push(3); -/// assert_eq!(pv.into_vec(), vec![2, 3]); +/// assert_eq!(pv.into_vec().as_slice(), &[2, 3]); /// ``` // // Upheld invariants: @@ -1731,6 +1734,8 @@ pub mod raw { // // (f) From `start_t` (incl.) to `end_t` (excl.) there are sequential instances // of type `T`. +// +// (g) The size of `T` and `U` is equal and non-zero. pub struct PartialVec { vec: Vec, @@ -1743,8 +1748,14 @@ pub struct PartialVec { impl PartialVec { /// Creates a `PartialVec` from a `Vec`. - pub fn new(mut vec: Vec) -> PartialVec { + /// + /// # Failure + /// + /// Fails if `T` and `U` have differing sizes or are zero-sized. + pub fn from_vec(mut vec: Vec) -> PartialVec { // FIXME: Assert that the types `T` and `U` have the same size. + // + // These asserts make sure (g) is satisfied. assert!(mem::size_of::() != 0); assert!(mem::size_of::() != 0); assert!(mem::size_of::() == mem::size_of::()); @@ -1773,24 +1784,24 @@ impl PartialVec { let start_u = start as *mut U; let end_u = start as *mut U; let start_t = start; + + // This points inside the vector, as the vector has length `offset`. let end_t = unsafe { start_t.offset(offset) }; // (b) is satisfied, `start_u` points to the start of `vec`. - + // // (c) is also satisfied, `end_t` points to the end of `vec`. - + // // `start_u == end_u == start_t <= end_t`, so also `start_u <= end_u <= // start_t <= end_t`, thus (b). - + // // As `start_u == end_u`, it is represented correctly that there are no // instances of `U` in `vec`, thus (e) is satisfied. - + // // At start, there are only elements of type `T` in `vec`, so (f) is // satisfied, as `start_t` points to the start of `vec` and `end_t` to // the end of it. - // This points inside the vector, as the vector has length `offset`. - PartialVec { // (a) is satisfied, `vec` isn't modified in the function. vec: vec, @@ -1803,8 +1814,8 @@ impl PartialVec { /// Pops a `T` from the `PartialVec`. /// - /// Returns `Some(t)` if there are more `T`s in the vector, otherwise - /// `None`. + /// Removes the next `T` from the vector and returns it as `Some(T)`, or + /// `None` if there are none left. fn pop(&mut self) -> Option { // The `if` ensures that there are more `T`s in `vec`. if self.start_t < self.end_t { @@ -1849,21 +1860,26 @@ impl PartialVec { /// /// Fails if not all `T`s were popped, also fails if not the same amount of /// `U`s was pushed before calling `unwrap`. - pub fn into_vec(self) -> Vec { + pub fn into_vec(mut self) -> Vec { // If `self.end_u == self.end_t`, we know from (e) that there are no // more `T`s in `vec`, we also know that the whole length of `vec` is - // now used by `U`s, thus we can just transmute `vec` from a vector of - // `T`s to a vector of `U`s safely. + // now used by `U`s, thus we can just interpret `vec` as a vector of + // `U` safely. assert!(self.end_u as *const () == self.end_t as *const (), "trying to unwrap a PartialVec before completing the writes to it"); // Extract `vec` and prevent the destructor of `PartialVec` from - // running. + // running. Note that none of the function calls can fail, thus no + // resources can be leaked (as the `vec` member of `PartialVec` is the + // only one which holds allocations -- and it is returned from this + // function. unsafe { - let vec = ptr::read(&self.vec); + let vec_len = self.vec.len(); + let vec_cap = self.vec.capacity(); + let vec_ptr = self.vec.as_mut_ptr() as *mut U; mem::forget(self); - mem::transmute(vec) + Vec::from_raw_parts(vec_len, vec_cap, vec_ptr) } } } @@ -1903,24 +1919,29 @@ impl Iterator for PartialVec { } impl Vec { - /// Converts a `Vec` to a `Vec` where `T` and `U` have the same size. + /// Converts a `Vec` to a `Vec` where `T` and `U` have the same + /// non-zero size. + /// + /// # Failure + /// + /// Fails if `T` and `U` have differing sizes or are zero-sized. /// /// # Example /// /// ```rust /// let v = vec![0u, 1, 2]; /// let w = v.map_inplace(|i| i + 3); - /// assert_eq!(w.as_slice() == &[3, 4, 5]); + /// assert_eq!(w.as_slice(), &[3, 4, 5]); /// /// let big_endian_u16s = vec![0x1122u16, 0x3344]; /// let u8s = big_endian_u16s.map_inplace(|x| [ /// ((x >> 8) & 0xff) as u8, /// (x & 0xff) as u8 /// ]); - /// assert_eq!(u8s.as_slice() == &[[0x11, 0x22], [0x33, 0x44]]); + /// assert_eq!(u8s.as_slice(), &[[0x11, 0x22], [0x33, 0x44]]); /// ``` pub fn map_inplace(self, f: |T| -> U) -> Vec { - let mut pv = PartialVec::new(self); + let mut pv = PartialVec::from_vec(self); loop { let maybe_t = pv.pop(); match maybe_t { @@ -2261,7 +2282,7 @@ mod tests { #[test] fn test_map_inplace() { let v = vec![0u, 1, 2]; - assert_eq!(v.map_inplace(|i: uint| i as int - 1).as_slice, &[-1i, 0, 1]); + assert_eq!(v.map_inplace(|i: uint| i as int - 1).as_slice(), &[-1i, 0, 1]); } #[bench] From e009e49af316a495ea5fe603b2f68bb16d701b2b Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Fri, 8 Aug 2014 23:52:15 +0200 Subject: [PATCH 4/4] Check that the `min_align_of` the both types in a `PartialVec` matches This is important because the underlying allocator of the `Vec` passes that information to the deallocator which needs the guarantee that it is the same parameters that were also passed to the allocation function. --- src/libcollections/vec.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index f6011190d40ac..fa30caa0fd0cd 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -1736,6 +1736,8 @@ pub mod raw { // of type `T`. // // (g) The size of `T` and `U` is equal and non-zero. +// +// (h) The `min_align_of` of `T` and `U` is equal. pub struct PartialVec { vec: Vec, @@ -1753,12 +1755,14 @@ impl PartialVec { /// /// Fails if `T` and `U` have differing sizes or are zero-sized. pub fn from_vec(mut vec: Vec) -> PartialVec { - // FIXME: Assert that the types `T` and `U` have the same size. + // FIXME: Assert statically that the types `T` and `U` have the same + // size. // - // These asserts make sure (g) is satisfied. + // These asserts make sure (g) and (h) are satisfied. assert!(mem::size_of::() != 0); assert!(mem::size_of::() != 0); assert!(mem::size_of::() == mem::size_of::()); + assert!(mem::min_align_of::() == mem::min_align_of::()); let start = vec.as_mut_ptr();