diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index cfad111aa546f..d0bfa038aa13d 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -120,6 +120,7 @@ #![feature(receiver_trait)] #![cfg_attr(bootstrap, feature(min_const_generics))] #![feature(min_specialization)] +#![feature(set_ptr_value)] #![feature(slice_ptr_get)] #![feature(slice_ptr_len)] #![feature(staged_api)] diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 0973a6e362bc2..0e3bab6b20a0a 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -829,8 +829,8 @@ impl Rc { let offset = unsafe { data_offset(ptr) }; // Reverse the offset to find the original RcBox. - let fake_ptr = ptr as *mut RcBox; - let rc_ptr = unsafe { set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset)) }; + let rc_ptr = + unsafe { (ptr as *mut RcBox).set_ptr_value((ptr as *mut u8).offset(-offset)) }; unsafe { Self::from_ptr(rc_ptr) } } @@ -850,7 +850,7 @@ impl Rc { pub fn downgrade(this: &Self) -> Weak { this.inner().inc_weak(); // Make sure we do not create a dangling Weak - debug_assert!(!is_dangling(this.ptr)); + debug_assert!(!is_dangling(this.ptr.as_ptr())); Weak { ptr: this.ptr } } @@ -1164,7 +1164,7 @@ impl Rc { Self::allocate_for_layout( Layout::for_value(&*ptr), |layout| Global.allocate(layout), - |mem| set_data_ptr(ptr as *mut T, mem) as *mut RcBox, + |mem| (ptr as *mut RcBox).set_ptr_value(mem), ) } } @@ -1203,20 +1203,7 @@ impl Rc<[T]> { ) } } -} - -/// Sets the data pointer of a `?Sized` raw pointer. -/// -/// For a slice/trait object, this sets the `data` field and leaves the rest -/// unchanged. For a sized raw pointer, this simply sets the pointer. -unsafe fn set_data_ptr(mut ptr: *mut T, data: *mut U) -> *mut T { - unsafe { - ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8); - } - ptr -} -impl Rc<[T]> { /// Copy elements from slice into newly allocated Rc<\[T\]> /// /// Unsafe because the caller must either take ownership or bind `T: Copy` @@ -1860,8 +1847,8 @@ impl Weak { } } -pub(crate) fn is_dangling(ptr: NonNull) -> bool { - let address = ptr.as_ptr() as *mut () as usize; +pub(crate) fn is_dangling(ptr: *mut T) -> bool { + let address = ptr as *mut () as usize; address == usize::MAX } @@ -1872,7 +1859,7 @@ struct WeakInner<'a> { strong: &'a Cell, } -impl Weak { +impl Weak { /// Returns a raw pointer to the object `T` pointed to by this `Weak`. /// /// The pointer is valid only if there are some strong references. The pointer may be dangling, @@ -1902,15 +1889,15 @@ impl Weak { pub fn as_ptr(&self) -> *const T { let ptr: *mut RcBox = NonNull::as_ptr(self.ptr); - // SAFETY: we must offset the pointer manually, and said pointer may be - // a dangling weak (usize::MAX) if T is sized. data_offset is safe to call, - // because we know that a pointer to unsized T was derived from a real - // unsized T, as dangling weaks are only created for sized T. wrapping_offset - // is used so that we can use the same code path for the non-dangling - // unsized case and the potentially dangling sized case. - unsafe { - let offset = data_offset(ptr as *mut T); - set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset)) + if is_dangling(ptr) { + // If the pointer is dangling, we return the sentinel directly. This cannot be + // a valid payload address, as the payload is at least as aligned as RcBox (usize). + ptr as *const T + } else { + // SAFETY: if is_dangling returns false, then the pointer is dereferencable. + // The payload may be dropped at this point, and we have to maintain provenance, + // so use raw pointer manipulation. + unsafe { &raw const (*ptr).value } } } @@ -1992,22 +1979,24 @@ impl Weak { /// [`new`]: Weak::new #[stable(feature = "weak_into_raw", since = "1.45.0")] pub unsafe fn from_raw(ptr: *const T) -> Self { - // SAFETY: data_offset is safe to call, because this pointer originates from a Weak. // See Weak::as_ptr for context on how the input pointer is derived. - let offset = unsafe { data_offset(ptr) }; - // Reverse the offset to find the original RcBox. - // SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized). - let ptr = unsafe { - set_data_ptr(ptr as *mut RcBox, (ptr as *mut u8).wrapping_offset(-offset)) + let ptr = if is_dangling(ptr as *mut T) { + // This is a dangling Weak. + ptr as *mut RcBox + } else { + // Otherwise, we're guaranteed the pointer came from a nondangling Weak. + // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. + let offset = unsafe { data_offset(ptr) }; + // Thus, we reverse the offset to get the whole RcBox. + // SAFETY: the pointer originated from a Weak, so this offset is safe. + unsafe { (ptr as *mut RcBox).set_ptr_value((ptr as *mut u8).offset(-offset)) } }; // SAFETY: we now have recovered the original Weak pointer, so can create the Weak. Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } } } -} -impl Weak { /// Attempts to upgrade the `Weak` pointer to an [`Rc`], delaying /// dropping of the inner value if successful. /// @@ -2070,7 +2059,7 @@ impl Weak { /// (i.e., when this `Weak` was created by `Weak::new`). #[inline] fn inner(&self) -> Option> { - if is_dangling(self.ptr) { + if is_dangling(self.ptr.as_ptr()) { None } else { // We are careful to *not* create a reference covering the "data" field, as @@ -2325,21 +2314,19 @@ impl AsRef for Rc { #[stable(feature = "pin", since = "1.33.0")] impl Unpin for Rc {} -/// Get the offset within an `RcBox` for -/// a payload of type described by a pointer. +/// Get the offset within an `RcBox` for the payload behind a pointer. /// /// # Safety /// -/// This has the same safety requirements as `align_of_val_raw`. In effect: -/// -/// - This function is safe for any argument if `T` is sized, and -/// - if `T` is unsized, the pointer must have appropriate pointer metadata -/// acquired from the real instance that you are getting this offset for. +/// The pointer must point to (and have valid metadata for) a previously +/// valid instance of T, but the T is allowed to be dropped. unsafe fn data_offset(ptr: *const T) -> isize { - // Align the unsized value to the end of the `RcBox`. - // Because it is ?Sized, it will always be the last field in memory. - // Note: This is a detail of the current implementation of the compiler, - // and is not a guaranteed language detail. Do not rely on it outside of std. + // Align the unsized value to the end of the RcBox. + // Because RcBox is repr(C), it will always be the last field in memory. + // SAFETY: since the only unsized types possible are slices, trait objects, + // and extern types, the input safety requirement is currently enough to + // satisfy the requirements of align_of_val_raw; this is an implementation + // detail of the language that may not be relied upon outside of std. unsafe { data_offset_align(align_of_val_raw(ptr)) } } diff --git a/library/alloc/src/rc/tests.rs b/library/alloc/src/rc/tests.rs index 2d183a8c88c64..843a9b07fa934 100644 --- a/library/alloc/src/rc/tests.rs +++ b/library/alloc/src/rc/tests.rs @@ -208,6 +208,30 @@ fn into_from_weak_raw() { } } +#[test] +fn test_into_from_weak_raw_unsized() { + use std::fmt::Display; + use std::string::ToString; + + let arc: Rc = Rc::from("foo"); + let weak: Weak = Rc::downgrade(&arc); + + let ptr = Weak::into_raw(weak.clone()); + let weak2 = unsafe { Weak::from_raw(ptr) }; + + assert_eq!(unsafe { &*ptr }, "foo"); + assert!(weak.ptr_eq(&weak2)); + + let arc: Rc = Rc::new(123); + let weak: Weak = Rc::downgrade(&arc); + + let ptr = Weak::into_raw(weak.clone()); + let weak2 = unsafe { Weak::from_raw(ptr) }; + + assert_eq!(unsafe { &*ptr }.to_string(), "123"); + assert!(weak.ptr_eq(&weak2)); +} + #[test] fn get_mut() { let mut x = Rc::new(3); @@ -294,6 +318,23 @@ fn test_unsized() { assert_eq!(foo, foo.clone()); } +#[test] +fn test_maybe_thin_unsized() { + // If/when custom thin DSTs exist, this test should be updated to use one + use std::ffi::{CStr, CString}; + + let x: Rc = Rc::from(CString::new("swordfish").unwrap().into_boxed_c_str()); + assert_eq!(format!("{:?}", x), "\"swordfish\""); + let y: Weak = Rc::downgrade(&x); + drop(x); + + // At this point, the weak points to a dropped DST + assert!(y.upgrade().is_none()); + // But we still need to be able to get the alloc layout to drop. + // CStr has no drop glue, but custom DSTs might, and need to work. + drop(y); +} + #[test] fn test_from_owned() { let foo = 123; diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index 05bfeccbda132..c0d684fbb4573 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -846,8 +846,7 @@ impl Arc { let offset = data_offset(ptr); // Reverse the offset to find the original ArcInner. - let fake_ptr = ptr as *mut ArcInner; - let arc_ptr = set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset)); + let arc_ptr = (ptr as *mut ArcInner).set_ptr_value((ptr as *mut u8).offset(-offset)); Self::from_ptr(arc_ptr) } @@ -888,7 +887,7 @@ impl Arc { match this.inner().weak.compare_exchange_weak(cur, cur + 1, Acquire, Relaxed) { Ok(_) => { // Make sure we do not create a dangling Weak - debug_assert!(!is_dangling(this.ptr)); + debug_assert!(!is_dangling(this.ptr.as_ptr())); return Weak { ptr: this.ptr }; } Err(old) => cur = old, @@ -1131,7 +1130,7 @@ impl Arc { Self::allocate_for_layout( Layout::for_value(&*ptr), |layout| Global.allocate(layout), - |mem| set_data_ptr(ptr as *mut T, mem) as *mut ArcInner, + |mem| (ptr as *mut ArcInner).set_ptr_value(mem) as *mut ArcInner, ) } } @@ -1170,20 +1169,7 @@ impl Arc<[T]> { ) } } -} - -/// Sets the data pointer of a `?Sized` raw pointer. -/// -/// For a slice/trait object, this sets the `data` field and leaves the rest -/// unchanged. For a sized raw pointer, this simply sets the pointer. -unsafe fn set_data_ptr(mut ptr: *mut T, data: *mut U) -> *mut T { - unsafe { - ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8); - } - ptr -} -impl Arc<[T]> { /// Copy elements from slice into newly allocated Arc<\[T\]> /// /// Unsafe because the caller must either take ownership or bind `T: Copy`. @@ -1653,7 +1639,7 @@ struct WeakInner<'a> { strong: &'a atomic::AtomicUsize, } -impl Weak { +impl Weak { /// Returns a raw pointer to the object `T` pointed to by this `Weak`. /// /// The pointer is valid only if there are some strong references. The pointer may be dangling, @@ -1683,15 +1669,15 @@ impl Weak { pub fn as_ptr(&self) -> *const T { let ptr: *mut ArcInner = NonNull::as_ptr(self.ptr); - // SAFETY: we must offset the pointer manually, and said pointer may be - // a dangling weak (usize::MAX) if T is sized. data_offset is safe to call, - // because we know that a pointer to unsized T was derived from a real - // unsized T, as dangling weaks are only created for sized T. wrapping_offset - // is used so that we can use the same code path for the non-dangling - // unsized case and the potentially dangling sized case. - unsafe { - let offset = data_offset(ptr as *mut T); - set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset)) + if is_dangling(ptr) { + // If the pointer is dangling, we return the sentinel directly. This cannot be + // a valid payload address, as the payload is at least as aligned as ArcInner (usize). + ptr as *const T + } else { + // SAFETY: if is_dangling returns false, then the pointer is dereferencable. + // The payload may be dropped at this point, and we have to maintain provenance, + // so use raw pointer manipulation. + unsafe { &raw mut (*ptr).data } } } @@ -1773,18 +1759,22 @@ impl Weak { /// [`forget`]: std::mem::forget #[stable(feature = "weak_into_raw", since = "1.45.0")] pub unsafe fn from_raw(ptr: *const T) -> Self { - // SAFETY: data_offset is safe to call, because this pointer originates from a Weak. // See Weak::as_ptr for context on how the input pointer is derived. - let offset = unsafe { data_offset(ptr) }; - // Reverse the offset to find the original ArcInner. - // SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized) - let ptr = unsafe { - set_data_ptr(ptr as *mut ArcInner, (ptr as *mut u8).wrapping_offset(-offset)) + let ptr = if is_dangling(ptr as *mut T) { + // This is a dangling Weak. + ptr as *mut ArcInner + } else { + // Otherwise, we're guaranteed the pointer came from a nondangling Weak. + // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. + let offset = unsafe { data_offset(ptr) }; + // Thus, we reverse the offset to get the whole RcBox. + // SAFETY: the pointer originated from a Weak, so this offset is safe. + unsafe { (ptr as *mut ArcInner).set_ptr_value((ptr as *mut u8).offset(-offset)) } }; // SAFETY: we now have recovered the original Weak pointer, so can create the Weak. - unsafe { Weak { ptr: NonNull::new_unchecked(ptr) } } + Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } } } } @@ -1889,7 +1879,7 @@ impl Weak { /// (i.e., when this `Weak` was created by `Weak::new`). #[inline] fn inner(&self) -> Option> { - if is_dangling(self.ptr) { + if is_dangling(self.ptr.as_ptr()) { None } else { // We are careful to *not* create a reference covering the "data" field, as @@ -2469,21 +2459,19 @@ impl AsRef for Arc { #[stable(feature = "pin", since = "1.33.0")] impl Unpin for Arc {} -/// Get the offset within an `ArcInner` for -/// a payload of type described by a pointer. +/// Get the offset within an `ArcInner` for the payload behind a pointer. /// /// # Safety /// -/// This has the same safety requirements as `align_of_val_raw`. In effect: -/// -/// - This function is safe for any argument if `T` is sized, and -/// - if `T` is unsized, the pointer must have appropriate pointer metadata -/// acquired from the real instance that you are getting this offset for. +/// The pointer must point to (and have valid metadata for) a previously +/// valid instance of T, but the T is allowed to be dropped. unsafe fn data_offset(ptr: *const T) -> isize { - // Align the unsized value to the end of the `ArcInner`. - // Because it is `?Sized`, it will always be the last field in memory. - // Note: This is a detail of the current implementation of the compiler, - // and is not a guaranteed language detail. Do not rely on it outside of std. + // Align the unsized value to the end of the ArcInner. + // Because RcBox is repr(C), it will always be the last field in memory. + // SAFETY: since the only unsized types possible are slices, trait objects, + // and extern types, the input safety requirement is currently enough to + // satisfy the requirements of align_of_val_raw; this is an implementation + // detail of the language that may not be relied upon outside of std. unsafe { data_offset_align(align_of_val_raw(ptr)) } } diff --git a/library/alloc/src/sync/tests.rs b/library/alloc/src/sync/tests.rs index 5067af1d4ff68..4ccb32fbbf63d 100644 --- a/library/alloc/src/sync/tests.rs +++ b/library/alloc/src/sync/tests.rs @@ -158,6 +158,30 @@ fn into_from_weak_raw() { } } +#[test] +fn test_into_from_weak_raw_unsized() { + use std::fmt::Display; + use std::string::ToString; + + let arc: Arc = Arc::from("foo"); + let weak: Weak = Arc::downgrade(&arc); + + let ptr = Weak::into_raw(weak.clone()); + let weak2 = unsafe { Weak::from_raw(ptr) }; + + assert_eq!(unsafe { &*ptr }, "foo"); + assert!(weak.ptr_eq(&weak2)); + + let arc: Arc = Arc::new(123); + let weak: Weak = Arc::downgrade(&arc); + + let ptr = Weak::into_raw(weak.clone()); + let weak2 = unsafe { Weak::from_raw(ptr) }; + + assert_eq!(unsafe { &*ptr }.to_string(), "123"); + assert!(weak.ptr_eq(&weak2)); +} + #[test] fn test_cowarc_clone_make_mut() { let mut cow0 = Arc::new(75); @@ -329,6 +353,23 @@ fn test_unsized() { assert!(y.upgrade().is_none()); } +#[test] +fn test_maybe_thin_unsized() { + // If/when custom thin DSTs exist, this test should be updated to use one + use std::ffi::{CStr, CString}; + + let x: Arc = Arc::from(CString::new("swordfish").unwrap().into_boxed_c_str()); + assert_eq!(format!("{:?}", x), "\"swordfish\""); + let y: Weak = Arc::downgrade(&x); + drop(x); + + // At this point, the weak points to a dropped DST + assert!(y.upgrade().is_none()); + // But we still need to be able to get the alloc layout to drop. + // CStr has no drop glue, but custom DSTs might, and need to work. + drop(y); +} + #[test] fn test_from_owned() { let foo = 123;