Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-stabilize Weak::as_ptr and friends for unsized T #80764

Merged
merged 8 commits into from
Jan 16, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
#![feature(receiver_trait)]
#![cfg_attr(bootstrap, feature(min_const_generics))]
#![feature(min_specialization)]
#![feature(set_ptr_value)]
#![feature(slice_ptr_get)]
#![feature(slice_ptr_len)]
#![feature(staged_api)]
Expand Down
85 changes: 36 additions & 49 deletions library/alloc/src/rc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -827,8 +827,8 @@ impl<T: ?Sized> Rc<T> {
let offset = unsafe { data_offset(ptr) };

// Reverse the offset to find the original RcBox.
let fake_ptr = ptr as *mut RcBox<T>;
let rc_ptr = unsafe { set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset)) };
let rc_ptr =
unsafe { (ptr as *mut RcBox<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) };

unsafe { Self::from_ptr(rc_ptr) }
}
Expand All @@ -848,7 +848,7 @@ impl<T: ?Sized> Rc<T> {
pub fn downgrade(this: &Self) -> Weak<T> {
this.inner().inc_weak();
// Make sure we do not create a dangling Weak
debug_assert!(!is_dangling(this.ptr));
debug_assert!(!is_dangling(this.ptr.as_ptr()));
Weak { ptr: this.ptr }
}

Expand Down Expand Up @@ -1154,7 +1154,7 @@ impl<T: ?Sized> Rc<T> {
Self::allocate_for_layout(
Layout::for_value(&*ptr),
|layout| Global.allocate(layout),
|mem| set_data_ptr(ptr as *mut T, mem) as *mut RcBox<T>,
|mem| (ptr as *mut RcBox<T>).set_ptr_value(mem),
)
}
}
Expand Down Expand Up @@ -1193,20 +1193,7 @@ impl<T> Rc<[T]> {
)
}
}
}

/// Sets the data pointer of a `?Sized` raw pointer.
///
/// For a slice/trait object, this sets the `data` field and leaves the rest
/// unchanged. For a sized raw pointer, this simply sets the pointer.
unsafe fn set_data_ptr<T: ?Sized, U>(mut ptr: *mut T, data: *mut U) -> *mut T {
unsafe {
ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8);
}
ptr
}

impl<T> Rc<[T]> {
/// Copy elements from slice into newly allocated Rc<\[T\]>
///
/// Unsafe because the caller must either take ownership or bind `T: Copy`
Expand Down Expand Up @@ -1850,8 +1837,8 @@ impl<T> Weak<T> {
}
}

pub(crate) fn is_dangling<T: ?Sized>(ptr: NonNull<T>) -> bool {
let address = ptr.as_ptr() as *mut () as usize;
pub(crate) fn is_dangling<T: ?Sized>(ptr: *mut T) -> bool {
let address = ptr as *mut () as usize;
address == usize::MAX
}

Expand All @@ -1862,7 +1849,7 @@ struct WeakInner<'a> {
strong: &'a Cell<usize>,
}

impl<T> Weak<T> {
impl<T: ?Sized> Weak<T> {
/// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
///
/// The pointer is valid only if there are some strong references. The pointer may be dangling,
Expand Down Expand Up @@ -1892,15 +1879,15 @@ impl<T> Weak<T> {
pub fn as_ptr(&self) -> *const T {
let ptr: *mut RcBox<T> = NonNull::as_ptr(self.ptr);

// SAFETY: we must offset the pointer manually, and said pointer may be
// a dangling weak (usize::MAX) if T is sized. data_offset is safe to call,
// because we know that a pointer to unsized T was derived from a real
// unsized T, as dangling weaks are only created for sized T. wrapping_offset
// is used so that we can use the same code path for the non-dangling
// unsized case and the potentially dangling sized case.
unsafe {
let offset = data_offset(ptr as *mut T);
set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset))
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
if is_dangling(ptr) {
// If the pointer is dangling, we return the sentinel directly. This cannot be
// a valid payload address, as it is at least as aligned as RcBox (usize).
CAD97 marked this conversation as resolved.
Show resolved Hide resolved
ptr as *const T
} else {
// SAFETY: if is_dangling returns false, then the pointer is dereferencable.
// The payload may be dropped at this point, and we have to maintain provenance,
// so use raw pointer manipulation.
unsafe { &raw const (*ptr).value }
}
}

Expand Down Expand Up @@ -1982,22 +1969,24 @@ impl<T> Weak<T> {
/// [`new`]: Weak::new
#[stable(feature = "weak_into_raw", since = "1.45.0")]
pub unsafe fn from_raw(ptr: *const T) -> Self {
// SAFETY: data_offset is safe to call, because this pointer originates from a Weak.
// See Weak::as_ptr for context on how the input pointer is derived.
let offset = unsafe { data_offset(ptr) };

// Reverse the offset to find the original RcBox.
// SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized).
let ptr = unsafe {
set_data_ptr(ptr as *mut RcBox<T>, (ptr as *mut u8).wrapping_offset(-offset))
let ptr = if is_dangling(ptr as *mut T) {
// This is a dangling Weak.
ptr as *mut RcBox<T>
} else {
// Otherwise, we're guaranteed the pointer came from a nondangling Weak.
// SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T.
let offset = unsafe { data_offset(ptr) };
CAD97 marked this conversation as resolved.
Show resolved Hide resolved
// Thus, we reverse the offset to get the whole RcBox.
// SAFETY: the pointer originated from a Weak, so this offset is safe.
unsafe { (ptr as *mut RcBox<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) }
};

// SAFETY: we now have recovered the original Weak pointer, so can create the Weak.
Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } }
}
}

impl<T: ?Sized> Weak<T> {
/// Attempts to upgrade the `Weak` pointer to an [`Rc`], delaying
/// dropping of the inner value if successful.
///
Expand Down Expand Up @@ -2060,7 +2049,7 @@ impl<T: ?Sized> Weak<T> {
/// (i.e., when this `Weak` was created by `Weak::new`).
#[inline]
fn inner(&self) -> Option<WeakInner<'_>> {
if is_dangling(self.ptr) {
if is_dangling(self.ptr.as_ptr()) {
None
} else {
// We are careful to *not* create a reference covering the "data" field, as
Expand Down Expand Up @@ -2315,21 +2304,19 @@ impl<T: ?Sized> AsRef<T> for Rc<T> {
#[stable(feature = "pin", since = "1.33.0")]
impl<T: ?Sized> Unpin for Rc<T> {}

/// Get the offset within an `RcBox` for
/// a payload of type described by a pointer.
/// Get the offset within an `RcBox` for the payload behind a pointer.
///
/// # Safety
///
/// This has the same safety requirements as `align_of_val_raw`. In effect:
///
/// - This function is safe for any argument if `T` is sized, and
/// - if `T` is unsized, the pointer must have appropriate pointer metadata
/// acquired from the real instance that you are getting this offset for.
/// The pointer must point to (and have valid metadata for) a previously
/// valid instance of T, but the T is allowed to be dropped.
unsafe fn data_offset<T: ?Sized>(ptr: *const T) -> isize {
// Align the unsized value to the end of the `RcBox`.
// Because it is ?Sized, it will always be the last field in memory.
// Note: This is a detail of the current implementation of the compiler,
// and is not a guaranteed language detail. Do not rely on it outside of std.
// Align the unsized value to the end of the RcBox.
// Because RcBox is repr(C), it will always be the last field in memory.
// SAFETY: since the only unsized types possible are slices, trait objects,
// and extern types, the input safety requirement is currently enough to
// satisfy the requirements of align_of_val_raw; this is an implementation
// detail of the language that may not be relied upon outside of std.
unsafe { data_offset_align(align_of_val_raw(ptr)) }
}

Expand Down
41 changes: 41 additions & 0 deletions library/alloc/src/rc/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,30 @@ fn into_from_weak_raw() {
}
}

#[test]
fn test_into_from_weak_raw_unsized() {
use std::fmt::Display;
use std::string::ToString;

let arc: Rc<str> = Rc::from("foo");
let weak: Weak<str> = Rc::downgrade(&arc);

let ptr = Weak::into_raw(weak.clone());
let weak2 = unsafe { Weak::from_raw(ptr) };

assert_eq!(unsafe { &*ptr }, "foo");
assert!(weak.ptr_eq(&weak2));

let arc: Rc<dyn Display> = Rc::new(123);
let weak: Weak<dyn Display> = Rc::downgrade(&arc);

let ptr = Weak::into_raw(weak.clone());
let weak2 = unsafe { Weak::from_raw(ptr) };

assert_eq!(unsafe { &*ptr }.to_string(), "123");
assert!(weak.ptr_eq(&weak2));
}

#[test]
fn get_mut() {
let mut x = Rc::new(3);
Expand Down Expand Up @@ -294,6 +318,23 @@ fn test_unsized() {
assert_eq!(foo, foo.clone());
}

#[test]
fn test_maybe_thin_unsized() {
// If/when custom thin DSTs exist, this test should be updated to use one
use std::ffi::{CStr, CString};

let x: Rc<CStr> = Rc::from(CString::new("swordfish").unwrap().into_boxed_c_str());
assert_eq!(format!("{:?}", x), "\"swordfish\"");
let y: Weak<CStr> = Rc::downgrade(&x);
drop(x);

// At this point, the weak points to a dropped DST
assert!(y.upgrade().is_none());
// But we still need to be able to get the alloc layout to drop.
// CStr has no drop glue, but custom DSTs might, and need to work.
drop(y);
}

#[test]
fn test_from_owned() {
let foo = 123;
Expand Down
80 changes: 34 additions & 46 deletions library/alloc/src/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -844,8 +844,7 @@ impl<T: ?Sized> Arc<T> {
let offset = data_offset(ptr);

// Reverse the offset to find the original ArcInner.
let fake_ptr = ptr as *mut ArcInner<T>;
let arc_ptr = set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset));
let arc_ptr = (ptr as *mut ArcInner<T>).set_ptr_value((ptr as *mut u8).offset(-offset));

Self::from_ptr(arc_ptr)
}
Expand Down Expand Up @@ -886,7 +885,7 @@ impl<T: ?Sized> Arc<T> {
match this.inner().weak.compare_exchange_weak(cur, cur + 1, Acquire, Relaxed) {
Ok(_) => {
// Make sure we do not create a dangling Weak
debug_assert!(!is_dangling(this.ptr));
debug_assert!(!is_dangling(this.ptr.as_ptr()));
return Weak { ptr: this.ptr };
}
Err(old) => cur = old,
Expand Down Expand Up @@ -1129,7 +1128,7 @@ impl<T: ?Sized> Arc<T> {
Self::allocate_for_layout(
Layout::for_value(&*ptr),
|layout| Global.allocate(layout),
|mem| set_data_ptr(ptr as *mut T, mem) as *mut ArcInner<T>,
|mem| (ptr as *mut ArcInner<T>).set_ptr_value(mem) as *mut ArcInner<T>,
)
}
}
Expand Down Expand Up @@ -1168,20 +1167,7 @@ impl<T> Arc<[T]> {
)
}
}
}

/// Sets the data pointer of a `?Sized` raw pointer.
///
/// For a slice/trait object, this sets the `data` field and leaves the rest
/// unchanged. For a sized raw pointer, this simply sets the pointer.
unsafe fn set_data_ptr<T: ?Sized, U>(mut ptr: *mut T, data: *mut U) -> *mut T {
unsafe {
ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8);
}
ptr
}

impl<T> Arc<[T]> {
/// Copy elements from slice into newly allocated Arc<\[T\]>
///
/// Unsafe because the caller must either take ownership or bind `T: Copy`.
Expand Down Expand Up @@ -1648,7 +1634,7 @@ struct WeakInner<'a> {
strong: &'a atomic::AtomicUsize,
}

impl<T> Weak<T> {
impl<T: ?Sized> Weak<T> {
/// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
///
/// The pointer is valid only if there are some strong references. The pointer may be dangling,
Expand Down Expand Up @@ -1678,15 +1664,15 @@ impl<T> Weak<T> {
pub fn as_ptr(&self) -> *const T {
let ptr: *mut ArcInner<T> = NonNull::as_ptr(self.ptr);

// SAFETY: we must offset the pointer manually, and said pointer may be
// a dangling weak (usize::MAX) if T is sized. data_offset is safe to call,
// because we know that a pointer to unsized T was derived from a real
// unsized T, as dangling weaks are only created for sized T. wrapping_offset
// is used so that we can use the same code path for the non-dangling
// unsized case and the potentially dangling sized case.
unsafe {
let offset = data_offset(ptr as *mut T);
set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset))
if is_dangling(ptr) {
// If the pointer is dangling, we return the sentinel directly. This cannot be
// a valid payload address, as it is at least as aligned as ArcInner (usize).
CAD97 marked this conversation as resolved.
Show resolved Hide resolved
ptr as *const T
} else {
// SAFETY: if is_dangling returns false, then the pointer is dereferencable.
// The payload may be dropped at this point, and we have to maintain provenance,
// so use raw pointer manipulation.
unsafe { &raw mut (*ptr).data }
}
}

Expand Down Expand Up @@ -1768,18 +1754,22 @@ impl<T> Weak<T> {
/// [`forget`]: std::mem::forget
#[stable(feature = "weak_into_raw", since = "1.45.0")]
pub unsafe fn from_raw(ptr: *const T) -> Self {
// SAFETY: data_offset is safe to call, because this pointer originates from a Weak.
// See Weak::as_ptr for context on how the input pointer is derived.
let offset = unsafe { data_offset(ptr) };

// Reverse the offset to find the original ArcInner.
// SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized)
let ptr = unsafe {
set_data_ptr(ptr as *mut ArcInner<T>, (ptr as *mut u8).wrapping_offset(-offset))
let ptr = if is_dangling(ptr as *mut T) {
// This is a dangling Weak.
ptr as *mut ArcInner<T>
} else {
// Otherwise, we're guaranteed the pointer came from a nondangling Weak.
// SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T.
let offset = unsafe { data_offset(ptr) };
// Thus, we reverse the offset to get the whole RcBox.
// SAFETY: the pointer originated from a Weak, so this offset is safe.
unsafe { (ptr as *mut ArcInner<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) }
};

// SAFETY: we now have recovered the original Weak pointer, so can create the Weak.
unsafe { Weak { ptr: NonNull::new_unchecked(ptr) } }
Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } }
}
}

Expand Down Expand Up @@ -1884,7 +1874,7 @@ impl<T: ?Sized> Weak<T> {
/// (i.e., when this `Weak` was created by `Weak::new`).
#[inline]
fn inner(&self) -> Option<WeakInner<'_>> {
if is_dangling(self.ptr) {
if is_dangling(self.ptr.as_ptr()) {
None
} else {
// We are careful to *not* create a reference covering the "data" field, as
Expand Down Expand Up @@ -2464,21 +2454,19 @@ impl<T: ?Sized> AsRef<T> for Arc<T> {
#[stable(feature = "pin", since = "1.33.0")]
impl<T: ?Sized> Unpin for Arc<T> {}

/// Get the offset within an `ArcInner` for
/// a payload of type described by a pointer.
/// Get the offset within an `ArcInner` for the payload behind a pointer.
///
/// # Safety
///
/// This has the same safety requirements as `align_of_val_raw`. In effect:
///
/// - This function is safe for any argument if `T` is sized, and
/// - if `T` is unsized, the pointer must have appropriate pointer metadata
/// acquired from the real instance that you are getting this offset for.
/// The pointer must point to (and have valid metadata for) a previously
/// valid instance of T, but the T is allowed to be dropped.
unsafe fn data_offset<T: ?Sized>(ptr: *const T) -> isize {
// Align the unsized value to the end of the `ArcInner`.
// Because it is `?Sized`, it will always be the last field in memory.
// Note: This is a detail of the current implementation of the compiler,
// and is not a guaranteed language detail. Do not rely on it outside of std.
// Align the unsized value to the end of the ArcInner.
// Because RcBox is repr(C), it will always be the last field in memory.
// SAFETY: since the only unsized types possible are slices, trait objects,
// and extern types, the input safety requirement is currently enough to
// satisfy the requirements of align_of_val_raw; this is an implementation
// detail of the language that may not be relied upon outside of std.
unsafe { data_offset_align(align_of_val_raw(ptr)) }
}

Expand Down
Loading