Skip to content

Commit d160774

Browse files
committed
Add types for inspecting control messages in Unix socket ancillary data
1 parent 7ac9a3a commit d160774

File tree

3 files changed

+434
-0
lines changed

3 files changed

+434
-0
lines changed

library/std/src/os/unix/net/cmsg.rs

+300
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
use crate::ffi::c_int;
2+
use crate::mem::{size_of, MaybeUninit};
3+
4+
// Wrapper around `libc::CMSG_LEN` to safely decouple from OS-specific ints.
5+
//
6+
// https://github.com/rust-lang/libc/issues/3240
7+
#[inline]
8+
const fn CMSG_LEN(len: usize) -> usize {
9+
let c_len = len & 0x7FFFFFFF;
10+
let padding = (unsafe { libc::CMSG_LEN(c_len as _) } as usize) - c_len;
11+
len + padding
12+
}
13+
14+
// Wrapper around `libc::CMSG_SPACE` to safely decouple from OS-specific ints.
15+
//
16+
// https://github.com/rust-lang/libc/issues/3240
17+
#[inline]
18+
const fn CMSG_SPACE(len: usize) -> usize {
19+
let c_len = len & 0x7FFFFFFF;
20+
let padding = (unsafe { libc::CMSG_SPACE(c_len as _) } as usize) - c_len;
21+
len + padding
22+
}
23+
24+
/// A socket control message with borrowed data.
25+
///
26+
/// This type is semantically equivalent to POSIX `struct cmsghdr`, but is
27+
/// not guaranteed to have the same internal representation.
28+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
29+
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
30+
pub struct ControlMessage<'a> {
31+
cmsg_len: usize,
32+
cmsg_level: c_int,
33+
cmsg_type: c_int,
34+
data: &'a [u8],
35+
}
36+
37+
impl<'a> ControlMessage<'a> {
38+
/// Creates a `ControlMessage` with the given level, type, and data.
39+
///
40+
/// The semantics of a control message "level" and "type" are OS-specific,
41+
/// but generally the level is a sort of general category of socket and the
42+
/// type identifies a specific control message data layout.
43+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
44+
pub fn new(cmsg_level: c_int, cmsg_type: c_int, data: &'a [u8]) -> ControlMessage<'a> {
45+
let cmsg_len = CMSG_LEN(data.len());
46+
ControlMessage { cmsg_len, cmsg_level, cmsg_type, data }
47+
}
48+
}
49+
50+
impl ControlMessage<'_> {
51+
/// Returns the control message's level, an OS-specific value.
52+
///
53+
/// POSIX describes this field as the "originating protocol".
54+
#[inline]
55+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
56+
pub fn cmsg_level(&self) -> c_int {
57+
self.cmsg_level
58+
}
59+
60+
/// Returns the control message's type, an OS-specific value.
61+
///
62+
/// POSIX describes this field as the "protocol-specific type".
63+
#[inline]
64+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
65+
pub fn cmsg_type(&self) -> c_int {
66+
self.cmsg_type
67+
}
68+
69+
/// Returns the control message's type-specific data.
70+
///
71+
/// The returned slice is equivalent to the result of C macro `CMSG_DATA()`.
72+
/// Control message data is not guaranteed to be aligned, so code that needs
73+
/// to inspect it should first copy the data to a properly-aligned location.
74+
#[inline]
75+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
76+
pub fn data(&self) -> &[u8] {
77+
self.data
78+
}
79+
80+
/// Returns the size (in bytes) of the encoded control message, including
81+
/// post-data padding.
82+
///
83+
/// This is the same value returned by the C macro `CMSG_SPACE()`.
84+
#[inline]
85+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
86+
pub fn cmsg_space(&self) -> usize {
87+
CMSG_SPACE(self.data.len())
88+
}
89+
90+
/// Returns `true` if the control message data is truncated.
91+
///
92+
/// The kernel may truncate a control message if its data is too large to
93+
/// fit into the capacity of the userspace buffer.
94+
///
95+
/// The semantics of truncated control messages are OS- and type-specific.
96+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
97+
pub fn truncated(&self) -> bool {
98+
self.cmsg_len > CMSG_LEN(self.data.len())
99+
}
100+
101+
/// Encodes this control message into the provided byte buffer, returning
102+
/// the initialized bytes.
103+
///
104+
/// # Panics
105+
///
106+
/// Panics if `dst.len() != self.cmsg_space()`
107+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
108+
pub fn copy_to_slice<'a>(&self, dst: &'a mut [MaybeUninit<u8>]) -> &'a [u8] {
109+
assert_eq!(dst.len(), self.cmsg_space());
110+
111+
// SAFETY: C type `struct cmsghdr` is safe to zero-initialize.
112+
let mut hdr: libc::cmsghdr = unsafe { core::mem::zeroed() };
113+
114+
// Write `cmsg.cmsg_len` instead of `CMSG_LEN(data.len())` so that
115+
// truncated control messages are preserved as-is.
116+
hdr.cmsg_len = self.cmsg_len as _;
117+
hdr.cmsg_level = self.cmsg_level;
118+
hdr.cmsg_type = self.cmsg_type;
119+
120+
#[inline]
121+
unsafe fn sized_to_slice<T: Sized>(t: &T) -> &[u8] {
122+
let t_ptr = (t as *const T).cast::<u8>();
123+
crate::slice::from_raw_parts(t_ptr, size_of::<T>())
124+
}
125+
126+
let (hdr_dst, after_hdr) = dst.split_at_mut(size_of::<libc::cmsghdr>());
127+
let (data_dst, padding_dst) = after_hdr.split_at_mut(self.data.len());
128+
129+
// SAFETY: C type `struct cmsghdr` is safe to bitwise-copy from.
130+
MaybeUninit::write_slice(hdr_dst, unsafe { sized_to_slice(&hdr) });
131+
132+
// See comment in `ControlMessagesIter` regarding `CMSG_DATA()`.
133+
MaybeUninit::write_slice(data_dst, self.data());
134+
135+
if padding_dst.len() > 0 {
136+
for byte in padding_dst.iter_mut() {
137+
byte.write(0);
138+
}
139+
}
140+
141+
// SAFETY: Every byte in `dst` has been initialized.
142+
unsafe { MaybeUninit::slice_assume_init_ref(dst) }
143+
}
144+
}
145+
146+
/// A borrowed reference to a `&[u8]` slice containing control messages.
147+
///
148+
/// Note that this type does not guarantee the control messages are valid, or
149+
/// even well-formed. Code that uses control messages to implement (for example)
150+
/// access control or file descriptor passing should maintain a chain of custody
151+
/// to verify that the `&ControlMessages` came from a trusted source, such as
152+
/// a syscall.
153+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
154+
pub struct ControlMessages {
155+
bytes: [u8],
156+
}
157+
158+
impl ControlMessages {
159+
/// Creates a `ControlMessages` wrapper from a `&[u8]` slice containing
160+
/// encoded control messages.
161+
///
162+
/// This method does not attempt to verify that the provided bytes represent
163+
/// valid control messages.
164+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
165+
pub fn from_bytes(bytes: &[u8]) -> &ControlMessages {
166+
// SAFETY: casting `&[u8]` to `&ControlMessages` is safe because its
167+
// internal representation is `[u8]`.
168+
unsafe { &*(bytes as *const [u8] as *const ControlMessages) }
169+
}
170+
171+
/// Returns a `&[u8]` slice containing encoded control messages.
172+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
173+
pub fn as_bytes(&self) -> &[u8] {
174+
&self.bytes
175+
}
176+
177+
/// Returns `true` if `self.as_bytes()` is an empty slice.
178+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
179+
pub fn is_empty(&self) -> bool {
180+
self.bytes.is_empty()
181+
}
182+
183+
/// Returns an iterator over the control messages.
184+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
185+
pub fn iter(&self) -> ControlMessagesIter<'_> {
186+
ControlMessagesIter { bytes: &self.bytes }
187+
}
188+
}
189+
190+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
191+
impl<'a> IntoIterator for &'a ControlMessages {
192+
type Item = ControlMessage<'a>;
193+
type IntoIter = ControlMessagesIter<'a>;
194+
195+
fn into_iter(self) -> ControlMessagesIter<'a> {
196+
self.iter()
197+
}
198+
}
199+
200+
/// An iterator over the content of a [`ControlMessages`].
201+
///
202+
/// Each control message starts with a header describing its own length. This
203+
/// iterator is safe even if the header lengths are incorrect, but the returned
204+
/// control messages may contain incorrect data.
205+
///
206+
/// Iteration ends when the remaining data is smaller than the size of a single
207+
/// control message header.
208+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
209+
pub struct ControlMessagesIter<'a> {
210+
bytes: &'a [u8],
211+
}
212+
213+
impl<'a> ControlMessagesIter<'a> {
214+
/// Returns a `&[u8]` slice containing any remaining data.
215+
///
216+
/// Even if `next()` returns `None`, this method may return a non-empty
217+
/// slice if the original `ControlMessages` was truncated in the middle
218+
/// of a control message header.
219+
#[inline]
220+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
221+
pub fn into_bytes(self) -> &'a [u8] {
222+
self.bytes
223+
}
224+
}
225+
226+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
227+
impl<'a> Iterator for ControlMessagesIter<'a> {
228+
type Item = ControlMessage<'a>;
229+
230+
fn next(&mut self) -> Option<ControlMessage<'a>> {
231+
const CMSGHDR_SIZE: usize = size_of::<libc::cmsghdr>();
232+
233+
if CMSGHDR_SIZE > self.bytes.len() {
234+
return None;
235+
}
236+
237+
// SAFETY: C type `struct cmsghdr` is safe to bitwise-copy from.
238+
let hdr = unsafe {
239+
let mut hdr = MaybeUninit::<libc::cmsghdr>::uninit();
240+
hdr.as_mut_ptr().cast::<u8>().copy_from(self.bytes.as_ptr(), CMSGHDR_SIZE);
241+
hdr.assume_init()
242+
};
243+
244+
// `cmsg_bytes` contains the full content of the control message,
245+
// which may have been truncated if there was insufficient capacity.
246+
let cmsg_bytes;
247+
let hdr_cmsg_len = hdr.cmsg_len as usize;
248+
if hdr_cmsg_len >= self.bytes.len() {
249+
cmsg_bytes = self.bytes;
250+
} else {
251+
cmsg_bytes = &self.bytes[..hdr_cmsg_len];
252+
}
253+
254+
// `cmsg_data` is the portion of the control message that contains
255+
// type-specific content (file descriptors, etc).
256+
//
257+
// POSIX specifies that a pointer to this data should be obtained with
258+
// macro `CMSG_DATA()`, but its definition is problematic for Rust:
259+
//
260+
// 1. The macro may in principle read fields of `cmsghdr`. To avoid
261+
// unaligned reads this code would call it as `CMSG_DATA(&hdr)`.
262+
// But the resulting pointer would be relative to the stack value
263+
// `hdr`, not the actual message data contained in `cmsg_bytes`.
264+
//
265+
// 2. `CMSG_DATA()` is implemented with `pointer::offset()`, which
266+
// causes undefined behavior if its result is outside the original
267+
// allocated object. The POSIX spec allows control messages to
268+
// have padding between the header and data, in which case
269+
// `CMSG_DATA(&hdr)` is UB.
270+
//
271+
// 3. The control message may have been truncated. We know there's
272+
// at least `CMSGHDR_SIZE` bytes available, but anything past that
273+
// isn't guaranteed. Again, possible UB in the presence of padding.
274+
//
275+
// Therefore, this code obtains `cmsg_data` by assuming it directly
276+
// follows the header (with no padding, and no header field dependency).
277+
// This is true on all target OSes currently supported by Rust.
278+
//
279+
// If in the future support is added for an OS with cmsg data padding,
280+
// then this implementation will cause unit test failures rather than
281+
// risking silent UB.
282+
let cmsg_data = &cmsg_bytes[CMSGHDR_SIZE..];
283+
284+
// `cmsg_space` is the length of the control message plus any padding
285+
// necessary to align the next message.
286+
let cmsg_space = CMSG_SPACE(cmsg_data.len());
287+
if cmsg_space >= self.bytes.len() {
288+
self.bytes = &[];
289+
} else {
290+
self.bytes = &self.bytes[cmsg_space..];
291+
}
292+
293+
Some(ControlMessage {
294+
cmsg_len: hdr_cmsg_len,
295+
cmsg_level: hdr.cmsg_level,
296+
cmsg_type: hdr.cmsg_type,
297+
data: cmsg_data,
298+
})
299+
}
300+
}

library/std/src/os/unix/net/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ mod addr;
77
#[doc(cfg(any(target_os = "android", target_os = "linux")))]
88
#[cfg(any(doc, target_os = "android", target_os = "linux"))]
99
mod ancillary;
10+
mod cmsg;
1011
mod datagram;
1112
mod listener;
1213
mod stream;
@@ -18,6 +19,8 @@ pub use self::addr::*;
1819
#[cfg(any(doc, target_os = "android", target_os = "linux"))]
1920
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
2021
pub use self::ancillary::*;
22+
#[unstable(feature = "unix_socket_ancillary_data", issue = "76915")]
23+
pub use self::cmsg::*;
2124
#[stable(feature = "unix_socket", since = "1.10.0")]
2225
pub use self::datagram::*;
2326
#[stable(feature = "unix_socket", since = "1.10.0")]

0 commit comments

Comments
 (0)