cap_primitives/rustix/linux/fs/open_impl.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
//! Linux 5.6 and later have a syscall `openat2`, with flags that allow it to
//! enforce the sandboxing property we want. See the [LWN article] for an
//! overview and the [`openat2` documentation] for details.
//!
//! [LWN article]: https://lwn.net/Articles/796868/
//! [`openat2` documentation]: https://man7.org/linux/man-pages/man2/openat2.2.html
//!
//! On older Linux, fall back to `manually::open`.
#[cfg(racy_asserts)]
use crate::fs::is_same_file;
use crate::fs::{manually, OpenOptions};
use std::path::Path;
use std::{fs, io};
#[cfg(target_os = "linux")]
use {
super::super::super::fs::compute_oflags,
crate::fs::errors,
io_lifetimes::FromFd,
rustix::fs::{openat2, Mode, OFlags, RawMode, ResolveFlags},
rustix::path::Arg,
std::sync::atomic::AtomicBool,
std::sync::atomic::Ordering::Relaxed,
};
/// Call the `openat2` system call, or use a fallback if that's unavailable.
pub(crate) fn open_impl(
start: &fs::File,
path: &Path,
options: &OpenOptions,
) -> io::Result<fs::File> {
// On regular Linux, attempt to use `openat2` to accelerate sandboxed
// lookups. On Android, the [seccomp policy] prevents us from even
// detecting whether `openat2` is supported, so don't even try.
//
// [seccomp policy]: https://android-developers.googleblog.com/2017/07/seccomp-filter-in-android-o.html
#[cfg(target_os = "linux")]
{
let result = open_beneath(start, path, options);
// If we got anything other than a `ENOSYS` error, that's our result.
match result {
Err(err) if err.raw_os_error() == Some(rustix::io::Errno::NOSYS.raw_os_error()) => {}
Err(err) => return Err(err),
Ok(fd) => return Ok(fd),
}
}
manually::open(start, path, options)
}
/// Call the `openat2` system call with `RESOLVE_BENEATH`. If the syscall is
/// unavailable, mark it so for future calls. If `openat2` is unavailable
/// either permanently or temporarily, return `ENOSYS`.
#[cfg(target_os = "linux")]
pub(crate) fn open_beneath(
start: &fs::File,
path: &Path,
options: &OpenOptions,
) -> io::Result<fs::File> {
static INVALID: AtomicBool = AtomicBool::new(false);
if INVALID.load(Relaxed) {
// `openat2` is permanently unavailable.
return Err(rustix::io::Errno::NOSYS.into());
}
let oflags = compute_oflags(options)?;
// Do two `contains` checks because `TMPFILE` may be represented with
// multiple flags and we need to ensure they're all set.
let mode = if oflags.contains(OFlags::CREATE) || oflags.contains(OFlags::TMPFILE) {
Mode::from_bits((options.ext.mode & 0o7777) as RawMode).unwrap()
} else {
Mode::empty()
};
// We know `openat2` needs a `&CStr` internally; to avoid allocating on
// each iteration of the loop below, allocate the `CString` now.
path.into_with_c_str(|path_c_str| {
// `openat2` fails with `EAGAIN` if a rename happens anywhere on the host
// while it's running, so use a loop to retry it a few times. But not too many
// times, because there's no limit on how often this can happen. The actual
// number here is currently an arbitrarily chosen guess.
for _ in 0..4 {
match openat2(
start,
path_c_str,
oflags,
mode,
ResolveFlags::BENEATH | ResolveFlags::NO_MAGICLINKS,
) {
Ok(file) => {
let file = fs::File::from_into_fd(file);
#[cfg(racy_asserts)]
check_open(start, path, options, &file);
return Ok(file);
}
Err(err) => match err {
// A rename or similar happened. Try again.
rustix::io::Errno::AGAIN => continue,
// `EPERM` is used by some `seccomp` sandboxes to indicate
// that `openat2` is unimplemented:
// <https://github.com/systemd/systemd/blob/e2357b1c8a87b610066b8b2a59517bcfb20b832e/src/shared/seccomp-util.c#L2066>
//
// However, `EPERM` may also indicate a failed `O_NOATIME`
// or a file seal prevented the operation, and it's complex
// to detect those cases, so exit the loop and use the
// fallback.
rustix::io::Errno::PERM => break,
// `ENOSYS` means `openat2` is permanently unavailable;
// mark it so and exit the loop.
rustix::io::Errno::NOSYS => {
INVALID.store(true, Relaxed);
break;
}
_ => return Err(err),
},
}
}
Err(rustix::io::Errno::NOSYS)
})
.map_err(|err| match err {
rustix::io::Errno::XDEV => errors::escape_attempt(),
err => err.into(),
})
}
#[cfg(racy_asserts)]
fn check_open(start: &fs::File, path: &Path, options: &OpenOptions, file: &fs::File) {
let check = manually::open(
start,
path,
options
.clone()
.create(false)
.create_new(false)
.truncate(false),
)
.expect("manually::open failed when open_openat2 succeeded");
assert!(
is_same_file(file, &check).unwrap(),
"manually::open should open the same inode as open_openat2"
);
}