cap_primitives/rustix/linux/fs/open_impl.rs
1//! Linux 5.6 and later have a syscall `openat2`, with flags that allow it to
2//! enforce the sandboxing property we want. See the [LWN article] for an
3//! overview and the [`openat2` documentation] for details.
4//!
5//! [LWN article]: https://lwn.net/Articles/796868/
6//! [`openat2` documentation]: https://man7.org/linux/man-pages/man2/openat2.2.html
7//!
8//! On older Linux, fall back to `manually::open`.
9
10#[cfg(racy_asserts)]
11use crate::fs::is_same_file;
12use crate::fs::{manually, OpenOptions};
13use std::path::Path;
14use std::{fs, io};
15#[cfg(target_os = "linux")]
16use {
17 super::super::super::fs::compute_oflags,
18 crate::fs::errors,
19 io_lifetimes::FromFd,
20 rustix::fs::{openat2, Mode, OFlags, RawMode, ResolveFlags},
21 rustix::path::Arg,
22 std::sync::atomic::AtomicBool,
23 std::sync::atomic::Ordering::Relaxed,
24};
25
26/// Call the `openat2` system call, or use a fallback if that's unavailable.
27pub(crate) fn open_impl(
28 start: &fs::File,
29 path: &Path,
30 options: &OpenOptions,
31) -> io::Result<fs::File> {
32 // On regular Linux, attempt to use `openat2` to accelerate sandboxed
33 // lookups. On Android, the [seccomp policy] prevents us from even
34 // detecting whether `openat2` is supported, so don't even try.
35 //
36 // [seccomp policy]: https://android-developers.googleblog.com/2017/07/seccomp-filter-in-android-o.html
37 #[cfg(target_os = "linux")]
38 {
39 let result = open_beneath(start, path, options);
40
41 // If we got anything other than a `ENOSYS` error, that's our result.
42 match result {
43 Err(err) if err.raw_os_error() == Some(rustix::io::Errno::NOSYS.raw_os_error()) => {}
44 Err(err) => return Err(err),
45 Ok(fd) => return Ok(fd),
46 }
47 }
48
49 manually::open(start, path, options)
50}
51
52/// Call the `openat2` system call with `RESOLVE_BENEATH`. If the syscall is
53/// unavailable, mark it so for future calls. If `openat2` is unavailable
54/// either permanently or temporarily, return `ENOSYS`.
55#[cfg(target_os = "linux")]
56pub(crate) fn open_beneath(
57 start: &fs::File,
58 path: &Path,
59 options: &OpenOptions,
60) -> io::Result<fs::File> {
61 static INVALID: AtomicBool = AtomicBool::new(false);
62 if INVALID.load(Relaxed) {
63 // `openat2` is permanently unavailable.
64 return Err(rustix::io::Errno::NOSYS.into());
65 }
66
67 let oflags = compute_oflags(options)?;
68
69 // Do two `contains` checks because `TMPFILE` may be represented with
70 // multiple flags and we need to ensure they're all set.
71 let mode = if oflags.contains(OFlags::CREATE) || oflags.contains(OFlags::TMPFILE) {
72 Mode::from_bits((options.ext.mode & 0o7777) as RawMode).unwrap()
73 } else {
74 Mode::empty()
75 };
76
77 // We know `openat2` needs a `&CStr` internally; to avoid allocating on
78 // each iteration of the loop below, allocate the `CString` now.
79 path.into_with_c_str(|path_c_str| {
80 // `openat2` fails with `EAGAIN` if a rename happens anywhere on the host
81 // while it's running, so use a loop to retry it a few times. But not too many
82 // times, because there's no limit on how often this can happen. The actual
83 // number here is currently an arbitrarily chosen guess.
84 for _ in 0..4 {
85 match openat2(
86 start,
87 path_c_str,
88 oflags,
89 mode,
90 ResolveFlags::BENEATH | ResolveFlags::NO_MAGICLINKS,
91 ) {
92 Ok(file) => {
93 let file = fs::File::from_into_fd(file);
94
95 #[cfg(racy_asserts)]
96 check_open(start, path, options, &file);
97
98 return Ok(file);
99 }
100 Err(err) => match err {
101 // A rename or similar happened. Try again.
102 rustix::io::Errno::AGAIN => continue,
103
104 // `EPERM` is used by some `seccomp` sandboxes to indicate
105 // that `openat2` is unimplemented:
106 // <https://github.com/systemd/systemd/blob/e2357b1c8a87b610066b8b2a59517bcfb20b832e/src/shared/seccomp-util.c#L2066>
107 //
108 // However, `EPERM` may also indicate a failed `O_NOATIME`
109 // or a file seal prevented the operation, and it's complex
110 // to detect those cases, so exit the loop and use the
111 // fallback.
112 rustix::io::Errno::PERM => break,
113
114 // `ENOSYS` means `openat2` is permanently unavailable;
115 // mark it so and exit the loop.
116 rustix::io::Errno::NOSYS => {
117 INVALID.store(true, Relaxed);
118 break;
119 }
120
121 _ => return Err(err),
122 },
123 }
124 }
125
126 Err(rustix::io::Errno::NOSYS)
127 })
128 .map_err(|err| match err {
129 rustix::io::Errno::XDEV => errors::escape_attempt(),
130 err => err.into(),
131 })
132}
133
134#[cfg(racy_asserts)]
135fn check_open(start: &fs::File, path: &Path, options: &OpenOptions, file: &fs::File) {
136 let check = manually::open(
137 start,
138 path,
139 options
140 .clone()
141 .create(false)
142 .create_new(false)
143 .truncate(false),
144 )
145 .expect("manually::open failed when open_openat2 succeeded");
146 assert!(
147 is_same_file(file, &check).unwrap(),
148 "manually::open should open the same inode as open_openat2"
149 );
150}