tokio_tar/entry.rs
1use crate::fs::normalize;
2use crate::{
3 error::TarError, header::bytes2path, other, pax::pax_extensions, Archive, Header, PaxExtensions,
4};
5use filetime::{self, FileTime};
6use rustc_hash::FxHashSet;
7use std::{
8 borrow::Cow,
9 cmp,
10 collections::VecDeque,
11 convert::TryFrom,
12 fmt,
13 io::{Error, ErrorKind, SeekFrom},
14 marker,
15 path::{Component, Path, PathBuf},
16 pin::Pin,
17 task::{Context, Poll},
18};
19use tokio::{
20 fs,
21 fs::{remove_file, OpenOptions},
22 io::{self, AsyncRead as Read, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
23};
24
25/// A read-only view into an entry of an archive.
26///
27/// This structure is a window into a portion of a borrowed archive which can
28/// be inspected. It acts as a file handle by implementing the Reader trait. An
29/// entry cannot be rewritten once inserted into an archive.
30pub struct Entry<R: Read + Unpin> {
31 fields: EntryFields<R>,
32 _ignored: marker::PhantomData<Archive<R>>,
33}
34
35impl<R: Read + Unpin> fmt::Debug for Entry<R> {
36 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37 f.debug_struct("Entry")
38 .field("fields", &self.fields)
39 .finish()
40 }
41}
42
43// private implementation detail of `Entry`, but concrete (no type parameters)
44// and also all-public to be constructed from other modules.
45pub struct EntryFields<R: Read + Unpin> {
46 pub long_pathname: Option<Vec<u8>>,
47 pub long_linkname: Option<Vec<u8>>,
48 pub pax_extensions: Option<Vec<u8>>,
49 pub header: Header,
50 pub size: u64,
51 pub header_pos: u64,
52 pub file_pos: u64,
53 pub data: VecDeque<EntryIo<R>>,
54 pub unpack_xattrs: bool,
55 pub preserve_permissions: bool,
56 pub preserve_mtime: bool,
57 pub overwrite: bool,
58 pub allow_external_symlinks: bool,
59 pub(crate) read_state: Option<EntryIo<R>>,
60}
61
62impl<R: Read + Unpin> fmt::Debug for EntryFields<R> {
63 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64 f.debug_struct("EntryFields")
65 .field("long_pathname", &self.long_pathname)
66 .field("long_linkname", &self.long_linkname)
67 .field("pax_extensions", &self.pax_extensions)
68 .field("header", &self.header)
69 .field("size", &self.size)
70 .field("header_pos", &self.header_pos)
71 .field("file_pos", &self.file_pos)
72 .field("data", &self.data)
73 .field("unpack_xattrs", &self.unpack_xattrs)
74 .field("preserve_permissions", &self.preserve_permissions)
75 .field("preserve_mtime", &self.preserve_mtime)
76 .field("overwrite", &self.overwrite)
77 .field("allow_external_symlinks", &self.allow_external_symlinks)
78 .field("read_state", &self.read_state)
79 .finish()
80 }
81}
82
83pub enum EntryIo<R: Read + Unpin> {
84 Pad(io::Take<io::Repeat>),
85 Data(io::Take<R>),
86}
87
88impl<R: Read + Unpin> fmt::Debug for EntryIo<R> {
89 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90 match self {
91 EntryIo::Pad(t) => write!(f, "EntryIo::Pad({})", t.limit()),
92 EntryIo::Data(t) => write!(f, "EntryIo::Data({})", t.limit()),
93 }
94 }
95}
96
97/// When unpacking items the unpacked thing is returned to allow custom
98/// additional handling by users. Today the File is returned, in future
99/// the enum may be extended with kinds for links, directories etc.
100#[derive(Debug)]
101#[non_exhaustive]
102pub enum Unpacked {
103 /// A file was unpacked.
104 File(fs::File),
105 /// A directory, hardlink, symlink, or other node was unpacked.
106 Other,
107}
108
109impl<R: Read + Unpin> Entry<R> {
110 /// Returns the path name for this entry.
111 ///
112 /// This method may fail if the pathname is not valid Unicode and this is
113 /// called on a Windows platform.
114 ///
115 /// Note that this function will convert any `\` characters to directory
116 /// separators, and it will not always return the same value as
117 /// `self.header().path()` as some archive formats have support for longer
118 /// path names described in separate entries.
119 ///
120 /// It is recommended to use this method instead of inspecting the `header`
121 /// directly to ensure that various archive formats are handled correctly.
122 pub fn path(&self) -> io::Result<Cow<'_, Path>> {
123 self.fields.path()
124 }
125
126 /// Returns the raw bytes listed for this entry.
127 ///
128 /// Note that this function will convert any `\` characters to directory
129 /// separators, and it will not always return the same value as
130 /// `self.header().path_bytes()` as some archive formats have support for
131 /// longer path names described in separate entries.
132 pub fn path_bytes(&self) -> Cow<'_, [u8]> {
133 self.fields.path_bytes()
134 }
135
136 /// Returns the link name for this entry, if any is found.
137 ///
138 /// This method may fail if the pathname is not valid Unicode and this is
139 /// called on a Windows platform. `Ok(None)` being returned, however,
140 /// indicates that the link name was not present.
141 ///
142 /// Note that this function will convert any `\` characters to directory
143 /// separators, and it will not always return the same value as
144 /// `self.header().link_name()` as some archive formats have support for
145 /// longer path names described in separate entries.
146 ///
147 /// It is recommended to use this method instead of inspecting the `header`
148 /// directly to ensure that various archive formats are handled correctly.
149 pub fn link_name(&self) -> io::Result<Option<Cow<'_, Path>>> {
150 self.fields.link_name()
151 }
152
153 /// Returns the link name for this entry, in bytes, if listed.
154 ///
155 /// Note that this will not always return the same value as
156 /// `self.header().link_name_bytes()` as some archive formats have support for
157 /// longer path names described in separate entries.
158 pub fn link_name_bytes(&self) -> Option<Cow<'_, [u8]>> {
159 self.fields.link_name_bytes()
160 }
161
162 /// Returns an iterator over the pax extensions contained in this entry.
163 ///
164 /// Pax extensions are a form of archive where extra metadata is stored in
165 /// key/value pairs in entries before the entry they're intended to
166 /// describe. For example this can be used to describe long file name or
167 /// other metadata like atime/ctime/mtime in more precision.
168 ///
169 /// The returned iterator will yield key/value pairs for each extension.
170 ///
171 /// `None` will be returned if this entry does not indicate that it itself
172 /// contains extensions, or if there were no previous extensions describing
173 /// it.
174 ///
175 /// Note that global pax extensions are intended to be applied to all
176 /// archive entries.
177 ///
178 /// Also note that this function will read the entire entry if the entry
179 /// itself is a list of extensions.
180 pub async fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions<'_>>> {
181 self.fields.pax_extensions().await
182 }
183
184 /// Returns access to the header of this entry in the archive.
185 ///
186 /// This provides access to the metadata for this entry in the archive.
187 pub fn header(&self) -> &Header {
188 &self.fields.header
189 }
190
191 /// Returns the starting position, in bytes, of the header of this entry in
192 /// the archive.
193 ///
194 /// The header is always a contiguous section of 512 bytes, so if the
195 /// underlying reader implements `Seek`, then the slice from `header_pos` to
196 /// `header_pos + 512` contains the raw header bytes.
197 pub fn raw_header_position(&self) -> u64 {
198 self.fields.header_pos
199 }
200
201 /// Returns the starting position, in bytes, of the file of this entry in
202 /// the archive.
203 ///
204 /// If the file of this entry is continuous (e.g. not a sparse file), and
205 /// if the underlying reader implements `Seek`, then the slice from
206 /// `file_pos` to `file_pos + entry_size` contains the raw file bytes.
207 pub fn raw_file_position(&self) -> u64 {
208 self.fields.file_pos
209 }
210
211 /// Writes this file to the specified location.
212 ///
213 /// This function will write the entire contents of this file into the
214 /// location specified by `dst`. Metadata will also be propagated to the
215 /// path `dst`.
216 ///
217 /// This function will create a file at the path `dst`, and it is required
218 /// that the intermediate directories are created. Any existing file at the
219 /// location `dst` will be overwritten.
220 ///
221 /// > **Note**: This function does not have as many sanity checks as
222 /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're
223 /// > thinking of unpacking untrusted tarballs you may want to review the
224 /// > implementations of the previous two functions and perhaps implement
225 /// > similar logic yourself.
226 ///
227 /// # Examples
228 ///
229 /// ```no_run
230 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
231 /// #
232 /// use tokio::fs::File;
233 /// use tokio_tar::Archive;
234 /// use tokio_stream::*;
235 ///
236 /// let mut ar = Archive::new(File::open("foo.tar").await?);
237 /// let mut entries = ar.entries()?;
238 /// let mut i = 0;
239 /// while let Some(file) = entries.next().await {
240 /// let mut file = file?;
241 /// file.unpack(format!("file-{}", i)).await?;
242 /// i += 1;
243 /// }
244 /// #
245 /// # Ok(()) }) }
246 /// ```
247 pub async fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> {
248 self.fields.unpack(None, dst.as_ref()).await
249 }
250
251 /// Extracts this file under the specified path, avoiding security issues.
252 ///
253 /// This function will write the entire contents of this file into the
254 /// location obtained by appending the path of this file in the archive to
255 /// `dst`, creating any intermediate directories if needed. Metadata will
256 /// also be propagated to the path `dst`. Any existing file at the location
257 /// `dst` will be overwritten.
258 ///
259 /// This function carefully avoids writing outside of `dst`. If the file has
260 /// a '..' in its path, this function will skip it and return false.
261 ///
262 /// # Examples
263 ///
264 /// ```no_run
265 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
266 /// #
267 /// use tokio::{fs::File, stream::*};
268 /// use tokio_tar::Archive;
269 /// use tokio_stream::*;
270 ///
271 /// let mut ar = Archive::new(File::open("foo.tar").await?);
272 /// let mut entries = ar.entries()?;
273 /// let mut i = 0;
274 /// while let Some(file) = entries.next().await {
275 /// let mut file = file.unwrap();
276 /// file.unpack_in("target").await?;
277 /// i += 1;
278 /// }
279 /// #
280 /// # Ok(()) }) }
281 /// ```
282 pub async fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Option<PathBuf>> {
283 let dst = dst.as_ref().canonicalize()?;
284 let mut memo = FxHashSet::default();
285 self.fields.unpack_in(&dst, &mut memo).await
286 }
287
288 /// Extracts this file under the specified path, avoiding security issues.
289 ///
290 /// Like [`unpack_in`], but memoizes the set of validated paths to avoid
291 /// redundant filesystem operations and assumes that the destination path
292 /// is already canonicalized.
293 pub async fn unpack_in_raw<P: AsRef<Path>>(
294 &mut self,
295 dst: P,
296 memo: &mut FxHashSet<PathBuf>,
297 ) -> io::Result<Option<PathBuf>> {
298 self.fields.unpack_in(dst.as_ref(), memo).await
299 }
300
301 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
302 /// when unpacking this entry.
303 ///
304 /// This flag is disabled by default and is currently only implemented on
305 /// Unix using xattr support. This may eventually be implemented for
306 /// Windows, however, if other archive implementations are found which do
307 /// this as well.
308 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
309 self.fields.unpack_xattrs = unpack_xattrs;
310 }
311
312 /// Indicate whether extended permissions (like suid on Unix) are preserved
313 /// when unpacking this entry.
314 ///
315 /// This flag is disabled by default and is currently only implemented on
316 /// Unix.
317 pub fn set_preserve_permissions(&mut self, preserve: bool) {
318 self.fields.preserve_permissions = preserve;
319 }
320
321 /// Indicate whether access time information is preserved when unpacking
322 /// this entry.
323 ///
324 /// This flag is enabled by default.
325 pub fn set_preserve_mtime(&mut self, preserve: bool) {
326 self.fields.preserve_mtime = preserve;
327 }
328
329 /// Indicate whether to deny symlinks that point outside the destination
330 /// directory when unpacking this entry. (Writing to locations outside the
331 /// destination directory is _always_ forbidden.)
332 ///
333 /// This flag is enabled by default.
334 pub fn set_allow_external_symlinks(&mut self, allow_external_symlinks: bool) {
335 self.fields.allow_external_symlinks = allow_external_symlinks;
336 }
337}
338
339impl<R: Read + Unpin> Read for Entry<R> {
340 fn poll_read(
341 mut self: Pin<&mut Self>,
342 cx: &mut Context<'_>,
343 into: &mut io::ReadBuf<'_>,
344 ) -> Poll<io::Result<()>> {
345 Pin::new(&mut self.as_mut().fields).poll_read(cx, into)
346 }
347}
348
349impl<R: Read + Unpin> EntryFields<R> {
350 pub fn from(entry: Entry<R>) -> Self {
351 entry.fields
352 }
353
354 pub fn into_entry(self) -> Entry<R> {
355 Entry {
356 fields: self,
357 _ignored: marker::PhantomData,
358 }
359 }
360
361 pub(crate) fn poll_read_all(
362 self: Pin<&mut Self>,
363 cx: &mut Context<'_>,
364 out: &mut Vec<u8>,
365 ) -> Poll<io::Result<()>> {
366 // Copied from futures::ReadToEnd
367 match poll_read_all_internal(self, cx, out) {
368 Poll::Ready(t) => Poll::Ready(t.map(|_| ())),
369 Poll::Pending => Poll::Pending,
370 }
371 }
372
373 pub async fn read_all(&mut self) -> io::Result<Vec<u8>> {
374 // Preallocate some data but don't let ourselves get too crazy now.
375 let cap = cmp::min(self.size, 128 * 1024);
376 let mut v = Vec::with_capacity(cap as usize);
377 self.read_to_end(&mut v).await.map(|_| v)
378 }
379
380 fn path(&self) -> io::Result<Cow<'_, Path>> {
381 bytes2path(self.path_bytes())
382 }
383
384 fn path_bytes(&self) -> Cow<'_, [u8]> {
385 match self.long_pathname {
386 Some(ref bytes) => {
387 if let Some(&0) = bytes.last() {
388 Cow::Borrowed(&bytes[..bytes.len() - 1])
389 } else {
390 Cow::Borrowed(bytes)
391 }
392 }
393 None => {
394 if let Some(ref pax) = self.pax_extensions {
395 let pax = pax_extensions(pax)
396 .filter_map(|f| f.ok())
397 .find(|f| f.key_bytes() == b"path")
398 .map(|f| f.value_bytes());
399 if let Some(field) = pax {
400 return Cow::Borrowed(field);
401 }
402 }
403 self.header.path_bytes()
404 }
405 }
406 }
407
408 /// Gets the path in a "lossy" way, used for error reporting ONLY.
409 fn path_lossy(&self) -> String {
410 String::from_utf8_lossy(&self.path_bytes()).to_string()
411 }
412
413 fn link_name(&self) -> io::Result<Option<Cow<'_, Path>>> {
414 match self.link_name_bytes() {
415 Some(bytes) => bytes2path(bytes).map(Some),
416 None => Ok(None),
417 }
418 }
419
420 fn link_name_bytes(&self) -> Option<Cow<'_, [u8]>> {
421 match self.long_linkname {
422 Some(ref bytes) => {
423 if let Some(&0) = bytes.last() {
424 Some(Cow::Borrowed(&bytes[..bytes.len() - 1]))
425 } else {
426 Some(Cow::Borrowed(bytes))
427 }
428 }
429 None => {
430 if let Some(ref pax) = self.pax_extensions {
431 let pax = pax_extensions(pax)
432 .filter_map(|f| f.ok())
433 .find(|f| f.key_bytes() == b"linkpath")
434 .map(|f| f.value_bytes());
435 if let Some(field) = pax {
436 return Some(Cow::Borrowed(field));
437 }
438 }
439 self.header.link_name_bytes()
440 }
441 }
442 }
443
444 async fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions<'_>>> {
445 if self.pax_extensions.is_none() {
446 if !self.header.entry_type().is_pax_global_extensions()
447 && !self.header.entry_type().is_pax_local_extensions()
448 {
449 return Ok(None);
450 }
451 self.pax_extensions = Some(self.read_all().await?);
452 }
453 Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap())))
454 }
455
456 /// Unpack the [`Entry`] into the specified destination.
457 ///
458 /// It's assumed that `dst` is already canonicalized, and that the memoized set of validated
459 /// paths are tied to `dst`.
460 async fn unpack_in(
461 &mut self,
462 dst: &Path,
463 memo: &mut FxHashSet<PathBuf>,
464 ) -> io::Result<Option<PathBuf>> {
465 // It's assumed that `dst` is already canonicalized.
466 if cfg!(debug_assertions) {
467 let canon_target = dst.canonicalize()?;
468 assert_eq!(canon_target, dst, "Destination path must be canonicalized");
469 }
470
471 // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
472 // * Leading '/'s are trimmed. For example, `///test` is treated as
473 // `test`.
474 // * If the filename contains '..', then the file is skipped when
475 // extracting the tarball.
476 // * '//' within a filename is effectively skipped. An error is
477 // logged, but otherwise the effect is as if any two or more
478 // adjacent '/'s within the filename were consolidated into one
479 // '/'.
480 //
481 // Most of this is handled by the `path` module of the standard
482 // library, but we specially handle a few cases here as well.
483
484 let mut file_dst = dst.to_path_buf();
485 {
486 let path = self.path().map_err(|e| {
487 TarError::new(
488 format!("invalid path in entry header: {}", self.path_lossy()),
489 e,
490 )
491 })?;
492 for part in path.components() {
493 match part {
494 // Leading '/' characters, root paths, and '.'
495 // components are just ignored and treated as "empty
496 // components"
497 Component::Prefix(..) | Component::RootDir | Component::CurDir => continue,
498
499 // If any part of the filename is '..', then skip over
500 // unpacking the file to prevent directory traversal
501 // security issues. See, e.g.: CVE-2001-1267,
502 // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
503 Component::ParentDir => return Ok(None),
504
505 Component::Normal(part) => file_dst.push(part),
506 }
507 }
508 }
509
510 // Skip cases where only slashes or '.' parts were seen, because
511 // this is effectively an empty filename.
512 if *dst == *file_dst {
513 return Ok(None);
514 }
515
516 // Skip entries without a parent (i.e. outside of FS root)
517 let parent = match file_dst.parent() {
518 Some(p) => p,
519 None => return Ok(None),
520 };
521
522 // If the target is a link, clear the memoized set entirely. If we don't clear the set, then
523 // a malicious tarball could create a symlink to change the effective parent directory
524 // of an unpacked file _after_ it has been validated.
525 if self.header.entry_type().is_symlink() || self.header.entry_type().is_hard_link() {
526 memo.clear();
527 }
528
529 // Validate the parent, if we haven't seen it yet.
530 if !memo.contains(parent) {
531 self.ensure_dir_created(dst, parent).await.map_err(|e| {
532 TarError::new(format!("failed to create `{}`", parent.display()), e)
533 })?;
534 self.validate_inside_dst(dst, parent).await?;
535 memo.insert(parent.to_path_buf());
536 }
537
538 self.unpack(Some(dst), &file_dst)
539 .await
540 .map_err(|e| TarError::new(format!("failed to unpack `{}`", file_dst.display()), e))?;
541
542 Ok(Some(file_dst))
543 }
544
545 /// Unpack as destination directory `dst`.
546 async fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> {
547 // If the directory already exists just let it slide
548 match fs::create_dir(dst).await {
549 Ok(()) => Ok(()),
550 Err(err) => {
551 if err.kind() == ErrorKind::AlreadyExists {
552 let prev = fs::metadata(dst).await;
553 if prev.map(|m| m.is_dir()).unwrap_or(false) {
554 return Ok(());
555 }
556 }
557 Err(Error::new(
558 err.kind(),
559 format!("{} when creating dir {}", err, dst.display()),
560 ))
561 }
562 }
563 }
564
565 /// Returns access to the header of this entry in the archive.
566 async fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> {
567 fn get_mtime(header: &Header) -> Option<FileTime> {
568 header.mtime().ok().map(|mtime| {
569 // For some more information on this see the comments in
570 // `Header::fill_platform_from`, but the general idea is that
571 // we're trying to avoid 0-mtime files coming out of archives
572 // since some tools don't ingest them well. Perhaps one day
573 // when Cargo stops working with 0-mtime archives we can remove
574 // this.
575 let mtime = if mtime == 0 { 1 } else { mtime };
576 FileTime::from_unix_time(mtime as i64, 0)
577 })
578 }
579
580 let kind = self.header.entry_type();
581
582 if kind.is_dir() {
583 self.unpack_dir(dst).await?;
584 if self.preserve_permissions {
585 if let Ok(mode) = self.header.mode() {
586 set_perms(dst, None, mode).await?;
587 }
588 }
589 return Ok(Unpacked::Other);
590 } else if kind.is_hard_link() || kind.is_symlink() {
591 let link_name = match self.link_name()? {
592 Some(name) => name,
593 None => {
594 return Err(other("hard link listed but no link name found"));
595 }
596 };
597
598 // Reject absolute paths entirely.
599 if !self.allow_external_symlinks && link_name.is_absolute() {
600 return Err(other(&format!(
601 "symlink path `{}` is absolute, but external symlinks are not allowed",
602 link_name.display()
603 )));
604 }
605
606 if link_name.iter().count() == 0 {
607 return Err(other(&format!(
608 "symlink destination for {} is empty",
609 link_name.display()
610 )));
611 }
612
613 if kind.is_hard_link() {
614 let link_src = match target_base {
615 // If we're unpacking within a directory then ensure that
616 // the destination of this hard link is both present and
617 // inside our own directory. This is needed because we want
618 // to make sure to not overwrite anything outside the root.
619 //
620 // Note that this logic is only needed for hard links
621 // currently. With symlinks the `validate_inside_dst` which
622 // happens before this method as part of `unpack_in` will
623 // use canonicalization to ensure this guarantee. For hard
624 // links though they're canonicalized to their existing path
625 // so we need to validate at this time.
626 Some(p) => {
627 let link_src = p.join(link_name);
628 self.validate_inside_dst(p, &link_src).await?;
629 link_src
630 }
631 None => link_name.into_owned(),
632 };
633 fs::hard_link(&link_src, dst).await.map_err(|err| {
634 Error::new(
635 err.kind(),
636 format!(
637 "{} when hard linking {} to {}",
638 err,
639 link_src.display(),
640 dst.display()
641 ),
642 )
643 })?;
644 } else {
645 let normalized_src = if self.allow_external_symlinks {
646 // If external symlinks are allowed, use the source path as is.
647 link_name
648 } else {
649 // Ensure that we were able to normalize the path (e.g., `a/b/../c` to `a/c`).
650 let Some(normalized_src) = normalize(&link_name) else {
651 return Err(other(&format!(
652 "symlink destination for {} is not a valid path",
653 link_name.display()
654 )));
655 };
656
657 // Join the normalized path with the parent of `dst`.
658 let Some(absolute_normalized_path) = dst
659 .parent()
660 .map(|parent| parent.join(&normalized_src))
661 .and_then(|path| normalize(&path))
662 else {
663 return Err(other(&format!(
664 "symlink destination for {} lacks a parent path",
665 link_name.display()
666 )));
667 };
668
669 // If the normalized path points outside the target directory, reject it.
670 if !target_base
671 .is_some_and(|target| absolute_normalized_path.starts_with(target))
672 {
673 return Err(other(&format!(
674 "symlink destination for {} is outside of the target directory",
675 link_name.display()
676 )));
677 }
678
679 Cow::Owned(normalized_src)
680 };
681
682 match symlink(&normalized_src, dst).await {
683 Ok(()) => Ok(()),
684 Err(err) => {
685 if err.kind() == io::ErrorKind::AlreadyExists && self.overwrite {
686 match remove_file(dst).await {
687 Ok(()) => symlink(&normalized_src, dst).await,
688 Err(ref e) if e.kind() == io::ErrorKind::NotFound => {
689 symlink(&normalized_src, dst).await
690 }
691 Err(e) => Err(e),
692 }
693 } else {
694 Err(err)
695 }
696 }
697 }?;
698 if self.preserve_mtime {
699 if let Some(mtime) = get_mtime(&self.header) {
700 filetime::set_symlink_file_times(dst, mtime, mtime).map_err(|e| {
701 TarError::new(format!("failed to set mtime for `{}`", dst.display()), e)
702 })?;
703 }
704 }
705 };
706 return Ok(Unpacked::Other);
707
708 #[cfg(target_arch = "wasm32")]
709 #[allow(unused_variables)]
710 async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
711 Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
712 }
713
714 #[cfg(windows)]
715 async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
716 let (src, dst) = (src.to_owned(), dst.to_owned());
717 tokio::task::spawn_blocking(|| std::os::windows::fs::symlink_file(src, dst))
718 .await
719 .unwrap()
720 }
721
722 #[cfg(unix)]
723 async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
724 tokio::fs::symlink(src, dst).await
725 }
726 } else if kind.is_pax_global_extensions()
727 || kind.is_pax_local_extensions()
728 || kind.is_gnu_longname()
729 || kind.is_gnu_longlink()
730 {
731 return Ok(Unpacked::Other);
732 };
733
734 // Old BSD-tar compatibility.
735 // Names that have a trailing slash should be treated as a directory.
736 // Only applies to old headers.
737 if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") {
738 self.unpack_dir(dst).await?;
739 if self.preserve_permissions {
740 if let Ok(mode) = self.header.mode() {
741 set_perms(dst, None, mode).await?;
742 }
743 }
744 return Ok(Unpacked::Other);
745 }
746
747 // Note the lack of `else` clause above. According to the FreeBSD
748 // documentation:
749 //
750 // > A POSIX-compliant implementation must treat any unrecognized
751 // > typeflag value as a regular file.
752 //
753 // As a result if we don't recognize the kind we just write out the file
754 // as we would normally.
755
756 // Ensure we write a new file rather than overwriting in-place which
757 // is attackable; if an existing file is found unlink it.
758 async fn open(dst: &Path) -> io::Result<fs::File> {
759 OpenOptions::new()
760 .write(true)
761 .create_new(true)
762 .open(dst)
763 .await
764 }
765
766 let mut f = async {
767 let mut f = match open(dst).await {
768 Ok(f) => Ok(f),
769 Err(err) => {
770 if err.kind() == ErrorKind::AlreadyExists && self.overwrite {
771 match fs::remove_file(dst).await {
772 Ok(()) => open(dst).await,
773 Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst).await,
774 Err(e) => Err(e),
775 }
776 } else {
777 Err(err)
778 }
779 }
780 }?;
781
782 let size = usize::try_from(self.size).unwrap_or(usize::MAX);
783 let capacity = cmp::min(size, 128 * 1024);
784 let mut writer = io::BufWriter::with_capacity(capacity, &mut f);
785 for io in self.data.drain(..) {
786 match io {
787 EntryIo::Data(mut d) => {
788 let expected = d.limit();
789 if io::copy(&mut d, &mut writer).await? != expected {
790 return Err(other("failed to write entire file"));
791 }
792 }
793 EntryIo::Pad(d) => {
794 // TODO: checked cast to i64
795 let pad_len = d.limit() as i64;
796 writer.flush().await?;
797 let f = writer.get_mut();
798 let new_size = f.seek(SeekFrom::Current(pad_len)).await?;
799 f.set_len(new_size).await?;
800 }
801 }
802 }
803 writer.flush().await?;
804 Ok::<fs::File, io::Error>(f)
805 }
806 .await
807 .map_err(|e| {
808 let header = self.header.path_bytes();
809 TarError::new(
810 format!(
811 "failed to unpack `{}` into `{}`",
812 String::from_utf8_lossy(&header),
813 dst.display()
814 ),
815 e,
816 )
817 })?;
818
819 if self.preserve_mtime {
820 if let Some(mtime) = get_mtime(&self.header) {
821 filetime::set_file_times(dst, mtime, mtime).map_err(|e| {
822 TarError::new(format!("failed to set mtime for `{}`", dst.display()), e)
823 })?;
824 }
825 }
826 if self.preserve_permissions {
827 if let Ok(mode) = self.header.mode() {
828 set_perms(dst, Some(&mut f), mode).await?;
829 }
830 }
831 if self.unpack_xattrs {
832 set_xattrs(self, dst).await?;
833 }
834 return Ok(Unpacked::File(f));
835
836 async fn set_perms(
837 dst: &Path,
838 f: Option<&mut fs::File>,
839 mode: u32,
840 ) -> Result<(), TarError> {
841 _set_perms(dst, f, mode).await.map_err(|e| {
842 TarError::new(
843 format!(
844 "failed to set permissions to {:o} \
845 for `{}`",
846 mode,
847 dst.display()
848 ),
849 e,
850 )
851 })
852 }
853
854 #[cfg(unix)]
855 async fn _set_perms(dst: &Path, f: Option<&mut fs::File>, mode: u32) -> io::Result<()> {
856 use std::os::unix::prelude::*;
857
858 let perm = std::fs::Permissions::from_mode(mode as _);
859 match f {
860 Some(f) => f.set_permissions(perm).await,
861 None => fs::set_permissions(dst, perm).await,
862 }
863 }
864
865 #[cfg(windows)]
866 async fn _set_perms(dst: &Path, f: Option<&mut fs::File>, mode: u32) -> io::Result<()> {
867 if mode & 0o200 == 0o200 {
868 return Ok(());
869 }
870 match f {
871 Some(f) => {
872 let mut perm = f.metadata().await?.permissions();
873 perm.set_readonly(true);
874 f.set_permissions(perm).await
875 }
876 None => {
877 let mut perm = fs::metadata(dst).await?.permissions();
878 perm.set_readonly(true);
879 fs::set_permissions(dst, perm).await
880 }
881 }
882 }
883
884 #[cfg(target_arch = "wasm32")]
885 #[allow(unused_variables)]
886 async fn _set_perms(dst: &Path, f: Option<&mut fs::File>, mode: u32) -> io::Result<()> {
887 Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
888 }
889
890 #[cfg(all(unix, feature = "xattr"))]
891 async fn set_xattrs<R: Read + Unpin>(
892 me: &mut EntryFields<R>,
893 dst: &Path,
894 ) -> io::Result<()> {
895 use std::{ffi::OsStr, os::unix::prelude::*};
896
897 let exts = match me.pax_extensions().await {
898 Ok(Some(e)) => e,
899 _ => return Ok(()),
900 };
901 let exts = exts
902 .filter_map(|e| e.ok())
903 .filter_map(|e| {
904 let key = e.key_bytes();
905 let prefix = b"SCHILY.xattr.";
906 key.strip_prefix(prefix).map(|rest| (rest, e))
907 })
908 .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes()));
909
910 for (key, value) in exts {
911 xattr::set(dst, key, value).map_err(|e| {
912 TarError::new(
913 format!(
914 "failed to set extended \
915 attributes to {}. \
916 Xattrs: key={:?}, value={:?}.",
917 dst.display(),
918 key,
919 String::from_utf8_lossy(value)
920 ),
921 e,
922 )
923 })?;
924 }
925
926 Ok(())
927 }
928 // Windows does not completely support posix xattrs
929 // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT
930 #[cfg(any(windows, not(feature = "xattr"), target_arch = "wasm32"))]
931 async fn set_xattrs<R: Read + Unpin>(_: &mut EntryFields<R>, _: &Path) -> io::Result<()> {
932 Ok(())
933 }
934 }
935
936 async fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> {
937 let mut ancestor = dir;
938 let mut dirs_to_create = Vec::new();
939 while tokio::fs::symlink_metadata(ancestor).await.is_err() {
940 dirs_to_create.push(ancestor);
941 if let Some(parent) = ancestor.parent() {
942 ancestor = parent;
943 } else {
944 break;
945 }
946 }
947 for ancestor in dirs_to_create.into_iter().rev() {
948 if let Some(parent) = ancestor.parent() {
949 self.validate_inside_dst(dst, parent).await?;
950 }
951 fs::create_dir_all(ancestor).await?;
952 }
953 Ok(())
954 }
955
956 async fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<()> {
957 // Abort if target (canonical) parent is outside of `dst`
958 let canon_parent = file_dst.canonicalize().map_err(|err| {
959 Error::new(
960 err.kind(),
961 format!("{} while canonicalizing {}", err, file_dst.display()),
962 )
963 })?;
964 if !canon_parent.starts_with(dst) {
965 let err = TarError::new(
966 format!(
967 "trying to unpack outside of destination path: {}",
968 dst.display()
969 ),
970 // TODO: use ErrorKind::InvalidInput here? (minor breaking change)
971 Error::other("Invalid argument"),
972 );
973 return Err(err.into());
974 }
975 Ok(())
976 }
977}
978
979impl<R: Read + Unpin> Read for EntryFields<R> {
980 fn poll_read(
981 self: Pin<&mut Self>,
982 cx: &mut Context<'_>,
983 into: &mut io::ReadBuf<'_>,
984 ) -> Poll<io::Result<()>> {
985 let this = self.get_mut();
986 loop {
987 if this.read_state.is_none() {
988 this.read_state = this.data.pop_front();
989 }
990
991 if let Some(ref mut io) = &mut this.read_state {
992 let start = into.filled().len();
993 let ret = Pin::new(io).poll_read(cx, into);
994 match ret {
995 Poll::Ready(Ok(())) if into.filled().len() == start => {
996 this.read_state = None;
997 if this.data.is_empty() {
998 return Poll::Ready(Ok(()));
999 }
1000 continue;
1001 }
1002 Poll::Ready(Ok(())) => {
1003 return Poll::Ready(Ok(()));
1004 }
1005 Poll::Ready(Err(err)) => {
1006 return Poll::Ready(Err(err));
1007 }
1008 Poll::Pending => {
1009 return Poll::Pending;
1010 }
1011 }
1012 } else {
1013 // Unable to pull another value from `data`, so we are done.
1014 return Poll::Ready(Ok(()));
1015 }
1016 }
1017 }
1018}
1019
1020impl<R: Read + Unpin> Read for EntryIo<R> {
1021 fn poll_read(
1022 self: Pin<&mut Self>,
1023 cx: &mut Context<'_>,
1024 into: &mut io::ReadBuf<'_>,
1025 ) -> Poll<io::Result<()>> {
1026 match self.get_mut() {
1027 EntryIo::Pad(ref mut io) => Pin::new(io).poll_read(cx, into),
1028 EntryIo::Data(ref mut io) => Pin::new(io).poll_read(cx, into),
1029 }
1030 }
1031}
1032
1033struct Guard<'a> {
1034 buf: &'a mut Vec<u8>,
1035 len: usize,
1036}
1037
1038impl Drop for Guard<'_> {
1039 fn drop(&mut self) {
1040 unsafe {
1041 self.buf.set_len(self.len);
1042 }
1043 }
1044}
1045
1046fn poll_read_all_internal<R: Read + ?Sized>(
1047 mut rd: Pin<&mut R>,
1048 cx: &mut Context<'_>,
1049 buf: &mut Vec<u8>,
1050) -> Poll<io::Result<usize>> {
1051 let mut g = Guard {
1052 len: buf.len(),
1053 buf,
1054 };
1055 let ret;
1056 loop {
1057 if g.len == g.buf.len() {
1058 unsafe {
1059 g.buf.reserve(32);
1060 let capacity = g.buf.capacity();
1061 g.buf.set_len(capacity);
1062
1063 let buf = &mut g.buf[g.len..];
1064 std::ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len());
1065 }
1066 }
1067
1068 let mut read_buf = io::ReadBuf::new(&mut g.buf[g.len..]);
1069 match futures_core::ready!(rd.as_mut().poll_read(cx, &mut read_buf)) {
1070 Ok(()) if read_buf.filled().is_empty() => {
1071 ret = Poll::Ready(Ok(g.len));
1072 break;
1073 }
1074 Ok(()) => g.len += read_buf.filled().len(),
1075 Err(e) => {
1076 ret = Poll::Ready(Err(e));
1077 break;
1078 }
1079 }
1080 }
1081
1082 ret
1083}