tokio_tar/
entry.rs

1use crate::fs::normalize;
2use crate::{
3    error::TarError, header::bytes2path, other, pax::pax_extensions, Archive, Header, PaxExtensions,
4};
5use filetime::{self, FileTime};
6use rustc_hash::FxHashSet;
7use std::{
8    borrow::Cow,
9    cmp,
10    collections::VecDeque,
11    convert::TryFrom,
12    fmt,
13    io::{Error, ErrorKind, SeekFrom},
14    marker,
15    path::{Component, Path, PathBuf},
16    pin::Pin,
17    task::{Context, Poll},
18};
19use tokio::{
20    fs,
21    fs::{remove_file, OpenOptions},
22    io::{self, AsyncRead as Read, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
23};
24
25/// A read-only view into an entry of an archive.
26///
27/// This structure is a window into a portion of a borrowed archive which can
28/// be inspected. It acts as a file handle by implementing the Reader trait. An
29/// entry cannot be rewritten once inserted into an archive.
30pub struct Entry<R: Read + Unpin> {
31    fields: EntryFields<R>,
32    _ignored: marker::PhantomData<Archive<R>>,
33}
34
35impl<R: Read + Unpin> fmt::Debug for Entry<R> {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        f.debug_struct("Entry")
38            .field("fields", &self.fields)
39            .finish()
40    }
41}
42
43// private implementation detail of `Entry`, but concrete (no type parameters)
44// and also all-public to be constructed from other modules.
45pub struct EntryFields<R: Read + Unpin> {
46    pub long_pathname: Option<Vec<u8>>,
47    pub long_linkname: Option<Vec<u8>>,
48    pub pax_extensions: Option<Vec<u8>>,
49    pub header: Header,
50    pub size: u64,
51    pub header_pos: u64,
52    pub file_pos: u64,
53    pub data: VecDeque<EntryIo<R>>,
54    pub unpack_xattrs: bool,
55    pub preserve_permissions: bool,
56    pub preserve_mtime: bool,
57    pub overwrite: bool,
58    pub allow_external_symlinks: bool,
59    pub(crate) read_state: Option<EntryIo<R>>,
60}
61
62impl<R: Read + Unpin> fmt::Debug for EntryFields<R> {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        f.debug_struct("EntryFields")
65            .field("long_pathname", &self.long_pathname)
66            .field("long_linkname", &self.long_linkname)
67            .field("pax_extensions", &self.pax_extensions)
68            .field("header", &self.header)
69            .field("size", &self.size)
70            .field("header_pos", &self.header_pos)
71            .field("file_pos", &self.file_pos)
72            .field("data", &self.data)
73            .field("unpack_xattrs", &self.unpack_xattrs)
74            .field("preserve_permissions", &self.preserve_permissions)
75            .field("preserve_mtime", &self.preserve_mtime)
76            .field("overwrite", &self.overwrite)
77            .field("allow_external_symlinks", &self.allow_external_symlinks)
78            .field("read_state", &self.read_state)
79            .finish()
80    }
81}
82
83pub enum EntryIo<R: Read + Unpin> {
84    Pad(io::Take<io::Repeat>),
85    Data(io::Take<R>),
86}
87
88impl<R: Read + Unpin> fmt::Debug for EntryIo<R> {
89    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90        match self {
91            EntryIo::Pad(t) => write!(f, "EntryIo::Pad({})", t.limit()),
92            EntryIo::Data(t) => write!(f, "EntryIo::Data({})", t.limit()),
93        }
94    }
95}
96
97/// When unpacking items the unpacked thing is returned to allow custom
98/// additional handling by users. Today the File is returned, in future
99/// the enum may be extended with kinds for links, directories etc.
100#[derive(Debug)]
101#[non_exhaustive]
102pub enum Unpacked {
103    /// A file was unpacked.
104    File(fs::File),
105    /// A directory, hardlink, symlink, or other node was unpacked.
106    Other,
107}
108
109impl<R: Read + Unpin> Entry<R> {
110    /// Returns the path name for this entry.
111    ///
112    /// This method may fail if the pathname is not valid Unicode and this is
113    /// called on a Windows platform.
114    ///
115    /// Note that this function will convert any `\` characters to directory
116    /// separators, and it will not always return the same value as
117    /// `self.header().path()` as some archive formats have support for longer
118    /// path names described in separate entries.
119    ///
120    /// It is recommended to use this method instead of inspecting the `header`
121    /// directly to ensure that various archive formats are handled correctly.
122    pub fn path(&self) -> io::Result<Cow<'_, Path>> {
123        self.fields.path()
124    }
125
126    /// Returns the raw bytes listed for this entry.
127    ///
128    /// Note that this function will convert any `\` characters to directory
129    /// separators, and it will not always return the same value as
130    /// `self.header().path_bytes()` as some archive formats have support for
131    /// longer path names described in separate entries.
132    pub fn path_bytes(&self) -> Cow<'_, [u8]> {
133        self.fields.path_bytes()
134    }
135
136    /// Returns the link name for this entry, if any is found.
137    ///
138    /// This method may fail if the pathname is not valid Unicode and this is
139    /// called on a Windows platform. `Ok(None)` being returned, however,
140    /// indicates that the link name was not present.
141    ///
142    /// Note that this function will convert any `\` characters to directory
143    /// separators, and it will not always return the same value as
144    /// `self.header().link_name()` as some archive formats have support for
145    /// longer path names described in separate entries.
146    ///
147    /// It is recommended to use this method instead of inspecting the `header`
148    /// directly to ensure that various archive formats are handled correctly.
149    pub fn link_name(&self) -> io::Result<Option<Cow<'_, Path>>> {
150        self.fields.link_name()
151    }
152
153    /// Returns the link name for this entry, in bytes, if listed.
154    ///
155    /// Note that this will not always return the same value as
156    /// `self.header().link_name_bytes()` as some archive formats have support for
157    /// longer path names described in separate entries.
158    pub fn link_name_bytes(&self) -> Option<Cow<'_, [u8]>> {
159        self.fields.link_name_bytes()
160    }
161
162    /// Returns an iterator over the pax extensions contained in this entry.
163    ///
164    /// Pax extensions are a form of archive where extra metadata is stored in
165    /// key/value pairs in entries before the entry they're intended to
166    /// describe. For example this can be used to describe long file name or
167    /// other metadata like atime/ctime/mtime in more precision.
168    ///
169    /// The returned iterator will yield key/value pairs for each extension.
170    ///
171    /// `None` will be returned if this entry does not indicate that it itself
172    /// contains extensions, or if there were no previous extensions describing
173    /// it.
174    ///
175    /// Note that global pax extensions are intended to be applied to all
176    /// archive entries.
177    ///
178    /// Also note that this function will read the entire entry if the entry
179    /// itself is a list of extensions.
180    pub async fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions<'_>>> {
181        self.fields.pax_extensions().await
182    }
183
184    /// Returns access to the header of this entry in the archive.
185    ///
186    /// This provides access to the metadata for this entry in the archive.
187    pub fn header(&self) -> &Header {
188        &self.fields.header
189    }
190
191    /// Returns the starting position, in bytes, of the header of this entry in
192    /// the archive.
193    ///
194    /// The header is always a contiguous section of 512 bytes, so if the
195    /// underlying reader implements `Seek`, then the slice from `header_pos` to
196    /// `header_pos + 512` contains the raw header bytes.
197    pub fn raw_header_position(&self) -> u64 {
198        self.fields.header_pos
199    }
200
201    /// Returns the starting position, in bytes, of the file of this entry in
202    /// the archive.
203    ///
204    /// If the file of this entry is continuous (e.g. not a sparse file), and
205    /// if the underlying reader implements `Seek`, then the slice from
206    /// `file_pos` to `file_pos + entry_size` contains the raw file bytes.
207    pub fn raw_file_position(&self) -> u64 {
208        self.fields.file_pos
209    }
210
211    /// Writes this file to the specified location.
212    ///
213    /// This function will write the entire contents of this file into the
214    /// location specified by `dst`. Metadata will also be propagated to the
215    /// path `dst`.
216    ///
217    /// This function will create a file at the path `dst`, and it is required
218    /// that the intermediate directories are created. Any existing file at the
219    /// location `dst` will be overwritten.
220    ///
221    /// > **Note**: This function does not have as many sanity checks as
222    /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're
223    /// > thinking of unpacking untrusted tarballs you may want to review the
224    /// > implementations of the previous two functions and perhaps implement
225    /// > similar logic yourself.
226    ///
227    /// # Examples
228    ///
229    /// ```no_run
230    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
231    /// #
232    /// use tokio::fs::File;
233    /// use tokio_tar::Archive;
234    /// use tokio_stream::*;
235    ///
236    /// let mut ar = Archive::new(File::open("foo.tar").await?);
237    /// let mut entries = ar.entries()?;
238    /// let mut i = 0;
239    /// while let Some(file) = entries.next().await {
240    ///     let mut file = file?;
241    ///     file.unpack(format!("file-{}", i)).await?;
242    ///     i += 1;
243    /// }
244    /// #
245    /// # Ok(()) }) }
246    /// ```
247    pub async fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> {
248        self.fields.unpack(None, dst.as_ref()).await
249    }
250
251    /// Extracts this file under the specified path, avoiding security issues.
252    ///
253    /// This function will write the entire contents of this file into the
254    /// location obtained by appending the path of this file in the archive to
255    /// `dst`, creating any intermediate directories if needed. Metadata will
256    /// also be propagated to the path `dst`. Any existing file at the location
257    /// `dst` will be overwritten.
258    ///
259    /// This function carefully avoids writing outside of `dst`. If the file has
260    /// a '..' in its path, this function will skip it and return false.
261    ///
262    /// # Examples
263    ///
264    /// ```no_run
265    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
266    /// #
267    /// use tokio::{fs::File, stream::*};
268    /// use tokio_tar::Archive;
269    /// use tokio_stream::*;
270    ///
271    /// let mut ar = Archive::new(File::open("foo.tar").await?);
272    /// let mut entries = ar.entries()?;
273    /// let mut i = 0;
274    /// while let Some(file) = entries.next().await {
275    ///     let mut file = file.unwrap();
276    ///     file.unpack_in("target").await?;
277    ///     i += 1;
278    /// }
279    /// #
280    /// # Ok(()) }) }
281    /// ```
282    pub async fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Option<PathBuf>> {
283        let dst = dst.as_ref().canonicalize()?;
284        let mut memo = FxHashSet::default();
285        self.fields.unpack_in(&dst, &mut memo).await
286    }
287
288    /// Extracts this file under the specified path, avoiding security issues.
289    ///
290    /// Like [`unpack_in`], but memoizes the set of validated paths to avoid
291    /// redundant filesystem operations and assumes that the destination path
292    /// is already canonicalized.
293    pub async fn unpack_in_raw<P: AsRef<Path>>(
294        &mut self,
295        dst: P,
296        memo: &mut FxHashSet<PathBuf>,
297    ) -> io::Result<Option<PathBuf>> {
298        self.fields.unpack_in(dst.as_ref(), memo).await
299    }
300
301    /// Indicate whether extended file attributes (xattrs on Unix) are preserved
302    /// when unpacking this entry.
303    ///
304    /// This flag is disabled by default and is currently only implemented on
305    /// Unix using xattr support. This may eventually be implemented for
306    /// Windows, however, if other archive implementations are found which do
307    /// this as well.
308    pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
309        self.fields.unpack_xattrs = unpack_xattrs;
310    }
311
312    /// Indicate whether extended permissions (like suid on Unix) are preserved
313    /// when unpacking this entry.
314    ///
315    /// This flag is disabled by default and is currently only implemented on
316    /// Unix.
317    pub fn set_preserve_permissions(&mut self, preserve: bool) {
318        self.fields.preserve_permissions = preserve;
319    }
320
321    /// Indicate whether access time information is preserved when unpacking
322    /// this entry.
323    ///
324    /// This flag is enabled by default.
325    pub fn set_preserve_mtime(&mut self, preserve: bool) {
326        self.fields.preserve_mtime = preserve;
327    }
328
329    /// Indicate whether to deny symlinks that point outside the destination
330    /// directory when unpacking this entry. (Writing to locations outside the
331    /// destination directory is _always_ forbidden.)
332    ///
333    /// This flag is enabled by default.
334    pub fn set_allow_external_symlinks(&mut self, allow_external_symlinks: bool) {
335        self.fields.allow_external_symlinks = allow_external_symlinks;
336    }
337}
338
339impl<R: Read + Unpin> Read for Entry<R> {
340    fn poll_read(
341        mut self: Pin<&mut Self>,
342        cx: &mut Context<'_>,
343        into: &mut io::ReadBuf<'_>,
344    ) -> Poll<io::Result<()>> {
345        Pin::new(&mut self.as_mut().fields).poll_read(cx, into)
346    }
347}
348
349impl<R: Read + Unpin> EntryFields<R> {
350    pub fn from(entry: Entry<R>) -> Self {
351        entry.fields
352    }
353
354    pub fn into_entry(self) -> Entry<R> {
355        Entry {
356            fields: self,
357            _ignored: marker::PhantomData,
358        }
359    }
360
361    pub(crate) fn poll_read_all(
362        self: Pin<&mut Self>,
363        cx: &mut Context<'_>,
364        out: &mut Vec<u8>,
365    ) -> Poll<io::Result<()>> {
366        // Copied from futures::ReadToEnd
367        match poll_read_all_internal(self, cx, out) {
368            Poll::Ready(t) => Poll::Ready(t.map(|_| ())),
369            Poll::Pending => Poll::Pending,
370        }
371    }
372
373    pub async fn read_all(&mut self) -> io::Result<Vec<u8>> {
374        // Preallocate some data but don't let ourselves get too crazy now.
375        let cap = cmp::min(self.size, 128 * 1024);
376        let mut v = Vec::with_capacity(cap as usize);
377        self.read_to_end(&mut v).await.map(|_| v)
378    }
379
380    fn path(&self) -> io::Result<Cow<'_, Path>> {
381        bytes2path(self.path_bytes())
382    }
383
384    fn path_bytes(&self) -> Cow<'_, [u8]> {
385        match self.long_pathname {
386            Some(ref bytes) => {
387                if let Some(&0) = bytes.last() {
388                    Cow::Borrowed(&bytes[..bytes.len() - 1])
389                } else {
390                    Cow::Borrowed(bytes)
391                }
392            }
393            None => {
394                if let Some(ref pax) = self.pax_extensions {
395                    let pax = pax_extensions(pax)
396                        .filter_map(|f| f.ok())
397                        .find(|f| f.key_bytes() == b"path")
398                        .map(|f| f.value_bytes());
399                    if let Some(field) = pax {
400                        return Cow::Borrowed(field);
401                    }
402                }
403                self.header.path_bytes()
404            }
405        }
406    }
407
408    /// Gets the path in a "lossy" way, used for error reporting ONLY.
409    fn path_lossy(&self) -> String {
410        String::from_utf8_lossy(&self.path_bytes()).to_string()
411    }
412
413    fn link_name(&self) -> io::Result<Option<Cow<'_, Path>>> {
414        match self.link_name_bytes() {
415            Some(bytes) => bytes2path(bytes).map(Some),
416            None => Ok(None),
417        }
418    }
419
420    fn link_name_bytes(&self) -> Option<Cow<'_, [u8]>> {
421        match self.long_linkname {
422            Some(ref bytes) => {
423                if let Some(&0) = bytes.last() {
424                    Some(Cow::Borrowed(&bytes[..bytes.len() - 1]))
425                } else {
426                    Some(Cow::Borrowed(bytes))
427                }
428            }
429            None => {
430                if let Some(ref pax) = self.pax_extensions {
431                    let pax = pax_extensions(pax)
432                        .filter_map(|f| f.ok())
433                        .find(|f| f.key_bytes() == b"linkpath")
434                        .map(|f| f.value_bytes());
435                    if let Some(field) = pax {
436                        return Some(Cow::Borrowed(field));
437                    }
438                }
439                self.header.link_name_bytes()
440            }
441        }
442    }
443
444    async fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions<'_>>> {
445        if self.pax_extensions.is_none() {
446            if !self.header.entry_type().is_pax_global_extensions()
447                && !self.header.entry_type().is_pax_local_extensions()
448            {
449                return Ok(None);
450            }
451            self.pax_extensions = Some(self.read_all().await?);
452        }
453        Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap())))
454    }
455
456    /// Unpack the [`Entry`] into the specified destination.
457    ///
458    /// It's assumed that `dst` is already canonicalized, and that the memoized set of validated
459    /// paths are tied to `dst`.
460    async fn unpack_in(
461        &mut self,
462        dst: &Path,
463        memo: &mut FxHashSet<PathBuf>,
464    ) -> io::Result<Option<PathBuf>> {
465        // It's assumed that `dst` is already canonicalized.
466        if cfg!(debug_assertions) {
467            let canon_target = dst.canonicalize()?;
468            assert_eq!(canon_target, dst, "Destination path must be canonicalized");
469        }
470
471        // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
472        // * Leading '/'s are trimmed. For example, `///test` is treated as
473        //   `test`.
474        // * If the filename contains '..', then the file is skipped when
475        //   extracting the tarball.
476        // * '//' within a filename is effectively skipped. An error is
477        //   logged, but otherwise the effect is as if any two or more
478        //   adjacent '/'s within the filename were consolidated into one
479        //   '/'.
480        //
481        // Most of this is handled by the `path` module of the standard
482        // library, but we specially handle a few cases here as well.
483
484        let mut file_dst = dst.to_path_buf();
485        {
486            let path = self.path().map_err(|e| {
487                TarError::new(
488                    format!("invalid path in entry header: {}", self.path_lossy()),
489                    e,
490                )
491            })?;
492            for part in path.components() {
493                match part {
494                    // Leading '/' characters, root paths, and '.'
495                    // components are just ignored and treated as "empty
496                    // components"
497                    Component::Prefix(..) | Component::RootDir | Component::CurDir => continue,
498
499                    // If any part of the filename is '..', then skip over
500                    // unpacking the file to prevent directory traversal
501                    // security issues.  See, e.g.: CVE-2001-1267,
502                    // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
503                    Component::ParentDir => return Ok(None),
504
505                    Component::Normal(part) => file_dst.push(part),
506                }
507            }
508        }
509
510        // Skip cases where only slashes or '.' parts were seen, because
511        // this is effectively an empty filename.
512        if *dst == *file_dst {
513            return Ok(None);
514        }
515
516        // Skip entries without a parent (i.e. outside of FS root)
517        let parent = match file_dst.parent() {
518            Some(p) => p,
519            None => return Ok(None),
520        };
521
522        // If the target is a link, clear the memoized set entirely. If we don't clear the set, then
523        // a malicious tarball could create a symlink to change the effective parent directory
524        // of an unpacked file _after_ it has been validated.
525        if self.header.entry_type().is_symlink() || self.header.entry_type().is_hard_link() {
526            memo.clear();
527        }
528
529        // Validate the parent, if we haven't seen it yet.
530        if !memo.contains(parent) {
531            self.ensure_dir_created(dst, parent).await.map_err(|e| {
532                TarError::new(format!("failed to create `{}`", parent.display()), e)
533            })?;
534            self.validate_inside_dst(dst, parent).await?;
535            memo.insert(parent.to_path_buf());
536        }
537
538        self.unpack(Some(dst), &file_dst)
539            .await
540            .map_err(|e| TarError::new(format!("failed to unpack `{}`", file_dst.display()), e))?;
541
542        Ok(Some(file_dst))
543    }
544
545    /// Unpack as destination directory `dst`.
546    async fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> {
547        // If the directory already exists just let it slide
548        match fs::create_dir(dst).await {
549            Ok(()) => Ok(()),
550            Err(err) => {
551                if err.kind() == ErrorKind::AlreadyExists {
552                    let prev = fs::metadata(dst).await;
553                    if prev.map(|m| m.is_dir()).unwrap_or(false) {
554                        return Ok(());
555                    }
556                }
557                Err(Error::new(
558                    err.kind(),
559                    format!("{} when creating dir {}", err, dst.display()),
560                ))
561            }
562        }
563    }
564
565    /// Returns access to the header of this entry in the archive.
566    async fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> {
567        fn get_mtime(header: &Header) -> Option<FileTime> {
568            header.mtime().ok().map(|mtime| {
569                // For some more information on this see the comments in
570                // `Header::fill_platform_from`, but the general idea is that
571                // we're trying to avoid 0-mtime files coming out of archives
572                // since some tools don't ingest them well. Perhaps one day
573                // when Cargo stops working with 0-mtime archives we can remove
574                // this.
575                let mtime = if mtime == 0 { 1 } else { mtime };
576                FileTime::from_unix_time(mtime as i64, 0)
577            })
578        }
579
580        let kind = self.header.entry_type();
581
582        if kind.is_dir() {
583            self.unpack_dir(dst).await?;
584            if self.preserve_permissions {
585                if let Ok(mode) = self.header.mode() {
586                    set_perms(dst, None, mode).await?;
587                }
588            }
589            return Ok(Unpacked::Other);
590        } else if kind.is_hard_link() || kind.is_symlink() {
591            let link_name = match self.link_name()? {
592                Some(name) => name,
593                None => {
594                    return Err(other("hard link listed but no link name found"));
595                }
596            };
597
598            // Reject absolute paths entirely.
599            if !self.allow_external_symlinks && link_name.is_absolute() {
600                return Err(other(&format!(
601                    "symlink path `{}` is absolute, but external symlinks are not allowed",
602                    link_name.display()
603                )));
604            }
605
606            if link_name.iter().count() == 0 {
607                return Err(other(&format!(
608                    "symlink destination for {} is empty",
609                    link_name.display()
610                )));
611            }
612
613            if kind.is_hard_link() {
614                let link_src = match target_base {
615                    // If we're unpacking within a directory then ensure that
616                    // the destination of this hard link is both present and
617                    // inside our own directory. This is needed because we want
618                    // to make sure to not overwrite anything outside the root.
619                    //
620                    // Note that this logic is only needed for hard links
621                    // currently. With symlinks the `validate_inside_dst` which
622                    // happens before this method as part of `unpack_in` will
623                    // use canonicalization to ensure this guarantee. For hard
624                    // links though they're canonicalized to their existing path
625                    // so we need to validate at this time.
626                    Some(p) => {
627                        let link_src = p.join(link_name);
628                        self.validate_inside_dst(p, &link_src).await?;
629                        link_src
630                    }
631                    None => link_name.into_owned(),
632                };
633                fs::hard_link(&link_src, dst).await.map_err(|err| {
634                    Error::new(
635                        err.kind(),
636                        format!(
637                            "{} when hard linking {} to {}",
638                            err,
639                            link_src.display(),
640                            dst.display()
641                        ),
642                    )
643                })?;
644            } else {
645                let normalized_src = if self.allow_external_symlinks {
646                    // If external symlinks are allowed, use the source path as is.
647                    link_name
648                } else {
649                    // Ensure that we were able to normalize the path (e.g., `a/b/../c` to `a/c`).
650                    let Some(normalized_src) = normalize(&link_name) else {
651                        return Err(other(&format!(
652                            "symlink destination for {} is not a valid path",
653                            link_name.display()
654                        )));
655                    };
656
657                    // Join the normalized path with the parent of `dst`.
658                    let Some(absolute_normalized_path) = dst
659                        .parent()
660                        .map(|parent| parent.join(&normalized_src))
661                        .and_then(|path| normalize(&path))
662                    else {
663                        return Err(other(&format!(
664                            "symlink destination for {} lacks a parent path",
665                            link_name.display()
666                        )));
667                    };
668
669                    // If the normalized path points outside the target directory, reject it.
670                    if !target_base
671                        .is_some_and(|target| absolute_normalized_path.starts_with(target))
672                    {
673                        return Err(other(&format!(
674                            "symlink destination for {} is outside of the target directory",
675                            link_name.display()
676                        )));
677                    }
678
679                    Cow::Owned(normalized_src)
680                };
681
682                match symlink(&normalized_src, dst).await {
683                    Ok(()) => Ok(()),
684                    Err(err) => {
685                        if err.kind() == io::ErrorKind::AlreadyExists && self.overwrite {
686                            match remove_file(dst).await {
687                                Ok(()) => symlink(&normalized_src, dst).await,
688                                Err(ref e) if e.kind() == io::ErrorKind::NotFound => {
689                                    symlink(&normalized_src, dst).await
690                                }
691                                Err(e) => Err(e),
692                            }
693                        } else {
694                            Err(err)
695                        }
696                    }
697                }?;
698                if self.preserve_mtime {
699                    if let Some(mtime) = get_mtime(&self.header) {
700                        filetime::set_symlink_file_times(dst, mtime, mtime).map_err(|e| {
701                            TarError::new(format!("failed to set mtime for `{}`", dst.display()), e)
702                        })?;
703                    }
704                }
705            };
706            return Ok(Unpacked::Other);
707
708            #[cfg(target_arch = "wasm32")]
709            #[allow(unused_variables)]
710            async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
711                Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
712            }
713
714            #[cfg(windows)]
715            async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
716                let (src, dst) = (src.to_owned(), dst.to_owned());
717                tokio::task::spawn_blocking(|| std::os::windows::fs::symlink_file(src, dst))
718                    .await
719                    .unwrap()
720            }
721
722            #[cfg(unix)]
723            async fn symlink(src: &Path, dst: &Path) -> io::Result<()> {
724                tokio::fs::symlink(src, dst).await
725            }
726        } else if kind.is_pax_global_extensions()
727            || kind.is_pax_local_extensions()
728            || kind.is_gnu_longname()
729            || kind.is_gnu_longlink()
730        {
731            return Ok(Unpacked::Other);
732        };
733
734        // Old BSD-tar compatibility.
735        // Names that have a trailing slash should be treated as a directory.
736        // Only applies to old headers.
737        if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") {
738            self.unpack_dir(dst).await?;
739            if self.preserve_permissions {
740                if let Ok(mode) = self.header.mode() {
741                    set_perms(dst, None, mode).await?;
742                }
743            }
744            return Ok(Unpacked::Other);
745        }
746
747        // Note the lack of `else` clause above. According to the FreeBSD
748        // documentation:
749        //
750        // > A POSIX-compliant implementation must treat any unrecognized
751        // > typeflag value as a regular file.
752        //
753        // As a result if we don't recognize the kind we just write out the file
754        // as we would normally.
755
756        // Ensure we write a new file rather than overwriting in-place which
757        // is attackable; if an existing file is found unlink it.
758        async fn open(dst: &Path) -> io::Result<fs::File> {
759            OpenOptions::new()
760                .write(true)
761                .create_new(true)
762                .open(dst)
763                .await
764        }
765
766        let mut f = async {
767            let mut f = match open(dst).await {
768                Ok(f) => Ok(f),
769                Err(err) => {
770                    if err.kind() == ErrorKind::AlreadyExists && self.overwrite {
771                        match fs::remove_file(dst).await {
772                            Ok(()) => open(dst).await,
773                            Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst).await,
774                            Err(e) => Err(e),
775                        }
776                    } else {
777                        Err(err)
778                    }
779                }
780            }?;
781
782            let size = usize::try_from(self.size).unwrap_or(usize::MAX);
783            let capacity = cmp::min(size, 128 * 1024);
784            let mut writer = io::BufWriter::with_capacity(capacity, &mut f);
785            for io in self.data.drain(..) {
786                match io {
787                    EntryIo::Data(mut d) => {
788                        let expected = d.limit();
789                        if io::copy(&mut d, &mut writer).await? != expected {
790                            return Err(other("failed to write entire file"));
791                        }
792                    }
793                    EntryIo::Pad(d) => {
794                        // TODO: checked cast to i64
795                        let pad_len = d.limit() as i64;
796                        writer.flush().await?;
797                        let f = writer.get_mut();
798                        let new_size = f.seek(SeekFrom::Current(pad_len)).await?;
799                        f.set_len(new_size).await?;
800                    }
801                }
802            }
803            writer.flush().await?;
804            Ok::<fs::File, io::Error>(f)
805        }
806        .await
807        .map_err(|e| {
808            let header = self.header.path_bytes();
809            TarError::new(
810                format!(
811                    "failed to unpack `{}` into `{}`",
812                    String::from_utf8_lossy(&header),
813                    dst.display()
814                ),
815                e,
816            )
817        })?;
818
819        if self.preserve_mtime {
820            if let Some(mtime) = get_mtime(&self.header) {
821                filetime::set_file_times(dst, mtime, mtime).map_err(|e| {
822                    TarError::new(format!("failed to set mtime for `{}`", dst.display()), e)
823                })?;
824            }
825        }
826        if self.preserve_permissions {
827            if let Ok(mode) = self.header.mode() {
828                set_perms(dst, Some(&mut f), mode).await?;
829            }
830        }
831        if self.unpack_xattrs {
832            set_xattrs(self, dst).await?;
833        }
834        return Ok(Unpacked::File(f));
835
836        async fn set_perms(
837            dst: &Path,
838            f: Option<&mut fs::File>,
839            mode: u32,
840        ) -> Result<(), TarError> {
841            _set_perms(dst, f, mode).await.map_err(|e| {
842                TarError::new(
843                    format!(
844                        "failed to set permissions to {:o} \
845                         for `{}`",
846                        mode,
847                        dst.display()
848                    ),
849                    e,
850                )
851            })
852        }
853
854        #[cfg(unix)]
855        async fn _set_perms(dst: &Path, f: Option<&mut fs::File>, mode: u32) -> io::Result<()> {
856            use std::os::unix::prelude::*;
857
858            let perm = std::fs::Permissions::from_mode(mode as _);
859            match f {
860                Some(f) => f.set_permissions(perm).await,
861                None => fs::set_permissions(dst, perm).await,
862            }
863        }
864
865        #[cfg(windows)]
866        async fn _set_perms(dst: &Path, f: Option<&mut fs::File>, mode: u32) -> io::Result<()> {
867            if mode & 0o200 == 0o200 {
868                return Ok(());
869            }
870            match f {
871                Some(f) => {
872                    let mut perm = f.metadata().await?.permissions();
873                    perm.set_readonly(true);
874                    f.set_permissions(perm).await
875                }
876                None => {
877                    let mut perm = fs::metadata(dst).await?.permissions();
878                    perm.set_readonly(true);
879                    fs::set_permissions(dst, perm).await
880                }
881            }
882        }
883
884        #[cfg(target_arch = "wasm32")]
885        #[allow(unused_variables)]
886        async fn _set_perms(dst: &Path, f: Option<&mut fs::File>, mode: u32) -> io::Result<()> {
887            Err(io::Error::new(io::ErrorKind::Other, "Not implemented"))
888        }
889
890        #[cfg(all(unix, feature = "xattr"))]
891        async fn set_xattrs<R: Read + Unpin>(
892            me: &mut EntryFields<R>,
893            dst: &Path,
894        ) -> io::Result<()> {
895            use std::{ffi::OsStr, os::unix::prelude::*};
896
897            let exts = match me.pax_extensions().await {
898                Ok(Some(e)) => e,
899                _ => return Ok(()),
900            };
901            let exts = exts
902                .filter_map(|e| e.ok())
903                .filter_map(|e| {
904                    let key = e.key_bytes();
905                    let prefix = b"SCHILY.xattr.";
906                    key.strip_prefix(prefix).map(|rest| (rest, e))
907                })
908                .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes()));
909
910            for (key, value) in exts {
911                xattr::set(dst, key, value).map_err(|e| {
912                    TarError::new(
913                        format!(
914                            "failed to set extended \
915                             attributes to {}. \
916                             Xattrs: key={:?}, value={:?}.",
917                            dst.display(),
918                            key,
919                            String::from_utf8_lossy(value)
920                        ),
921                        e,
922                    )
923                })?;
924            }
925
926            Ok(())
927        }
928        // Windows does not completely support posix xattrs
929        // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT
930        #[cfg(any(windows, not(feature = "xattr"), target_arch = "wasm32"))]
931        async fn set_xattrs<R: Read + Unpin>(_: &mut EntryFields<R>, _: &Path) -> io::Result<()> {
932            Ok(())
933        }
934    }
935
936    async fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> {
937        let mut ancestor = dir;
938        let mut dirs_to_create = Vec::new();
939        while tokio::fs::symlink_metadata(ancestor).await.is_err() {
940            dirs_to_create.push(ancestor);
941            if let Some(parent) = ancestor.parent() {
942                ancestor = parent;
943            } else {
944                break;
945            }
946        }
947        for ancestor in dirs_to_create.into_iter().rev() {
948            if let Some(parent) = ancestor.parent() {
949                self.validate_inside_dst(dst, parent).await?;
950            }
951            fs::create_dir_all(ancestor).await?;
952        }
953        Ok(())
954    }
955
956    async fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<()> {
957        // Abort if target (canonical) parent is outside of `dst`
958        let canon_parent = file_dst.canonicalize().map_err(|err| {
959            Error::new(
960                err.kind(),
961                format!("{} while canonicalizing {}", err, file_dst.display()),
962            )
963        })?;
964        if !canon_parent.starts_with(dst) {
965            let err = TarError::new(
966                format!(
967                    "trying to unpack outside of destination path: {}",
968                    dst.display()
969                ),
970                // TODO: use ErrorKind::InvalidInput here? (minor breaking change)
971                Error::other("Invalid argument"),
972            );
973            return Err(err.into());
974        }
975        Ok(())
976    }
977}
978
979impl<R: Read + Unpin> Read for EntryFields<R> {
980    fn poll_read(
981        self: Pin<&mut Self>,
982        cx: &mut Context<'_>,
983        into: &mut io::ReadBuf<'_>,
984    ) -> Poll<io::Result<()>> {
985        let this = self.get_mut();
986        loop {
987            if this.read_state.is_none() {
988                this.read_state = this.data.pop_front();
989            }
990
991            if let Some(ref mut io) = &mut this.read_state {
992                let start = into.filled().len();
993                let ret = Pin::new(io).poll_read(cx, into);
994                match ret {
995                    Poll::Ready(Ok(())) if into.filled().len() == start => {
996                        this.read_state = None;
997                        if this.data.is_empty() {
998                            return Poll::Ready(Ok(()));
999                        }
1000                        continue;
1001                    }
1002                    Poll::Ready(Ok(())) => {
1003                        return Poll::Ready(Ok(()));
1004                    }
1005                    Poll::Ready(Err(err)) => {
1006                        return Poll::Ready(Err(err));
1007                    }
1008                    Poll::Pending => {
1009                        return Poll::Pending;
1010                    }
1011                }
1012            } else {
1013                // Unable to pull another value from `data`, so we are done.
1014                return Poll::Ready(Ok(()));
1015            }
1016        }
1017    }
1018}
1019
1020impl<R: Read + Unpin> Read for EntryIo<R> {
1021    fn poll_read(
1022        self: Pin<&mut Self>,
1023        cx: &mut Context<'_>,
1024        into: &mut io::ReadBuf<'_>,
1025    ) -> Poll<io::Result<()>> {
1026        match self.get_mut() {
1027            EntryIo::Pad(ref mut io) => Pin::new(io).poll_read(cx, into),
1028            EntryIo::Data(ref mut io) => Pin::new(io).poll_read(cx, into),
1029        }
1030    }
1031}
1032
1033struct Guard<'a> {
1034    buf: &'a mut Vec<u8>,
1035    len: usize,
1036}
1037
1038impl Drop for Guard<'_> {
1039    fn drop(&mut self) {
1040        unsafe {
1041            self.buf.set_len(self.len);
1042        }
1043    }
1044}
1045
1046fn poll_read_all_internal<R: Read + ?Sized>(
1047    mut rd: Pin<&mut R>,
1048    cx: &mut Context<'_>,
1049    buf: &mut Vec<u8>,
1050) -> Poll<io::Result<usize>> {
1051    let mut g = Guard {
1052        len: buf.len(),
1053        buf,
1054    };
1055    let ret;
1056    loop {
1057        if g.len == g.buf.len() {
1058            unsafe {
1059                g.buf.reserve(32);
1060                let capacity = g.buf.capacity();
1061                g.buf.set_len(capacity);
1062
1063                let buf = &mut g.buf[g.len..];
1064                std::ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len());
1065            }
1066        }
1067
1068        let mut read_buf = io::ReadBuf::new(&mut g.buf[g.len..]);
1069        match futures_core::ready!(rd.as_mut().poll_read(cx, &mut read_buf)) {
1070            Ok(()) if read_buf.filled().is_empty() => {
1071                ret = Poll::Ready(Ok(g.len));
1072                break;
1073            }
1074            Ok(()) => g.len += read_buf.filled().len(),
1075            Err(e) => {
1076                ret = Poll::Ready(Err(e));
1077                break;
1078            }
1079        }
1080    }
1081
1082    ret
1083}