tokio_tar/
header.rs

1#[cfg(unix)]
2use std::os::unix::prelude::*;
3#[cfg(windows)]
4use std::os::windows::prelude::*;
5
6use std::{
7    borrow::Cow,
8    fmt,
9    fs::Metadata,
10    iter,
11    iter::{once, repeat},
12    mem,
13    path::{Component, Path, PathBuf},
14    str,
15};
16use tokio::io;
17
18use crate::{other, EntryType};
19
20/// A deterministic, arbitrary, non-zero timestamp that use used as `mtime`
21/// of headers when [`HeaderMode::Deterministic`] is used.
22///
23/// This value, chosen after careful deliberation, corresponds to _Jul 23, 2006_,
24/// which is the date of the first commit for what would become Rust.
25#[cfg(any(unix, windows))]
26const DETERMINISTIC_TIMESTAMP: u64 = 1153704088;
27
28pub(crate) const BLOCK_SIZE: u64 = 512;
29
30/// Representation of the header of an entry in an archive
31#[repr(C)]
32#[allow(missing_docs)]
33pub struct Header {
34    bytes: [u8; BLOCK_SIZE as usize],
35}
36
37/// Declares the information that should be included when filling a Header
38/// from filesystem metadata.
39#[derive(Clone, Copy, PartialEq, Eq, Debug)]
40#[non_exhaustive]
41pub enum HeaderMode {
42    /// All supported metadata, including mod/access times and ownership will
43    /// be included.
44    Complete,
45
46    /// Only metadata that is directly relevant to the identity of a file will
47    /// be included. In particular, ownership and mod/access times are excluded.
48    Deterministic,
49}
50
51/// Representation of the header of an entry in an archive
52#[repr(C)]
53#[allow(missing_docs)]
54pub struct OldHeader {
55    pub name: [u8; 100],
56    pub mode: [u8; 8],
57    pub uid: [u8; 8],
58    pub gid: [u8; 8],
59    pub size: [u8; 12],
60    pub mtime: [u8; 12],
61    pub cksum: [u8; 8],
62    pub linkflag: [u8; 1],
63    pub linkname: [u8; 100],
64    pub pad: [u8; 255],
65}
66
67/// Representation of the header of an entry in an archive
68#[repr(C)]
69#[allow(missing_docs)]
70pub struct UstarHeader {
71    pub name: [u8; 100],
72    pub mode: [u8; 8],
73    pub uid: [u8; 8],
74    pub gid: [u8; 8],
75    pub size: [u8; 12],
76    pub mtime: [u8; 12],
77    pub cksum: [u8; 8],
78    pub typeflag: [u8; 1],
79    pub linkname: [u8; 100],
80
81    // UStar format
82    pub magic: [u8; 6],
83    pub version: [u8; 2],
84    pub uname: [u8; 32],
85    pub gname: [u8; 32],
86    pub dev_major: [u8; 8],
87    pub dev_minor: [u8; 8],
88    pub prefix: [u8; 155],
89    pub pad: [u8; 12],
90}
91
92/// Representation of the header of an entry in an archive
93#[repr(C)]
94#[allow(missing_docs)]
95pub struct GnuHeader {
96    pub name: [u8; 100],
97    pub mode: [u8; 8],
98    pub uid: [u8; 8],
99    pub gid: [u8; 8],
100    pub size: [u8; 12],
101    pub mtime: [u8; 12],
102    pub cksum: [u8; 8],
103    pub typeflag: [u8; 1],
104    pub linkname: [u8; 100],
105
106    // GNU format
107    pub magic: [u8; 6],
108    pub version: [u8; 2],
109    pub uname: [u8; 32],
110    pub gname: [u8; 32],
111    pub dev_major: [u8; 8],
112    pub dev_minor: [u8; 8],
113    pub atime: [u8; 12],
114    pub ctime: [u8; 12],
115    pub offset: [u8; 12],
116    pub longnames: [u8; 4],
117    pub unused: [u8; 1],
118    pub sparse: [GnuSparseHeader; 4],
119    pub isextended: [u8; 1],
120    pub realsize: [u8; 12],
121    pub pad: [u8; 17],
122}
123
124/// Description of the header of a spare entry.
125///
126/// Specifies the offset/number of bytes of a chunk of data in octal.
127#[repr(C)]
128#[allow(missing_docs)]
129pub struct GnuSparseHeader {
130    pub offset: [u8; 12],
131    pub numbytes: [u8; 12],
132}
133
134/// Representation of the entry found to represent extended GNU sparse files.
135///
136/// When a `GnuHeader` has the `isextended` flag set to `1` then the contents of
137/// the next entry will be one of these headers.
138#[repr(C)]
139#[allow(missing_docs)]
140pub struct GnuExtSparseHeader {
141    pub sparse: [GnuSparseHeader; 21],
142    pub isextended: [u8; 1],
143    pub padding: [u8; 7],
144}
145
146impl Header {
147    /// Creates a new blank GNU header.
148    ///
149    /// The GNU style header is the default for this library and allows various
150    /// extensions such as long path names, long link names, and setting the
151    /// atime/ctime metadata attributes of files.
152    pub fn new_gnu() -> Header {
153        let mut header = Header {
154            bytes: [0; BLOCK_SIZE as usize],
155        };
156        unsafe {
157            let gnu = cast_mut::<_, GnuHeader>(&mut header);
158            gnu.magic = *b"ustar ";
159            gnu.version = *b" \0";
160        }
161        header.set_mtime(0);
162        header
163    }
164
165    /// Creates a new blank UStar header.
166    ///
167    /// The UStar style header is an extension of the original archive header
168    /// which enables some extra metadata along with storing a longer (but not
169    /// too long) path name.
170    ///
171    /// UStar is also the basis used for pax archives.
172    pub fn new_ustar() -> Header {
173        let mut header = Header {
174            bytes: [0; BLOCK_SIZE as usize],
175        };
176        unsafe {
177            let gnu = cast_mut::<_, UstarHeader>(&mut header);
178            gnu.magic = *b"ustar\0";
179            gnu.version = *b"00";
180        }
181        header.set_mtime(0);
182        header
183    }
184
185    /// Creates a new blank old header.
186    ///
187    /// This header format is the original archive header format which all other
188    /// versions are compatible with (e.g. they are a superset). This header
189    /// format limits the path name limit and isn't able to contain extra
190    /// metadata like atime/ctime.
191    pub fn new_old() -> Header {
192        let mut header = Header {
193            bytes: [0; BLOCK_SIZE as usize],
194        };
195        header.set_mtime(0);
196        header
197    }
198
199    fn is_ustar(&self) -> bool {
200        let ustar = unsafe { cast::<_, UstarHeader>(self) };
201        ustar.magic[..] == b"ustar\0"[..] && ustar.version[..] == b"00"[..]
202    }
203
204    fn is_gnu(&self) -> bool {
205        let ustar = unsafe { cast::<_, UstarHeader>(self) };
206        ustar.magic[..] == b"ustar "[..] && ustar.version[..] == b" \0"[..]
207    }
208
209    /// View this archive header as a raw "old" archive header.
210    ///
211    /// This view will always succeed as all archive header formats will fill
212    /// out at least the fields specified in the old header format.
213    pub fn as_old(&self) -> &OldHeader {
214        unsafe { cast(self) }
215    }
216
217    /// Same as `as_old`, but the mutable version.
218    pub fn as_old_mut(&mut self) -> &mut OldHeader {
219        unsafe { cast_mut(self) }
220    }
221
222    /// View this archive header as a raw UStar archive header.
223    ///
224    /// The UStar format is an extension to the tar archive format which enables
225    /// longer pathnames and a few extra attributes such as the group and user
226    /// name.
227    ///
228    /// This cast may not succeed as this function will test whether the
229    /// magic/version fields of the UStar format have the appropriate values,
230    /// returning `None` if they aren't correct.
231    pub fn as_ustar(&self) -> Option<&UstarHeader> {
232        if self.is_ustar() {
233            Some(unsafe { cast(self) })
234        } else {
235            None
236        }
237    }
238
239    /// Same as `as_ustar_mut`, but the mutable version.
240    pub fn as_ustar_mut(&mut self) -> Option<&mut UstarHeader> {
241        if self.is_ustar() {
242            Some(unsafe { cast_mut(self) })
243        } else {
244            None
245        }
246    }
247
248    /// View this archive header as a raw GNU archive header.
249    ///
250    /// The GNU format is an extension to the tar archive format which enables
251    /// longer pathnames and a few extra attributes such as the group and user
252    /// name.
253    ///
254    /// This cast may not succeed as this function will test whether the
255    /// magic/version fields of the GNU format have the appropriate values,
256    /// returning `None` if they aren't correct.
257    pub fn as_gnu(&self) -> Option<&GnuHeader> {
258        if self.is_gnu() {
259            Some(unsafe { cast(self) })
260        } else {
261            None
262        }
263    }
264
265    /// Same as `as_gnu`, but the mutable version.
266    pub fn as_gnu_mut(&mut self) -> Option<&mut GnuHeader> {
267        if self.is_gnu() {
268            Some(unsafe { cast_mut(self) })
269        } else {
270            None
271        }
272    }
273
274    /// Treats the given byte slice as a header.
275    ///
276    /// Panics if the length of the passed slice is not equal to 512.
277    pub fn from_byte_slice(bytes: &[u8]) -> &Header {
278        assert_eq!(bytes.len(), mem::size_of::<Header>());
279        assert_eq!(mem::align_of_val(bytes), mem::align_of::<Header>());
280        unsafe { &*(bytes.as_ptr() as *const Header) }
281    }
282
283    /// Returns a view into this header as a byte array.
284    pub fn as_bytes(&self) -> &[u8; BLOCK_SIZE as usize] {
285        &self.bytes
286    }
287
288    /// Returns a view into this header as a byte array.
289    pub fn as_mut_bytes(&mut self) -> &mut [u8; BLOCK_SIZE as usize] {
290        &mut self.bytes
291    }
292
293    /// Blanket sets the metadata in this header from the metadata argument
294    /// provided.
295    ///
296    /// This is useful for initializing a `Header` from the OS's metadata from a
297    /// file. By default, this will use `HeaderMode::Complete` to include all
298    /// metadata.
299    pub fn set_metadata(&mut self, meta: &Metadata) {
300        self.fill_from(meta, HeaderMode::Complete);
301    }
302
303    /// Sets only the metadata relevant to the given HeaderMode in this header
304    /// from the metadata argument provided.
305    pub fn set_metadata_in_mode(&mut self, meta: &Metadata, mode: HeaderMode) {
306        self.fill_from(meta, mode);
307    }
308
309    /// Returns the size of entry's data this header represents.
310    ///
311    /// This is different from `Header::size` for sparse files, which have
312    /// some longer `size()` but shorter `entry_size()`. The `entry_size()`
313    /// listed here should be the number of bytes in the archive this header
314    /// describes.
315    ///
316    /// May return an error if the field is corrupted.
317    pub fn entry_size(&self) -> io::Result<u64> {
318        num_field_wrapper_from(&self.as_old().size).map_err(|err| {
319            io::Error::new(
320                err.kind(),
321                format!("{} when getting size for {}", err, self.path_lossy()),
322            )
323        })
324    }
325
326    /// Returns the file size this header represents.
327    ///
328    /// May return an error if the field is corrupted.
329    pub fn size(&self) -> io::Result<u64> {
330        if self.entry_type().is_gnu_sparse() {
331            self.as_gnu()
332                .ok_or_else(|| other("sparse header was not a gnu header"))
333                .and_then(|h| h.real_size())
334        } else {
335            self.entry_size()
336        }
337    }
338
339    /// Encodes the `size` argument into the size field of this header.
340    pub fn set_size(&mut self, size: u64) {
341        num_field_wrapper_into(&mut self.as_old_mut().size, size);
342    }
343
344    /// Returns the raw path name stored in this header.
345    ///
346    /// This method may fail if the pathname is not valid Unicode and this is
347    /// called on a Windows platform.
348    ///
349    /// Note that this function will convert any `\` characters to directory
350    /// separators.
351    pub fn path(&self) -> io::Result<Cow<'_, Path>> {
352        bytes2path(self.path_bytes())
353    }
354
355    /// Returns the pathname stored in this header as a byte array.
356    ///
357    /// This function is guaranteed to succeed, but you may wish to call the
358    /// `path` method to convert to a `Path`.
359    ///
360    /// Note that this function will convert any `\` characters to directory
361    /// separators.
362    pub fn path_bytes(&self) -> Cow<'_, [u8]> {
363        if let Some(ustar) = self.as_ustar() {
364            ustar.path_bytes()
365        } else {
366            let name = truncate(&self.as_old().name);
367            Cow::Borrowed(name)
368        }
369    }
370
371    /// Gets the path in a "lossy" way, used for error reporting ONLY.
372    fn path_lossy(&self) -> String {
373        String::from_utf8_lossy(&self.path_bytes()).to_string()
374    }
375
376    /// Sets the path name for this header.
377    ///
378    /// This function will set the pathname listed in this header, encoding it
379    /// in the appropriate format. May fail if the path is too long or if the
380    /// path specified is not Unicode and this is a Windows platform. Will
381    /// strip out any "." path component, which signifies the current directory.
382    ///
383    /// Note: This function does not support names over 100 bytes, or paths
384    /// over 255 bytes, even for formats that support longer names. Instead,
385    /// use `Builder` methods to insert a long-name extension at the same time
386    /// as the file content.
387    pub fn set_path<P: AsRef<Path>>(&mut self, p: P) -> io::Result<()> {
388        self.set_path_inner(p.as_ref(), false)
389    }
390
391    // Sets the truncated path for GNU header
392    //
393    // Same as set_path but skips some validations.
394    pub(crate) fn set_truncated_path_for_gnu_header<P: AsRef<Path>>(
395        &mut self,
396        p: P,
397    ) -> std::io::Result<()> {
398        self.set_path_inner(p.as_ref(), true)
399    }
400
401    fn set_path_inner(
402        &mut self,
403        path: &Path,
404        is_truncated_gnu_long_path: bool,
405    ) -> std::io::Result<()> {
406        if let Some(ustar) = self.as_ustar_mut() {
407            return ustar.set_path(path);
408        }
409        if is_truncated_gnu_long_path {
410            copy_path_into_gnu_long(&mut self.as_old_mut().name, path, false)
411        } else {
412            copy_path_into(&mut self.as_old_mut().name, path, false)
413        }
414        .map_err(|err| {
415            io::Error::new(
416                err.kind(),
417                format!("{} when setting path for {}", err, self.path_lossy()),
418            )
419        })
420    }
421
422    /// Returns the link name stored in this header, if any is found.
423    ///
424    /// This method may fail if the pathname is not valid Unicode and this is
425    /// called on a Windows platform. `Ok(None)` being returned, however,
426    /// indicates that the link name was not present.
427    ///
428    /// Note that this function will convert any `\` characters to directory
429    /// separators.
430    pub fn link_name(&self) -> io::Result<Option<Cow<'_, Path>>> {
431        match self.link_name_bytes() {
432            Some(bytes) => bytes2path(bytes).map(Some),
433            None => Ok(None),
434        }
435    }
436
437    /// Returns the link name stored in this header as a byte array, if any.
438    ///
439    /// This function is guaranteed to succeed, but you may wish to call the
440    /// `link_name` method to convert to a `Path`.
441    ///
442    /// Note that this function will convert any `\` characters to directory
443    /// separators.
444    pub fn link_name_bytes(&self) -> Option<Cow<'_, [u8]>> {
445        let old = self.as_old();
446        if old.linkname[0] != 0 {
447            Some(Cow::Borrowed(truncate(&old.linkname)))
448        } else {
449            None
450        }
451    }
452
453    /// Sets the link name for this header.
454    ///
455    /// This function will set the linkname listed in this header, encoding it
456    /// in the appropriate format. May fail if the link name is too long or if
457    /// the path specified is not Unicode and this is a Windows platform. Will
458    /// strip out any "." path component, which signifies the current directory.
459    pub fn set_link_name<P: AsRef<Path>>(&mut self, p: P) -> io::Result<()> {
460        self._set_link_name(p.as_ref())
461    }
462
463    fn _set_link_name(&mut self, path: &Path) -> io::Result<()> {
464        copy_path_into(&mut self.as_old_mut().linkname, path, true).map_err(|err| {
465            io::Error::new(
466                err.kind(),
467                format!("{} when setting link name for {}", err, self.path_lossy()),
468            )
469        })
470    }
471
472    /// Returns the mode bits for this file
473    ///
474    /// May return an error if the field is corrupted.
475    pub fn mode(&self) -> io::Result<u32> {
476        octal_from(&self.as_old().mode)
477            .map(|u| u as u32)
478            .map_err(|err| {
479                io::Error::new(
480                    err.kind(),
481                    format!("{} when getting mode for {}", err, self.path_lossy()),
482                )
483            })
484    }
485
486    /// Encodes the `mode` provided into this header.
487    pub fn set_mode(&mut self, mode: u32) {
488        octal_into(&mut self.as_old_mut().mode, mode);
489    }
490
491    /// Returns the value of the owner's user ID field
492    ///
493    /// May return an error if the field is corrupted.
494    pub fn uid(&self) -> io::Result<u64> {
495        num_field_wrapper_from(&self.as_old().uid).map_err(|err| {
496            io::Error::new(
497                err.kind(),
498                format!("{} when getting uid for {}", err, self.path_lossy()),
499            )
500        })
501    }
502
503    /// Encodes the `uid` provided into this header.
504    pub fn set_uid(&mut self, uid: u64) {
505        num_field_wrapper_into(&mut self.as_old_mut().uid, uid);
506    }
507
508    /// Returns the value of the group's user ID field
509    pub fn gid(&self) -> io::Result<u64> {
510        num_field_wrapper_from(&self.as_old().gid).map_err(|err| {
511            io::Error::new(
512                err.kind(),
513                format!("{} when getting gid for {}", err, self.path_lossy()),
514            )
515        })
516    }
517
518    /// Encodes the `gid` provided into this header.
519    pub fn set_gid(&mut self, gid: u64) {
520        num_field_wrapper_into(&mut self.as_old_mut().gid, gid);
521    }
522
523    /// Returns the last modification time in Unix time format
524    pub fn mtime(&self) -> io::Result<u64> {
525        num_field_wrapper_from(&self.as_old().mtime).map_err(|err| {
526            io::Error::new(
527                err.kind(),
528                format!("{} when getting mtime for {}", err, self.path_lossy()),
529            )
530        })
531    }
532
533    /// Encodes the `mtime` provided into this header.
534    ///
535    /// Note that this time is typically a number of seconds passed since
536    /// January 1, 1970.
537    pub fn set_mtime(&mut self, mtime: u64) {
538        num_field_wrapper_into(&mut self.as_old_mut().mtime, mtime);
539    }
540
541    /// Return the user name of the owner of this file.
542    ///
543    /// A return value of `Ok(Some(..))` indicates that the user name was
544    /// present and was valid utf-8, `Ok(None)` indicates that the user name is
545    /// not present in this archive format, and `Err` indicates that the user
546    /// name was present but was not valid utf-8.
547    pub fn username(&self) -> Result<Option<&str>, str::Utf8Error> {
548        match self.username_bytes() {
549            Some(bytes) => str::from_utf8(bytes).map(Some),
550            None => Ok(None),
551        }
552    }
553
554    /// Returns the user name of the owner of this file, if present.
555    ///
556    /// A return value of `None` indicates that the user name is not present in
557    /// this header format.
558    #[allow(clippy::manual_map)]
559    pub fn username_bytes(&self) -> Option<&[u8]> {
560        if let Some(ustar) = self.as_ustar() {
561            Some(ustar.username_bytes())
562        } else if let Some(gnu) = self.as_gnu() {
563            Some(gnu.username_bytes())
564        } else {
565            None
566        }
567    }
568
569    /// Sets the username inside this header.
570    ///
571    /// This function will return an error if this header format cannot encode a
572    /// user name or the name is too long.
573    pub fn set_username(&mut self, name: &str) -> io::Result<()> {
574        if let Some(ustar) = self.as_ustar_mut() {
575            return ustar.set_username(name);
576        }
577        if let Some(gnu) = self.as_gnu_mut() {
578            gnu.set_username(name)
579        } else {
580            Err(other("not a ustar or gnu archive, cannot set username"))
581        }
582    }
583
584    /// Return the group name of the owner of this file.
585    ///
586    /// A return value of `Ok(Some(..))` indicates that the group name was
587    /// present and was valid utf-8, `Ok(None)` indicates that the group name is
588    /// not present in this archive format, and `Err` indicates that the group
589    /// name was present but was not valid utf-8.
590    pub fn groupname(&self) -> Result<Option<&str>, str::Utf8Error> {
591        match self.groupname_bytes() {
592            Some(bytes) => str::from_utf8(bytes).map(Some),
593            None => Ok(None),
594        }
595    }
596
597    /// Returns the group name of the owner of this file, if present.
598    ///
599    /// A return value of `None` indicates that the group name is not present in
600    /// this header format.
601    #[allow(clippy::manual_map)]
602    pub fn groupname_bytes(&self) -> Option<&[u8]> {
603        if let Some(ustar) = self.as_ustar() {
604            Some(ustar.groupname_bytes())
605        } else if let Some(gnu) = self.as_gnu() {
606            Some(gnu.groupname_bytes())
607        } else {
608            None
609        }
610    }
611
612    /// Sets the group name inside this header.
613    ///
614    /// This function will return an error if this header format cannot encode a
615    /// group name or the name is too long.
616    pub fn set_groupname(&mut self, name: &str) -> io::Result<()> {
617        if let Some(ustar) = self.as_ustar_mut() {
618            return ustar.set_groupname(name);
619        }
620        if let Some(gnu) = self.as_gnu_mut() {
621            gnu.set_groupname(name)
622        } else {
623            Err(other("not a ustar or gnu archive, cannot set groupname"))
624        }
625    }
626
627    /// Returns the device major number, if present.
628    ///
629    /// This field may not be present in all archives, and it may not be
630    /// correctly formed in all archives. `Ok(Some(..))` means it was present
631    /// and correctly decoded, `Ok(None)` indicates that this header format does
632    /// not include the device major number, and `Err` indicates that it was
633    /// present and failed to decode.
634    pub fn device_major(&self) -> io::Result<Option<u32>> {
635        if let Some(ustar) = self.as_ustar() {
636            ustar.device_major().map(Some)
637        } else if let Some(gnu) = self.as_gnu() {
638            gnu.device_major().map(Some)
639        } else {
640            Ok(None)
641        }
642    }
643
644    /// Encodes the value `major` into the dev_major field of this header.
645    ///
646    /// This function will return an error if this header format cannot encode a
647    /// major device number.
648    pub fn set_device_major(&mut self, major: u32) -> io::Result<()> {
649        if let Some(ustar) = self.as_ustar_mut() {
650            ustar.set_device_major(major);
651            Ok(())
652        } else if let Some(gnu) = self.as_gnu_mut() {
653            gnu.set_device_major(major);
654            Ok(())
655        } else {
656            Err(other("not a ustar or gnu archive, cannot set dev_major"))
657        }
658    }
659
660    /// Returns the device minor number, if present.
661    ///
662    /// This field may not be present in all archives, and it may not be
663    /// correctly formed in all archives. `Ok(Some(..))` means it was present
664    /// and correctly decoded, `Ok(None)` indicates that this header format does
665    /// not include the device minor number, and `Err` indicates that it was
666    /// present and failed to decode.
667    pub fn device_minor(&self) -> io::Result<Option<u32>> {
668        if let Some(ustar) = self.as_ustar() {
669            ustar.device_minor().map(Some)
670        } else if let Some(gnu) = self.as_gnu() {
671            gnu.device_minor().map(Some)
672        } else {
673            Ok(None)
674        }
675    }
676
677    /// Encodes the value `minor` into the dev_minor field of this header.
678    ///
679    /// This function will return an error if this header format cannot encode a
680    /// minor device number.
681    pub fn set_device_minor(&mut self, minor: u32) -> io::Result<()> {
682        if let Some(ustar) = self.as_ustar_mut() {
683            ustar.set_device_minor(minor);
684            Ok(())
685        } else if let Some(gnu) = self.as_gnu_mut() {
686            gnu.set_device_minor(minor);
687            Ok(())
688        } else {
689            Err(other("not a ustar or gnu archive, cannot set dev_minor"))
690        }
691    }
692
693    /// Returns the type of file described by this header.
694    pub fn entry_type(&self) -> EntryType {
695        EntryType::new(self.as_old().linkflag[0])
696    }
697
698    /// Sets the type of file that will be described by this header.
699    pub fn set_entry_type(&mut self, ty: EntryType) {
700        self.as_old_mut().linkflag = [ty.as_byte()];
701    }
702
703    /// Returns the checksum field of this header.
704    ///
705    /// May return an error if the field is corrupted.
706    pub fn cksum(&self) -> io::Result<u32> {
707        octal_from(&self.as_old().cksum)
708            .map(|u| u as u32)
709            .map_err(|err| {
710                io::Error::new(
711                    err.kind(),
712                    format!("{} when getting cksum for {}", err, self.path_lossy()),
713                )
714            })
715    }
716
717    /// Sets the checksum field of this header based on the current fields in
718    /// this header.
719    pub fn set_cksum(&mut self) {
720        let cksum = self.calculate_cksum();
721        octal_into(&mut self.as_old_mut().cksum, cksum);
722    }
723
724    fn calculate_cksum(&self) -> u32 {
725        let old = self.as_old();
726        let start = old as *const _ as usize;
727        let cksum_start = old.cksum.as_ptr() as *const _ as usize;
728        let offset = cksum_start - start;
729        let len = old.cksum.len();
730        self.bytes[0..offset]
731            .iter()
732            .chain(iter::repeat_n(&b' ', len))
733            .chain(&self.bytes[offset + len..])
734            .fold(0, |a, b| a + (*b as u32))
735    }
736
737    fn fill_from(&mut self, meta: &Metadata, mode: HeaderMode) {
738        self.fill_platform_from(meta, mode);
739        // Set size of directories to zero
740        self.set_size(if meta.is_dir() || meta.file_type().is_symlink() {
741            0
742        } else {
743            meta.len()
744        });
745        if let Some(ustar) = self.as_ustar_mut() {
746            ustar.set_device_major(0);
747            ustar.set_device_minor(0);
748        }
749        if let Some(gnu) = self.as_gnu_mut() {
750            gnu.set_device_major(0);
751            gnu.set_device_minor(0);
752        }
753    }
754
755    #[cfg(target_arch = "wasm32")]
756    #[allow(unused_variables)]
757    fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) {
758        unimplemented!();
759    }
760
761    #[cfg(unix)]
762    fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) {
763        match mode {
764            HeaderMode::Complete => {
765                self.set_mtime(meta.mtime() as u64);
766                self.set_uid(meta.uid() as u64);
767                self.set_gid(meta.gid() as u64);
768                self.set_mode(meta.mode());
769            }
770            HeaderMode::Deterministic => {
771                // We could in theory set the mtime to zero here, but not all tools seem to behave
772                // well when ingesting files with a 0 timestamp.
773                // For example, rust-lang/cargo#9512 shows that lldb doesn't ingest files with a
774                // zero timestamp correctly.
775                self.set_mtime(DETERMINISTIC_TIMESTAMP);
776
777                self.set_uid(0);
778                self.set_gid(0);
779
780                // Use a default umask value, but propagate the (user) execute bit.
781                let fs_mode = if meta.is_dir() || (0o100 & meta.mode() == 0o100) {
782                    0o755
783                } else {
784                    0o644
785                };
786                self.set_mode(fs_mode);
787            }
788        }
789
790        // Note that if we are a GNU header we *could* set atime/ctime, except
791        // the `tar` utility doesn't do that by default and it causes problems
792        // with 7-zip [1].
793        //
794        // It's always possible to fill them out manually, so we just don't fill
795        // it out automatically here.
796        //
797        // [1]: https://github.com/alexcrichton/tar-rs/issues/70
798
799        // TODO: need to bind more file types
800        self.set_entry_type(entry_type(meta.mode()));
801
802        fn entry_type(mode: u32) -> EntryType {
803            match mode as libc::mode_t & libc::S_IFMT {
804                libc::S_IFREG => EntryType::file(),
805                libc::S_IFLNK => EntryType::symlink(),
806                libc::S_IFCHR => EntryType::character_special(),
807                libc::S_IFBLK => EntryType::block_special(),
808                libc::S_IFDIR => EntryType::dir(),
809                libc::S_IFIFO => EntryType::fifo(),
810                _ => EntryType::new(b' '),
811            }
812        }
813    }
814
815    #[cfg(windows)]
816    fn fill_platform_from(&mut self, meta: &Metadata, mode: HeaderMode) {
817        // There's no concept of a file mode on Windows, so do a best approximation here.
818        match mode {
819            HeaderMode::Complete => {
820                self.set_uid(0);
821                self.set_gid(0);
822                // The dates listed in tarballs are always seconds relative to
823                // January 1, 1970. On Windows, however, the timestamps are returned as
824                // dates relative to January 1, 1601 (in 100ns intervals), so we need to
825                // add in some offset for those dates.
826                let mtime = (meta.last_write_time() / (1_000_000_000 / 100)) - 11644473600;
827                self.set_mtime(mtime);
828                let fs_mode = {
829                    const FILE_ATTRIBUTE_READONLY: u32 = 0x00000001;
830                    let readonly = meta.file_attributes() & FILE_ATTRIBUTE_READONLY;
831                    match (meta.is_dir(), readonly != 0) {
832                        (true, false) => 0o755,
833                        (true, true) => 0o555,
834                        (false, false) => 0o644,
835                        (false, true) => 0o444,
836                    }
837                };
838                self.set_mode(fs_mode);
839            }
840            HeaderMode::Deterministic => {
841                self.set_uid(0);
842                self.set_gid(0);
843                self.set_mtime(DETERMINISTIC_TIMESTAMP); // see above in unix
844                let fs_mode = if meta.is_dir() { 0o755 } else { 0o644 };
845                self.set_mode(fs_mode);
846            }
847        }
848
849        let ft = meta.file_type();
850        self.set_entry_type(if ft.is_dir() {
851            EntryType::dir()
852        } else if ft.is_file() {
853            EntryType::file()
854        } else if ft.is_symlink() {
855            EntryType::symlink()
856        } else {
857            EntryType::new(b' ')
858        });
859    }
860
861    fn debug_fields(&self, b: &mut fmt::DebugStruct) {
862        if let Ok(entry_size) = self.entry_size() {
863            b.field("entry_size", &entry_size);
864        }
865        if let Ok(size) = self.size() {
866            b.field("size", &size);
867        }
868        if let Ok(path) = self.path() {
869            b.field("path", &path);
870        }
871        if let Ok(link_name) = self.link_name() {
872            b.field("link_name", &link_name);
873        }
874        if let Ok(mode) = self.mode() {
875            b.field("mode", &DebugAsOctal(mode));
876        }
877        if let Ok(uid) = self.uid() {
878            b.field("uid", &uid);
879        }
880        if let Ok(gid) = self.gid() {
881            b.field("gid", &gid);
882        }
883        if let Ok(mtime) = self.mtime() {
884            b.field("mtime", &mtime);
885        }
886        if let Ok(username) = self.username() {
887            b.field("username", &username);
888        }
889        if let Ok(groupname) = self.groupname() {
890            b.field("groupname", &groupname);
891        }
892        if let Ok(device_major) = self.device_major() {
893            b.field("device_major", &device_major);
894        }
895        if let Ok(device_minor) = self.device_minor() {
896            b.field("device_minor", &device_minor);
897        }
898        if let Ok(cksum) = self.cksum() {
899            b.field("cksum", &cksum);
900            b.field("cksum_valid", &(cksum == self.calculate_cksum()));
901        }
902    }
903}
904
905struct DebugAsOctal<T>(T);
906
907impl<T: fmt::Octal> fmt::Debug for DebugAsOctal<T> {
908    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
909        fmt::Octal::fmt(&self.0, f)
910    }
911}
912
913unsafe fn cast<T, U>(a: &T) -> &U {
914    assert_eq!(mem::size_of_val(a), mem::size_of::<U>());
915    assert_eq!(mem::align_of_val(a), mem::align_of::<U>());
916    &*(a as *const T as *const U)
917}
918
919unsafe fn cast_mut<T, U>(a: &mut T) -> &mut U {
920    assert_eq!(mem::size_of_val(a), mem::size_of::<U>());
921    assert_eq!(mem::align_of_val(a), mem::align_of::<U>());
922    &mut *(a as *mut T as *mut U)
923}
924
925impl Clone for Header {
926    fn clone(&self) -> Header {
927        Header { bytes: self.bytes }
928    }
929}
930
931impl fmt::Debug for Header {
932    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
933        if let Some(me) = self.as_ustar() {
934            me.fmt(f)
935        } else if let Some(me) = self.as_gnu() {
936            me.fmt(f)
937        } else {
938            self.as_old().fmt(f)
939        }
940    }
941}
942
943impl OldHeader {
944    /// Views this as a normal `Header`
945    pub fn as_header(&self) -> &Header {
946        unsafe { cast(self) }
947    }
948
949    /// Views this as a normal `Header`
950    pub fn as_header_mut(&mut self) -> &mut Header {
951        unsafe { cast_mut(self) }
952    }
953}
954
955impl fmt::Debug for OldHeader {
956    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
957        let mut f = f.debug_struct("OldHeader");
958        self.as_header().debug_fields(&mut f);
959        f.finish()
960    }
961}
962
963impl UstarHeader {
964    /// See `Header::path_bytes`
965    pub fn path_bytes(&self) -> Cow<'_, [u8]> {
966        if self.prefix[0] == 0 && !self.name.contains(&b'\\') {
967            Cow::Borrowed(truncate(&self.name))
968        } else {
969            let mut bytes = Vec::new();
970            let prefix = truncate(&self.prefix);
971            if !prefix.is_empty() {
972                bytes.extend_from_slice(prefix);
973                bytes.push(b'/');
974            }
975            bytes.extend_from_slice(truncate(&self.name));
976            Cow::Owned(bytes)
977        }
978    }
979
980    /// Gets the path in a "lossy" way, used for error reporting ONLY.
981    fn path_lossy(&self) -> String {
982        String::from_utf8_lossy(&self.path_bytes()).to_string()
983    }
984
985    /// See `Header::set_path`
986    pub fn set_path<P: AsRef<Path>>(&mut self, p: P) -> io::Result<()> {
987        self._set_path(p.as_ref())
988    }
989
990    fn _set_path(&mut self, path: &Path) -> io::Result<()> {
991        // This can probably be optimized quite a bit more, but for now just do
992        // something that's relatively easy and readable.
993        //
994        // First up, if the path fits within `self.name` then we just shove it
995        // in there. If not then we try to split it between some existing path
996        // components where it can fit in name/prefix. To do that we peel off
997        // enough until the path fits in `prefix`, then we try to put both
998        // halves into their destination.
999        let bytes = path2bytes(path)?;
1000        let (maxnamelen, maxprefixlen) = (self.name.len(), self.prefix.len());
1001        if bytes.len() <= maxnamelen {
1002            copy_path_into(&mut self.name, path, false).map_err(|err| {
1003                io::Error::new(
1004                    err.kind(),
1005                    format!("{} when setting path for {}", err, self.path_lossy()),
1006                )
1007            })?;
1008        } else {
1009            let mut prefix = path;
1010            let mut prefixlen;
1011            loop {
1012                match prefix.parent() {
1013                    Some(parent) => prefix = parent,
1014                    None => {
1015                        return Err(other(&format!(
1016                            "path cannot be split to be inserted into archive: {}",
1017                            path.display()
1018                        )));
1019                    }
1020                }
1021                prefixlen = path2bytes(prefix)?.len();
1022                if prefixlen <= maxprefixlen {
1023                    break;
1024                }
1025            }
1026            copy_path_into(&mut self.prefix, prefix, false).map_err(|err| {
1027                io::Error::new(
1028                    err.kind(),
1029                    format!("{} when setting path for {}", err, self.path_lossy()),
1030                )
1031            })?;
1032            let path = bytes2path(Cow::Borrowed(&bytes[prefixlen + 1..]))?;
1033            copy_path_into(&mut self.name, &path, false).map_err(|err| {
1034                io::Error::new(
1035                    err.kind(),
1036                    format!("{} when setting path for {}", err, self.path_lossy()),
1037                )
1038            })?;
1039        }
1040        Ok(())
1041    }
1042
1043    /// See `Header::username_bytes`
1044    pub fn username_bytes(&self) -> &[u8] {
1045        truncate(&self.uname)
1046    }
1047
1048    /// See `Header::set_username`
1049    pub fn set_username(&mut self, name: &str) -> io::Result<()> {
1050        copy_into(&mut self.uname, name.as_bytes()).map_err(|err| {
1051            io::Error::new(
1052                err.kind(),
1053                format!("{} when setting username for {}", err, self.path_lossy()),
1054            )
1055        })
1056    }
1057
1058    /// See `Header::groupname_bytes`
1059    pub fn groupname_bytes(&self) -> &[u8] {
1060        truncate(&self.gname)
1061    }
1062
1063    /// See `Header::set_groupname`
1064    pub fn set_groupname(&mut self, name: &str) -> io::Result<()> {
1065        copy_into(&mut self.gname, name.as_bytes()).map_err(|err| {
1066            io::Error::new(
1067                err.kind(),
1068                format!("{} when setting groupname for {}", err, self.path_lossy()),
1069            )
1070        })
1071    }
1072
1073    /// See `Header::device_major`
1074    pub fn device_major(&self) -> io::Result<u32> {
1075        octal_from(&self.dev_major)
1076            .map(|u| u as u32)
1077            .map_err(|err| {
1078                io::Error::new(
1079                    err.kind(),
1080                    format!(
1081                        "{} when getting device_major for {}",
1082                        err,
1083                        self.path_lossy()
1084                    ),
1085                )
1086            })
1087    }
1088
1089    /// See `Header::set_device_major`
1090    pub fn set_device_major(&mut self, major: u32) {
1091        octal_into(&mut self.dev_major, major);
1092    }
1093
1094    /// See `Header::device_minor`
1095    pub fn device_minor(&self) -> io::Result<u32> {
1096        octal_from(&self.dev_minor)
1097            .map(|u| u as u32)
1098            .map_err(|err| {
1099                io::Error::new(
1100                    err.kind(),
1101                    format!(
1102                        "{} when getting device_minor for {}",
1103                        err,
1104                        self.path_lossy()
1105                    ),
1106                )
1107            })
1108    }
1109
1110    /// See `Header::set_device_minor`
1111    pub fn set_device_minor(&mut self, minor: u32) {
1112        octal_into(&mut self.dev_minor, minor);
1113    }
1114
1115    /// Views this as a normal `Header`
1116    pub fn as_header(&self) -> &Header {
1117        unsafe { cast(self) }
1118    }
1119
1120    /// Views this as a normal `Header`
1121    pub fn as_header_mut(&mut self) -> &mut Header {
1122        unsafe { cast_mut(self) }
1123    }
1124}
1125
1126impl fmt::Debug for UstarHeader {
1127    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1128        let mut f = f.debug_struct("UstarHeader");
1129        self.as_header().debug_fields(&mut f);
1130        f.finish()
1131    }
1132}
1133
1134impl GnuHeader {
1135    /// See `Header::username_bytes`
1136    pub fn username_bytes(&self) -> &[u8] {
1137        truncate(&self.uname)
1138    }
1139
1140    /// Gets the fullname (group:user) in a "lossy" way, used for error reporting ONLY.
1141    fn fullname_lossy(&self) -> String {
1142        format!(
1143            "{}:{}",
1144            String::from_utf8_lossy(self.groupname_bytes()),
1145            String::from_utf8_lossy(self.username_bytes()),
1146        )
1147    }
1148
1149    /// See `Header::set_username`
1150    pub fn set_username(&mut self, name: &str) -> io::Result<()> {
1151        copy_into(&mut self.uname, name.as_bytes()).map_err(|err| {
1152            io::Error::new(
1153                err.kind(),
1154                format!(
1155                    "{} when setting username for {}",
1156                    err,
1157                    self.fullname_lossy()
1158                ),
1159            )
1160        })
1161    }
1162
1163    /// See `Header::groupname_bytes`
1164    pub fn groupname_bytes(&self) -> &[u8] {
1165        truncate(&self.gname)
1166    }
1167
1168    /// See `Header::set_groupname`
1169    pub fn set_groupname(&mut self, name: &str) -> io::Result<()> {
1170        copy_into(&mut self.gname, name.as_bytes()).map_err(|err| {
1171            io::Error::new(
1172                err.kind(),
1173                format!(
1174                    "{} when setting groupname for {}",
1175                    err,
1176                    self.fullname_lossy()
1177                ),
1178            )
1179        })
1180    }
1181
1182    /// See `Header::device_major`
1183    pub fn device_major(&self) -> io::Result<u32> {
1184        octal_from(&self.dev_major)
1185            .map(|u| u as u32)
1186            .map_err(|err| {
1187                io::Error::new(
1188                    err.kind(),
1189                    format!(
1190                        "{} when getting device_major for {}",
1191                        err,
1192                        self.fullname_lossy()
1193                    ),
1194                )
1195            })
1196    }
1197
1198    /// See `Header::set_device_major`
1199    pub fn set_device_major(&mut self, major: u32) {
1200        octal_into(&mut self.dev_major, major);
1201    }
1202
1203    /// See `Header::device_minor`
1204    pub fn device_minor(&self) -> io::Result<u32> {
1205        octal_from(&self.dev_minor)
1206            .map(|u| u as u32)
1207            .map_err(|err| {
1208                io::Error::new(
1209                    err.kind(),
1210                    format!(
1211                        "{} when getting device_minor for {}",
1212                        err,
1213                        self.fullname_lossy()
1214                    ),
1215                )
1216            })
1217    }
1218
1219    /// See `Header::set_device_minor`
1220    pub fn set_device_minor(&mut self, minor: u32) {
1221        octal_into(&mut self.dev_minor, minor);
1222    }
1223
1224    /// Returns the last modification time in Unix time format
1225    pub fn atime(&self) -> io::Result<u64> {
1226        num_field_wrapper_from(&self.atime).map_err(|err| {
1227            io::Error::new(
1228                err.kind(),
1229                format!("{} when getting atime for {}", err, self.fullname_lossy()),
1230            )
1231        })
1232    }
1233
1234    /// Encodes the `atime` provided into this header.
1235    ///
1236    /// Note that this time is typically a number of seconds passed since
1237    /// January 1, 1970.
1238    pub fn set_atime(&mut self, atime: u64) {
1239        num_field_wrapper_into(&mut self.atime, atime);
1240    }
1241
1242    /// Returns the last modification time in Unix time format
1243    pub fn ctime(&self) -> io::Result<u64> {
1244        num_field_wrapper_from(&self.ctime).map_err(|err| {
1245            io::Error::new(
1246                err.kind(),
1247                format!("{} when getting ctime for {}", err, self.fullname_lossy()),
1248            )
1249        })
1250    }
1251
1252    /// Encodes the `ctime` provided into this header.
1253    ///
1254    /// Note that this time is typically a number of seconds passed since
1255    /// January 1, 1970.
1256    pub fn set_ctime(&mut self, ctime: u64) {
1257        num_field_wrapper_into(&mut self.ctime, ctime);
1258    }
1259
1260    /// Returns the "real size" of the file this header represents.
1261    ///
1262    /// This is applicable for sparse files where the returned size here is the
1263    /// size of the entire file after the sparse regions have been filled in.
1264    pub fn real_size(&self) -> io::Result<u64> {
1265        num_field_wrapper_from(&self.realsize).map_err(|err| {
1266            io::Error::new(
1267                err.kind(),
1268                format!(
1269                    "{} when getting real_size for {}",
1270                    err,
1271                    self.fullname_lossy()
1272                ),
1273            )
1274        })
1275    }
1276
1277    /// Indicates whether this header will be followed by additional
1278    /// sparse-header records.
1279    ///
1280    /// Note that this is handled internally by this library, and is likely only
1281    /// interesting if a `raw` iterator is being used.
1282    pub fn is_extended(&self) -> bool {
1283        self.isextended[0] == 1
1284    }
1285
1286    /// Views this as a normal `Header`
1287    pub fn as_header(&self) -> &Header {
1288        unsafe { cast(self) }
1289    }
1290
1291    /// Views this as a normal `Header`
1292    pub fn as_header_mut(&mut self) -> &mut Header {
1293        unsafe { cast_mut(self) }
1294    }
1295}
1296
1297impl fmt::Debug for GnuHeader {
1298    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1299        let mut f = f.debug_struct("GnuHeader");
1300        self.as_header().debug_fields(&mut f);
1301        if let Ok(atime) = self.atime() {
1302            f.field("atime", &atime);
1303        }
1304        if let Ok(ctime) = self.ctime() {
1305            f.field("ctime", &ctime);
1306        }
1307        f.field("is_extended", &self.is_extended())
1308            .field("sparse", &DebugSparseHeaders(&self.sparse))
1309            .finish()
1310    }
1311}
1312
1313struct DebugSparseHeaders<'a>(&'a [GnuSparseHeader]);
1314
1315impl fmt::Debug for DebugSparseHeaders<'_> {
1316    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1317        let mut f = f.debug_list();
1318        for header in self.0 {
1319            if !header.is_empty() {
1320                f.entry(header);
1321            }
1322        }
1323        f.finish()
1324    }
1325}
1326
1327impl GnuSparseHeader {
1328    /// Returns true if block is empty
1329    pub fn is_empty(&self) -> bool {
1330        self.offset[0] == 0 || self.numbytes[0] == 0
1331    }
1332
1333    /// Offset of the block from the start of the file
1334    ///
1335    /// Returns `Err` for a malformed `offset` field.
1336    pub fn offset(&self) -> io::Result<u64> {
1337        num_field_wrapper_from(&self.offset).map_err(|err| {
1338            io::Error::new(
1339                err.kind(),
1340                format!("{} when getting offset from sparse header", err),
1341            )
1342        })
1343    }
1344
1345    /// Length of the block
1346    ///
1347    /// Returns `Err` for a malformed `numbytes` field.
1348    pub fn length(&self) -> io::Result<u64> {
1349        num_field_wrapper_from(&self.numbytes).map_err(|err| {
1350            io::Error::new(
1351                err.kind(),
1352                format!("{} when getting length from sparse header", err),
1353            )
1354        })
1355    }
1356}
1357
1358impl fmt::Debug for GnuSparseHeader {
1359    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1360        let mut f = f.debug_struct("GnuSparseHeader");
1361        if let Ok(offset) = self.offset() {
1362            f.field("offset", &offset);
1363        }
1364        if let Ok(length) = self.length() {
1365            f.field("length", &length);
1366        }
1367        f.finish()
1368    }
1369}
1370
1371impl GnuExtSparseHeader {
1372    /// Crates a new zero'd out sparse header entry.
1373    pub fn new() -> GnuExtSparseHeader {
1374        unsafe { mem::zeroed() }
1375    }
1376
1377    /// Returns a view into this header as a byte array.
1378    pub fn as_bytes(&self) -> &[u8; BLOCK_SIZE as usize] {
1379        debug_assert_eq!(mem::size_of_val(self), BLOCK_SIZE as usize);
1380        unsafe { &*(self as *const GnuExtSparseHeader as *const [u8; BLOCK_SIZE as usize]) }
1381    }
1382
1383    /// Returns a view into this header as a byte array.
1384    pub fn as_mut_bytes(&mut self) -> &mut [u8; BLOCK_SIZE as usize] {
1385        debug_assert_eq!(mem::size_of_val(self), BLOCK_SIZE as usize);
1386        unsafe { &mut *(self as *mut GnuExtSparseHeader as *mut [u8; BLOCK_SIZE as usize]) }
1387    }
1388
1389    /// Returns a slice of the underlying sparse headers.
1390    ///
1391    /// Some headers may represent empty chunks of both the offset and numbytes
1392    /// fields are 0.
1393    pub fn sparse(&self) -> &[GnuSparseHeader; 21] {
1394        &self.sparse
1395    }
1396
1397    /// Indicates if another sparse header should be following this one.
1398    pub fn is_extended(&self) -> bool {
1399        self.isextended[0] == 1
1400    }
1401}
1402
1403impl Default for GnuExtSparseHeader {
1404    fn default() -> Self {
1405        Self::new()
1406    }
1407}
1408
1409fn octal_from(slice: &[u8]) -> io::Result<u64> {
1410    let trun = truncate(slice);
1411    let num = match str::from_utf8(trun) {
1412        Ok(n) => n,
1413        Err(_) => {
1414            return Err(other(&format!(
1415                "numeric field did not have utf-8 text: {}",
1416                String::from_utf8_lossy(trun)
1417            )));
1418        }
1419    };
1420    match u64::from_str_radix(num.trim(), 8) {
1421        Ok(n) => Ok(n),
1422        Err(_) => Err(other(&format!("numeric field was not a number: {}", num))),
1423    }
1424}
1425
1426fn octal_into<T: fmt::Octal>(dst: &mut [u8], val: T) {
1427    let o = format!("{:o}", val);
1428    let value = once(b'\0').chain(o.bytes().rev().chain(repeat(b'0')));
1429    for (slot, value) in dst.iter_mut().rev().zip(value) {
1430        *slot = value;
1431    }
1432}
1433
1434// Wrapper to figure out if we should fill the header field using tar's numeric
1435// extension (binary) or not (octal).
1436fn num_field_wrapper_into(dst: &mut [u8], src: u64) {
1437    if src >= 8_589_934_592 || (src >= 2_097_152 && dst.len() == 8) {
1438        numeric_extended_into(dst, src);
1439    } else {
1440        octal_into(dst, src);
1441    }
1442}
1443
1444// Wrapper to figure out if we should read the header field in binary (numeric
1445// extension) or octal (standard encoding).
1446fn num_field_wrapper_from(src: &[u8]) -> io::Result<u64> {
1447    if src[0] & 0x80 != 0 {
1448        Ok(numeric_extended_from(src))
1449    } else {
1450        octal_from(src)
1451    }
1452}
1453
1454// When writing numeric fields with is the extended form, the high bit of the
1455// first byte is set to 1 and the remainder of the field is treated as binary
1456// instead of octal ascii.
1457// This handles writing u64 to 8 (uid, gid) or 12 (size, *time) bytes array.
1458fn numeric_extended_into(dst: &mut [u8], src: u64) {
1459    let len: usize = dst.len();
1460    for (slot, val) in dst.iter_mut().zip(
1461        iter::repeat_n(0, len - 8) // to zero init extra bytes
1462            .chain((0..8).rev().map(|x| ((src >> (8 * x)) & 0xff) as u8)),
1463    ) {
1464        *slot = val;
1465    }
1466    dst[0] |= 0x80;
1467}
1468
1469fn numeric_extended_from(src: &[u8]) -> u64 {
1470    let mut dst: u64 = 0;
1471    let mut b_to_skip = 1;
1472    if src.len() == 8 {
1473        // read first byte without extension flag bit
1474        dst = (src[0] ^ 0x80) as u64;
1475    } else {
1476        // only read last 8 bytes
1477        b_to_skip = src.len() - 8;
1478    }
1479    for byte in src.iter().skip(b_to_skip) {
1480        dst <<= 8;
1481        dst |= *byte as u64;
1482    }
1483    dst
1484}
1485
1486fn truncate(slice: &[u8]) -> &[u8] {
1487    match slice.iter().position(|i| *i == 0) {
1488        Some(i) => &slice[..i],
1489        None => slice,
1490    }
1491}
1492
1493/// Copies `bytes` into the `slot` provided, returning an error if the `bytes`
1494/// array is too long or if it contains any nul bytes.
1495fn copy_into(slot: &mut [u8], bytes: &[u8]) -> io::Result<()> {
1496    if bytes.len() > slot.len() {
1497        Err(other("provided value is too long"))
1498    } else if bytes.contains(&0) {
1499        Err(other("provided value contains a nul byte"))
1500    } else {
1501        for (slot, val) in slot.iter_mut().zip(bytes.iter().chain(Some(&0))) {
1502            *slot = *val;
1503        }
1504        Ok(())
1505    }
1506}
1507
1508fn copy_path_into_inner(
1509    mut slot: &mut [u8],
1510    path: &Path,
1511    is_link_name: bool,
1512    is_truncated_gnu_long_path: bool,
1513) -> io::Result<()> {
1514    let mut emitted = false;
1515    let mut needs_slash = false;
1516    let mut iter = path.components().peekable();
1517    while let Some(component) = iter.next() {
1518        let bytes = path2bytes(Path::new(component.as_os_str()))?;
1519        match (component, is_link_name) {
1520            (Component::Prefix(..), false) | (Component::RootDir, false) => {
1521                return Err(other("paths in archives must be relative"));
1522            }
1523            (Component::ParentDir, false) => {
1524                // If it's last component of a gnu long path we know that there might be more
1525                // to the component than .. (the rest is stored elsewhere)
1526                // Otherwise it's a clear error
1527                if !is_truncated_gnu_long_path || iter.peek().is_some() {
1528                    return Err(other("paths in archives must not have `..`"));
1529                }
1530            }
1531            // Allow "./" as the path
1532            (Component::CurDir, false) if path.components().count() == 1 => {}
1533            (Component::CurDir, false) => continue,
1534            (Component::Normal(_), _) | (_, true) => {}
1535        };
1536        if needs_slash {
1537            copy(&mut slot, b"/")?;
1538        }
1539        if bytes.contains(&b'/') {
1540            if let Component::Normal(..) = component {
1541                return Err(other("path component in archive cannot contain `/`"));
1542            }
1543        }
1544        copy(&mut slot, &bytes)?;
1545        if &*bytes != b"/" {
1546            needs_slash = true;
1547        }
1548        emitted = true;
1549    }
1550    if !emitted {
1551        return Err(other("paths in archives must have at least one component"));
1552    }
1553    if ends_with_slash(path) {
1554        copy(&mut slot, b"/")?;
1555    }
1556    return Ok(());
1557
1558    fn copy(slot: &mut &mut [u8], bytes: &[u8]) -> io::Result<()> {
1559        copy_into(slot, bytes)?;
1560        let tmp = std::mem::take(slot);
1561        *slot = &mut tmp[bytes.len()..];
1562        Ok(())
1563    }
1564}
1565
1566/// Copies `path` into the `slot` provided
1567///
1568/// Returns an error if:
1569///
1570/// * the path is too long to fit
1571/// * a nul byte was found
1572/// * an invalid path component is encountered (e.g. a root path or parent dir)
1573/// * the path itself is empty
1574fn copy_path_into(slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> {
1575    copy_path_into_inner(slot, path, is_link_name, false)
1576}
1577
1578/// Copies `path` into the `slot` provided
1579///
1580/// Returns an error if:
1581///
1582/// * the path is too long to fit
1583/// * a nul byte was found
1584/// * an invalid path component is encountered (e.g. a root path or parent dir)
1585/// * the path itself is empty
1586///
1587/// This is less restrictive version meant to be used for truncated GNU paths.
1588fn copy_path_into_gnu_long(slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> {
1589    copy_path_into_inner(slot, path, is_link_name, true)
1590}
1591
1592#[cfg(target_arch = "wasm32")]
1593fn ends_with_slash(p: &Path) -> bool {
1594    p.to_string_lossy().ends_with('/')
1595}
1596
1597#[cfg(windows)]
1598fn ends_with_slash(p: &Path) -> bool {
1599    let last = p.as_os_str().encode_wide().last();
1600    last == Some(b'/' as u16) || last == Some(b'\\' as u16)
1601}
1602
1603#[cfg(unix)]
1604fn ends_with_slash(p: &Path) -> bool {
1605    p.as_os_str().as_bytes().ends_with(b"/")
1606}
1607
1608#[cfg(any(windows, target_arch = "wasm32"))]
1609pub fn path2bytes(p: &Path) -> io::Result<Cow<'_, [u8]>> {
1610    p.as_os_str()
1611        .to_str()
1612        .map(|s| s.as_bytes())
1613        .ok_or_else(|| other(&format!("path {} was not valid Unicode", p.display())))
1614        .map(|bytes| {
1615            if bytes.contains(&b'\\') {
1616                // Normalize to Unix-style path separators
1617                let mut bytes = bytes.to_owned();
1618                for b in &mut bytes {
1619                    if *b == b'\\' {
1620                        *b = b'/';
1621                    }
1622                }
1623                Cow::Owned(bytes)
1624            } else {
1625                Cow::Borrowed(bytes)
1626            }
1627        })
1628}
1629
1630#[cfg(unix)]
1631/// On unix this will never fail
1632pub fn path2bytes(p: &Path) -> io::Result<Cow<'_, [u8]>> {
1633    Ok(Cow::Borrowed(p.as_os_str().as_bytes()))
1634}
1635
1636#[cfg(windows)]
1637/// On windows we cannot accept non-Unicode bytes because it
1638/// is impossible to convert it to UTF-16.
1639pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result<Cow<Path>> {
1640    return match bytes {
1641        Cow::Borrowed(bytes) => {
1642            let s = str::from_utf8(bytes).map_err(|_| not_unicode(bytes))?;
1643            Ok(Cow::Borrowed(Path::new(s)))
1644        }
1645        Cow::Owned(bytes) => {
1646            let s = String::from_utf8(bytes).map_err(|uerr| not_unicode(&uerr.into_bytes()))?;
1647            Ok(Cow::Owned(PathBuf::from(s)))
1648        }
1649    };
1650
1651    fn not_unicode(v: &[u8]) -> io::Error {
1652        other(&format!(
1653            "only Unicode paths are supported on Windows: {}",
1654            String::from_utf8_lossy(v)
1655        ))
1656    }
1657}
1658
1659#[cfg(unix)]
1660/// On unix this operation can never fail.
1661pub fn bytes2path(bytes: Cow<'_, [u8]>) -> io::Result<Cow<'_, Path>> {
1662    use std::ffi::{OsStr, OsString};
1663
1664    Ok(match bytes {
1665        Cow::Borrowed(bytes) => Cow::Borrowed(Path::new(OsStr::from_bytes(bytes))),
1666        Cow::Owned(bytes) => Cow::Owned(PathBuf::from(OsString::from_vec(bytes))),
1667    })
1668}
1669
1670#[cfg(target_arch = "wasm32")]
1671pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result<Cow<Path>> {
1672    Ok(match bytes {
1673        Cow::Borrowed(bytes) => {
1674            Cow::Borrowed({ Path::new(str::from_utf8(bytes).map_err(invalid_utf8)?) })
1675        }
1676        Cow::Owned(bytes) => {
1677            Cow::Owned({ PathBuf::from(String::from_utf8(bytes).map_err(invalid_utf8)?) })
1678        }
1679    })
1680}
1681
1682#[cfg(target_arch = "wasm32")]
1683fn invalid_utf8<T>(_: T) -> io::Error {
1684    io::Error::new(io::ErrorKind::InvalidData, "Invalid utf-8")
1685}