wasmtime/runtime/vm/
cow.rs

1//! Copy-on-write initialization support: creation of backing images for
2//! modules, and logic to support mapping these backing images into memory.
3
4use super::sys::DecommitBehavior;
5use crate::Engine;
6use crate::prelude::*;
7use crate::runtime::vm::sys::vm::{self, MemoryImageSource, PageMap, reset_with_pagemap};
8use crate::runtime::vm::{
9    HostAlignedByteCount, MmapOffset, ModuleMemoryImageSource, host_page_size,
10};
11use alloc::sync::Arc;
12use core::fmt;
13use core::ops::Range;
14use wasmtime_environ::{DefinedMemoryIndex, MemoryInitialization, Module, PrimaryMap, Tunables};
15
16/// Backing images for memories in a module.
17///
18/// This is meant to be built once, when a module is first loaded/constructed,
19/// and then used many times for instantiation.
20pub struct ModuleMemoryImages {
21    memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryImage>>>,
22}
23
24impl ModuleMemoryImages {
25    /// Get the MemoryImage for a given memory.
26    pub fn get_memory_image(&self, defined_index: DefinedMemoryIndex) -> Option<&Arc<MemoryImage>> {
27        self.memories[defined_index].as_ref()
28    }
29}
30
31/// One backing image for one memory.
32pub struct MemoryImage {
33    /// The platform-specific source of this image.
34    ///
35    /// This might be a mapped `*.cwasm` file or on Unix it could also be a
36    /// `Memfd` as an anonymous file in memory on Linux. In either case this is
37    /// used as the backing-source for the CoW image.
38    source: MemoryImageSource,
39
40    /// Length of image, in bytes.
41    ///
42    /// Note that initial memory size may be larger; leading and trailing zeroes
43    /// are truncated (handled by backing fd).
44    ///
45    /// Must be a multiple of the system page size.
46    len: HostAlignedByteCount,
47
48    /// Image starts this many bytes into `source`.
49    ///
50    /// This is 0 for anonymous-backed memfd files and is the offset of the
51    /// data section in a `*.cwasm` file for `*.cwasm`-backed images.
52    ///
53    /// Must be a multiple of the system page size.
54    ///
55    /// ## Notes
56    ///
57    /// This currently isn't a `HostAlignedByteCount` because that's a usize and
58    /// this, being a file offset, is a u64.
59    source_offset: u64,
60
61    /// Image starts this many bytes into heap space.
62    ///
63    /// Must be a multiple of the system page size.
64    linear_memory_offset: HostAlignedByteCount,
65
66    /// The original source of data that this image is derived from.
67    module_source: Arc<dyn ModuleMemoryImageSource>,
68
69    /// The offset, within `module_source.wasm_data()`, that this image starts
70    /// at.
71    module_source_offset: usize,
72}
73
74impl MemoryImage {
75    fn new(
76        engine: &Engine,
77        page_size: u32,
78        linear_memory_offset: HostAlignedByteCount,
79        module_source: &Arc<impl ModuleMemoryImageSource>,
80        data_range: Range<usize>,
81    ) -> Result<Option<MemoryImage>> {
82        let assert_page_aligned = |val: usize| {
83            assert_eq!(val % (page_size as usize), 0);
84        };
85        // Sanity-check that various parameters are page-aligned.
86        let len =
87            HostAlignedByteCount::new(data_range.len()).expect("memory image data is page-aligned");
88
89        // If a backing `mmap` is present then `data` should be a sub-slice of
90        // the `mmap`. The sanity-checks here double-check that. Additionally
91        // compilation should have ensured that the `data` section is
92        // page-aligned within `mmap`, so that's also all double-checked here.
93        //
94        // Finally if the `mmap` itself comes from a backing file on disk, such
95        // as a `*.cwasm` file, then that's a valid source of data for the
96        // memory image so we simply return referencing that.
97        //
98        // Note that this path is platform-agnostic in the sense of all
99        // platforms we support support memory mapping copy-on-write data from
100        // files, but for now this is still a Linux-specific region of Wasmtime.
101        // Some work will be needed to get this file compiling for macOS and
102        // Windows.
103        let data = &module_source.wasm_data()[data_range.clone()];
104        if !engine.config().force_memory_init_memfd {
105            if let Some(mmap) = module_source.mmap() {
106                let start = mmap.as_ptr() as usize;
107                let end = start + mmap.len();
108                let data_start = data.as_ptr() as usize;
109                let data_end = data_start + data.len();
110                assert!(start <= data_start && data_end <= end);
111                assert_page_aligned(start);
112                assert_page_aligned(data_start);
113                assert_page_aligned(data_end);
114
115                #[cfg(feature = "std")]
116                if let Some(file) = mmap.original_file() {
117                    if let Some(source) = MemoryImageSource::from_file(file) {
118                        return Ok(Some(MemoryImage {
119                            source,
120                            source_offset: u64::try_from(data_start - start).unwrap(),
121                            linear_memory_offset,
122                            len,
123                            module_source: module_source.clone(),
124                            module_source_offset: data_range.start,
125                        }));
126                    }
127                }
128            }
129        }
130
131        // If `mmap` doesn't come from a file then platform-specific mechanisms
132        // may be used to place the data in a form that's amenable to an mmap.
133        if let Some(source) = MemoryImageSource::from_data(data)? {
134            return Ok(Some(MemoryImage {
135                source,
136                source_offset: 0,
137                linear_memory_offset,
138                len,
139                module_source: module_source.clone(),
140                module_source_offset: data_range.start,
141            }));
142        }
143
144        Ok(None)
145    }
146
147    unsafe fn map_at(&self, mmap_base: &MmapOffset) -> Result<()> {
148        unsafe {
149            mmap_base.map_image_at(
150                &self.source,
151                self.source_offset,
152                self.linear_memory_offset,
153                self.len,
154            )
155        }
156    }
157
158    unsafe fn remap_as_zeros_at(&self, base: *mut u8) -> Result<()> {
159        unsafe {
160            self.source.remap_as_zeros_at(
161                base.add(self.linear_memory_offset.byte_count()),
162                self.len.byte_count(),
163            )?;
164        }
165        Ok(())
166    }
167}
168
169impl ModuleMemoryImages {
170    /// Create a new `ModuleMemoryImages` for the given module. This can be
171    /// passed in as part of a `InstanceAllocationRequest` to speed up
172    /// instantiation and execution by using copy-on-write-backed memories.
173    pub fn new(
174        engine: &Engine,
175        module: &Module,
176        source: &Arc<impl ModuleMemoryImageSource>,
177    ) -> Result<Option<ModuleMemoryImages>> {
178        let map = match &module.memory_initialization {
179            MemoryInitialization::Static { map } => map,
180            _ => return Ok(None),
181        };
182        let mut memories = PrimaryMap::with_capacity(map.len());
183        let page_size = crate::runtime::vm::host_page_size();
184        let page_size = u32::try_from(page_size).unwrap();
185        for (memory_index, init) in map {
186            // mmap-based-initialization only works for defined memories with a
187            // known starting point of all zeros, so bail out if the mmeory is
188            // imported.
189            let defined_memory = match module.defined_memory_index(memory_index) {
190                Some(idx) => idx,
191                None => return Ok(None),
192            };
193
194            // If there's no initialization for this memory known then we don't
195            // need an image for the memory so push `None` and move on.
196            let init = match init {
197                Some(init) => init,
198                None => {
199                    memories.push(None);
200                    continue;
201                }
202            };
203
204            let data_range = init.data.start as usize..init.data.end as usize;
205            if module.memories[memory_index]
206                .minimum_byte_size()
207                .map_or(false, |mem_initial_len| {
208                    init.offset + u64::try_from(data_range.len()).unwrap() > mem_initial_len
209                })
210            {
211                // The image is rounded up to multiples of the host OS page
212                // size. But if Wasm is using a custom page size, the Wasm page
213                // size might be smaller than the host OS page size, and that
214                // rounding might have made the image larger than the Wasm
215                // memory's initial length. This is *probably* okay, since the
216                // rounding would have just introduced new runs of zeroes in the
217                // image, but out of an abundance of caution we don't generate
218                // CoW images in this scenario.
219                return Ok(None);
220            }
221
222            let offset_usize = match usize::try_from(init.offset) {
223                Ok(offset) => offset,
224                Err(_) => return Ok(None),
225            };
226            let offset = HostAlignedByteCount::new(offset_usize)
227                .expect("memory init offset is a multiple of the host page size");
228
229            // If this creation fails then we fail creating
230            // `ModuleMemoryImages` since this memory couldn't be represented.
231            let image = match MemoryImage::new(engine, page_size, offset, source, data_range)? {
232                Some(image) => image,
233                None => return Ok(None),
234            };
235
236            let idx = memories.push(Some(Arc::new(image)));
237            assert_eq!(idx, defined_memory);
238        }
239
240        Ok(Some(ModuleMemoryImages { memories }))
241    }
242}
243
244/// Slot management of a copy-on-write image which can be reused for the pooling
245/// allocator.
246///
247/// This data structure manages a slot of linear memory, primarily in the
248/// pooling allocator, which optionally has a contiguous memory image in the
249/// middle of it. Pictorially this data structure manages a virtual memory
250/// region that looks like:
251///
252/// ```text
253///   +--------------------+-------------------+--------------+--------------+
254///   |   anonymous        |      optional     |   anonymous  |    PROT_NONE |
255///   |     zero           |       memory      |     zero     |     memory   |
256///   |    memory          |       image       |    memory    |              |
257///   +--------------------+-------------------+--------------+--------------+
258///   |                     <------+---------->
259///   |<-----+------------>         \
260///   |      \                   image.len
261///   |       \
262///   |  image.linear_memory_offset
263///   |
264///   \
265///  self.base is this virtual address
266///
267///    <------------------+------------------------------------------------>
268///                        \
269///                      static_size
270///
271///    <------------------+---------------------------------->
272///                        \
273///                      accessible
274/// ```
275///
276/// When a `MemoryImageSlot` is created it's told what the `static_size` and
277/// `accessible` limits are. Initially there is assumed to be no image in linear
278/// memory.
279///
280/// When `MemoryImageSlot::instantiate` is called then the method will perform
281/// a "synchronization" to take the image from its prior state to the new state
282/// for the image specified. The first instantiation for example will mmap the
283/// heap image into place. Upon reuse of a slot nothing happens except possibly
284/// shrinking `self.accessible`. When a new image is used then the old image is
285/// mapped to anonymous zero memory and then the new image is mapped in place.
286///
287/// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot`
288/// is dirty then it is assumed that any memory beneath `self.accessible` could
289/// have any value. Instantiation cannot happen into a `dirty` slot, however, so
290/// the `MemoryImageSlot::clear_and_remain_ready` returns this memory back to
291/// its original state to mark `dirty = false`. This is done by resetting all
292/// anonymous memory back to zero and the image itself back to its initial
293/// contents.
294///
295/// On Linux this is achieved with the `madvise(MADV_DONTNEED)` syscall. This
296/// syscall will release the physical pages back to the OS but retain the
297/// original mappings, effectively resetting everything back to its initial
298/// state. Non-linux platforms will replace all memory below `self.accessible`
299/// with a fresh zero'd mmap, meaning that reuse is effectively not supported.
300pub struct MemoryImageSlot {
301    /// The mmap and offset within it that contains the linear memory for this
302    /// slot.
303    base: MmapOffset,
304
305    /// The maximum static memory size which `self.accessible` can grow to.
306    static_size: usize,
307
308    /// An optional image that is currently being used in this linear memory.
309    ///
310    /// This can be `None` in which case memory is originally all zeros. When
311    /// `Some` the image describes where it's located within the image.
312    image: Option<Arc<MemoryImage>>,
313
314    /// The size of the heap that is readable and writable.
315    ///
316    /// Note that this may extend beyond the actual linear memory heap size in
317    /// the case of dynamic memories in use. Memory accesses to memory below
318    /// `self.accessible` may still page fault as pages are lazily brought in
319    /// but the faults will always be resolved by the kernel.
320    ///
321    /// Also note that this is always page-aligned.
322    accessible: HostAlignedByteCount,
323
324    /// Whether this slot may have "dirty" pages (pages written by an
325    /// instantiation). Set by `instantiate()` and cleared by
326    /// `clear_and_remain_ready()`, and used in assertions to ensure
327    /// those methods are called properly.
328    ///
329    /// Invariant: if !dirty, then this memory slot contains a clean
330    /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero
331    /// memory beyond the image up to `static_size`. The addresses
332    /// from offset 0 to `self.accessible` are R+W and set to zero or the
333    /// initial image content, as appropriate. Everything between
334    /// `self.accessible` and `self.static_size` is inaccessible.
335    dirty: bool,
336}
337
338impl fmt::Debug for MemoryImageSlot {
339    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
340        f.debug_struct("MemoryImageSlot")
341            .field("base", &self.base)
342            .field("static_size", &self.static_size)
343            .field("accessible", &self.accessible)
344            .field("dirty", &self.dirty)
345            .finish_non_exhaustive()
346    }
347}
348
349impl MemoryImageSlot {
350    /// Create a new MemoryImageSlot. Assumes that there is an anonymous
351    /// mmap backing in the given range to start.
352    ///
353    /// The `accessible` parameter describes how much of linear memory is
354    /// already mapped as R/W with all zero-bytes. The `static_size` value is
355    /// the maximum size of this image which `accessible` cannot grow beyond,
356    /// and all memory from `accessible` from `static_size` should be mapped as
357    /// `PROT_NONE` backed by zero-bytes.
358    pub(crate) fn create(
359        base: MmapOffset,
360        accessible: HostAlignedByteCount,
361        static_size: usize,
362    ) -> Self {
363        MemoryImageSlot {
364            base,
365            static_size,
366            accessible,
367            image: None,
368            dirty: false,
369        }
370    }
371
372    pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
373        let size_bytes_aligned = HostAlignedByteCount::new_rounded_up(size_bytes)?;
374        assert!(size_bytes <= self.static_size);
375        assert!(size_bytes_aligned.byte_count() <= self.static_size);
376
377        // If the heap limit already addresses accessible bytes then no syscalls
378        // are necessary since the data is already mapped into the process and
379        // waiting to go.
380        //
381        // This is used for "dynamic" memories where memory is not always
382        // decommitted during recycling (but it's still always reset).
383        if size_bytes_aligned <= self.accessible {
384            return Ok(());
385        }
386
387        // Otherwise use `mprotect` to make the new pages read/write.
388        self.set_protection(self.accessible..size_bytes_aligned, true)?;
389        self.accessible = size_bytes_aligned;
390
391        Ok(())
392    }
393
394    /// Prepares this slot for the instantiation of a new instance with the
395    /// provided linear memory image.
396    ///
397    /// The `initial_size_bytes` parameter indicates the required initial size
398    /// of the heap for the instance. The `maybe_image` is an optional initial
399    /// image for linear memory to contains. The `style` is the way compiled
400    /// code will be accessing this memory.
401    ///
402    /// The purpose of this method is to take a previously pristine slot
403    /// (`!self.dirty`) and transform its prior state into state necessary for
404    /// the given parameters. This could include, for example:
405    ///
406    /// * More memory may be made read/write if `initial_size_bytes` is larger
407    ///   than `self.accessible`.
408    /// * For `MemoryStyle::Static` linear memory may be made `PROT_NONE` if
409    ///   `self.accessible` is larger than `initial_size_bytes`.
410    /// * If no image was previously in place or if the wrong image was
411    ///   previously in place then `mmap` may be used to setup the initial
412    ///   image.
413    pub(crate) fn instantiate(
414        &mut self,
415        initial_size_bytes: usize,
416        maybe_image: Option<&Arc<MemoryImage>>,
417        ty: &wasmtime_environ::Memory,
418        tunables: &Tunables,
419    ) -> Result<()> {
420        assert!(!self.dirty);
421        assert!(
422            initial_size_bytes <= self.static_size,
423            "initial_size_bytes <= self.static_size failed: \
424             initial_size_bytes={initial_size_bytes}, self.static_size={}",
425            self.static_size
426        );
427        let initial_size_bytes_page_aligned =
428            HostAlignedByteCount::new_rounded_up(initial_size_bytes)?;
429
430        // First order of business is to blow away the previous linear memory
431        // image if it doesn't match the image specified here. If one is
432        // detected then it's reset with anonymous memory which means that all
433        // of memory up to `self.accessible` will now be read/write and zero.
434        //
435        // Note that this intentionally a "small mmap" which only covers the
436        // extent of the prior initialization image in order to preserve
437        // resident memory that might come before or after the image.
438        let images_equal = match (self.image.as_ref(), maybe_image) {
439            (Some(a), Some(b)) if Arc::ptr_eq(a, b) => true,
440            (None, None) => true,
441            _ => false,
442        };
443        if !images_equal {
444            self.remove_image()?;
445        }
446
447        // The next order of business is to ensure that `self.accessible` is
448        // appropriate. First up is to grow the read/write portion of memory if
449        // it's not large enough to accommodate `initial_size_bytes`.
450        if self.accessible < initial_size_bytes_page_aligned {
451            self.set_protection(self.accessible..initial_size_bytes_page_aligned, true)?;
452            self.accessible = initial_size_bytes_page_aligned;
453        }
454
455        // If (1) the accessible region is not in its initial state, and (2) the
456        // memory relies on virtual memory at all (i.e. has offset guard
457        // pages), then we need to reset memory protections. Put another way,
458        // the only time it is safe to not reset protections is when we are
459        // using dynamic memory without any guard pages.
460        let host_page_size_log2 = u8::try_from(host_page_size().ilog2()).unwrap();
461        if initial_size_bytes_page_aligned < self.accessible
462            && (tunables.memory_guard_size > 0
463                || ty.can_elide_bounds_check(tunables, host_page_size_log2))
464        {
465            self.set_protection(initial_size_bytes_page_aligned..self.accessible, false)?;
466            self.accessible = initial_size_bytes_page_aligned;
467        }
468
469        // Now that memory is sized appropriately the final operation is to
470        // place the new image into linear memory. Note that this operation is
471        // skipped if `self.image` matches `maybe_image`.
472        assert!(initial_size_bytes <= self.accessible.byte_count());
473        assert!(initial_size_bytes_page_aligned <= self.accessible);
474        if !images_equal {
475            if let Some(image) = maybe_image.as_ref() {
476                assert!(
477                    image
478                        .linear_memory_offset
479                        .checked_add(image.len)
480                        .unwrap()
481                        .byte_count()
482                        <= initial_size_bytes
483                );
484                if !image.len.is_zero() {
485                    unsafe {
486                        image.map_at(&self.base)?;
487                    }
488                }
489            }
490            self.image = maybe_image.cloned();
491        }
492
493        // Flag ourselves as `dirty` which means that the next operation on this
494        // slot is required to be `clear_and_remain_ready`.
495        self.dirty = true;
496
497        Ok(())
498    }
499
500    pub(crate) fn remove_image(&mut self) -> Result<()> {
501        if let Some(image) = &self.image {
502            unsafe {
503                image.remap_as_zeros_at(self.base.as_mut_ptr())?;
504            }
505            self.image = None;
506        }
507        Ok(())
508    }
509
510    /// Resets this linear memory slot back to a "pristine state".
511    ///
512    /// This will reset the memory back to its original contents on Linux or
513    /// reset the contents back to zero on other platforms. The `keep_resident`
514    /// argument is the maximum amount of memory to keep resident in this
515    /// process's memory on Linux. Up to that much memory will be `memset` to
516    /// zero where the rest of it will be reset or released with `madvise`.
517    #[allow(dead_code, reason = "only used in some cfgs")]
518    pub(crate) fn clear_and_remain_ready(
519        &mut self,
520        pagemap: Option<&PageMap>,
521        keep_resident: HostAlignedByteCount,
522        decommit: impl FnMut(*mut u8, usize),
523    ) -> Result<()> {
524        assert!(self.dirty);
525
526        unsafe {
527            self.reset_all_memory_contents(pagemap, keep_resident, decommit)?;
528        }
529
530        self.dirty = false;
531        Ok(())
532    }
533
534    #[allow(dead_code, reason = "only used in some cfgs")]
535    unsafe fn reset_all_memory_contents(
536        &mut self,
537        pagemap: Option<&PageMap>,
538        keep_resident: HostAlignedByteCount,
539        decommit: impl FnMut(*mut u8, usize),
540    ) -> Result<()> {
541        match vm::decommit_behavior() {
542            DecommitBehavior::Zero => {
543                // If we're not on Linux then there's no generic platform way to
544                // reset memory back to its original state, so instead reset memory
545                // back to entirely zeros with an anonymous backing.
546                //
547                // Additionally the previous image, if any, is dropped here
548                // since it's no longer applicable to this mapping.
549                self.reset_with_anon_memory()
550            }
551            DecommitBehavior::RestoreOriginalMapping => {
552                unsafe {
553                    self.reset_with_original_mapping(pagemap, keep_resident, decommit);
554                }
555                Ok(())
556            }
557        }
558    }
559
560    #[allow(dead_code, reason = "only used in some cfgs")]
561    unsafe fn reset_with_original_mapping(
562        &mut self,
563        pagemap: Option<&PageMap>,
564        keep_resident: HostAlignedByteCount,
565        decommit: impl FnMut(*mut u8, usize),
566    ) {
567        assert_eq!(
568            vm::decommit_behavior(),
569            DecommitBehavior::RestoreOriginalMapping
570        );
571
572        unsafe {
573            match &self.image {
574                // If there's a backing image then manually resetting a region
575                // is a bit trickier than without an image, so delegate to the
576                // helper function below.
577                Some(image) => {
578                    reset_with_pagemap(
579                        pagemap,
580                        self.base.as_mut_ptr(),
581                        self.accessible,
582                        keep_resident,
583                        |region| {
584                            manually_reset_region(self.base.as_mut_ptr().addr(), image, region)
585                        },
586                        decommit,
587                    );
588                }
589
590                // If there's no memory image for this slot then pages are always
591                // manually reset back to zero or given to `decommit`.
592                None => reset_with_pagemap(
593                    pagemap,
594                    self.base.as_mut_ptr(),
595                    self.accessible,
596                    keep_resident,
597                    |region| region.fill(0),
598                    decommit,
599                ),
600            }
601        }
602
603        /// Manually resets `region` back to its original contents as specified
604        /// in `image`.
605        ///
606        /// This assumes that the original mmap starts at `base_addr` and
607        /// `region` is a subslice within the original mmap.
608        ///
609        /// # Panics
610        ///
611        /// Panics if `base_addr` is not the right index due to the various
612        /// indexing calculations below.
613        fn manually_reset_region(base_addr: usize, image: &MemoryImage, mut region: &mut [u8]) {
614            let image_start = image.linear_memory_offset.byte_count();
615            let image_end = image_start + image.len.byte_count();
616            let mut region_start = region.as_ptr().addr() - base_addr;
617            let region_end = region_start + region.len();
618            let image_bytes = image.module_source.wasm_data();
619            let image_bytes = &image_bytes[image.module_source_offset..][..image.len.byte_count()];
620
621            // 1. Zero out the part before the image (if any).
622            if let Some(len_before_image) = image_start.checked_sub(region_start) {
623                let len = len_before_image.min(region.len());
624                let (a, b) = region.split_at_mut(len);
625                a.fill(0);
626                region = b;
627                region_start += len;
628
629                if region.is_empty() {
630                    return;
631                }
632            }
633
634            debug_assert_eq!(region_end - region_start, region.len());
635            debug_assert!(region_start >= image_start);
636
637            // 2. Copy the original bytes from the image for the part that
638            //    overlaps with the image.
639            if let Some(len_in_image) = image_end.checked_sub(region_start) {
640                let len = len_in_image.min(region.len());
641                let (a, b) = region.split_at_mut(len);
642                a.copy_from_slice(&image_bytes[region_start - image_start..][..len]);
643                region = b;
644                region_start += len;
645
646                if region.is_empty() {
647                    return;
648                }
649            }
650
651            debug_assert_eq!(region_end - region_start, region.len());
652            debug_assert!(region_start >= image_end);
653
654            // 3. Zero out the part after the image.
655            region.fill(0);
656        }
657    }
658
659    fn set_protection(&self, range: Range<HostAlignedByteCount>, readwrite: bool) -> Result<()> {
660        let len = range
661            .end
662            .checked_sub(range.start)
663            .expect("range.start <= range.end");
664        assert!(range.end.byte_count() <= self.static_size);
665        if len.is_zero() {
666            return Ok(());
667        }
668
669        // TODO: use Mmap to change memory permissions instead of these free
670        // functions.
671        unsafe {
672            let start = self.base.as_mut_ptr().add(range.start.byte_count());
673            if readwrite {
674                vm::expose_existing_mapping(start, len.byte_count())?;
675            } else {
676                vm::hide_existing_mapping(start, len.byte_count())?;
677            }
678        }
679
680        Ok(())
681    }
682
683    pub(crate) fn has_image(&self) -> bool {
684        self.image.is_some()
685    }
686
687    #[allow(dead_code, reason = "only used in some cfgs")]
688    pub(crate) fn is_dirty(&self) -> bool {
689        self.dirty
690    }
691
692    /// Map anonymous zeroed memory across the whole slot,
693    /// inaccessible. Used both during instantiate and during drop.
694    pub(crate) fn reset_with_anon_memory(&mut self) -> Result<()> {
695        if self.static_size == 0 {
696            assert!(self.image.is_none());
697            assert_eq!(self.accessible, 0);
698            return Ok(());
699        }
700
701        unsafe {
702            vm::erase_existing_mapping(self.base.as_mut_ptr(), self.static_size)?;
703        }
704
705        self.image = None;
706        self.accessible = HostAlignedByteCount::ZERO;
707
708        Ok(())
709    }
710}
711
712#[cfg(all(test, target_os = "linux", not(miri)))]
713mod test {
714    use super::*;
715    use crate::runtime::vm::mmap::{AlignedLength, Mmap};
716    use crate::runtime::vm::sys::vm::decommit_pages;
717    use crate::runtime::vm::{HostAlignedByteCount, MmapVec, host_page_size};
718    use std::sync::Arc;
719    use wasmtime_environ::{IndexType, Limits, Memory};
720
721    fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result<MemoryImage> {
722        // offset must be a multiple of the page size.
723        let linear_memory_offset =
724            HostAlignedByteCount::new(offset).expect("offset is page-aligned");
725        // The image length is rounded up to the nearest page size
726        let image_len = HostAlignedByteCount::new_rounded_up(data.len()).unwrap();
727
728        let mut source = TestDataSource {
729            data: vec![0; image_len.byte_count()],
730        };
731        source.data[..data.len()].copy_from_slice(data);
732
733        return Ok(MemoryImage {
734            source: MemoryImageSource::from_data(data)?.unwrap(),
735            len: image_len,
736            source_offset: 0,
737            linear_memory_offset,
738            module_source: Arc::new(source),
739            module_source_offset: 0,
740        });
741
742        struct TestDataSource {
743            data: Vec<u8>,
744        }
745
746        impl ModuleMemoryImageSource for TestDataSource {
747            fn wasm_data(&self) -> &[u8] {
748                &self.data
749            }
750            fn mmap(&self) -> Option<&MmapVec> {
751                None
752            }
753        }
754    }
755
756    fn dummy_memory() -> Memory {
757        Memory {
758            idx_type: IndexType::I32,
759            limits: Limits { min: 0, max: None },
760            shared: false,
761            page_size_log2: Memory::DEFAULT_PAGE_SIZE_LOG2,
762        }
763    }
764
765    fn mmap_4mib_inaccessible() -> Arc<Mmap<AlignedLength>> {
766        let four_mib = HostAlignedByteCount::new(4 << 20).expect("4 MiB is page aligned");
767        Arc::new(Mmap::accessible_reserved(HostAlignedByteCount::ZERO, four_mib).unwrap())
768    }
769
770    /// Presents a part of an mmap as a mutable slice within a callback.
771    ///
772    /// The callback ensures that the reference no longer lives after the
773    /// function is done.
774    ///
775    /// # Safety
776    ///
777    /// The caller must ensure that during this function call, the only way this
778    /// region of memory is not accessed by (read from or written to) is via the
779    /// reference. Making the callback `'static` goes some way towards ensuring
780    /// that, but it's still possible to squirrel away a reference into global
781    /// state. So don't do that.
782    unsafe fn with_slice_mut(
783        mmap: &Arc<Mmap<AlignedLength>>,
784        range: Range<usize>,
785        f: impl FnOnce(&mut [u8]) + 'static,
786    ) {
787        let ptr = mmap.as_ptr().cast_mut();
788        let slice = unsafe {
789            core::slice::from_raw_parts_mut(ptr.add(range.start), range.end - range.start)
790        };
791        f(slice);
792    }
793
794    #[test]
795    fn instantiate_no_image() {
796        let ty = dummy_memory();
797        let tunables = Tunables {
798            memory_reservation: 4 << 30,
799            ..Tunables::default_miri()
800        };
801        // 4 MiB mmap'd area, not accessible
802        let mmap = mmap_4mib_inaccessible();
803        // Create a MemoryImageSlot on top of it
804        let mut memfd =
805            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
806        assert!(!memfd.is_dirty());
807        // instantiate with 64 KiB initial size
808        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
809        assert!(memfd.is_dirty());
810
811        // We should be able to access this 64 KiB (try both ends) and
812        // it should consist of zeroes.
813        unsafe {
814            with_slice_mut(&mmap, 0..65536, |slice| {
815                assert_eq!(0, slice[0]);
816                assert_eq!(0, slice[65535]);
817                slice[1024] = 42;
818                assert_eq!(42, slice[1024]);
819            });
820        }
821
822        // grow the heap
823        memfd.set_heap_limit(128 << 10).unwrap();
824        let slice = unsafe { mmap.slice(0..1 << 20) };
825        assert_eq!(42, slice[1024]);
826        assert_eq!(0, slice[131071]);
827        // instantiate again; we should see zeroes, even as the
828        // reuse-anon-mmap-opt kicks in
829        memfd
830            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
831                decommit_pages(ptr, len).unwrap()
832            })
833            .unwrap();
834        assert!(!memfd.is_dirty());
835        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
836        let slice = unsafe { mmap.slice(0..65536) };
837        assert_eq!(0, slice[1024]);
838    }
839
840    #[test]
841    fn instantiate_image() {
842        let page_size = host_page_size();
843        let ty = dummy_memory();
844        let tunables = Tunables {
845            memory_reservation: 4 << 30,
846            ..Tunables::default_miri()
847        };
848        // 4 MiB mmap'd area, not accessible
849        let mmap = mmap_4mib_inaccessible();
850        // Create a MemoryImageSlot on top of it
851        let mut memfd =
852            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
853        // Create an image with some data.
854        let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
855        // Instantiate with this image
856        memfd
857            .instantiate(64 << 10, Some(&image), &ty, &tunables)
858            .unwrap();
859        assert!(memfd.has_image());
860
861        unsafe {
862            with_slice_mut(&mmap, 0..65536, move |slice| {
863                assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
864                slice[page_size] = 5;
865            });
866        }
867
868        // Clear and re-instantiate same image
869        memfd
870            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
871                decommit_pages(ptr, len).unwrap()
872            })
873            .unwrap();
874        memfd
875            .instantiate(64 << 10, Some(&image), &ty, &tunables)
876            .unwrap();
877        let slice = unsafe { mmap.slice(0..65536) };
878        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
879
880        // Clear and re-instantiate no image
881        memfd
882            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
883                decommit_pages(ptr, len).unwrap()
884            })
885            .unwrap();
886        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
887        assert!(!memfd.has_image());
888        let slice = unsafe { mmap.slice(0..65536) };
889        assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
890
891        // Clear and re-instantiate image again
892        memfd
893            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
894                decommit_pages(ptr, len).unwrap()
895            })
896            .unwrap();
897        memfd
898            .instantiate(64 << 10, Some(&image), &ty, &tunables)
899            .unwrap();
900        let slice = unsafe { mmap.slice(0..65536) };
901        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
902
903        // Create another image with different data.
904        let image2 = Arc::new(create_memfd_with_data(page_size, &[10, 11, 12, 13]).unwrap());
905        memfd
906            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
907                decommit_pages(ptr, len).unwrap()
908            })
909            .unwrap();
910        memfd
911            .instantiate(128 << 10, Some(&image2), &ty, &tunables)
912            .unwrap();
913        let slice = unsafe { mmap.slice(0..65536) };
914        assert_eq!(&[10, 11, 12, 13], &slice[page_size..][..4]);
915
916        // Instantiate the original image again; we should notice it's
917        // a different image and not reuse the mappings.
918        memfd
919            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
920                decommit_pages(ptr, len).unwrap()
921            })
922            .unwrap();
923        memfd
924            .instantiate(64 << 10, Some(&image), &ty, &tunables)
925            .unwrap();
926        let slice = unsafe { mmap.slice(0..65536) };
927        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
928    }
929
930    #[test]
931    #[cfg(target_os = "linux")]
932    fn memset_instead_of_madvise() {
933        let page_size = host_page_size();
934        let ty = dummy_memory();
935        let tunables = Tunables {
936            memory_reservation: 100 << 16,
937            ..Tunables::default_miri()
938        };
939        let mmap = mmap_4mib_inaccessible();
940        let mut memfd =
941            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
942
943        // Test basics with the image
944        for image_off in [0, page_size, page_size * 2] {
945            let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap());
946            for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
947                let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
948                memfd
949                    .instantiate(64 << 10, Some(&image), &ty, &tunables)
950                    .unwrap();
951                assert!(memfd.has_image());
952
953                unsafe {
954                    with_slice_mut(&mmap, 0..64 << 10, move |slice| {
955                        if image_off > 0 {
956                            assert_eq!(slice[image_off - 1], 0);
957                        }
958                        assert_eq!(slice[image_off + 5], 0);
959                        assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]);
960                        slice[image_off] = 5;
961                        assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]);
962                    })
963                };
964
965                memfd
966                    .clear_and_remain_ready(None, amt_to_memset, |ptr, len| unsafe {
967                        decommit_pages(ptr, len).unwrap()
968                    })
969                    .unwrap();
970            }
971        }
972
973        // Test without an image
974        for amt_to_memset in [0, page_size, page_size * 10, 1 << 20, 10 << 20] {
975            let amt_to_memset = HostAlignedByteCount::new(amt_to_memset).unwrap();
976            memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
977
978            unsafe {
979                with_slice_mut(&mmap, 0..64 << 10, |slice| {
980                    for chunk in slice.chunks_mut(1024) {
981                        assert_eq!(chunk[0], 0);
982                        chunk[0] = 5;
983                    }
984                });
985            }
986            memfd
987                .clear_and_remain_ready(None, amt_to_memset, |ptr, len| unsafe {
988                    decommit_pages(ptr, len).unwrap()
989                })
990                .unwrap();
991        }
992    }
993
994    #[test]
995    #[cfg(target_os = "linux")]
996    fn dynamic() {
997        let page_size = host_page_size();
998        let ty = dummy_memory();
999        let tunables = Tunables {
1000            memory_reservation: 0,
1001            memory_reservation_for_growth: 200,
1002            ..Tunables::default_miri()
1003        };
1004
1005        let mmap = mmap_4mib_inaccessible();
1006        let mut memfd =
1007            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, 4 << 20);
1008        let image = Arc::new(create_memfd_with_data(page_size, &[1, 2, 3, 4]).unwrap());
1009        let initial = 64 << 10;
1010
1011        // Instantiate the image and test that memory remains accessible after
1012        // it's cleared.
1013        memfd
1014            .instantiate(initial, Some(&image), &ty, &tunables)
1015            .unwrap();
1016        assert!(memfd.has_image());
1017
1018        unsafe {
1019            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1020                assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1021                slice[page_size] = 5;
1022                assert_eq!(&[5, 2, 3, 4], &slice[page_size..][..4]);
1023            });
1024        }
1025
1026        memfd
1027            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1028                decommit_pages(ptr, len).unwrap()
1029            })
1030            .unwrap();
1031        let slice = unsafe { mmap.slice(0..(64 << 10) + page_size) };
1032        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1033
1034        // Re-instantiate make sure it preserves memory. Grow a bit and set data
1035        // beyond the initial size.
1036        memfd
1037            .instantiate(initial, Some(&image), &ty, &tunables)
1038            .unwrap();
1039        assert_eq!(&[1, 2, 3, 4], &slice[page_size..][..4]);
1040
1041        memfd.set_heap_limit(initial * 2).unwrap();
1042
1043        unsafe {
1044            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1045                assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1046                slice[initial] = 100;
1047                assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1048            });
1049        }
1050
1051        memfd
1052            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1053                decommit_pages(ptr, len).unwrap()
1054            })
1055            .unwrap();
1056
1057        // Test that memory is still accessible, but it's been reset
1058        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1059
1060        // Instantiate again, and again memory beyond the initial size should
1061        // still be accessible. Grow into it again and make sure it works.
1062        memfd
1063            .instantiate(initial, Some(&image), &ty, &tunables)
1064            .unwrap();
1065        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1066        memfd.set_heap_limit(initial * 2).unwrap();
1067
1068        unsafe {
1069            with_slice_mut(&mmap, 0..(64 << 10) + page_size, move |slice| {
1070                assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1071                slice[initial] = 100;
1072                assert_eq!(&[100, 0], &slice[initial..initial + 2]);
1073            });
1074        }
1075
1076        memfd
1077            .clear_and_remain_ready(None, HostAlignedByteCount::ZERO, |ptr, len| unsafe {
1078                decommit_pages(ptr, len).unwrap()
1079            })
1080            .unwrap();
1081
1082        // Reset the image to none and double-check everything is back to zero
1083        memfd.instantiate(64 << 10, None, &ty, &tunables).unwrap();
1084        assert!(!memfd.has_image());
1085        assert_eq!(&[0, 0, 0, 0], &slice[page_size..][..4]);
1086        assert_eq!(&[0, 0], &slice[initial..initial + 2]);
1087    }
1088
1089    #[test]
1090    fn reset_with_pagemap() {
1091        let page_size = host_page_size();
1092        let ty = dummy_memory();
1093        let tunables = Tunables {
1094            memory_reservation: 100 << 16,
1095            ..Tunables::default_miri()
1096        };
1097        let mmap = mmap_4mib_inaccessible();
1098        let mmap_len = page_size * 9;
1099        let mut memfd =
1100            MemoryImageSlot::create(mmap.zero_offset(), HostAlignedByteCount::ZERO, mmap_len);
1101        let pagemap = PageMap::new();
1102        let pagemap = pagemap.as_ref();
1103
1104        let mut data = vec![0; 3 * page_size];
1105        for (i, chunk) in data.chunks_mut(page_size).enumerate() {
1106            for slot in chunk {
1107                *slot = u8::try_from(i + 1).unwrap();
1108            }
1109        }
1110        let image = Arc::new(create_memfd_with_data(3 * page_size, &data).unwrap());
1111
1112        memfd
1113            .instantiate(mmap_len, Some(&image), &ty, &tunables)
1114            .unwrap();
1115
1116        let keep_resident = HostAlignedByteCount::new(mmap_len).unwrap();
1117        let assert_pristine_after_reset = |memfd: &mut MemoryImageSlot| unsafe {
1118            // Wipe the image, keeping some bytes resident.
1119            memfd
1120                .clear_and_remain_ready(pagemap, keep_resident, |ptr, len| {
1121                    decommit_pages(ptr, len).unwrap()
1122                })
1123                .unwrap();
1124
1125            // Double check that the contents of memory are as expected after
1126            // reset.
1127            with_slice_mut(&mmap, 0..mmap_len, move |slice| {
1128                for (i, chunk) in slice.chunks(page_size).enumerate() {
1129                    let expected = match i {
1130                        0..3 => 0,
1131                        3..6 => u8::try_from(i).unwrap() - 2,
1132                        6..9 => 0,
1133                        _ => unreachable!(),
1134                    };
1135                    for slot in chunk {
1136                        assert_eq!(*slot, expected);
1137                    }
1138                }
1139            });
1140
1141            // Re-instantiate, but then wipe the image entirely by keeping
1142            // nothing resident.
1143            memfd
1144                .instantiate(mmap_len, Some(&image), &ty, &tunables)
1145                .unwrap();
1146            memfd
1147                .clear_and_remain_ready(pagemap, HostAlignedByteCount::ZERO, |ptr, len| {
1148                    decommit_pages(ptr, len).unwrap()
1149                })
1150                .unwrap();
1151
1152            // Next re-instantiate a final time to get used for the next test.
1153            memfd
1154                .instantiate(mmap_len, Some(&image), &ty, &tunables)
1155                .unwrap();
1156        };
1157
1158        let write_page = |_memfd: &mut MemoryImageSlot, page: usize| unsafe {
1159            with_slice_mut(
1160                &mmap,
1161                page * page_size..(page + 1) * page_size,
1162                move |slice| slice.fill(0xff),
1163            );
1164        };
1165
1166        // Test various combinations of dirty pages and regions. For example
1167        // test a dirty region of memory entirely in the zero-initialized zone
1168        // before/after the image and also test when the dirty region straddles
1169        // just the start of the image, just the end of the image, both ends,
1170        // and is entirely contained in just the image.
1171        assert_pristine_after_reset(&mut memfd);
1172
1173        for i in 0..9 {
1174            write_page(&mut memfd, i);
1175            assert_pristine_after_reset(&mut memfd);
1176        }
1177        write_page(&mut memfd, 0);
1178        write_page(&mut memfd, 1);
1179        assert_pristine_after_reset(&mut memfd);
1180        write_page(&mut memfd, 1);
1181        assert_pristine_after_reset(&mut memfd);
1182        write_page(&mut memfd, 2);
1183        write_page(&mut memfd, 3);
1184        assert_pristine_after_reset(&mut memfd);
1185        write_page(&mut memfd, 3);
1186        write_page(&mut memfd, 4);
1187        write_page(&mut memfd, 5);
1188        assert_pristine_after_reset(&mut memfd);
1189        write_page(&mut memfd, 0);
1190        write_page(&mut memfd, 1);
1191        write_page(&mut memfd, 2);
1192        assert_pristine_after_reset(&mut memfd);
1193        write_page(&mut memfd, 0);
1194        write_page(&mut memfd, 3);
1195        write_page(&mut memfd, 6);
1196        assert_pristine_after_reset(&mut memfd);
1197        write_page(&mut memfd, 2);
1198        write_page(&mut memfd, 3);
1199        write_page(&mut memfd, 4);
1200        write_page(&mut memfd, 5);
1201        write_page(&mut memfd, 6);
1202        assert_pristine_after_reset(&mut memfd);
1203        write_page(&mut memfd, 4);
1204        write_page(&mut memfd, 5);
1205        write_page(&mut memfd, 6);
1206        write_page(&mut memfd, 7);
1207        assert_pristine_after_reset(&mut memfd);
1208        write_page(&mut memfd, 4);
1209        write_page(&mut memfd, 5);
1210        write_page(&mut memfd, 8);
1211        assert_pristine_after_reset(&mut memfd);
1212    }
1213}