wasmtime/runtime/vm/instance/allocator/pooling/memory_pool.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
//! Implements a memory pool using a single allocated memory slab.
//!
//! The pooling instance allocator maps one large slab of memory in advance and
//! allocates WebAssembly memories from this slab--a [`MemoryPool`]. Each
//! WebAssembly memory is allocated in its own slot (see uses of `index` and
//! [`SlotId`] in this module):
//!
//! ```text
//! ┌──────┬──────┬──────┬──────┬──────┐
//! │Slot 0│Slot 1│Slot 2│Slot 3│......│
//! └──────┴──────┴──────┴──────┴──────┘
//! ```
//!
//! Diving deeper, we note that a [`MemoryPool`] protects Wasmtime from
//! out-of-bounds memory accesses by inserting inaccessible guard regions
//! between memory slots. These guard regions are configured to raise a signal
//! if they are accessed--a WebAssembly out-of-bounds (OOB) memory access. The
//! [`MemoryPool`] documentation has a more detailed chart but one can think of
//! memory slots being laid out like the following:
//!
//! ```text
//! ┌─────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┐
//! │Guard│Mem 0│Guard│Mem 1│Guard│Mem 2│.....│Guard│
//! └─────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┘
//! ```
//!
//! But we can be more efficient about guard regions: with memory protection
//! keys (MPK) enabled, the interleaved guard regions can be smaller. If we
//! surround a memory with memories from other instances and each instance is
//! protected by different protection keys, the guard region can be smaller AND
//! the pool will still raise a signal on an OOB access. This complicates how we
//! lay out memory slots: we must store memories from the same instance in the
//! same "stripe". Each stripe is protected by a different protection key.
//!
//! This concept, dubbed [ColorGuard] in the original paper, relies on careful
//! calculation of the memory sizes to prevent any "overlapping access" (see
//! [`calculate`]): there are limited protection keys available (15) so the next
//! memory using the same key must be at least as far away as the guard region
//! we would insert otherwise. This ends up looking like the following, where a
//! store for instance 0 (`I0`) "stripes" two memories (`M0` and `M1`) with the
//! same protection key 1 and far enough apart to signal an OOB access:
//!
//! ```text
//! ┌─────┬─────┬─────┬─────┬────────────────┬─────┬─────┬─────┐
//! │.....│I0:M1│.....│.....│.<enough slots>.│I0:M2│.....│.....│
//! ├─────┼─────┼─────┼─────┼────────────────┼─────┼─────┼─────┤
//! │.....│key 1│key 2│key 3│..<more keys>...│key 1│key 2│.....│
//! └─────┴─────┴─────┴─────┴────────────────┴─────┴─────┴─────┘
//! ```
//!
//! [ColorGuard]: https://plas2022.github.io/files/pdf/SegueColorGuard.pdf
use super::{
index_allocator::{MemoryInModule, ModuleAffinityIndexAllocator, SlotId},
MemoryAllocationIndex,
};
use crate::runtime::vm::mpk::{self, ProtectionKey, ProtectionMask};
use crate::runtime::vm::{
CompiledModuleId, InstanceAllocationRequest, InstanceLimits, Memory, MemoryImageSlot, Mmap,
MpkEnabled, PoolingInstanceAllocatorConfig,
};
use crate::{prelude::*, vm::round_usize_up_to_host_pages};
use std::ffi::c_void;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Mutex;
use wasmtime_environ::{DefinedMemoryIndex, MemoryPlan, MemoryStyle, Module, Tunables};
/// A set of allocator slots.
///
/// The allocated slots can be split by striping them: e.g., with two stripe
/// colors 0 and 1, we would allocate all even slots using stripe 0 and all odd
/// slots using stripe 1.
///
/// This is helpful for the use of protection keys: (a) if a request comes to
/// allocate multiple instances, we can allocate them all from the same stripe
/// and (b) if a store wants to allocate more from the same stripe it can.
#[derive(Debug)]
struct Stripe {
allocator: ModuleAffinityIndexAllocator,
pkey: Option<ProtectionKey>,
}
/// Represents a pool of WebAssembly linear memories.
///
/// A linear memory is divided into accessible pages and guard pages. A memory
/// pool contains linear memories: each memory occupies a slot in an
/// allocated slab (i.e., `mapping`):
///
/// ```text
/// layout.max_memory_bytes layout.slot_bytes
/// | |
/// ◄─────┴────► ◄───────────┴──────────►
/// ┌───────────┬────────────┬───────────┐ ┌───────────┬───────────┬───────────┐
/// | PROT_NONE | | PROT_NONE | ... | | PROT_NONE | PROT_NONE |
/// └───────────┴────────────┴───────────┘ └───────────┴───────────┴───────────┘
/// | |◄──────────────────┬─────────────────────────────────► ◄────┬────►
/// | | | |
/// mapping | `layout.num_slots` memories layout.post_slab_guard_size
/// |
/// layout.pre_slab_guard_size
/// ```
#[derive(Debug)]
pub struct MemoryPool {
mapping: Mmap,
/// This memory pool is stripe-aware. If using memory protection keys, this
/// will contain one stripe per available key; otherwise, a single stripe
/// with an empty key.
stripes: Vec<Stripe>,
/// If using a copy-on-write allocation scheme, the slot management. We
/// dynamically transfer ownership of a slot to a Memory when in use.
image_slots: Vec<Mutex<Option<MemoryImageSlot>>>,
/// A description of the various memory sizes used in allocating the
/// `mapping` slab.
layout: SlabLayout,
/// The maximum number of memories that a single core module instance may
/// use.
///
/// NB: this is needed for validation but does not affect the pool's size.
memories_per_instance: usize,
/// How much linear memory, in bytes, to keep resident after resetting for
/// use with the next instance. This much memory will be `memset` to zero
/// when a linear memory is deallocated.
///
/// Memory exceeding this amount in the wasm linear memory will be released
/// with `madvise` back to the kernel.
///
/// Only applicable on Linux.
pub(super) keep_resident: usize,
/// Keep track of protection keys handed out to initialized stores; this
/// allows us to round-robin the assignment of stores to stripes.
next_available_pkey: AtomicUsize,
}
impl MemoryPool {
/// Create a new `MemoryPool`.
pub fn new(config: &PoolingInstanceAllocatorConfig, tunables: &Tunables) -> Result<Self> {
if u64::try_from(config.limits.max_memory_size).unwrap()
> tunables.static_memory_reservation
{
bail!(
"maximum memory size of {:#x} bytes exceeds the configured \
static memory reservation of {:#x} bytes",
config.limits.max_memory_size,
tunables.static_memory_reservation
);
}
let pkeys = match config.memory_protection_keys {
MpkEnabled::Auto => {
if mpk::is_supported() {
mpk::keys(config.max_memory_protection_keys)
} else {
&[]
}
}
MpkEnabled::Enable => {
if mpk::is_supported() {
mpk::keys(config.max_memory_protection_keys)
} else {
bail!("mpk is disabled on this system")
}
}
MpkEnabled::Disable => &[],
};
// This is a tricky bit of global state: when creating a memory pool
// that uses memory protection keys, we ensure here that any host code
// will have access to all keys (i.e., stripes). It's only when we enter
// the WebAssembly guest code (see `StoreInner::call_hook`) that we
// enforce which keys/stripes can be accessed. Be forewarned about the
// assumptions here:
// - we expect this "allow all" configuration to reset the default
// process state (only allow key 0) _before_ any memories are accessed
// - and we expect no other code (e.g., host-side code) to modify this
// global MPK configuration
if !pkeys.is_empty() {
mpk::allow(ProtectionMask::all());
}
// Create a slab layout and allocate it as a completely inaccessible
// region to start--`PROT_NONE`.
let constraints = SlabConstraints::new(&config.limits, tunables, pkeys.len())?;
let layout = calculate(&constraints)?;
log::debug!(
"creating memory pool: {constraints:?} -> {layout:?} (total: {})",
layout.total_slab_bytes()?
);
let mut mapping = Mmap::accessible_reserved(0, layout.total_slab_bytes()?)
.context("failed to create memory pool mapping")?;
// Then, stripe the memory with the available protection keys. This is
// unnecessary if there is only one stripe color.
if layout.num_stripes >= 2 {
let mut cursor = layout.pre_slab_guard_bytes;
let pkeys = &pkeys[..layout.num_stripes];
for i in 0..constraints.num_slots {
let pkey = &pkeys[i % pkeys.len()];
let region = unsafe { mapping.slice_mut(cursor..cursor + layout.slot_bytes) };
pkey.protect(region)?;
cursor += layout.slot_bytes;
}
debug_assert_eq!(
cursor + layout.post_slab_guard_bytes,
layout.total_slab_bytes()?
);
}
let image_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(None))
.take(constraints.num_slots)
.collect();
let create_stripe = |i| {
let num_slots = constraints.num_slots / layout.num_stripes
+ usize::from(constraints.num_slots % layout.num_stripes > i);
let allocator = ModuleAffinityIndexAllocator::new(
num_slots.try_into().unwrap(),
config.max_unused_warm_slots,
);
Stripe {
allocator,
pkey: pkeys.get(i).cloned(),
}
};
debug_assert!(layout.num_stripes > 0);
let stripes: Vec<_> = (0..layout.num_stripes)
.into_iter()
.map(create_stripe)
.collect();
let pool = Self {
stripes,
mapping,
image_slots,
layout,
memories_per_instance: usize::try_from(config.limits.max_memories_per_module).unwrap(),
keep_resident: round_usize_up_to_host_pages(config.linear_memory_keep_resident)?,
next_available_pkey: AtomicUsize::new(0),
};
Ok(pool)
}
/// Return a protection key that stores can use for requesting new
pub fn next_available_pkey(&self) -> Option<ProtectionKey> {
let index = self.next_available_pkey.fetch_add(1, Ordering::SeqCst) % self.stripes.len();
debug_assert!(
self.stripes.len() < 2 || self.stripes[index].pkey.is_some(),
"if we are using stripes, we cannot have an empty protection key"
);
self.stripes[index].pkey
}
/// Validate whether this memory pool supports the given module.
pub fn validate(&self, module: &Module) -> Result<()> {
let memories = module.memory_plans.len() - module.num_imported_memories;
if memories > usize::try_from(self.memories_per_instance).unwrap() {
bail!(
"defined memories count of {} exceeds the per-instance limit of {}",
memories,
self.memories_per_instance,
);
}
for (i, plan) in module
.memory_plans
.iter()
.skip(module.num_imported_memories)
{
match plan.style {
MemoryStyle::Static { byte_reservation } => {
if u64::try_from(self.layout.bytes_to_next_stripe_slot()).unwrap()
< byte_reservation
{
bail!(
"memory size allocated per-memory is too small to \
satisfy static bound of {byte_reservation:#x} bytes"
);
}
}
MemoryStyle::Dynamic { .. } => {}
}
let min = plan.memory.minimum_byte_size().with_context(|| {
format!(
"memory index {} has a minimum byte size that cannot be represented in a u64",
i.as_u32()
)
})?;
if min > u64::try_from(self.layout.max_memory_bytes).unwrap() {
bail!(
"memory index {} has a minimum byte size of {} which exceeds the limit of {} bytes",
i.as_u32(),
min,
self.layout.max_memory_bytes,
);
}
}
Ok(())
}
/// Are zero slots in use right now?
#[allow(unused)] // some cfgs don't use this
pub fn is_empty(&self) -> bool {
self.stripes.iter().all(|s| s.allocator.is_empty())
}
/// Allocate a single memory for the given instance allocation request.
pub fn allocate(
&self,
request: &mut InstanceAllocationRequest,
memory_plan: &MemoryPlan,
memory_index: DefinedMemoryIndex,
) -> Result<(MemoryAllocationIndex, Memory)> {
let stripe_index = if let Some(pkey) = &request.pkey {
pkey.as_stripe()
} else {
debug_assert!(self.stripes.len() < 2);
0
};
let striped_allocation_index = self.stripes[stripe_index]
.allocator
.alloc(
request
.runtime_info
.unique_id()
.map(|id| MemoryInModule(id, memory_index)),
)
.map(|slot| StripedAllocationIndex(u32::try_from(slot.index()).unwrap()))
.ok_or_else(|| {
super::PoolConcurrencyLimitError::new(
self.stripes[stripe_index].allocator.len(),
format!("memory stripe {stripe_index}"),
)
})?;
let allocation_index =
striped_allocation_index.as_unstriped_slot_index(stripe_index, self.stripes.len());
match (|| {
// Double-check that the runtime requirements of the memory are
// satisfied by the configuration of this pooling allocator. This
// should be returned as an error through `validate_memory_plans`
// but double-check here to be sure.
match memory_plan.style {
MemoryStyle::Static { byte_reservation } => {
assert!(
byte_reservation
<= u64::try_from(self.layout.bytes_to_next_stripe_slot()).unwrap()
);
}
MemoryStyle::Dynamic { .. } => {}
}
let base_ptr = self.get_base(allocation_index);
let base_capacity = self.layout.max_memory_bytes;
let mut slot = self.take_memory_image_slot(allocation_index);
let image = request.runtime_info.memory_image(memory_index)?;
let initial_size = memory_plan
.memory
.minimum_byte_size()
.expect("min size checked in validation");
// If instantiation fails, we can propagate the error
// upward and drop the slot. This will cause the Drop
// handler to attempt to map the range with PROT_NONE
// memory, to reserve the space while releasing any
// stale mappings. The next use of this slot will then
// create a new slot that will try to map over
// this, returning errors as well if the mapping
// errors persist. The unmap-on-drop is best effort;
// if it fails, then we can still soundly continue
// using the rest of the pool and allowing the rest of
// the process to continue, because we never perform a
// mmap that would leave an open space for someone
// else to come in and map something.
let initial_size = usize::try_from(initial_size).unwrap();
slot.instantiate(initial_size, image, memory_plan)?;
Memory::new_static(
memory_plan,
base_ptr,
base_capacity,
slot,
self.layout.bytes_to_next_stripe_slot(),
unsafe { &mut *request.store.get().unwrap() },
)
})() {
Ok(memory) => Ok((allocation_index, memory)),
Err(e) => {
self.stripes[stripe_index]
.allocator
.free(SlotId(striped_allocation_index.0));
Err(e)
}
}
}
/// Deallocate a previously-allocated memory.
///
/// # Safety
///
/// The memory must have been previously allocated from this pool and
/// assigned the given index, must currently be in an allocated state, and
/// must never be used again.
///
/// The caller must have already called `clear_and_remain_ready` on the
/// memory's image and flushed any enqueued decommits for this memory.
pub unsafe fn deallocate(
&self,
allocation_index: MemoryAllocationIndex,
image: MemoryImageSlot,
) {
self.return_memory_image_slot(allocation_index, image);
let (stripe_index, striped_allocation_index) =
StripedAllocationIndex::from_unstriped_slot_index(allocation_index, self.stripes.len());
self.stripes[stripe_index]
.allocator
.free(SlotId(striped_allocation_index.0));
}
/// Purging everything related to `module`.
pub fn purge_module(&self, module: CompiledModuleId) {
// This primarily means clearing out all of its memory images present in
// the virtual address space. Go through the index allocator for slots
// affine to `module` and reset them, freeing up the index when we're
// done.
//
// Note that this is only called when the specified `module` won't be
// allocated further (the module is being dropped) so this shouldn't hit
// any sort of infinite loop since this should be the final operation
// working with `module`.
//
// TODO: We are given a module id, but key affinity by pair of module id
// and defined memory index. We are missing any defined memory index or
// count of how many memories the module defines here. Therefore, we
// probe up to the maximum number of memories per instance. This is fine
// because that maximum is generally relatively small. If this method
// somehow ever gets hot because of unnecessary probing, we should
// either pass in the actual number of defined memories for the given
// module to this method, or keep a side table of all slots that are
// associated with a module (not just module and memory). The latter
// would require care to make sure that its maintenance wouldn't be too
// expensive for normal allocation/free operations.
for stripe in &self.stripes {
for i in 0..self.memories_per_instance {
use wasmtime_environ::EntityRef;
let memory_index = DefinedMemoryIndex::new(i);
while let Some(id) = stripe
.allocator
.alloc_affine_and_clear_affinity(module, memory_index)
{
// Clear the image from the slot and, if successful, return it back
// to our state. Note that on failure here the whole slot will get
// paved over with an anonymous mapping.
let index = MemoryAllocationIndex(id.0);
let mut slot = self.take_memory_image_slot(index);
if slot.remove_image().is_ok() {
self.return_memory_image_slot(index, slot);
}
stripe.allocator.free(id);
}
}
}
}
fn get_base(&self, allocation_index: MemoryAllocationIndex) -> *mut u8 {
assert!(allocation_index.index() < self.layout.num_slots);
let offset =
self.layout.pre_slab_guard_bytes + allocation_index.index() * self.layout.slot_bytes;
unsafe { self.mapping.as_ptr().offset(offset as isize).cast_mut() }
}
/// Take ownership of the given image slot. Must be returned via
/// `return_memory_image_slot` when the instance is done using it.
fn take_memory_image_slot(&self, allocation_index: MemoryAllocationIndex) -> MemoryImageSlot {
let maybe_slot = self.image_slots[allocation_index.index()]
.lock()
.unwrap()
.take();
maybe_slot.unwrap_or_else(|| {
MemoryImageSlot::create(
self.get_base(allocation_index) as *mut c_void,
0,
self.layout.max_memory_bytes,
)
})
}
/// Return ownership of the given image slot.
fn return_memory_image_slot(
&self,
allocation_index: MemoryAllocationIndex,
slot: MemoryImageSlot,
) {
assert!(!slot.is_dirty());
*self.image_slots[allocation_index.index()].lock().unwrap() = Some(slot);
}
}
impl Drop for MemoryPool {
fn drop(&mut self) {
// Clear the `clear_no_drop` flag (i.e., ask to *not* clear on
// drop) for all slots, and then drop them here. This is
// valid because the one `Mmap` that covers the whole region
// can just do its one munmap.
for mut slot in std::mem::take(&mut self.image_slots) {
if let Some(slot) = slot.get_mut().unwrap() {
slot.no_clear_on_drop();
}
}
}
}
/// The index of a memory allocation within an `InstanceAllocator`.
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
pub struct StripedAllocationIndex(u32);
impl StripedAllocationIndex {
fn from_unstriped_slot_index(
index: MemoryAllocationIndex,
num_stripes: usize,
) -> (usize, Self) {
let stripe_index = index.index() % num_stripes;
let num_stripes: u32 = num_stripes.try_into().unwrap();
let index_within_stripe = Self(index.0 / num_stripes);
(stripe_index, index_within_stripe)
}
fn as_unstriped_slot_index(self, stripe: usize, num_stripes: usize) -> MemoryAllocationIndex {
let num_stripes: u32 = num_stripes.try_into().unwrap();
let stripe: u32 = stripe.try_into().unwrap();
MemoryAllocationIndex(self.0 * num_stripes + stripe)
}
}
#[derive(Clone, Debug)]
struct SlabConstraints {
/// Essentially, the `static_memory_bound`: this is an assumption that the
/// runtime and JIT compiler make about how much space will be guarded
/// between slots.
expected_slot_bytes: usize,
/// The maximum size of any memory in the pool.
max_memory_bytes: usize,
num_slots: usize,
num_pkeys_available: usize,
guard_bytes: usize,
guard_before_slots: bool,
}
impl SlabConstraints {
fn new(
limits: &InstanceLimits,
tunables: &Tunables,
num_pkeys_available: usize,
) -> Result<Self> {
// `static_memory_reservation` is the configured number of bytes for a
// static memory slot (see `Config::static_memory_maximum_size`); even
// if the memory never grows to this size (e.g., it has a lower memory
// maximum), codegen will assume that this unused memory is mapped
// `PROT_NONE`. Typically `static_memory_bound` is 4GiB which helps
// elide most bounds checks. `MemoryPool` must respect this bound,
// though not explicitly: if we can achieve the same effect via
// MPK-protected stripes, the slot size can be lower than the
// `static_memory_bound`.
let expected_slot_bytes: usize = tunables
.static_memory_reservation
.try_into()
.context("static memory bound is too large")?;
let expected_slot_bytes = round_usize_up_to_host_pages(expected_slot_bytes)?;
let guard_bytes: usize = tunables
.static_memory_offset_guard_size
.try_into()
.context("guard region is too large")?;
let guard_bytes = round_usize_up_to_host_pages(guard_bytes)?;
let num_slots = limits
.total_memories
.try_into()
.context("too many memories")?;
let constraints = SlabConstraints {
max_memory_bytes: limits.max_memory_size,
num_slots,
expected_slot_bytes,
num_pkeys_available,
guard_bytes,
guard_before_slots: tunables.guard_before_linear_memory,
};
Ok(constraints)
}
}
#[derive(Debug)]
struct SlabLayout {
/// The total number of slots available in the memory pool slab.
num_slots: usize,
/// The size of each slot in the memory pool; this contains the maximum
/// memory size (i.e., from WebAssembly or Wasmtime configuration) plus any
/// guard region after the memory to catch OOB access. On these guard
/// regions, note that:
/// - users can configure how aggressively (or not) to elide bounds checks
/// via `Config::static_memory_guard_size` (see also:
/// `memory_and_guard_size`)
/// - memory protection keys can compress the size of the guard region by
/// placing slots from a different key (i.e., a stripe) in the guard
/// region; this means the slot itself can be smaller and we can allocate
/// more of them.
slot_bytes: usize,
/// The maximum size that can become accessible, in bytes, for each linear
/// memory. Guaranteed to be a whole number of Wasm pages.
max_memory_bytes: usize,
/// If necessary, the number of bytes to reserve as a guard region at the
/// beginning of the slab.
pre_slab_guard_bytes: usize,
/// Like `pre_slab_guard_bytes`, but at the end of the slab.
post_slab_guard_bytes: usize,
/// The number of stripes needed in the slab layout.
num_stripes: usize,
}
impl SlabLayout {
/// Return the total size of the slab, using the final layout (where `n =
/// num_slots`):
///
/// ```text
/// ┌────────────────────┬──────┬──────┬───┬──────┬─────────────────────┐
/// │pre_slab_guard_bytes│slot 1│slot 2│...│slot n│post_slab_guard_bytes│
/// └────────────────────┴──────┴──────┴───┴──────┴─────────────────────┘
/// ```
fn total_slab_bytes(&self) -> Result<usize> {
self.slot_bytes
.checked_mul(self.num_slots)
.and_then(|c| c.checked_add(self.pre_slab_guard_bytes))
.and_then(|c| c.checked_add(self.post_slab_guard_bytes))
.ok_or_else(|| anyhow!("total size of memory reservation exceeds addressable memory"))
}
/// Returns the number of Wasm bytes from the beginning of one slot to the
/// next slot in the same stripe--this is the striped equivalent of
/// `static_memory_bound`. Recall that between slots of the same stripe we
/// will see a slot from every other stripe.
///
/// For example, in a 3-stripe pool, this function measures the distance
/// from the beginning of slot 1 to slot 4, which are of the same stripe:
///
/// ```text
/// ◄────────────────────►
/// ┌────────┬──────┬──────┬────────┬───┐
/// │*slot 1*│slot 2│slot 3│*slot 4*│...|
/// └────────┴──────┴──────┴────────┴───┘
/// ```
fn bytes_to_next_stripe_slot(&self) -> usize {
self.slot_bytes * self.num_stripes
}
}
fn calculate(constraints: &SlabConstraints) -> Result<SlabLayout> {
let SlabConstraints {
max_memory_bytes,
num_slots,
expected_slot_bytes,
num_pkeys_available,
guard_bytes,
guard_before_slots,
} = *constraints;
// If the user specifies a guard region, we always need to allocate a
// `PROT_NONE` region for it before any memory slots. Recall that we can
// avoid bounds checks for loads and stores with immediates up to
// `guard_bytes`, but we rely on Wasmtime to emit bounds checks for any
// accesses greater than this.
let pre_slab_guard_bytes = if guard_before_slots { guard_bytes } else { 0 };
// To calculate the slot size, we start with the default configured size and
// attempt to chip away at this via MPK protection. Note here how we begin
// to define a slot as "all of the memory and guard region."
let faulting_region_bytes = expected_slot_bytes
.max(max_memory_bytes)
.saturating_add(guard_bytes);
let (num_stripes, slot_bytes) = if guard_bytes == 0 || max_memory_bytes == 0 || num_slots == 0 {
// In the uncommon case where the memory/guard regions are empty or we don't need any slots , we
// will not need any stripes: we just lay out the slots back-to-back
// using a single stripe.
(1, faulting_region_bytes)
} else if num_pkeys_available < 2 {
// If we do not have enough protection keys to stripe the memory, we do
// the same. We can't elide any of the guard bytes because we aren't
// overlapping guard regions with other stripes...
(1, faulting_region_bytes)
} else {
// ...but if we can create at least two stripes, we can use another
// stripe (i.e., a different pkey) as this slot's guard region--this
// reduces the guard bytes each slot has to allocate. We must make
// sure, though, that if the size of that other stripe(s) does not
// fully cover `guard_bytes`, we keep those around to prevent OOB
// access.
// We first calculate the number of stripes we need: we want to
// minimize this so that there is less chance of a single store
// running out of slots with its stripe--we need at least two,
// though. But this is not just an optimization; we need to handle
// the case when there are fewer slots than stripes. E.g., if our
// pool is configured with only three slots (`num_memory_slots =
// 3`), we will run into failures if we attempt to set up more than
// three stripes.
let needed_num_stripes = faulting_region_bytes / max_memory_bytes
+ usize::from(faulting_region_bytes % max_memory_bytes != 0);
assert!(needed_num_stripes > 0);
let num_stripes = num_pkeys_available.min(needed_num_stripes).min(num_slots);
// Next, we try to reduce the slot size by "overlapping" the stripes: we
// can make slot `n` smaller since we know that slot `n+1` and following
// are in different stripes and will look just like `PROT_NONE` memory.
// Recall that codegen expects a guarantee that at least
// `faulting_region_bytes` will catch OOB accesses via segfaults.
let needed_slot_bytes = faulting_region_bytes
.checked_div(num_stripes)
.unwrap_or(faulting_region_bytes)
.max(max_memory_bytes);
assert!(needed_slot_bytes >= max_memory_bytes);
(num_stripes, needed_slot_bytes)
};
// The page-aligned slot size; equivalent to `memory_and_guard_size`.
let page_alignment = crate::runtime::vm::host_page_size() - 1;
let slot_bytes = slot_bytes
.checked_add(page_alignment)
.and_then(|slot_bytes| Some(slot_bytes & !page_alignment))
.ok_or_else(|| anyhow!("slot size is too large"))?;
// We may need another guard region (like `pre_slab_guard_bytes`) at the end
// of our slab to maintain our `faulting_region_bytes` guarantee. We could
// be conservative and just create it as large as `faulting_region_bytes`,
// but because we know that the last slot's `slot_bytes` make up the first
// part of that region, we reduce the final guard region by that much.
let post_slab_guard_bytes = faulting_region_bytes.saturating_sub(slot_bytes);
// Check that we haven't exceeded the slab we can calculate given the limits
// of `usize`.
let layout = SlabLayout {
num_slots,
slot_bytes,
max_memory_bytes,
pre_slab_guard_bytes,
post_slab_guard_bytes,
num_stripes,
};
match layout.total_slab_bytes() {
Ok(_) => Ok(layout),
Err(e) => Err(e),
}
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
const WASM_PAGE_SIZE: u32 = wasmtime_environ::Memory::DEFAULT_PAGE_SIZE;
#[cfg(target_pointer_width = "64")]
#[test]
fn test_memory_pool() -> Result<()> {
let pool = MemoryPool::new(
&PoolingInstanceAllocatorConfig {
limits: InstanceLimits {
total_memories: 5,
max_tables_per_module: 0,
max_memories_per_module: 3,
table_elements: 0,
max_memory_size: WASM_PAGE_SIZE as usize,
..Default::default()
},
..Default::default()
},
&Tunables {
static_memory_reservation: WASM_PAGE_SIZE as u64,
static_memory_offset_guard_size: 0,
..Tunables::default_host()
},
)?;
assert_eq!(pool.layout.slot_bytes, WASM_PAGE_SIZE as usize);
assert_eq!(pool.layout.num_slots, 5);
assert_eq!(pool.layout.max_memory_bytes, WASM_PAGE_SIZE as usize);
let base = pool.mapping.as_ptr() as usize;
for i in 0..5 {
let index = MemoryAllocationIndex(i);
let ptr = pool.get_base(index);
assert_eq!(ptr as usize - base, i as usize * pool.layout.slot_bytes);
}
Ok(())
}
#[test]
#[cfg_attr(miri, ignore)]
fn test_pooling_allocator_striping() {
if !mpk::is_supported() {
println!("skipping `test_pooling_allocator_striping` test; mpk is not supported");
return;
}
// Force the use of MPK.
let config = PoolingInstanceAllocatorConfig {
memory_protection_keys: MpkEnabled::Enable,
..PoolingInstanceAllocatorConfig::default()
};
let pool = MemoryPool::new(&config, &Tunables::default_host()).unwrap();
assert!(pool.stripes.len() >= 2);
let max_memory_slots = config.limits.total_memories;
dbg!(pool.stripes[0].allocator.num_empty_slots());
dbg!(pool.stripes[1].allocator.num_empty_slots());
let available_memory_slots: usize = pool
.stripes
.iter()
.map(|s| s.allocator.num_empty_slots())
.sum();
assert_eq!(
max_memory_slots,
u32::try_from(available_memory_slots).unwrap()
);
}
#[test]
fn check_known_layout_calculations() {
for num_pkeys_available in 0..16 {
for num_memory_slots in [0, 1, 10, 64] {
for expected_slot_bytes in [0, 1 << 30 /* 1GB */, 4 << 30 /* 4GB */] {
for max_memory_bytes in
[0, 1 * WASM_PAGE_SIZE as usize, 10 * WASM_PAGE_SIZE as usize]
{
for guard_bytes in [0, 2 << 30 /* 2GB */] {
for guard_before_slots in [true, false] {
let constraints = SlabConstraints {
max_memory_bytes,
num_slots: num_memory_slots,
expected_slot_bytes,
num_pkeys_available,
guard_bytes,
guard_before_slots,
};
let layout = calculate(&constraints);
assert_slab_layout_invariants(constraints, layout.unwrap());
}
}
}
}
}
}
}
proptest! {
#[test]
#[cfg_attr(miri, ignore)]
fn check_random_layout_calculations(c in constraints()) {
if let Ok(l) = calculate(&c) {
assert_slab_layout_invariants(c, l);
}
}
}
fn constraints() -> impl Strategy<Value = SlabConstraints> {
(
any::<usize>(),
any::<usize>(),
any::<usize>(),
any::<usize>(),
any::<usize>(),
any::<bool>(),
)
.prop_map(
|(
max_memory_bytes,
num_memory_slots,
expected_slot_bytes,
num_pkeys_available,
guard_bytes,
guard_before_slots,
)| {
SlabConstraints {
max_memory_bytes,
num_slots: num_memory_slots,
expected_slot_bytes,
num_pkeys_available,
guard_bytes,
guard_before_slots,
}
},
)
}
fn assert_slab_layout_invariants(c: SlabConstraints, s: SlabLayout) {
// Check that all the sizes add up.
assert_eq!(
s.total_slab_bytes().unwrap(),
s.pre_slab_guard_bytes + s.slot_bytes * c.num_slots + s.post_slab_guard_bytes,
"the slab size does not add up: {c:?} => {s:?}"
);
assert!(
s.slot_bytes >= s.max_memory_bytes,
"slot is not big enough: {c:?} => {s:?}"
);
// Check that the various memory values are page-aligned.
assert!(
is_aligned(s.slot_bytes),
"slot is not page-aligned: {c:?} => {s:?}",
);
assert!(
is_aligned(s.max_memory_bytes),
"slot guard region is not page-aligned: {c:?} => {s:?}",
);
assert!(
is_aligned(s.pre_slab_guard_bytes),
"pre-slab guard region is not page-aligned: {c:?} => {s:?}"
);
assert!(
is_aligned(s.post_slab_guard_bytes),
"post-slab guard region is not page-aligned: {c:?} => {s:?}"
);
assert!(
is_aligned(s.total_slab_bytes().unwrap()),
"slab is not page-aligned: {c:?} => {s:?}"
);
// Check that we use no more or less stripes than needed.
assert!(s.num_stripes >= 1, "not enough stripes: {c:?} => {s:?}");
if c.num_pkeys_available == 0 || c.num_slots == 0 {
assert_eq!(
s.num_stripes, 1,
"expected at least one stripe: {c:?} => {s:?}"
);
} else {
assert!(
s.num_stripes <= c.num_pkeys_available,
"layout has more stripes than available pkeys: {c:?} => {s:?}"
);
assert!(
s.num_stripes <= c.num_slots,
"layout has more stripes than memory slots: {c:?} => {s:?}"
);
}
// Check that we use the minimum number of stripes/protection keys.
// - if the next MPK-protected slot is bigger or the same as the
// required guard region, we only need two stripes
// - if the next slot is smaller than the guard region, we only need
// enough stripes to add up to at least that guard region size.
if c.num_pkeys_available > 1 && c.max_memory_bytes > 0 {
assert!(
s.num_stripes <= (c.guard_bytes / c.max_memory_bytes) + 2,
"calculated more stripes than needed: {c:?} => {s:?}"
);
}
// Check that the memory-striping will not allow OOB access.
// - we may have reduced the slot size from `expected_slot_bytes` to
// `slot_bytes` assuming MPK striping; we check that our guaranteed
// "faulting region" is respected
// - the last slot won't have MPK striping after it; we check that the
// `post_slab_guard_bytes` accounts for this
assert!(
s.bytes_to_next_stripe_slot()
>= c.expected_slot_bytes.max(c.max_memory_bytes) + c.guard_bytes,
"faulting region not large enough: {c:?} => {s:?}"
);
assert!(
s.slot_bytes + s.post_slab_guard_bytes >= c.expected_slot_bytes,
"last slot may allow OOB access: {c:?} => {s:?}"
);
}
fn is_aligned(bytes: usize) -> bool {
bytes % crate::runtime::vm::host_page_size() == 0
}
}