wasmtime_internal_cranelift/obj.rs
1//! Object file builder.
2//!
3//! Creates ELF image based on `Compilation` information. The ELF contains
4//! functions and trampolines in the ".text" section. It also contains all
5//! relocation records for the linking stage. If DWARF sections exist, their
6//! content will be written as well.
7//!
8//! The object file has symbols for each function and trampoline, as well as
9//! symbols that refer to libcalls.
10//!
11//! The function symbol names have format "_wasm_function_N", where N is
12//! `FuncIndex`. The defined wasm function symbols refer to a JIT compiled
13//! function body, the imported wasm function do not. The trampolines symbol
14//! names have format "_trampoline_N", where N is `SignatureIndex`.
15
16use crate::CompiledFunction;
17use anyhow::Result;
18use cranelift_codegen::TextSectionBuilder;
19use cranelift_codegen::isa::unwind::{UnwindInfo, systemv};
20use cranelift_control::ControlPlane;
21use gimli::RunTimeEndian;
22use gimli::write::{Address, EhFrame, EndianVec, FrameTable, Writer};
23use object::write::{Object, SectionId, StandardSegment, Symbol, SymbolId, SymbolSection};
24use object::{Architecture, SectionFlags, SectionKind, SymbolFlags, SymbolKind, SymbolScope};
25use std::ops::Range;
26use wasmtime_environ::{Compiler, TripleExt};
27use wasmtime_environ::{FuncKey, obj};
28
29const TEXT_SECTION_NAME: &[u8] = b".text";
30
31fn text_align(compiler: &dyn Compiler) -> u64 {
32 // text pages will not be made executable with pulley, so the section
33 // doesn't need to be padded out to page alignment boundaries.
34 if compiler.triple().is_pulley() {
35 0x1
36 } else {
37 compiler.page_size_align()
38 }
39}
40
41/// A helper structure used to assemble the final text section of an executable,
42/// plus unwinding information and other related details.
43///
44/// This builder relies on Cranelift-specific internals but assembles into a
45/// generic `Object` which will get further appended to in a compiler-agnostic
46/// fashion later.
47pub struct ModuleTextBuilder<'a> {
48 /// The target that we're compiling for, used to query target-specific
49 /// information as necessary.
50 compiler: &'a dyn Compiler,
51
52 /// The object file that we're generating code into.
53 obj: &'a mut Object<'static>,
54
55 /// The WebAssembly module we're generating code for.
56 text_section: SectionId,
57
58 unwind_info: UnwindInfoBuilder<'a>,
59
60 /// In-progress text section that we're using cranelift's `MachBuffer` to
61 /// build to resolve relocations (calls) between functions.
62 text: Box<dyn TextSectionBuilder>,
63
64 ctrl_plane: ControlPlane,
65}
66
67impl<'a> ModuleTextBuilder<'a> {
68 /// Creates a new builder for the text section of an executable.
69 ///
70 /// The `.text` section will be appended to the specified `obj` along with
71 /// any unwinding or such information as necessary. The `num_funcs`
72 /// parameter indicates the number of times the `append_func` function will
73 /// be called. The `finish` function will panic if this contract is not met.
74 pub fn new(
75 obj: &'a mut Object<'static>,
76 compiler: &'a dyn Compiler,
77 text: Box<dyn TextSectionBuilder>,
78 ) -> Self {
79 // Entire code (functions and trampolines) will be placed
80 // in the ".text" section.
81 let text_section = obj.add_section(
82 obj.segment_name(StandardSegment::Text).to_vec(),
83 TEXT_SECTION_NAME.to_vec(),
84 SectionKind::Text,
85 );
86
87 // If this target is Pulley then flag the text section as not needing the
88 // executable bit in virtual memory which means that the runtime won't
89 // try to call `Mmap::make_executable`, which makes Pulley more
90 // portable.
91 if compiler.triple().is_pulley() {
92 let section = obj.section_mut(text_section);
93 assert!(matches!(section.flags, SectionFlags::None));
94 section.flags = SectionFlags::Elf {
95 sh_flags: obj::SH_WASMTIME_NOT_EXECUTED,
96 };
97 }
98
99 Self {
100 compiler,
101 obj,
102 text_section,
103 unwind_info: Default::default(),
104 text,
105 ctrl_plane: ControlPlane::default(),
106 }
107 }
108
109 /// Appends the `func` specified named `name` to this object.
110 ///
111 /// The `resolve_reloc_target` closure is used to resolve a relocation
112 /// target to an adjacent function which has already been added or will be
113 /// added to this object. The argument is the relocation target specified
114 /// within `CompiledFunction` and the return value must be an index where
115 /// the target will be defined by the `n`th call to `append_func`.
116 ///
117 /// Returns the symbol associated with the function as well as the range
118 /// that the function resides within the text section.
119 pub fn append_func(
120 &mut self,
121 name: &str,
122 compiled_func: &'a CompiledFunction,
123 resolve_reloc_target: impl Fn(wasmtime_environ::FuncKey) -> usize,
124 ) -> (SymbolId, Range<u64>) {
125 let body = compiled_func.buffer.data();
126 let alignment = compiled_func.alignment;
127 let body_len = body.len() as u64;
128 let off = self
129 .text
130 .append(true, &body, alignment, &mut self.ctrl_plane);
131
132 let symbol_id = self.obj.add_symbol(Symbol {
133 name: name.as_bytes().to_vec(),
134 value: off,
135 size: body_len,
136 kind: SymbolKind::Text,
137 scope: SymbolScope::Compilation,
138 weak: false,
139 section: SymbolSection::Section(self.text_section),
140 flags: SymbolFlags::None,
141 });
142
143 if let Some(info) = compiled_func.unwind_info() {
144 self.unwind_info.push(off, body_len, info);
145 }
146
147 for r in compiled_func.relocations() {
148 let reloc_offset = off + u64::from(r.offset);
149
150 // This relocation is used to fill in which hostcall id is
151 // desired within the `call_indirect_host` opcode of Pulley
152 // itself. The relocation target is the start of the instruction
153 // and the goal is to insert the static signature number, `n`,
154 // into the instruction.
155 //
156 // At this time the instruction looks like:
157 //
158 // +------+------+------+------+
159 // | OP | OP_EXTENDED | N |
160 // +------+------+------+------+
161 //
162 // This 4-byte encoding has `OP` indicating this is an "extended
163 // opcode" where `OP_EXTENDED` is a 16-bit extended opcode.
164 // The `N` byte is the index of the signature being called and
165 // is what's b eing filled in.
166 //
167 // See the `test_call_indirect_host_width` in
168 // `pulley/tests/all.rs` for this guarantee as well.
169 if let FuncKey::PulleyHostCall(host_call) = r.reloc_target {
170 #[cfg(feature = "pulley")]
171 {
172 use pulley_interpreter::encode::Encode;
173 assert_eq!(pulley_interpreter::CallIndirectHost::WIDTH, 4);
174 }
175 let n = host_call.index();
176 let byte = u8::try_from(n).unwrap();
177 self.text.write(reloc_offset + 3, &[byte]);
178 continue;
179 }
180
181 let target = resolve_reloc_target(r.reloc_target);
182 if self
183 .text
184 .resolve_reloc(reloc_offset, r.reloc, r.addend, target)
185 {
186 continue;
187 }
188
189 panic!("failed to resolve relocation: {r:?} -> {target}");
190 }
191
192 (symbol_id, off..off + body_len)
193 }
194
195 /// Forces "veneers" to be used for inter-function calls in the text
196 /// section which means that in-bounds optimized addresses are never used.
197 ///
198 /// This is only useful for debugging cranelift itself and typically this
199 /// option is disabled.
200 pub fn force_veneers(&mut self) {
201 self.text.force_veneers();
202 }
203
204 /// Appends the specified amount of bytes of padding into the text section.
205 ///
206 /// This is only useful when fuzzing and/or debugging cranelift itself and
207 /// for production scenarios `padding` is 0 and this function does nothing.
208 pub fn append_padding(&mut self, padding: usize) {
209 if padding == 0 {
210 return;
211 }
212 self.text
213 .append(false, &vec![0; padding], 1, &mut self.ctrl_plane);
214 }
215
216 /// Indicates that the text section has been written completely and this
217 /// will finish appending it to the original object.
218 ///
219 /// Note that this will also write out the unwind information sections if
220 /// necessary.
221 pub fn finish(mut self) {
222 // Finish up the text section now that we're done adding functions.
223 let text = self.text.finish(&mut self.ctrl_plane);
224 self.obj
225 .section_mut(self.text_section)
226 .set_data(text, text_align(self.compiler));
227
228 // Append the unwind information for all our functions, if necessary.
229 self.unwind_info
230 .append_section(self.compiler, self.obj, self.text_section);
231 }
232}
233
234/// Builder used to create unwind information for a set of functions added to a
235/// text section.
236#[derive(Default)]
237struct UnwindInfoBuilder<'a> {
238 windows_xdata: Vec<u8>,
239 windows_pdata: Vec<RUNTIME_FUNCTION>,
240 systemv_unwind_info: Vec<(u64, &'a systemv::UnwindInfo)>,
241}
242
243// This is a mirror of `RUNTIME_FUNCTION` in the Windows API, but defined here
244// to ensure everything is always `u32` and to have it available on all
245// platforms. Note that all of these specifiers here are relative to a "base
246// address" which we define as the base of where the text section is eventually
247// loaded.
248#[expect(non_camel_case_types, reason = "matching Windows style, not Rust")]
249struct RUNTIME_FUNCTION {
250 begin: u32,
251 end: u32,
252 unwind_address: u32,
253}
254
255impl<'a> UnwindInfoBuilder<'a> {
256 /// Pushes the unwind information for a function into this builder.
257 ///
258 /// The function being described must be located at `function_offset` within
259 /// the text section itself, and the function's size is specified by
260 /// `function_len`.
261 ///
262 /// The `info` should come from Cranelift. and is handled here depending on
263 /// its flavor.
264 fn push(&mut self, function_offset: u64, function_len: u64, info: &'a UnwindInfo) {
265 match info {
266 // Windows unwind information is stored in two locations:
267 //
268 // * First is the actual unwinding information which is stored
269 // in the `.xdata` section. This is where `info`'s emitted
270 // information will go into.
271 // * Second are pointers to connect all this unwind information,
272 // stored in the `.pdata` section. The `.pdata` section is an
273 // array of `RUNTIME_FUNCTION` structures.
274 //
275 // Due to how these will be loaded at runtime the `.pdata` isn't
276 // actually assembled byte-wise here. Instead that's deferred to
277 // happen later during `write_windows_unwind_info` which will apply
278 // a further offset to `unwind_address`.
279 //
280 // FIXME: in theory we could "intern" the `unwind_info` value
281 // here within the `.xdata` section. Most of our unwind
282 // information for functions is probably pretty similar in which
283 // case the `.xdata` could be quite small and `.pdata` could
284 // have multiple functions point to the same unwinding
285 // information.
286 UnwindInfo::WindowsX64(info) => {
287 let unwind_size = info.emit_size();
288 let mut unwind_info = vec![0; unwind_size];
289 info.emit(&mut unwind_info);
290
291 // `.xdata` entries are always 4-byte aligned
292 while self.windows_xdata.len() % 4 != 0 {
293 self.windows_xdata.push(0x00);
294 }
295 let unwind_address = self.windows_xdata.len();
296 self.windows_xdata.extend_from_slice(&unwind_info);
297
298 // Record a `RUNTIME_FUNCTION` which this will point to.
299 self.windows_pdata.push(RUNTIME_FUNCTION {
300 begin: u32::try_from(function_offset).unwrap(),
301 end: u32::try_from(function_offset + function_len).unwrap(),
302 unwind_address: u32::try_from(unwind_address).unwrap(),
303 });
304 }
305
306 // See https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling
307 UnwindInfo::WindowsArm64(info) => {
308 let code_words = info.code_words();
309 let mut unwind_codes = vec![0; (code_words * 4) as usize];
310 info.emit(&mut unwind_codes);
311
312 // `.xdata` entries are always 4-byte aligned
313 while self.windows_xdata.len() % 4 != 0 {
314 self.windows_xdata.push(0x00);
315 }
316
317 // First word:
318 // 0-17: Function Length
319 // 18-19: Version (must be 0)
320 // 20: X bit (is exception data present?)
321 // 21: E bit (has single packed epilogue?)
322 // 22-26: Epilogue count
323 // 27-31: Code words count
324 let requires_extended_counts = code_words > (1 << 5);
325 let encoded_function_len = function_len / 4;
326 assert!(encoded_function_len < (1 << 18), "function too large");
327 let mut word1 = u32::try_from(encoded_function_len).unwrap();
328 if !requires_extended_counts {
329 word1 |= u32::from(code_words) << 27;
330 }
331 let unwind_address = self.windows_xdata.len();
332 self.windows_xdata.extend_from_slice(&word1.to_le_bytes());
333
334 if requires_extended_counts {
335 // Extended counts word:
336 // 0-15: Epilogue count
337 // 16-23: Code words count
338 let extended_counts_word = (code_words as u32) << 16;
339 self.windows_xdata
340 .extend_from_slice(&extended_counts_word.to_le_bytes());
341 }
342
343 // Skip epilogue information: Per comment on [`UnwindInst`], we
344 // do not emit information about epilogues.
345
346 // Emit the unwind codes.
347 self.windows_xdata.extend_from_slice(&unwind_codes);
348
349 // Record a `RUNTIME_FUNCTION` which this will point to.
350 // NOTE: `end` is not used, so leave it as 0.
351 self.windows_pdata.push(RUNTIME_FUNCTION {
352 begin: u32::try_from(function_offset).unwrap(),
353 end: 0,
354 unwind_address: u32::try_from(unwind_address).unwrap(),
355 });
356 }
357
358 // System-V is different enough that we just record the unwinding
359 // information to get processed at a later time.
360 UnwindInfo::SystemV(info) => {
361 self.systemv_unwind_info.push((function_offset, info));
362 }
363
364 _ => panic!("some unwind info isn't handled here"),
365 }
366 }
367
368 /// Appends the unwind information section, if any, to the `obj` specified.
369 ///
370 /// This function must be called immediately after the text section was
371 /// added to a builder. The unwind information section must trail the text
372 /// section immediately.
373 ///
374 /// The `text_section`'s section identifier is passed into this function.
375 fn append_section(
376 &self,
377 compiler: &dyn Compiler,
378 obj: &mut Object<'_>,
379 text_section: SectionId,
380 ) {
381 // This write will align the text section to a page boundary and then
382 // return the offset at that point. This gives us the full size of the
383 // text section at that point, after alignment.
384 let text_section_size = obj.append_section_data(text_section, &[], text_align(compiler));
385
386 if self.windows_xdata.len() > 0 {
387 assert!(self.systemv_unwind_info.len() == 0);
388 // The `.xdata` section must come first to be just-after the `.text`
389 // section for the reasons documented in `write_windows_unwind_info`
390 // below.
391 let segment = obj.segment_name(StandardSegment::Data).to_vec();
392 let xdata_id = obj.add_section(segment, b".xdata".to_vec(), SectionKind::ReadOnlyData);
393 let segment = obj.segment_name(StandardSegment::Data).to_vec();
394 let pdata_id = obj.add_section(segment, b".pdata".to_vec(), SectionKind::ReadOnlyData);
395 self.write_windows_unwind_info(obj, xdata_id, pdata_id, text_section_size);
396 }
397
398 if self.systemv_unwind_info.len() > 0 {
399 let segment = obj.segment_name(StandardSegment::Data).to_vec();
400 let section_id =
401 obj.add_section(segment, b".eh_frame".to_vec(), SectionKind::ReadOnlyData);
402 self.write_systemv_unwind_info(compiler, obj, section_id, text_section_size)
403 }
404 }
405
406 /// This function appends a nonstandard section to the object which is only
407 /// used during `CodeMemory::publish`.
408 ///
409 /// This custom section effectively stores a `[RUNTIME_FUNCTION; N]` into
410 /// the object file itself. This way registration of unwind info can simply
411 /// pass this slice to the OS itself and there's no need to recalculate
412 /// anything on the other end of loading a module from a precompiled object.
413 ///
414 /// Support for reading this is in `crates/jit/src/unwind/winx64.rs`.
415 fn write_windows_unwind_info(
416 &self,
417 obj: &mut Object<'_>,
418 xdata_id: SectionId,
419 pdata_id: SectionId,
420 text_section_size: u64,
421 ) {
422 // Append the `.xdata` section, or the actual unwinding information
423 // codes and such which were built as we found unwind information for
424 // functions.
425 obj.append_section_data(xdata_id, &self.windows_xdata, 4);
426
427 // Next append the `.pdata` section, or the array of `RUNTIME_FUNCTION`
428 // structures stored in the binary.
429 //
430 // This memory will be passed at runtime to `RtlAddFunctionTable` which
431 // takes a "base address" and the entries within `RUNTIME_FUNCTION` are
432 // all relative to this base address. The base address we pass is the
433 // address of the text section itself so all the pointers here must be
434 // text-section-relative. The `begin` and `end` fields for the function
435 // it describes are already text-section-relative, but the
436 // `unwind_address` field needs to be updated here since the value
437 // stored right now is `xdata`-section-relative. We know that the
438 // `xdata` section follows the `.text` section so the
439 // `text_section_size` is added in to calculate the final
440 // `.text`-section-relative address of the unwind information.
441 let xdata_rva = |address| {
442 let address = u64::from(address);
443 let address = address + text_section_size;
444 u32::try_from(address).unwrap()
445 };
446 let pdata = match obj.architecture() {
447 Architecture::X86_64 => {
448 let mut pdata = Vec::with_capacity(self.windows_pdata.len() * 3 * 4);
449 for info in self.windows_pdata.iter() {
450 pdata.extend_from_slice(&info.begin.to_le_bytes());
451 pdata.extend_from_slice(&info.end.to_le_bytes());
452 pdata.extend_from_slice(&xdata_rva(info.unwind_address).to_le_bytes());
453 }
454 pdata
455 }
456
457 Architecture::Aarch64 => {
458 // Windows Arm64 .pdata also supports packed unwind data, but
459 // we're not currently using that.
460 let mut pdata = Vec::with_capacity(self.windows_pdata.len() * 2 * 4);
461 for info in self.windows_pdata.iter() {
462 pdata.extend_from_slice(&info.begin.to_le_bytes());
463 pdata.extend_from_slice(&xdata_rva(info.unwind_address).to_le_bytes());
464 }
465 pdata
466 }
467
468 _ => unimplemented!("unsupported architecture for windows unwind info"),
469 };
470 obj.append_section_data(pdata_id, &pdata, 4);
471 }
472
473 /// This function appends a nonstandard section to the object which is only
474 /// used during `CodeMemory::publish`.
475 ///
476 /// This will generate a `.eh_frame` section, but not one that can be
477 /// naively loaded. The goal of this section is that we can create the
478 /// section once here and never again does it need to change. To describe
479 /// dynamically loaded functions though each individual FDE needs to talk
480 /// about the function's absolute address that it's referencing. Naturally
481 /// we don't actually know the function's absolute address when we're
482 /// creating an object here.
483 ///
484 /// To solve this problem the FDE address encoding mode is set to
485 /// `DW_EH_PE_pcrel`. This means that the actual effective address that the
486 /// FDE describes is a relative to the address of the FDE itself. By
487 /// leveraging this relative-ness we can assume that the relative distance
488 /// between the FDE and the function it describes is constant, which should
489 /// allow us to generate an FDE ahead-of-time here.
490 ///
491 /// For now this assumes that all the code of functions will start at a
492 /// page-aligned address when loaded into memory. The eh_frame encoded here
493 /// then assumes that the text section is itself page aligned to its size
494 /// and the eh_frame will follow just after the text section. This means
495 /// that the relative offsets we're using here is the FDE going backwards
496 /// into the text section itself.
497 ///
498 /// Note that the library we're using to create the FDEs, `gimli`, doesn't
499 /// actually encode addresses relative to the FDE itself. Instead the
500 /// addresses are encoded relative to the start of the `.eh_frame` section.
501 /// This makes it much easier for us where we provide the relative offset
502 /// from the start of `.eh_frame` to the function in the text section, which
503 /// given our layout basically means the offset of the function in the text
504 /// section from the end of the text section.
505 ///
506 /// A final note is that the reason we page-align the text section's size is
507 /// so the .eh_frame lives on a separate page from the text section itself.
508 /// This allows `.eh_frame` to have different virtual memory permissions,
509 /// such as being purely read-only instead of read/execute like the code
510 /// bits.
511 fn write_systemv_unwind_info(
512 &self,
513 compiler: &dyn Compiler,
514 obj: &mut Object<'_>,
515 section_id: SectionId,
516 text_section_size: u64,
517 ) {
518 let mut cie = match compiler.create_systemv_cie() {
519 Some(cie) => cie,
520 None => return,
521 };
522 let mut table = FrameTable::default();
523 cie.fde_address_encoding = gimli::constants::DW_EH_PE_pcrel;
524 let cie_id = table.add_cie(cie);
525
526 for (text_section_off, unwind_info) in self.systemv_unwind_info.iter() {
527 let backwards_off = text_section_size - text_section_off;
528 let actual_offset = -i64::try_from(backwards_off).unwrap();
529 // Note that gimli wants an unsigned 64-bit integer here, but
530 // unwinders just use this constant for a relative addition with the
531 // address of the FDE, which means that the sign doesn't actually
532 // matter.
533 let fde = unwind_info.to_fde(Address::Constant(actual_offset.cast_unsigned()));
534 table.add_fde(cie_id, fde);
535 }
536 let endian = match compiler.triple().endianness().unwrap() {
537 target_lexicon::Endianness::Little => RunTimeEndian::Little,
538 target_lexicon::Endianness::Big => RunTimeEndian::Big,
539 };
540 let mut eh_frame = EhFrame(MyVec(EndianVec::new(endian)));
541 table.write_eh_frame(&mut eh_frame).unwrap();
542
543 // Some unwinding implementations expect a terminating "empty" length so
544 // a 0 is written at the end of the table for those implementations.
545 let mut endian_vec = (eh_frame.0).0;
546 endian_vec.write_u32(0).unwrap();
547 obj.append_section_data(section_id, endian_vec.slice(), 1);
548
549 use gimli::constants;
550 use gimli::write::Error;
551
552 struct MyVec(EndianVec<RunTimeEndian>);
553
554 impl Writer for MyVec {
555 type Endian = RunTimeEndian;
556
557 fn endian(&self) -> RunTimeEndian {
558 self.0.endian()
559 }
560
561 fn len(&self) -> usize {
562 self.0.len()
563 }
564
565 fn write(&mut self, buf: &[u8]) -> Result<(), Error> {
566 self.0.write(buf)
567 }
568
569 fn write_at(&mut self, pos: usize, buf: &[u8]) -> Result<(), Error> {
570 self.0.write_at(pos, buf)
571 }
572
573 // FIXME(gimli-rs/gimli#576) this is the definition we want for
574 // `write_eh_pointer` but the default implementation, at the time
575 // of this writing, uses `offset - val` instead of `val - offset`.
576 // A PR has been merged to fix this but until that's published we
577 // can't use it.
578 fn write_eh_pointer(
579 &mut self,
580 address: Address,
581 eh_pe: constants::DwEhPe,
582 size: u8,
583 ) -> Result<(), Error> {
584 let val = match address {
585 Address::Constant(val) => val,
586 Address::Symbol { .. } => unreachable!(),
587 };
588 assert_eq!(eh_pe.application(), constants::DW_EH_PE_pcrel);
589 let offset = self.len() as u64;
590 let val = val.wrapping_sub(offset);
591 self.write_eh_pointer_data(val, eh_pe.format(), size)
592 }
593 }
594 }
595}