regalloc2/ion/moves.rs
1/*
2 * This file was initially derived from the files
3 * `js/src/jit/BacktrackingAllocator.h` and
4 * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was
5 * originally licensed under the Mozilla Public License 2.0. We
6 * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see
7 * https://github.com/bytecodealliance/regalloc2/issues/7).
8 *
9 * Since the initial port, the design has been substantially evolved
10 * and optimized.
11 */
12
13//! Move resolution.
14
15use alloc::vec;
16
17use super::{
18 Env, InsertMovePrio, InsertedMove, InsertedMoves, LiveRangeFlag, LiveRangeIndex,
19 RedundantMoveEliminator, VRegIndex,
20};
21use crate::ion::data_structures::{
22 u64_key, BlockparamIn, BlockparamOut, CodeRange, Edits, FixedRegFixupLevel, LiveRangeKey,
23 LiveRangeListEntry,
24};
25use crate::ion::reg_traversal::RegTraversalIter;
26use crate::moves::{MoveAndScratchResolver, ParallelMoves};
27use crate::{
28 Allocation, Block, Edit, Function, FxHashMap, Inst, InstPosition, OperandConstraint,
29 OperandKind, OperandPos, PReg, ProgPoint, RegClass, SpillSlot,
30};
31use alloc::format;
32use alloc::vec::Vec;
33use hashbrown::hash_map::Entry;
34use smallvec::{smallvec, SmallVec};
35
36impl<'a, F: Function> Env<'a, F> {
37 pub fn is_start_of_block(&self, pos: ProgPoint) -> bool {
38 let block = self.ctx.cfginfo.insn_block[pos.inst().index()];
39 pos == self.ctx.cfginfo.block_entry[block.index()]
40 }
41 pub fn is_end_of_block(&self, pos: ProgPoint) -> bool {
42 let block = self.ctx.cfginfo.insn_block[pos.inst().index()];
43 pos == self.ctx.cfginfo.block_exit[block.index()]
44 }
45
46 pub fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation {
47 let inst_allocs =
48 &self.ctx.output.allocs[self.ctx.output.inst_alloc_offsets[inst.index()] as usize..];
49 inst_allocs[slot]
50 }
51
52 pub fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) {
53 let inst_allocs = &mut self.ctx.output.allocs
54 [self.ctx.output.inst_alloc_offsets[inst.index()] as usize..];
55 inst_allocs[slot] = alloc;
56 }
57
58 pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation {
59 trace!("get_alloc_for_range: {:?}", range);
60 let bundle = self.ctx.ranges[range].bundle;
61 trace!(" -> bundle: {:?}", bundle);
62 let bundledata = &self.ctx.bundles[bundle];
63 trace!(" -> allocation {:?}", bundledata.allocation);
64 if bundledata.allocation != Allocation::none() {
65 bundledata.allocation
66 } else {
67 trace!(" -> spillset {:?}", bundledata.spillset);
68 trace!(
69 " -> spill slot {:?}",
70 self.ctx.spillsets[bundledata.spillset].slot
71 );
72 self.ctx.spillslots[self.ctx.spillsets[bundledata.spillset].slot.index()].alloc
73 }
74 }
75
76 pub fn apply_allocations_and_insert_moves(&mut self) -> InsertedMoves {
77 trace!("apply_allocations_and_insert_moves");
78 trace!("blockparam_ins: {:?}", self.blockparam_ins);
79 trace!("blockparam_outs: {:?}", self.blockparam_outs);
80
81 let mut inserted_moves = InsertedMoves::default();
82
83 // Now that all splits are done, we can pay the cost once to
84 // sort VReg range lists and update with the final ranges.
85 for vreg in &mut self.ctx.vregs {
86 for entry in &mut vreg.ranges {
87 entry.range = self.ctx.ranges[entry.index].range;
88 }
89 vreg.ranges.sort_unstable_by_key(|entry| entry.range.from);
90 }
91
92 /// Buffered information about the previous liverange that was processed.
93 struct PrevBuffer {
94 prev: Option<LiveRangeListEntry>,
95 prev_ins_idx: usize,
96 buffered: Option<LiveRangeListEntry>,
97 buffered_ins_idx: usize,
98 }
99
100 impl PrevBuffer {
101 fn new(prev_ins_idx: usize) -> Self {
102 Self {
103 prev: None,
104 prev_ins_idx,
105 buffered: None,
106 buffered_ins_idx: prev_ins_idx,
107 }
108 }
109
110 /// Returns the previous `LiveRangeListEntry` when it's present.
111 #[inline(always)]
112 fn is_valid(&self) -> Option<LiveRangeListEntry> {
113 self.prev
114 }
115
116 /// Fetch the current index into the `Env::blockparam_ins` vector.
117 #[inline(always)]
118 fn blockparam_ins_idx(&self) -> usize {
119 self.prev_ins_idx
120 }
121
122 /// Record this index as the next index to use when the previous
123 /// liverange buffer advances.
124 #[inline(always)]
125 fn update_blockparam_ins_idx(&mut self, idx: usize) {
126 self.buffered_ins_idx = idx;
127 }
128
129 /// As overlapping liveranges might start at the same program point, we buffer the
130 /// previous liverange used when determining where to take the last value from for
131 /// intra-block moves. The liveranges we process are buffered until we encounter one
132 /// that starts at a later program point, indicating that it's now safe to advance the
133 /// previous LR buffer. We accumulate the longest-lived liverange in the buffer as a
134 /// heuristic for finding the most stable source of a value.
135 ///
136 /// We also buffer the index into the `Env::blockparam_ins` vector, as we may see
137 /// multiple uses of a blockparam within a single instruction, and as such may need to
138 /// generate multiple blockparam move destinations by re-traversing that section of the
139 /// vector.
140 #[inline(always)]
141 fn advance(&mut self, current: LiveRangeListEntry) {
142 // Advance the `prev` pointer to the `next` pointer, as long as the `next` pointer
143 // does not start at the same time as the current LR we're processing.
144 if self
145 .buffered
146 .map(|entry| entry.range.from < current.range.from)
147 .unwrap_or(false)
148 {
149 self.prev = self.buffered;
150 self.prev_ins_idx = self.buffered_ins_idx;
151 }
152
153 // Advance the `next` pointer to the currently processed LR, as long as it ends
154 // later than the current `next`.
155 if self
156 .buffered
157 .map(|entry| entry.range.to < current.range.to)
158 .unwrap_or(true)
159 {
160 self.buffered = Some(current);
161 }
162 }
163 }
164
165 // Determine the ProgPoint where moves on this (from, to)
166 // edge should go:
167 // - If there is more than one in-edge to `to`, then
168 // `from` must have only one out-edge; moves go at tail of
169 // `from` just before last Branch/Ret.
170 // - Otherwise, there must be at most one in-edge to `to`,
171 // and moves go at start of `to`.
172 #[inline(always)]
173 fn choose_move_location<'a, F: Function>(
174 env: &Env<'a, F>,
175 from: Block,
176 to: Block,
177 ) -> (ProgPoint, InsertMovePrio) {
178 let from_last_insn = env.func.block_insns(from).last();
179 let to_first_insn = env.func.block_insns(to).first();
180 let from_is_ret = env.func.is_ret(from_last_insn);
181 let to_is_entry = env.func.entry_block() == to;
182 let from_outs = env.func.block_succs(from).len() + if from_is_ret { 1 } else { 0 };
183 let to_ins = env.func.block_preds(to).len() + if to_is_entry { 1 } else { 0 };
184
185 if to_ins > 1 && from_outs <= 1 {
186 (
187 // N.B.: though semantically the edge moves happen
188 // after the branch, we must insert them before
189 // the branch because otherwise, of course, they
190 // would never execute. This is correct even in
191 // the presence of branches that read register
192 // inputs (e.g. conditional branches on some RISCs
193 // that branch on reg zero/not-zero, or any
194 // indirect branch), but for a very subtle reason:
195 // all cases of such branches will (or should)
196 // have multiple successors, and thus due to
197 // critical-edge splitting, their successors will
198 // have only the single predecessor, and we prefer
199 // to insert at the head of the successor in that
200 // case (rather than here). We make this a
201 // requirement, in fact: the user of this library
202 // shall not read registers in a branch
203 // instruction of there is only one successor per
204 // the given CFG information.
205 ProgPoint::before(from_last_insn),
206 InsertMovePrio::OutEdgeMoves,
207 )
208 } else if to_ins <= 1 {
209 (
210 ProgPoint::before(to_first_insn),
211 InsertMovePrio::InEdgeMoves,
212 )
213 } else {
214 panic!(
215 "Critical edge: can't insert moves between blocks {:?} and {:?}",
216 from, to
217 );
218 }
219 }
220
221 #[derive(PartialEq)]
222 struct InterBlockDest {
223 to: Block,
224 from: Block,
225 alloc: Allocation,
226 }
227
228 impl InterBlockDest {
229 fn key(&self) -> u64 {
230 u64_key(self.from.raw_u32(), self.to.raw_u32())
231 }
232 }
233
234 let mut inter_block_sources: FxHashMap<Block, Allocation> = FxHashMap::default();
235 let mut inter_block_dests = Vec::with_capacity(self.func.num_blocks());
236
237 #[derive(Hash, Eq, PartialEq)]
238 struct BlockparamSourceKey {
239 bits: u64,
240 }
241
242 impl BlockparamSourceKey {
243 fn new(from_block: Block, to_vreg: VRegIndex) -> Self {
244 BlockparamSourceKey {
245 bits: u64_key(from_block.raw_u32(), to_vreg.raw_u32()),
246 }
247 }
248 }
249
250 struct BlockparamDest {
251 from_block: Block,
252 to_block: Block,
253 to_vreg: VRegIndex,
254 alloc: Allocation,
255 }
256
257 impl BlockparamDest {
258 fn key(&self) -> u64 {
259 u64_key(self.to_block.raw_u32(), self.from_block.raw_u32())
260 }
261
262 fn source(&self) -> BlockparamSourceKey {
263 BlockparamSourceKey::new(self.from_block, self.to_vreg)
264 }
265 }
266
267 let mut block_param_sources =
268 FxHashMap::<BlockparamSourceKey, Allocation>::with_capacity_and_hasher(
269 3 * self.func.num_insts(),
270 Default::default(),
271 );
272 let mut block_param_dests = Vec::with_capacity(3 * self.func.num_insts());
273
274 let debug_labels = self.func.debug_value_labels();
275
276 let mut reuse_input_insts = Vec::with_capacity(self.func.num_insts() / 2);
277
278 let mut blockparam_in_idx = 0;
279 let mut blockparam_out_idx = 0;
280 for vreg in 0..self.vregs.len() {
281 let vreg = VRegIndex::new(vreg);
282 if !self.is_vreg_used(vreg) {
283 continue;
284 }
285
286 inter_block_sources.clear();
287
288 // For each range in each vreg, insert moves or
289 // half-moves. We also scan over `blockparam_ins` and
290 // `blockparam_outs`, which are sorted by (block, vreg),
291 // to fill in allocations.
292 let mut prev = PrevBuffer::new(blockparam_in_idx);
293 for range_idx in 0..self.vregs[vreg].ranges.len() {
294 let entry = self.vregs[vreg].ranges[range_idx];
295 let alloc = self.get_alloc_for_range(entry.index);
296 let range = entry.range;
297 trace!(
298 "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}",
299 vreg,
300 entry.index,
301 range,
302 alloc,
303 );
304 debug_assert!(alloc != Allocation::none());
305
306 if self.annotations_enabled {
307 self.annotate(
308 range.from,
309 format!(
310 " <<< start v{} in {} (range{}) (bundle{})",
311 vreg.index(),
312 alloc,
313 entry.index.index(),
314 self.ranges[entry.index].bundle.raw_u32(),
315 ),
316 );
317 self.annotate(
318 range.to,
319 format!(
320 " end v{} in {} (range{}) (bundle{}) >>>",
321 vreg.index(),
322 alloc,
323 entry.index.index(),
324 self.ranges[entry.index].bundle.raw_u32(),
325 ),
326 );
327 }
328
329 prev.advance(entry);
330
331 // Does this range follow immediately after a prior
332 // range in the same block? If so, insert a move (if
333 // the allocs differ). We do this directly rather than
334 // with half-moves because we eagerly know both sides
335 // already (and also, half-moves are specific to
336 // inter-block transfers).
337 //
338 // Note that we do *not* do this if there is also a
339 // def as the first use in the new range: it's
340 // possible that an old liverange covers the Before
341 // pos of an inst, a new liverange covers the After
342 // pos, and the def also happens at After. In this
343 // case we don't want to an insert a move after the
344 // instruction copying the old liverange.
345 //
346 // Note also that we assert that the new range has to
347 // start at the Before-point of an instruction; we
348 // can't insert a move that logically happens just
349 // before After (i.e. in the middle of a single
350 // instruction).
351 if let Some(prev) = prev.is_valid() {
352 let prev_alloc = self.get_alloc_for_range(prev.index);
353 debug_assert!(prev_alloc != Allocation::none());
354
355 if prev.range.to >= range.from
356 && (prev.range.to > range.from || !self.is_start_of_block(range.from))
357 && !self.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef)
358 {
359 trace!(
360 "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}",
361 prev.index.index(),
362 entry.index.index(),
363 prev_alloc,
364 alloc,
365 vreg.index()
366 );
367 debug_assert_eq!(range.from.pos(), InstPosition::Before);
368 inserted_moves.push(
369 range.from,
370 InsertMovePrio::Regular,
371 prev_alloc,
372 alloc,
373 self.vreg(vreg),
374 );
375 }
376 }
377
378 // Scan over blocks whose ends are covered by this
379 // range. For each, for each successor that is not
380 // already in this range (hence guaranteed to have the
381 // same allocation) and if the vreg is live, add a
382 // Source half-move.
383 let mut block = self.cfginfo.insn_block[range.from.inst().index()];
384 while block.is_valid() && block.index() < self.func.num_blocks() {
385 if range.to < self.cfginfo.block_exit[block.index()].next() {
386 break;
387 }
388 trace!("examining block with end in range: block{}", block.index());
389
390 match inter_block_sources.entry(block) {
391 // If the entry is already present in the map, we'll try to prefer a
392 // register allocation.
393 Entry::Occupied(mut entry) => {
394 if !entry.get().is_reg() {
395 entry.insert(alloc);
396 }
397 }
398 Entry::Vacant(entry) => {
399 entry.insert(alloc);
400 }
401 }
402
403 // Scan forward in `blockparam_outs`, adding all
404 // half-moves for outgoing values to blockparams
405 // in succs.
406 trace!(
407 "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}",
408 vreg.index(),
409 block.index(),
410 blockparam_out_idx,
411 );
412 while blockparam_out_idx < self.blockparam_outs.len() {
413 let BlockparamOut {
414 from_vreg,
415 from_block,
416 to_block,
417 to_vreg,
418 } = self.blockparam_outs[blockparam_out_idx];
419 if (from_vreg, from_block) > (vreg, block) {
420 break;
421 }
422 if (from_vreg, from_block) == (vreg, block) {
423 trace!(
424 " -> found: from v{} block{} to v{} block{}",
425 from_vreg.index(),
426 from_block.index(),
427 to_vreg.index(),
428 to_vreg.index()
429 );
430
431 let key = BlockparamSourceKey::new(from_block, to_vreg);
432 match block_param_sources.entry(key) {
433 // As with inter-block moves, if the entry is already present we'll
434 // try to prefer a register allocation.
435 Entry::Occupied(mut entry) => {
436 if !entry.get().is_reg() {
437 entry.insert(alloc);
438 }
439 }
440 Entry::Vacant(entry) => {
441 entry.insert(alloc);
442 }
443 }
444
445 if self.annotations_enabled {
446 self.annotate(
447 self.cfginfo.block_exit[block.index()],
448 format!(
449 "blockparam-out: block{} to block{}: v{} to v{} in {}",
450 from_block.index(),
451 to_block.index(),
452 from_vreg.index(),
453 to_vreg.index(),
454 alloc
455 ),
456 );
457 }
458 }
459
460 blockparam_out_idx += 1;
461 }
462
463 block = block.next();
464 }
465
466 // Scan over blocks whose beginnings are covered by
467 // this range and for which the vreg is live at the
468 // start of the block. For each, for each predecessor,
469 // add a Dest half-move.
470 let mut block = self.cfginfo.insn_block[range.from.inst().index()];
471 if self.cfginfo.block_entry[block.index()] < range.from {
472 block = block.next();
473 }
474 while block.is_valid() && block.index() < self.func.num_blocks() {
475 if self.cfginfo.block_entry[block.index()] >= range.to {
476 break;
477 }
478
479 // Add half-moves for blockparam inputs.
480 trace!(
481 "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}",
482 vreg.index(),
483 block.index(),
484 prev.prev_ins_idx,
485 );
486 let mut idx = prev.blockparam_ins_idx();
487 while idx < self.blockparam_ins.len() {
488 let BlockparamIn {
489 from_block,
490 to_block,
491 to_vreg,
492 } = self.blockparam_ins[idx];
493 if (to_vreg, to_block) > (vreg, block) {
494 break;
495 }
496 if (to_vreg, to_block) == (vreg, block) {
497 block_param_dests.push(BlockparamDest {
498 from_block,
499 to_block,
500 to_vreg,
501 alloc,
502 });
503 trace!(
504 "match: blockparam_in: v{} in block{} from block{} into {}",
505 to_vreg.index(),
506 to_block.index(),
507 from_block.index(),
508 alloc,
509 );
510 #[cfg(debug_assertions)]
511 if self.annotations_enabled {
512 self.annotate(
513 self.cfginfo.block_entry[block.index()],
514 format!(
515 "blockparam-in: block{} to block{}:into v{} in {}",
516 from_block.index(),
517 to_block.index(),
518 to_vreg.index(),
519 alloc
520 ),
521 );
522 }
523 }
524 idx += 1;
525 }
526
527 prev.update_blockparam_ins_idx(idx);
528
529 if !self.is_live_in(block, vreg) {
530 block = block.next();
531 continue;
532 }
533
534 trace!(
535 "scanning preds at vreg {} block {} for ends outside the range",
536 vreg.index(),
537 block.index()
538 );
539
540 // Now find any preds whose ends are not in the
541 // same range, and insert appropriate moves.
542 for &pred in self.func.block_preds(block) {
543 trace!(
544 "pred block {} has exit {:?}",
545 pred.index(),
546 self.cfginfo.block_exit[pred.index()]
547 );
548 if range.contains_point(self.cfginfo.block_exit[pred.index()]) {
549 continue;
550 }
551
552 inter_block_dests.push(InterBlockDest {
553 from: pred,
554 to: block,
555 alloc,
556 })
557 }
558
559 block = block.next();
560 }
561
562 // Scan over def/uses and apply allocations.
563 for use_idx in 0..self.ranges[entry.index].uses.len() {
564 let usedata = self.ranges[entry.index].uses[use_idx];
565 trace!("applying to use: {:?}", usedata);
566 debug_assert!(range.contains_point(usedata.pos));
567 let inst = usedata.pos.inst();
568 let slot = usedata.slot;
569 let operand = usedata.operand;
570 self.set_alloc(inst, slot as usize, alloc);
571 if let OperandConstraint::Reuse(_) = operand.constraint() {
572 reuse_input_insts.push(inst);
573 }
574 }
575
576 // Scan debug-labels on this vreg that overlap with
577 // this range, producing a debug-info output record
578 // giving the allocation location for each label.
579 if !debug_labels.is_empty() {
580 // Do a binary search to find the start of any
581 // labels for this vreg. Recall that we require
582 // debug-label requests to be sorted by vreg as a
583 // precondition (which we verified above).
584 let start = debug_labels
585 .binary_search_by(|&(label_vreg, _label_from, _label_to, _label)| {
586 // Search for the point just before the first
587 // tuple that could be for `vreg` overlapping
588 // with `range`. Never return
589 // `Ordering::Equal`; `binary_search_by` in
590 // this case returns the index of the first
591 // entry that is greater as an `Err`.
592 if label_vreg.vreg() < vreg.index() {
593 core::cmp::Ordering::Less
594 } else {
595 core::cmp::Ordering::Greater
596 }
597 })
598 .unwrap_err();
599
600 for &(label_vreg, label_from, label_to, label) in &debug_labels[start..] {
601 let label_from = ProgPoint::before(label_from);
602 let label_to = ProgPoint::before(label_to);
603 let label_range = CodeRange {
604 from: label_from,
605 to: label_to,
606 };
607 if label_vreg.vreg() != vreg.index() {
608 break;
609 }
610 if !range.overlaps(&label_range) {
611 continue;
612 }
613
614 let from = core::cmp::max(label_from, range.from);
615 let to = core::cmp::min(label_to, range.to);
616
617 self.ctx
618 .output
619 .debug_locations
620 .push((label, from, to, alloc));
621 }
622 }
623 }
624
625 if !inter_block_dests.is_empty() {
626 self.output.stats.halfmoves_count += inter_block_dests.len() * 2;
627
628 inter_block_dests.sort_unstable_by_key(InterBlockDest::key);
629
630 let vreg = self.vreg(vreg);
631 trace!("processing inter-block moves for {}", vreg);
632 for dest in inter_block_dests.drain(..) {
633 let src = inter_block_sources[&dest.from];
634
635 trace!(
636 " -> moving from {} to {} between {:?} and {:?}",
637 src,
638 dest.alloc,
639 dest.from,
640 dest.to
641 );
642
643 let (pos, prio) = choose_move_location(self, dest.from, dest.to);
644 inserted_moves.push(pos, prio, src, dest.alloc, vreg);
645 }
646 }
647
648 blockparam_in_idx = prev.blockparam_ins_idx();
649 }
650
651 if !block_param_dests.is_empty() {
652 self.output.stats.halfmoves_count += block_param_sources.len();
653 self.output.stats.halfmoves_count += block_param_dests.len();
654
655 trace!("processing block-param moves");
656 for dest in block_param_dests {
657 let src = dest.source();
658 let src_alloc = block_param_sources.get(&src).unwrap();
659 let (pos, prio) = choose_move_location(self, dest.from_block, dest.to_block);
660 inserted_moves.push(pos, prio, *src_alloc, dest.alloc, self.vreg(dest.to_vreg));
661 }
662 }
663
664 // Handle multi-fixed-reg constraints by copying.
665 for fixup in core::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) {
666 let from_alloc = self.get_alloc(fixup.pos.inst(), fixup.from_slot as usize);
667 let to_alloc = Allocation::reg(PReg::from_index(fixup.to_preg.index()));
668 trace!(
669 "multi-fixed-move constraint at {:?} from {} to {} for v{}",
670 fixup.pos,
671 from_alloc,
672 to_alloc,
673 fixup.vreg.index(),
674 );
675 let prio = match fixup.level {
676 FixedRegFixupLevel::Initial => InsertMovePrio::MultiFixedRegInitial,
677 FixedRegFixupLevel::Secondary => InsertMovePrio::MultiFixedRegSecondary,
678 };
679 inserted_moves.push(fixup.pos, prio, from_alloc, to_alloc, self.vreg(fixup.vreg));
680 self.set_alloc(
681 fixup.pos.inst(),
682 fixup.to_slot as usize,
683 Allocation::reg(PReg::from_index(fixup.to_preg.index())),
684 );
685 }
686
687 // Handle outputs that reuse inputs: copy beforehand, then set
688 // input's alloc to output's.
689 //
690 // Note that the output's allocation may not *actually* be
691 // valid until InstPosition::After, but the reused input may
692 // occur at InstPosition::Before. This may appear incorrect,
693 // but we make it work by ensuring that all *other* inputs are
694 // extended to InstPosition::After so that the def will not
695 // interfere. (The liveness computation code does this -- we
696 // do not require the user to do so.)
697 //
698 // One might ask: why not insist that input-reusing defs occur
699 // at InstPosition::Before? this would be correct, but would
700 // mean that the reused input and the reusing output
701 // interfere, *guaranteeing* that every such case would
702 // require a move. This is really bad on ISAs (like x86) where
703 // reused inputs are ubiquitous.
704 //
705 // Another approach might be to put the def at Before, and
706 // trim the reused input's liverange back to the previous
707 // instruction's After. This is kind of OK until (i) a block
708 // boundary occurs between the prior inst and this one, or
709 // (ii) any moves/spills/reloads occur between the two
710 // instructions. We really do need the input to be live at
711 // this inst's Before.
712 //
713 // In principle what we really need is a "BeforeBefore"
714 // program point, but we don't want to introduce that
715 // everywhere and pay the cost of twice as many ProgPoints
716 // throughout the allocator.
717 //
718 // Or we could introduce a separate move instruction -- this
719 // is the approach that regalloc.rs takes with "mod" operands
720 // -- but that is also costly.
721 //
722 // So we take this approach (invented by IonMonkey -- somewhat
723 // hard to discern, though see [0] for a comment that makes
724 // this slightly less unclear) to avoid interference between
725 // the actual reused input and reusing output, ensure
726 // interference (hence no incorrectness) between other inputs
727 // and the reusing output, and not require a separate explicit
728 // move instruction.
729 //
730 // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110
731 for inst in reuse_input_insts {
732 let mut input_reused: SmallVec<[usize; 4]> = smallvec![];
733 for output_idx in 0..self.func.inst_operands(inst).len() {
734 let operand = self.func.inst_operands(inst)[output_idx];
735 if let OperandConstraint::Reuse(input_idx) = operand.constraint() {
736 debug_assert!(!input_reused.contains(&input_idx));
737 debug_assert_eq!(operand.pos(), OperandPos::Late);
738 input_reused.push(input_idx);
739 let input_alloc = self.get_alloc(inst, input_idx);
740 let output_alloc = self.get_alloc(inst, output_idx);
741 trace!(
742 "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}",
743 inst,
744 output_idx,
745 output_alloc,
746 input_idx,
747 input_alloc
748 );
749 if input_alloc != output_alloc {
750 #[cfg(debug_assertions)]
751 if self.annotations_enabled {
752 self.annotate(
753 ProgPoint::before(inst),
754 format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc),
755 );
756 }
757 let input_operand = self.func.inst_operands(inst)[input_idx];
758 inserted_moves.push(
759 ProgPoint::before(inst),
760 InsertMovePrio::ReusedInput,
761 input_alloc,
762 output_alloc,
763 input_operand.vreg(),
764 );
765 self.set_alloc(inst, input_idx, output_alloc);
766 }
767 }
768 }
769 }
770
771 // Sort the debug-locations vector; we provide this
772 // invariant to the client.
773 self.output.debug_locations.sort_unstable();
774
775 inserted_moves
776 }
777
778 pub fn resolve_inserted_moves(&mut self, mut inserted_moves: InsertedMoves) -> Edits {
779 // For each program point, gather all moves together. Then
780 // resolve (see cases below).
781 let mut i = 0;
782 inserted_moves
783 .moves
784 .sort_unstable_by_key(|m| m.pos_prio.key());
785
786 // Redundant-move elimination state tracker.
787 let mut redundant_moves = RedundantMoveEliminator::default();
788
789 fn redundant_move_process_side_effects<'a, F: Function>(
790 this: &Env<'a, F>,
791 redundant_moves: &mut RedundantMoveEliminator,
792 from: ProgPoint,
793 to: ProgPoint,
794 ) {
795 // If we cross a block boundary, clear and return.
796 if this.cfginfo.insn_block[from.inst().index()]
797 != this.cfginfo.insn_block[to.inst().index()]
798 {
799 redundant_moves.clear();
800 return;
801 }
802
803 let start_inst = if from.pos() == InstPosition::Before {
804 from.inst()
805 } else {
806 from.inst().next()
807 };
808 let end_inst = if to.pos() == InstPosition::Before {
809 to.inst()
810 } else {
811 to.inst().next()
812 };
813 for inst in start_inst.index()..end_inst.index() {
814 let inst = Inst::new(inst);
815 for (i, op) in this.func.inst_operands(inst).iter().enumerate() {
816 match op.kind() {
817 OperandKind::Def => {
818 let alloc = this.get_alloc(inst, i);
819 redundant_moves.clear_alloc(alloc);
820 }
821 _ => {}
822 }
823 }
824 for reg in this.func.inst_clobbers(inst) {
825 redundant_moves.clear_alloc(Allocation::reg(reg));
826 }
827 // The dedicated scratch registers may be clobbered by any
828 // instruction.
829 for reg in this.env.scratch_by_class {
830 if let Some(reg) = reg {
831 redundant_moves.clear_alloc(Allocation::reg(reg));
832 }
833 }
834 }
835 }
836
837 let mut last_pos = ProgPoint::before(Inst::new(0));
838 let mut edits = Edits::with_capacity(self.func.num_insts());
839
840 while i < inserted_moves.moves.len() {
841 let start = i;
842 let pos_prio = inserted_moves.moves[i].pos_prio;
843 while i < inserted_moves.moves.len() && inserted_moves.moves[i].pos_prio == pos_prio {
844 i += 1;
845 }
846 let moves = &inserted_moves.moves[start..i];
847
848 redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos);
849 last_pos = pos_prio.pos;
850
851 // Gather all the moves in each RegClass separately.
852 // These cannot interact, so it is safe to have separate
853 // ParallelMove instances. They need to be separate because
854 // moves between the classes are impossible. (We could
855 // enhance ParallelMoves to understand register classes, but
856 // this seems simpler.)
857 let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
858 let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
859 let mut vec_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
860
861 for m in moves {
862 match m.to_vreg.class() {
863 RegClass::Int => {
864 int_moves.push(m.clone());
865 }
866 RegClass::Float => {
867 float_moves.push(m.clone());
868 }
869 RegClass::Vector => {
870 vec_moves.push(m.clone());
871 }
872 }
873 }
874
875 for &(regclass, moves) in &[
876 (RegClass::Int, &int_moves),
877 (RegClass::Float, &float_moves),
878 (RegClass::Vector, &vec_moves),
879 ] {
880 // All moves in `moves` semantically happen in
881 // parallel. Let's resolve these to a sequence of moves
882 // that can be done one at a time.
883 let mut parallel_moves = ParallelMoves::new();
884 trace!(
885 "parallel moves at pos {:?} prio {:?}",
886 pos_prio.pos,
887 pos_prio.prio
888 );
889 for m in moves {
890 trace!(" {} -> {}", m.from_alloc, m.to_alloc);
891 parallel_moves.add(m.from_alloc, m.to_alloc, Some(m.to_vreg));
892 }
893
894 let resolved = parallel_moves.resolve();
895 let mut scratch_iter = RegTraversalIter::new(
896 self.env, regclass, None, None, 0,
897 None, // We assume there is no limit on the set of registers available for moves.
898 );
899 let mut dedicated_scratch = self.env.scratch_by_class[regclass as usize];
900 let key = LiveRangeKey::from_range(&CodeRange {
901 from: pos_prio.pos,
902 to: pos_prio.pos.next(),
903 });
904 let find_free_reg = || {
905 // Use the dedicated scratch register first if it is
906 // available.
907 if let Some(reg) = dedicated_scratch.take() {
908 return Some(Allocation::reg(reg));
909 }
910 while let Some(preg) = scratch_iter.next() {
911 if !self.pregs[preg.index()]
912 .allocations
913 .btree
914 .contains_key(&key)
915 {
916 let alloc = Allocation::reg(preg);
917 if moves
918 .iter()
919 .any(|m| m.from_alloc == alloc || m.to_alloc == alloc)
920 {
921 // Skip pregs used by moves in this
922 // parallel move set, even if not
923 // marked used at progpoint: edge move
924 // liveranges meet but don't overlap
925 // so otherwise we may incorrectly
926 // overwrite a source reg.
927 continue;
928 }
929 return Some(alloc);
930 }
931 }
932 None
933 };
934 let mut stackslot_idx = 0;
935 let get_stackslot = || {
936 let idx = stackslot_idx;
937 stackslot_idx += 1;
938 // We can't borrow `self` as mutable, so we create
939 // these placeholders then allocate the actual
940 // slots if needed with `self.allocate_spillslot`
941 // below.
942 Allocation::stack(SpillSlot::new(SpillSlot::MAX - idx))
943 };
944 let is_stack_alloc = |alloc: Allocation| {
945 if let Some(preg) = alloc.as_reg() {
946 self.pregs[preg.index()].is_stack
947 } else {
948 alloc.is_stack()
949 }
950 };
951 let preferred_victim = self.preferred_victim_by_class[regclass as usize];
952
953 let scratch_resolver = MoveAndScratchResolver {
954 find_free_reg,
955 get_stackslot,
956 is_stack_alloc,
957 borrowed_scratch_reg: preferred_victim,
958 };
959
960 let resolved = scratch_resolver.compute(resolved);
961
962 let mut rewrites = FxHashMap::default();
963 for i in 0..stackslot_idx {
964 if i >= self.extra_spillslots_by_class[regclass as usize].len() {
965 let slot =
966 self.allocate_spillslot(self.func.spillslot_size(regclass) as u32);
967 self.extra_spillslots_by_class[regclass as usize].push(slot);
968 }
969 rewrites.insert(
970 Allocation::stack(SpillSlot::new(SpillSlot::MAX - i)),
971 self.extra_spillslots_by_class[regclass as usize][i],
972 );
973 }
974
975 for (src, dst, to_vreg) in resolved {
976 let src = rewrites.get(&src).cloned().unwrap_or(src);
977 let dst = rewrites.get(&dst).cloned().unwrap_or(dst);
978 trace!(" resolved: {} -> {} ({:?})", src, dst, to_vreg);
979 let action = redundant_moves.process_move(src, dst, to_vreg);
980 if !action.elide {
981 edits.add(pos_prio, src, dst);
982 } else {
983 trace!(" -> redundant move elided");
984 }
985 }
986 }
987 }
988
989 // Ensure edits are in sorted ProgPoint order. N.B.: this must
990 // be a stable sort! We have to keep the order produced by the
991 // parallel-move resolver for all moves within a single sort
992 // key.
993 edits.sort();
994 self.output.stats.edits_count = edits.len();
995
996 // Add debug annotations.
997 if self.annotations_enabled {
998 for &(pos_prio, ref edit) in edits.iter() {
999 match edit {
1000 &Edit::Move { from, to } => {
1001 self.annotate(pos_prio.pos, format!("move {} -> {}", from, to));
1002 }
1003 }
1004 }
1005 }
1006
1007 edits
1008 }
1009}