regalloc2/ion/
moves.rs

1/*
2 * This file was initially derived from the files
3 * `js/src/jit/BacktrackingAllocator.h` and
4 * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was
5 * originally licensed under the Mozilla Public License 2.0. We
6 * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see
7 * https://github.com/bytecodealliance/regalloc2/issues/7).
8 *
9 * Since the initial port, the design has been substantially evolved
10 * and optimized.
11 */
12
13//! Move resolution.
14
15use alloc::vec;
16
17use super::{
18    Env, InsertMovePrio, InsertedMove, InsertedMoves, LiveRangeFlag, LiveRangeIndex,
19    RedundantMoveEliminator, VRegIndex,
20};
21use crate::ion::data_structures::{
22    u64_key, BlockparamIn, BlockparamOut, CodeRange, Edits, FixedRegFixupLevel, LiveRangeKey,
23    LiveRangeListEntry,
24};
25use crate::ion::reg_traversal::RegTraversalIter;
26use crate::moves::{MoveAndScratchResolver, ParallelMoves};
27use crate::{
28    Allocation, Block, Edit, Function, FxHashMap, Inst, InstPosition, OperandConstraint,
29    OperandKind, OperandPos, PReg, ProgPoint, RegClass, SpillSlot,
30};
31use alloc::format;
32use alloc::vec::Vec;
33use hashbrown::hash_map::Entry;
34use smallvec::{smallvec, SmallVec};
35
36impl<'a, F: Function> Env<'a, F> {
37    pub fn is_start_of_block(&self, pos: ProgPoint) -> bool {
38        let block = self.ctx.cfginfo.insn_block[pos.inst().index()];
39        pos == self.ctx.cfginfo.block_entry[block.index()]
40    }
41    pub fn is_end_of_block(&self, pos: ProgPoint) -> bool {
42        let block = self.ctx.cfginfo.insn_block[pos.inst().index()];
43        pos == self.ctx.cfginfo.block_exit[block.index()]
44    }
45
46    pub fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation {
47        let inst_allocs =
48            &self.ctx.output.allocs[self.ctx.output.inst_alloc_offsets[inst.index()] as usize..];
49        inst_allocs[slot]
50    }
51
52    pub fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) {
53        let inst_allocs = &mut self.ctx.output.allocs
54            [self.ctx.output.inst_alloc_offsets[inst.index()] as usize..];
55        inst_allocs[slot] = alloc;
56    }
57
58    pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation {
59        trace!("get_alloc_for_range: {:?}", range);
60        let bundle = self.ctx.ranges[range].bundle;
61        trace!(" -> bundle: {:?}", bundle);
62        let bundledata = &self.ctx.bundles[bundle];
63        trace!(" -> allocation {:?}", bundledata.allocation);
64        if bundledata.allocation != Allocation::none() {
65            bundledata.allocation
66        } else {
67            trace!(" -> spillset {:?}", bundledata.spillset);
68            trace!(
69                " -> spill slot {:?}",
70                self.ctx.spillsets[bundledata.spillset].slot
71            );
72            self.ctx.spillslots[self.ctx.spillsets[bundledata.spillset].slot.index()].alloc
73        }
74    }
75
76    pub fn apply_allocations_and_insert_moves(&mut self) -> InsertedMoves {
77        trace!("apply_allocations_and_insert_moves");
78        trace!("blockparam_ins: {:?}", self.blockparam_ins);
79        trace!("blockparam_outs: {:?}", self.blockparam_outs);
80
81        let mut inserted_moves = InsertedMoves::default();
82
83        // Now that all splits are done, we can pay the cost once to
84        // sort VReg range lists and update with the final ranges.
85        for vreg in &mut self.ctx.vregs {
86            for entry in &mut vreg.ranges {
87                entry.range = self.ctx.ranges[entry.index].range;
88            }
89            vreg.ranges.sort_unstable_by_key(|entry| entry.range.from);
90        }
91
92        /// Buffered information about the previous liverange that was processed.
93        struct PrevBuffer {
94            prev: Option<LiveRangeListEntry>,
95            prev_ins_idx: usize,
96            buffered: Option<LiveRangeListEntry>,
97            buffered_ins_idx: usize,
98        }
99
100        impl PrevBuffer {
101            fn new(prev_ins_idx: usize) -> Self {
102                Self {
103                    prev: None,
104                    prev_ins_idx,
105                    buffered: None,
106                    buffered_ins_idx: prev_ins_idx,
107                }
108            }
109
110            /// Returns the previous `LiveRangeListEntry` when it's present.
111            #[inline(always)]
112            fn is_valid(&self) -> Option<LiveRangeListEntry> {
113                self.prev
114            }
115
116            /// Fetch the current index into the `Env::blockparam_ins` vector.
117            #[inline(always)]
118            fn blockparam_ins_idx(&self) -> usize {
119                self.prev_ins_idx
120            }
121
122            /// Record this index as the next index to use when the previous
123            /// liverange buffer advances.
124            #[inline(always)]
125            fn update_blockparam_ins_idx(&mut self, idx: usize) {
126                self.buffered_ins_idx = idx;
127            }
128
129            /// As overlapping liveranges might start at the same program point, we buffer the
130            /// previous liverange used when determining where to take the last value from for
131            /// intra-block moves. The liveranges we process are buffered until we encounter one
132            /// that starts at a later program point, indicating that it's now safe to advance the
133            /// previous LR buffer. We accumulate the longest-lived liverange in the buffer as a
134            /// heuristic for finding the most stable source of a value.
135            ///
136            /// We also buffer the index into the `Env::blockparam_ins` vector, as we may see
137            /// multiple uses of a blockparam within a single instruction, and as such may need to
138            /// generate multiple blockparam move destinations by re-traversing that section of the
139            /// vector.
140            #[inline(always)]
141            fn advance(&mut self, current: LiveRangeListEntry) {
142                // Advance the `prev` pointer to the `next` pointer, as long as the `next` pointer
143                // does not start at the same time as the current LR we're processing.
144                if self
145                    .buffered
146                    .map(|entry| entry.range.from < current.range.from)
147                    .unwrap_or(false)
148                {
149                    self.prev = self.buffered;
150                    self.prev_ins_idx = self.buffered_ins_idx;
151                }
152
153                // Advance the `next` pointer to the currently processed LR, as long as it ends
154                // later than the current `next`.
155                if self
156                    .buffered
157                    .map(|entry| entry.range.to < current.range.to)
158                    .unwrap_or(true)
159                {
160                    self.buffered = Some(current);
161                }
162            }
163        }
164
165        // Determine the ProgPoint where moves on this (from, to)
166        // edge should go:
167        // - If there is more than one in-edge to `to`, then
168        //   `from` must have only one out-edge; moves go at tail of
169        //   `from` just before last Branch/Ret.
170        // - Otherwise, there must be at most one in-edge to `to`,
171        //   and moves go at start of `to`.
172        #[inline(always)]
173        fn choose_move_location<'a, F: Function>(
174            env: &Env<'a, F>,
175            from: Block,
176            to: Block,
177        ) -> (ProgPoint, InsertMovePrio) {
178            let from_last_insn = env.func.block_insns(from).last();
179            let to_first_insn = env.func.block_insns(to).first();
180            let from_is_ret = env.func.is_ret(from_last_insn);
181            let to_is_entry = env.func.entry_block() == to;
182            let from_outs = env.func.block_succs(from).len() + if from_is_ret { 1 } else { 0 };
183            let to_ins = env.func.block_preds(to).len() + if to_is_entry { 1 } else { 0 };
184
185            if to_ins > 1 && from_outs <= 1 {
186                (
187                    // N.B.: though semantically the edge moves happen
188                    // after the branch, we must insert them before
189                    // the branch because otherwise, of course, they
190                    // would never execute. This is correct even in
191                    // the presence of branches that read register
192                    // inputs (e.g. conditional branches on some RISCs
193                    // that branch on reg zero/not-zero, or any
194                    // indirect branch), but for a very subtle reason:
195                    // all cases of such branches will (or should)
196                    // have multiple successors, and thus due to
197                    // critical-edge splitting, their successors will
198                    // have only the single predecessor, and we prefer
199                    // to insert at the head of the successor in that
200                    // case (rather than here). We make this a
201                    // requirement, in fact: the user of this library
202                    // shall not read registers in a branch
203                    // instruction of there is only one successor per
204                    // the given CFG information.
205                    ProgPoint::before(from_last_insn),
206                    InsertMovePrio::OutEdgeMoves,
207                )
208            } else if to_ins <= 1 {
209                (
210                    ProgPoint::before(to_first_insn),
211                    InsertMovePrio::InEdgeMoves,
212                )
213            } else {
214                panic!(
215                    "Critical edge: can't insert moves between blocks {:?} and {:?}",
216                    from, to
217                );
218            }
219        }
220
221        #[derive(PartialEq)]
222        struct InterBlockDest {
223            to: Block,
224            from: Block,
225            alloc: Allocation,
226        }
227
228        impl InterBlockDest {
229            fn key(&self) -> u64 {
230                u64_key(self.from.raw_u32(), self.to.raw_u32())
231            }
232        }
233
234        let mut inter_block_sources: FxHashMap<Block, Allocation> = FxHashMap::default();
235        let mut inter_block_dests = Vec::with_capacity(self.func.num_blocks());
236
237        #[derive(Hash, Eq, PartialEq)]
238        struct BlockparamSourceKey {
239            bits: u64,
240        }
241
242        impl BlockparamSourceKey {
243            fn new(from_block: Block, to_vreg: VRegIndex) -> Self {
244                BlockparamSourceKey {
245                    bits: u64_key(from_block.raw_u32(), to_vreg.raw_u32()),
246                }
247            }
248        }
249
250        struct BlockparamDest {
251            from_block: Block,
252            to_block: Block,
253            to_vreg: VRegIndex,
254            alloc: Allocation,
255        }
256
257        impl BlockparamDest {
258            fn key(&self) -> u64 {
259                u64_key(self.to_block.raw_u32(), self.from_block.raw_u32())
260            }
261
262            fn source(&self) -> BlockparamSourceKey {
263                BlockparamSourceKey::new(self.from_block, self.to_vreg)
264            }
265        }
266
267        let mut block_param_sources =
268            FxHashMap::<BlockparamSourceKey, Allocation>::with_capacity_and_hasher(
269                3 * self.func.num_insts(),
270                Default::default(),
271            );
272        let mut block_param_dests = Vec::with_capacity(3 * self.func.num_insts());
273
274        let debug_labels = self.func.debug_value_labels();
275
276        let mut reuse_input_insts = Vec::with_capacity(self.func.num_insts() / 2);
277
278        let mut blockparam_in_idx = 0;
279        let mut blockparam_out_idx = 0;
280        for vreg in 0..self.vregs.len() {
281            let vreg = VRegIndex::new(vreg);
282            if !self.is_vreg_used(vreg) {
283                continue;
284            }
285
286            inter_block_sources.clear();
287
288            // For each range in each vreg, insert moves or
289            // half-moves.  We also scan over `blockparam_ins` and
290            // `blockparam_outs`, which are sorted by (block, vreg),
291            // to fill in allocations.
292            let mut prev = PrevBuffer::new(blockparam_in_idx);
293            for range_idx in 0..self.vregs[vreg].ranges.len() {
294                let entry = self.vregs[vreg].ranges[range_idx];
295                let alloc = self.get_alloc_for_range(entry.index);
296                let range = entry.range;
297                trace!(
298                    "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}",
299                    vreg,
300                    entry.index,
301                    range,
302                    alloc,
303                );
304                debug_assert!(alloc != Allocation::none());
305
306                if self.annotations_enabled {
307                    self.annotate(
308                        range.from,
309                        format!(
310                            " <<< start v{} in {} (range{}) (bundle{})",
311                            vreg.index(),
312                            alloc,
313                            entry.index.index(),
314                            self.ranges[entry.index].bundle.raw_u32(),
315                        ),
316                    );
317                    self.annotate(
318                        range.to,
319                        format!(
320                            "     end   v{} in {} (range{}) (bundle{}) >>>",
321                            vreg.index(),
322                            alloc,
323                            entry.index.index(),
324                            self.ranges[entry.index].bundle.raw_u32(),
325                        ),
326                    );
327                }
328
329                prev.advance(entry);
330
331                // Does this range follow immediately after a prior
332                // range in the same block? If so, insert a move (if
333                // the allocs differ). We do this directly rather than
334                // with half-moves because we eagerly know both sides
335                // already (and also, half-moves are specific to
336                // inter-block transfers).
337                //
338                // Note that we do *not* do this if there is also a
339                // def as the first use in the new range: it's
340                // possible that an old liverange covers the Before
341                // pos of an inst, a new liverange covers the After
342                // pos, and the def also happens at After. In this
343                // case we don't want to an insert a move after the
344                // instruction copying the old liverange.
345                //
346                // Note also that we assert that the new range has to
347                // start at the Before-point of an instruction; we
348                // can't insert a move that logically happens just
349                // before After (i.e. in the middle of a single
350                // instruction).
351                if let Some(prev) = prev.is_valid() {
352                    let prev_alloc = self.get_alloc_for_range(prev.index);
353                    debug_assert!(prev_alloc != Allocation::none());
354
355                    if prev.range.to >= range.from
356                        && (prev.range.to > range.from || !self.is_start_of_block(range.from))
357                        && !self.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef)
358                    {
359                        trace!(
360                            "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}",
361                            prev.index.index(),
362                            entry.index.index(),
363                            prev_alloc,
364                            alloc,
365                            vreg.index()
366                        );
367                        debug_assert_eq!(range.from.pos(), InstPosition::Before);
368                        inserted_moves.push(
369                            range.from,
370                            InsertMovePrio::Regular,
371                            prev_alloc,
372                            alloc,
373                            self.vreg(vreg),
374                        );
375                    }
376                }
377
378                // Scan over blocks whose ends are covered by this
379                // range. For each, for each successor that is not
380                // already in this range (hence guaranteed to have the
381                // same allocation) and if the vreg is live, add a
382                // Source half-move.
383                let mut block = self.cfginfo.insn_block[range.from.inst().index()];
384                while block.is_valid() && block.index() < self.func.num_blocks() {
385                    if range.to < self.cfginfo.block_exit[block.index()].next() {
386                        break;
387                    }
388                    trace!("examining block with end in range: block{}", block.index());
389
390                    match inter_block_sources.entry(block) {
391                        // If the entry is already present in the map, we'll try to prefer a
392                        // register allocation.
393                        Entry::Occupied(mut entry) => {
394                            if !entry.get().is_reg() {
395                                entry.insert(alloc);
396                            }
397                        }
398                        Entry::Vacant(entry) => {
399                            entry.insert(alloc);
400                        }
401                    }
402
403                    // Scan forward in `blockparam_outs`, adding all
404                    // half-moves for outgoing values to blockparams
405                    // in succs.
406                    trace!(
407                        "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}",
408                        vreg.index(),
409                        block.index(),
410                        blockparam_out_idx,
411                    );
412                    while blockparam_out_idx < self.blockparam_outs.len() {
413                        let BlockparamOut {
414                            from_vreg,
415                            from_block,
416                            to_block,
417                            to_vreg,
418                        } = self.blockparam_outs[blockparam_out_idx];
419                        if (from_vreg, from_block) > (vreg, block) {
420                            break;
421                        }
422                        if (from_vreg, from_block) == (vreg, block) {
423                            trace!(
424                                " -> found: from v{} block{} to v{} block{}",
425                                from_vreg.index(),
426                                from_block.index(),
427                                to_vreg.index(),
428                                to_vreg.index()
429                            );
430
431                            let key = BlockparamSourceKey::new(from_block, to_vreg);
432                            match block_param_sources.entry(key) {
433                                // As with inter-block moves, if the entry is already present we'll
434                                // try to prefer a register allocation.
435                                Entry::Occupied(mut entry) => {
436                                    if !entry.get().is_reg() {
437                                        entry.insert(alloc);
438                                    }
439                                }
440                                Entry::Vacant(entry) => {
441                                    entry.insert(alloc);
442                                }
443                            }
444
445                            if self.annotations_enabled {
446                                self.annotate(
447                                    self.cfginfo.block_exit[block.index()],
448                                    format!(
449                                        "blockparam-out: block{} to block{}: v{} to v{} in {}",
450                                        from_block.index(),
451                                        to_block.index(),
452                                        from_vreg.index(),
453                                        to_vreg.index(),
454                                        alloc
455                                    ),
456                                );
457                            }
458                        }
459
460                        blockparam_out_idx += 1;
461                    }
462
463                    block = block.next();
464                }
465
466                // Scan over blocks whose beginnings are covered by
467                // this range and for which the vreg is live at the
468                // start of the block. For each, for each predecessor,
469                // add a Dest half-move.
470                let mut block = self.cfginfo.insn_block[range.from.inst().index()];
471                if self.cfginfo.block_entry[block.index()] < range.from {
472                    block = block.next();
473                }
474                while block.is_valid() && block.index() < self.func.num_blocks() {
475                    if self.cfginfo.block_entry[block.index()] >= range.to {
476                        break;
477                    }
478
479                    // Add half-moves for blockparam inputs.
480                    trace!(
481                        "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}",
482                        vreg.index(),
483                        block.index(),
484                        prev.prev_ins_idx,
485                    );
486                    let mut idx = prev.blockparam_ins_idx();
487                    while idx < self.blockparam_ins.len() {
488                        let BlockparamIn {
489                            from_block,
490                            to_block,
491                            to_vreg,
492                        } = self.blockparam_ins[idx];
493                        if (to_vreg, to_block) > (vreg, block) {
494                            break;
495                        }
496                        if (to_vreg, to_block) == (vreg, block) {
497                            block_param_dests.push(BlockparamDest {
498                                from_block,
499                                to_block,
500                                to_vreg,
501                                alloc,
502                            });
503                            trace!(
504                                "match: blockparam_in: v{} in block{} from block{} into {}",
505                                to_vreg.index(),
506                                to_block.index(),
507                                from_block.index(),
508                                alloc,
509                            );
510                            #[cfg(debug_assertions)]
511                            if self.annotations_enabled {
512                                self.annotate(
513                                    self.cfginfo.block_entry[block.index()],
514                                    format!(
515                                        "blockparam-in: block{} to block{}:into v{} in {}",
516                                        from_block.index(),
517                                        to_block.index(),
518                                        to_vreg.index(),
519                                        alloc
520                                    ),
521                                );
522                            }
523                        }
524                        idx += 1;
525                    }
526
527                    prev.update_blockparam_ins_idx(idx);
528
529                    if !self.is_live_in(block, vreg) {
530                        block = block.next();
531                        continue;
532                    }
533
534                    trace!(
535                        "scanning preds at vreg {} block {} for ends outside the range",
536                        vreg.index(),
537                        block.index()
538                    );
539
540                    // Now find any preds whose ends are not in the
541                    // same range, and insert appropriate moves.
542                    for &pred in self.func.block_preds(block) {
543                        trace!(
544                            "pred block {} has exit {:?}",
545                            pred.index(),
546                            self.cfginfo.block_exit[pred.index()]
547                        );
548                        if range.contains_point(self.cfginfo.block_exit[pred.index()]) {
549                            continue;
550                        }
551
552                        inter_block_dests.push(InterBlockDest {
553                            from: pred,
554                            to: block,
555                            alloc,
556                        })
557                    }
558
559                    block = block.next();
560                }
561
562                // Scan over def/uses and apply allocations.
563                for use_idx in 0..self.ranges[entry.index].uses.len() {
564                    let usedata = self.ranges[entry.index].uses[use_idx];
565                    trace!("applying to use: {:?}", usedata);
566                    debug_assert!(range.contains_point(usedata.pos));
567                    let inst = usedata.pos.inst();
568                    let slot = usedata.slot;
569                    let operand = usedata.operand;
570                    self.set_alloc(inst, slot as usize, alloc);
571                    if let OperandConstraint::Reuse(_) = operand.constraint() {
572                        reuse_input_insts.push(inst);
573                    }
574                }
575
576                // Scan debug-labels on this vreg that overlap with
577                // this range, producing a debug-info output record
578                // giving the allocation location for each label.
579                if !debug_labels.is_empty() {
580                    // Do a binary search to find the start of any
581                    // labels for this vreg. Recall that we require
582                    // debug-label requests to be sorted by vreg as a
583                    // precondition (which we verified above).
584                    let start = debug_labels
585                        .binary_search_by(|&(label_vreg, _label_from, _label_to, _label)| {
586                            // Search for the point just before the first
587                            // tuple that could be for `vreg` overlapping
588                            // with `range`. Never return
589                            // `Ordering::Equal`; `binary_search_by` in
590                            // this case returns the index of the first
591                            // entry that is greater as an `Err`.
592                            if label_vreg.vreg() < vreg.index() {
593                                core::cmp::Ordering::Less
594                            } else {
595                                core::cmp::Ordering::Greater
596                            }
597                        })
598                        .unwrap_err();
599
600                    for &(label_vreg, label_from, label_to, label) in &debug_labels[start..] {
601                        let label_from = ProgPoint::before(label_from);
602                        let label_to = ProgPoint::before(label_to);
603                        let label_range = CodeRange {
604                            from: label_from,
605                            to: label_to,
606                        };
607                        if label_vreg.vreg() != vreg.index() {
608                            break;
609                        }
610                        if !range.overlaps(&label_range) {
611                            continue;
612                        }
613
614                        let from = core::cmp::max(label_from, range.from);
615                        let to = core::cmp::min(label_to, range.to);
616
617                        self.ctx
618                            .output
619                            .debug_locations
620                            .push((label, from, to, alloc));
621                    }
622                }
623            }
624
625            if !inter_block_dests.is_empty() {
626                self.output.stats.halfmoves_count += inter_block_dests.len() * 2;
627
628                inter_block_dests.sort_unstable_by_key(InterBlockDest::key);
629
630                let vreg = self.vreg(vreg);
631                trace!("processing inter-block moves for {}", vreg);
632                for dest in inter_block_dests.drain(..) {
633                    let src = inter_block_sources[&dest.from];
634
635                    trace!(
636                        " -> moving from {} to {} between {:?} and {:?}",
637                        src,
638                        dest.alloc,
639                        dest.from,
640                        dest.to
641                    );
642
643                    let (pos, prio) = choose_move_location(self, dest.from, dest.to);
644                    inserted_moves.push(pos, prio, src, dest.alloc, vreg);
645                }
646            }
647
648            blockparam_in_idx = prev.blockparam_ins_idx();
649        }
650
651        if !block_param_dests.is_empty() {
652            self.output.stats.halfmoves_count += block_param_sources.len();
653            self.output.stats.halfmoves_count += block_param_dests.len();
654
655            trace!("processing block-param moves");
656            for dest in block_param_dests {
657                let src = dest.source();
658                let src_alloc = block_param_sources.get(&src).unwrap();
659                let (pos, prio) = choose_move_location(self, dest.from_block, dest.to_block);
660                inserted_moves.push(pos, prio, *src_alloc, dest.alloc, self.vreg(dest.to_vreg));
661            }
662        }
663
664        // Handle multi-fixed-reg constraints by copying.
665        for fixup in core::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) {
666            let from_alloc = self.get_alloc(fixup.pos.inst(), fixup.from_slot as usize);
667            let to_alloc = Allocation::reg(PReg::from_index(fixup.to_preg.index()));
668            trace!(
669                "multi-fixed-move constraint at {:?} from {} to {} for v{}",
670                fixup.pos,
671                from_alloc,
672                to_alloc,
673                fixup.vreg.index(),
674            );
675            let prio = match fixup.level {
676                FixedRegFixupLevel::Initial => InsertMovePrio::MultiFixedRegInitial,
677                FixedRegFixupLevel::Secondary => InsertMovePrio::MultiFixedRegSecondary,
678            };
679            inserted_moves.push(fixup.pos, prio, from_alloc, to_alloc, self.vreg(fixup.vreg));
680            self.set_alloc(
681                fixup.pos.inst(),
682                fixup.to_slot as usize,
683                Allocation::reg(PReg::from_index(fixup.to_preg.index())),
684            );
685        }
686
687        // Handle outputs that reuse inputs: copy beforehand, then set
688        // input's alloc to output's.
689        //
690        // Note that the output's allocation may not *actually* be
691        // valid until InstPosition::After, but the reused input may
692        // occur at InstPosition::Before. This may appear incorrect,
693        // but we make it work by ensuring that all *other* inputs are
694        // extended to InstPosition::After so that the def will not
695        // interfere. (The liveness computation code does this -- we
696        // do not require the user to do so.)
697        //
698        // One might ask: why not insist that input-reusing defs occur
699        // at InstPosition::Before? this would be correct, but would
700        // mean that the reused input and the reusing output
701        // interfere, *guaranteeing* that every such case would
702        // require a move. This is really bad on ISAs (like x86) where
703        // reused inputs are ubiquitous.
704        //
705        // Another approach might be to put the def at Before, and
706        // trim the reused input's liverange back to the previous
707        // instruction's After. This is kind of OK until (i) a block
708        // boundary occurs between the prior inst and this one, or
709        // (ii) any moves/spills/reloads occur between the two
710        // instructions. We really do need the input to be live at
711        // this inst's Before.
712        //
713        // In principle what we really need is a "BeforeBefore"
714        // program point, but we don't want to introduce that
715        // everywhere and pay the cost of twice as many ProgPoints
716        // throughout the allocator.
717        //
718        // Or we could introduce a separate move instruction -- this
719        // is the approach that regalloc.rs takes with "mod" operands
720        // -- but that is also costly.
721        //
722        // So we take this approach (invented by IonMonkey -- somewhat
723        // hard to discern, though see [0] for a comment that makes
724        // this slightly less unclear) to avoid interference between
725        // the actual reused input and reusing output, ensure
726        // interference (hence no incorrectness) between other inputs
727        // and the reusing output, and not require a separate explicit
728        // move instruction.
729        //
730        // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110
731        for inst in reuse_input_insts {
732            let mut input_reused: SmallVec<[usize; 4]> = smallvec![];
733            for output_idx in 0..self.func.inst_operands(inst).len() {
734                let operand = self.func.inst_operands(inst)[output_idx];
735                if let OperandConstraint::Reuse(input_idx) = operand.constraint() {
736                    debug_assert!(!input_reused.contains(&input_idx));
737                    debug_assert_eq!(operand.pos(), OperandPos::Late);
738                    input_reused.push(input_idx);
739                    let input_alloc = self.get_alloc(inst, input_idx);
740                    let output_alloc = self.get_alloc(inst, output_idx);
741                    trace!(
742                        "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}",
743                        inst,
744                        output_idx,
745                        output_alloc,
746                        input_idx,
747                        input_alloc
748                    );
749                    if input_alloc != output_alloc {
750                        #[cfg(debug_assertions)]
751                        if self.annotations_enabled {
752                            self.annotate(
753                                ProgPoint::before(inst),
754                                format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc),
755                            );
756                        }
757                        let input_operand = self.func.inst_operands(inst)[input_idx];
758                        inserted_moves.push(
759                            ProgPoint::before(inst),
760                            InsertMovePrio::ReusedInput,
761                            input_alloc,
762                            output_alloc,
763                            input_operand.vreg(),
764                        );
765                        self.set_alloc(inst, input_idx, output_alloc);
766                    }
767                }
768            }
769        }
770
771        // Sort the debug-locations vector; we provide this
772        // invariant to the client.
773        self.output.debug_locations.sort_unstable();
774
775        inserted_moves
776    }
777
778    pub fn resolve_inserted_moves(&mut self, mut inserted_moves: InsertedMoves) -> Edits {
779        // For each program point, gather all moves together. Then
780        // resolve (see cases below).
781        let mut i = 0;
782        inserted_moves
783            .moves
784            .sort_unstable_by_key(|m| m.pos_prio.key());
785
786        // Redundant-move elimination state tracker.
787        let mut redundant_moves = RedundantMoveEliminator::default();
788
789        fn redundant_move_process_side_effects<'a, F: Function>(
790            this: &Env<'a, F>,
791            redundant_moves: &mut RedundantMoveEliminator,
792            from: ProgPoint,
793            to: ProgPoint,
794        ) {
795            // If we cross a block boundary, clear and return.
796            if this.cfginfo.insn_block[from.inst().index()]
797                != this.cfginfo.insn_block[to.inst().index()]
798            {
799                redundant_moves.clear();
800                return;
801            }
802
803            let start_inst = if from.pos() == InstPosition::Before {
804                from.inst()
805            } else {
806                from.inst().next()
807            };
808            let end_inst = if to.pos() == InstPosition::Before {
809                to.inst()
810            } else {
811                to.inst().next()
812            };
813            for inst in start_inst.index()..end_inst.index() {
814                let inst = Inst::new(inst);
815                for (i, op) in this.func.inst_operands(inst).iter().enumerate() {
816                    match op.kind() {
817                        OperandKind::Def => {
818                            let alloc = this.get_alloc(inst, i);
819                            redundant_moves.clear_alloc(alloc);
820                        }
821                        _ => {}
822                    }
823                }
824                for reg in this.func.inst_clobbers(inst) {
825                    redundant_moves.clear_alloc(Allocation::reg(reg));
826                }
827                // The dedicated scratch registers may be clobbered by any
828                // instruction.
829                for reg in this.env.scratch_by_class {
830                    if let Some(reg) = reg {
831                        redundant_moves.clear_alloc(Allocation::reg(reg));
832                    }
833                }
834            }
835        }
836
837        let mut last_pos = ProgPoint::before(Inst::new(0));
838        let mut edits = Edits::with_capacity(self.func.num_insts());
839
840        while i < inserted_moves.moves.len() {
841            let start = i;
842            let pos_prio = inserted_moves.moves[i].pos_prio;
843            while i < inserted_moves.moves.len() && inserted_moves.moves[i].pos_prio == pos_prio {
844                i += 1;
845            }
846            let moves = &inserted_moves.moves[start..i];
847
848            redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos);
849            last_pos = pos_prio.pos;
850
851            // Gather all the moves in each RegClass separately.
852            // These cannot interact, so it is safe to have separate
853            // ParallelMove instances. They need to be separate because
854            // moves between the classes are impossible. (We could
855            // enhance ParallelMoves to understand register classes, but
856            // this seems simpler.)
857            let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
858            let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
859            let mut vec_moves: SmallVec<[InsertedMove; 8]> = smallvec![];
860
861            for m in moves {
862                match m.to_vreg.class() {
863                    RegClass::Int => {
864                        int_moves.push(m.clone());
865                    }
866                    RegClass::Float => {
867                        float_moves.push(m.clone());
868                    }
869                    RegClass::Vector => {
870                        vec_moves.push(m.clone());
871                    }
872                }
873            }
874
875            for &(regclass, moves) in &[
876                (RegClass::Int, &int_moves),
877                (RegClass::Float, &float_moves),
878                (RegClass::Vector, &vec_moves),
879            ] {
880                // All moves in `moves` semantically happen in
881                // parallel. Let's resolve these to a sequence of moves
882                // that can be done one at a time.
883                let mut parallel_moves = ParallelMoves::new();
884                trace!(
885                    "parallel moves at pos {:?} prio {:?}",
886                    pos_prio.pos,
887                    pos_prio.prio
888                );
889                for m in moves {
890                    trace!(" {} -> {}", m.from_alloc, m.to_alloc);
891                    parallel_moves.add(m.from_alloc, m.to_alloc, Some(m.to_vreg));
892                }
893
894                let resolved = parallel_moves.resolve();
895                let mut scratch_iter = RegTraversalIter::new(
896                    self.env, regclass, None, None, 0,
897                    None, // We assume there is no limit on the set of registers available for moves.
898                );
899                let mut dedicated_scratch = self.env.scratch_by_class[regclass as usize];
900                let key = LiveRangeKey::from_range(&CodeRange {
901                    from: pos_prio.pos,
902                    to: pos_prio.pos.next(),
903                });
904                let find_free_reg = || {
905                    // Use the dedicated scratch register first if it is
906                    // available.
907                    if let Some(reg) = dedicated_scratch.take() {
908                        return Some(Allocation::reg(reg));
909                    }
910                    while let Some(preg) = scratch_iter.next() {
911                        if !self.pregs[preg.index()]
912                            .allocations
913                            .btree
914                            .contains_key(&key)
915                        {
916                            let alloc = Allocation::reg(preg);
917                            if moves
918                                .iter()
919                                .any(|m| m.from_alloc == alloc || m.to_alloc == alloc)
920                            {
921                                // Skip pregs used by moves in this
922                                // parallel move set, even if not
923                                // marked used at progpoint: edge move
924                                // liveranges meet but don't overlap
925                                // so otherwise we may incorrectly
926                                // overwrite a source reg.
927                                continue;
928                            }
929                            return Some(alloc);
930                        }
931                    }
932                    None
933                };
934                let mut stackslot_idx = 0;
935                let get_stackslot = || {
936                    let idx = stackslot_idx;
937                    stackslot_idx += 1;
938                    // We can't borrow `self` as mutable, so we create
939                    // these placeholders then allocate the actual
940                    // slots if needed with `self.allocate_spillslot`
941                    // below.
942                    Allocation::stack(SpillSlot::new(SpillSlot::MAX - idx))
943                };
944                let is_stack_alloc = |alloc: Allocation| {
945                    if let Some(preg) = alloc.as_reg() {
946                        self.pregs[preg.index()].is_stack
947                    } else {
948                        alloc.is_stack()
949                    }
950                };
951                let preferred_victim = self.preferred_victim_by_class[regclass as usize];
952
953                let scratch_resolver = MoveAndScratchResolver {
954                    find_free_reg,
955                    get_stackslot,
956                    is_stack_alloc,
957                    borrowed_scratch_reg: preferred_victim,
958                };
959
960                let resolved = scratch_resolver.compute(resolved);
961
962                let mut rewrites = FxHashMap::default();
963                for i in 0..stackslot_idx {
964                    if i >= self.extra_spillslots_by_class[regclass as usize].len() {
965                        let slot =
966                            self.allocate_spillslot(self.func.spillslot_size(regclass) as u32);
967                        self.extra_spillslots_by_class[regclass as usize].push(slot);
968                    }
969                    rewrites.insert(
970                        Allocation::stack(SpillSlot::new(SpillSlot::MAX - i)),
971                        self.extra_spillslots_by_class[regclass as usize][i],
972                    );
973                }
974
975                for (src, dst, to_vreg) in resolved {
976                    let src = rewrites.get(&src).cloned().unwrap_or(src);
977                    let dst = rewrites.get(&dst).cloned().unwrap_or(dst);
978                    trace!("  resolved: {} -> {} ({:?})", src, dst, to_vreg);
979                    let action = redundant_moves.process_move(src, dst, to_vreg);
980                    if !action.elide {
981                        edits.add(pos_prio, src, dst);
982                    } else {
983                        trace!("    -> redundant move elided");
984                    }
985                }
986            }
987        }
988
989        // Ensure edits are in sorted ProgPoint order. N.B.: this must
990        // be a stable sort! We have to keep the order produced by the
991        // parallel-move resolver for all moves within a single sort
992        // key.
993        edits.sort();
994        self.output.stats.edits_count = edits.len();
995
996        // Add debug annotations.
997        if self.annotations_enabled {
998            for &(pos_prio, ref edit) in edits.iter() {
999                match edit {
1000                    &Edit::Move { from, to } => {
1001                        self.annotate(pos_prio.pos, format!("move {} -> {}", from, to));
1002                    }
1003                }
1004            }
1005        }
1006
1007        edits
1008    }
1009}
regalloc2/ion/moves.rs

regalloc2/ion/
moves.rs