cranelift_codegen/isa/x64/lower/
isle.rs

1//! ISLE integration glue code for x64 lowering.
2
3// Pull in the ISLE generated code.
4pub(crate) mod generated_code;
5use crate::{ir::AtomicRmwOp, ir::types};
6use generated_code::{AssemblerOutputs, Context, MInst, RegisterClass};
7
8// Types that the generated ISLE code uses via `use super::*`.
9use super::external::{CraneliftRegisters, PairedGpr, PairedXmm, isle_assembler_methods};
10use super::{MergeableLoadSize, is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
11use crate::ir::condcodes::{FloatCC, IntCC};
12use crate::ir::immediates::*;
13use crate::ir::types::*;
14use crate::ir::{
15    BlockCall, Inst, InstructionData, LibCall, MemFlags, Opcode, TrapCode, Value, ValueList,
16};
17use crate::isa::x64::X64Backend;
18use crate::isa::x64::inst::{ReturnCallInfo, args::*, regs};
19use crate::isa::x64::lower::{InsnInput, emit_vm_call};
20use crate::machinst::isle::*;
21use crate::machinst::{
22    ArgPair, CallArgList, CallInfo, CallRetList, InstOutput, MachInst, VCodeConstant,
23    VCodeConstantData,
24};
25use alloc::vec::Vec;
26use cranelift_assembler_x64 as asm;
27use regalloc2::PReg;
28use std::boxed::Box;
29
30/// Type representing out-of-line data for calls. This type optional because the
31/// call instruction is also used by Winch to emit calls, but the
32/// `Box<CallInfo>` field is not used, it's only used by Cranelift. By making it
33/// optional, we reduce the number of heap allocations in Winch.
34type BoxCallInfo = Box<CallInfo<ExternalName>>;
35type BoxCallIndInfo = Box<CallInfo<RegMem>>;
36type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
37type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
38type VecArgPair = Vec<ArgPair>;
39type BoxSyntheticAmode = Box<SyntheticAmode>;
40
41/// When interacting with the external assembler (see `external.rs`), we
42/// need to fix the types we'll use.
43type AssemblerInst = asm::Inst<CraneliftRegisters>;
44
45pub struct SinkableLoad {
46    inst: Inst,
47    addr_input: InsnInput,
48    offset: i32,
49}
50
51/// The main entry point for lowering with ISLE.
52pub(crate) fn lower(
53    lower_ctx: &mut Lower<MInst>,
54    backend: &X64Backend,
55    inst: Inst,
56) -> Option<InstOutput> {
57    // TODO: reuse the ISLE context across lowerings so we can reuse its
58    // internal heap allocations.
59    let mut isle_ctx = IsleContext { lower_ctx, backend };
60    generated_code::constructor_lower(&mut isle_ctx, inst)
61}
62
63pub(crate) fn lower_branch(
64    lower_ctx: &mut Lower<MInst>,
65    backend: &X64Backend,
66    branch: Inst,
67    targets: &[MachLabel],
68) -> Option<()> {
69    // TODO: reuse the ISLE context across lowerings so we can reuse its
70    // internal heap allocations.
71    let mut isle_ctx = IsleContext { lower_ctx, backend };
72    generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets)
73}
74
75impl Context for IsleContext<'_, '_, MInst, X64Backend> {
76    isle_lower_prelude_methods!();
77    isle_assembler_methods!();
78
79    fn gen_call_info(
80        &mut self,
81        sig: Sig,
82        dest: ExternalName,
83        uses: CallArgList,
84        defs: CallRetList,
85        try_call_info: Option<TryCallInfo>,
86    ) -> BoxCallInfo {
87        let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
88        let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
89        self.lower_ctx
90            .abi_mut()
91            .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
92
93        Box::new(
94            self.lower_ctx
95                .gen_call_info(sig, dest, uses, defs, try_call_info),
96        )
97    }
98
99    fn gen_call_ind_info(
100        &mut self,
101        sig: Sig,
102        dest: &RegMem,
103        uses: CallArgList,
104        defs: CallRetList,
105        try_call_info: Option<TryCallInfo>,
106    ) -> BoxCallIndInfo {
107        let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
108        let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
109        self.lower_ctx
110            .abi_mut()
111            .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
112
113        Box::new(
114            self.lower_ctx
115                .gen_call_info(sig, dest.clone(), uses, defs, try_call_info),
116        )
117    }
118
119    fn gen_return_call_info(
120        &mut self,
121        sig: Sig,
122        dest: ExternalName,
123        uses: CallArgList,
124    ) -> BoxReturnCallInfo {
125        let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
126        self.lower_ctx
127            .abi_mut()
128            .accumulate_tail_args_size(new_stack_arg_size);
129
130        Box::new(ReturnCallInfo {
131            dest,
132            uses,
133            tmp: self.lower_ctx.temp_writable_gpr(),
134            new_stack_arg_size,
135        })
136    }
137
138    fn gen_return_call_ind_info(
139        &mut self,
140        sig: Sig,
141        dest: Reg,
142        uses: CallArgList,
143    ) -> BoxReturnCallIndInfo {
144        let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
145        self.lower_ctx
146            .abi_mut()
147            .accumulate_tail_args_size(new_stack_arg_size);
148
149        Box::new(ReturnCallInfo {
150            dest,
151            uses,
152            tmp: self.lower_ctx.temp_writable_gpr(),
153            new_stack_arg_size,
154        })
155    }
156
157    #[inline]
158    fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
159        if ty.bits() == 64 {
160            OperandSize::Size64
161        } else {
162            OperandSize::Size32
163        }
164    }
165
166    #[inline]
167    fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
168        OperandSize::from_ty(ty)
169    }
170
171    fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
172        if let Some(imm) = self.i64_from_iconst(val) {
173            if let Ok(imm) = i32::try_from(imm) {
174                return RegMemImm::Imm {
175                    simm32: imm.cast_unsigned(),
176                };
177            }
178        }
179
180        self.put_in_reg_mem(val).into()
181    }
182
183    fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
184        if let Some(imm) = self.i64_from_iconst(val) {
185            if let Ok(imm) = i32::try_from(imm) {
186                return XmmMemImm::unwrap_new(RegMemImm::Imm {
187                    simm32: imm.cast_unsigned(),
188                });
189            }
190        }
191
192        let res = match self.put_in_xmm_mem(val).to_reg_mem() {
193            RegMem::Reg { reg } => RegMemImm::Reg { reg },
194            RegMem::Mem { addr } => RegMemImm::Mem { addr },
195        };
196
197        XmmMemImm::unwrap_new(res)
198    }
199
200    fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
201        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
202
203        if let Some(c) = inputs.constant {
204            // A load from the constant pool is better than a rematerialization into a register,
205            // because it reduces register pressure.
206            //
207            // NOTE: this is where behavior differs from `put_in_reg_mem`, as we always force
208            // constants to be 16 bytes when a constant will be used in place of an xmm register.
209            let vcode_constant = self.emit_u128_le_const(c as u128);
210            return XmmMem::unwrap_new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)));
211        }
212
213        XmmMem::unwrap_new(self.put_in_reg_mem(val))
214    }
215
216    fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
217        let inputs = self.lower_ctx.get_value_as_source_or_const(val);
218
219        if let Some(c) = inputs.constant {
220            // A load from the constant pool is better than a
221            // rematerialization into a register, because it reduces
222            // register pressure.
223            let vcode_constant = self.emit_u64_le_const(c);
224            return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
225        }
226
227        if let Some(load) = self.sinkable_load(val) {
228            return RegMem::Mem {
229                addr: self.sink_load(&load),
230            };
231        }
232
233        RegMem::reg(self.put_in_reg(val))
234    }
235
236    #[inline]
237    fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
238        imm.encode()
239    }
240
241    #[inline]
242    fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
243        imm.encode()
244    }
245
246    #[inline]
247    fn has_avx(&mut self) -> bool {
248        self.backend.x64_flags.has_avx()
249    }
250
251    #[inline]
252    fn use_avx2(&mut self) -> bool {
253        self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_avx2()
254    }
255
256    #[inline]
257    fn has_avx512vl(&mut self) -> bool {
258        self.backend.x64_flags.has_avx512vl()
259    }
260
261    #[inline]
262    fn has_avx512dq(&mut self) -> bool {
263        self.backend.x64_flags.has_avx512dq()
264    }
265
266    #[inline]
267    fn has_avx512f(&mut self) -> bool {
268        self.backend.x64_flags.has_avx512f()
269    }
270
271    #[inline]
272    fn has_avx512bitalg(&mut self) -> bool {
273        self.backend.x64_flags.has_avx512bitalg()
274    }
275
276    #[inline]
277    fn has_avx512vbmi(&mut self) -> bool {
278        self.backend.x64_flags.has_avx512vbmi()
279    }
280
281    #[inline]
282    fn has_lzcnt(&mut self) -> bool {
283        self.backend.x64_flags.has_lzcnt()
284    }
285
286    #[inline]
287    fn has_bmi1(&mut self) -> bool {
288        self.backend.x64_flags.has_bmi1()
289    }
290
291    #[inline]
292    fn has_bmi2(&mut self) -> bool {
293        self.backend.x64_flags.has_bmi2()
294    }
295
296    #[inline]
297    fn use_popcnt(&mut self) -> bool {
298        self.backend.x64_flags.has_popcnt() && self.backend.x64_flags.has_sse42()
299    }
300
301    #[inline]
302    fn use_fma(&mut self) -> bool {
303        self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_fma()
304    }
305
306    #[inline]
307    fn has_sse3(&mut self) -> bool {
308        self.backend.x64_flags.has_sse3()
309    }
310
311    #[inline]
312    fn has_ssse3(&mut self) -> bool {
313        self.backend.x64_flags.has_ssse3()
314    }
315
316    #[inline]
317    fn has_sse41(&mut self) -> bool {
318        self.backend.x64_flags.has_sse41()
319    }
320
321    #[inline]
322    fn use_sse42(&mut self) -> bool {
323        self.backend.x64_flags.has_sse41() && self.backend.x64_flags.has_sse42()
324    }
325
326    #[inline]
327    fn has_cmpxchg16b(&mut self) -> bool {
328        self.backend.x64_flags.has_cmpxchg16b()
329    }
330
331    #[inline]
332    fn shift_mask(&mut self, ty: Type) -> u8 {
333        debug_assert!(ty.lane_bits().is_power_of_two());
334
335        (ty.lane_bits() - 1) as u8
336    }
337
338    fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u8 {
339        (val.bits() as u8) & self.shift_mask(ty)
340    }
341
342    #[inline]
343    fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
344        let imm = self.i64_from_iconst(val)?;
345        Some(GprMemImm::unwrap_new(RegMemImm::Imm {
346            simm32: i32::try_from(imm).ok()?.cast_unsigned(),
347        }))
348    }
349
350    fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
351        if let Some(inst) = self.is_sinkable_inst(val) {
352            if let Some((addr_input, offset)) =
353                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
354            {
355                return Some(SinkableLoad {
356                    inst,
357                    addr_input,
358                    offset,
359                });
360            }
361        }
362        None
363    }
364
365    fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
366        if let Some(inst) = self.is_sinkable_inst(val) {
367            if let Some((addr_input, offset)) =
368                is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
369            {
370                return Some(SinkableLoad {
371                    inst,
372                    addr_input,
373                    offset,
374                });
375            }
376        }
377        None
378    }
379
380    fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
381        self.lower_ctx.sink_inst(load.inst);
382        let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
383        SyntheticAmode::Real(addr)
384    }
385
386    #[inline]
387    fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
388        ExtMode::new(from_bits, to_bits).unwrap()
389    }
390
391    fn emit(&mut self, inst: &MInst) -> Unit {
392        self.lower_ctx.emit(inst.clone());
393    }
394
395    #[inline]
396    fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
397        // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
398        // shifted into bits 5:6).
399        0b00_00_00_00 | lane << 4
400    }
401
402    #[inline]
403    fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
404        RegMem::mem(addr.clone())
405    }
406
407    #[inline]
408    fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
409        amode.clone().into()
410    }
411
412    #[inline]
413    fn synthetic_amode_slot(&mut self, offset: i32) -> SyntheticAmode {
414        SyntheticAmode::SlotOffset { simm32: offset }
415    }
416
417    #[inline]
418    fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
419        SyntheticAmode::ConstantOffset(c)
420    }
421
422    #[inline]
423    fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
424        r.to_writable_reg()
425    }
426
427    #[inline]
428    fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
429        r.to_writable_reg()
430    }
431
432    fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
433        // When the shift amount is known, we can statically (i.e. at compile
434        // time) determine the mask to use and only emit that.
435        debug_assert!(amt < 8);
436        let mask_offset = amt as usize * 16;
437        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
438            &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
439        ));
440        SyntheticAmode::ConstantOffset(mask_constant)
441    }
442
443    fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
444        let mask_table = self
445            .lower_ctx
446            .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
447        SyntheticAmode::ConstantOffset(mask_table)
448    }
449
450    fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
451        // When the shift amount is known, we can statically (i.e. at compile
452        // time) determine the mask to use and only emit that.
453        debug_assert!(amt < 8);
454        let mask_offset = amt as usize * 16;
455        let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
456            &I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
457        ));
458        SyntheticAmode::ConstantOffset(mask_constant)
459    }
460
461    fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
462        let mask_table = self
463            .lower_ctx
464            .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
465        SyntheticAmode::ConstantOffset(mask_table)
466    }
467
468    #[inline]
469    fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
470        Writable::from_reg(Xmm::unwrap_new(r.to_reg()))
471    }
472
473    #[inline]
474    fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
475        r.to_reg()
476    }
477
478    #[inline]
479    fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
480        r.to_reg()
481    }
482
483    #[inline]
484    fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
485        r.into()
486    }
487
488    #[inline]
489    fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
490        r.into()
491    }
492
493    #[inline]
494    fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
495        r.into()
496    }
497
498    #[inline]
499    fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
500        XmmMemImm::unwrap_new(r.clone().to_reg_mem().into())
501    }
502
503    #[inline]
504    fn temp_writable_gpr(&mut self) -> WritableGpr {
505        self.lower_ctx.temp_writable_gpr()
506    }
507
508    #[inline]
509    fn temp_writable_xmm(&mut self) -> WritableXmm {
510        self.lower_ctx.temp_writable_xmm()
511    }
512
513    #[inline]
514    fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
515        RegMemImm::Reg { reg }
516    }
517
518    #[inline]
519    fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
520        XmmMem::unwrap_new(rm.clone())
521    }
522
523    #[inline]
524    fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
525        GprMemImm::unwrap_new(rmi.clone())
526    }
527
528    #[inline]
529    fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
530        XmmMemImm::unwrap_new(rmi.clone())
531    }
532
533    #[inline]
534    fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
535        r.into()
536    }
537
538    #[inline]
539    fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
540        xm.clone().into()
541    }
542
543    #[inline]
544    fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
545        gm.clone().into()
546    }
547
548    #[inline]
549    fn xmm_new(&mut self, r: Reg) -> Xmm {
550        Xmm::unwrap_new(r)
551    }
552
553    #[inline]
554    fn gpr_new(&mut self, r: Reg) -> Gpr {
555        Gpr::unwrap_new(r)
556    }
557
558    #[inline]
559    fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
560        GprMem::unwrap_new(rm.clone())
561    }
562
563    #[inline]
564    fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
565        GprMem::unwrap_new(RegMem::reg(r))
566    }
567
568    #[inline]
569    fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
570        GprMem::from(gpr)
571    }
572
573    #[inline]
574    fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
575        GprMemImm::from(gpr)
576    }
577
578    #[inline]
579    fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
580        if is_int_or_ref_ty(ty) || ty == I128 {
581            Some(RegisterClass::Gpr {
582                single_register: ty != I128,
583            })
584        } else if ty.is_float() || (ty.is_vector() && ty.bits() <= 128) {
585            Some(RegisterClass::Xmm)
586        } else {
587            None
588        }
589    }
590
591    #[inline]
592    fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
593        match ty {
594            types::I8 | types::I16 | types::I32 | types::I64 => Some(()),
595            _ => None,
596        }
597    }
598
599    #[inline]
600    fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
601        CC::from_intcc(*intcc)
602    }
603
604    #[inline]
605    fn cc_invert(&mut self, cc: &CC) -> CC {
606        cc.invert()
607    }
608
609    #[inline]
610    fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
611        match cc {
612            CC::Z => Some(*cc),
613            CC::NZ => Some(*cc),
614            _ => None,
615        }
616    }
617
618    #[inline]
619    fn sum_extend_fits_in_32_bits(
620        &mut self,
621        extend_from_ty: Type,
622        constant_value: Imm64,
623        offset: Offset32,
624    ) -> Option<u32> {
625        let offset: i64 = offset.into();
626        let constant_value: u64 = constant_value.bits() as u64;
627        // If necessary, zero extend `constant_value` up to 64 bits.
628        let shift = 64 - extend_from_ty.bits();
629        let zero_extended_constant_value = (constant_value << shift) >> shift;
630        // Sum up the two operands.
631        let sum = offset.wrapping_add(zero_extended_constant_value as i64);
632        // Check that the sum will fit in 32-bits.
633        if sum == ((sum << 32) >> 32) {
634            Some(sum as u32)
635        } else {
636            None
637        }
638    }
639
640    #[inline]
641    fn amode_offset(&mut self, addr: &SyntheticAmode, offset: i32) -> SyntheticAmode {
642        addr.offset(offset)
643    }
644
645    #[inline]
646    fn zero_offset(&mut self) -> Offset32 {
647        Offset32::new(0)
648    }
649
650    #[inline]
651    fn preg_rbp(&mut self) -> PReg {
652        regs::rbp().to_real_reg().unwrap().into()
653    }
654
655    #[inline]
656    fn preg_rsp(&mut self) -> PReg {
657        regs::rsp().to_real_reg().unwrap().into()
658    }
659
660    #[inline]
661    fn preg_pinned(&mut self) -> PReg {
662        regs::pinned_reg().to_real_reg().unwrap().into()
663    }
664
665    fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
666        let outputs = emit_vm_call(
667            self.lower_ctx,
668            &self.backend.flags,
669            &self.backend.triple,
670            *libcall,
671            &[ValueRegs::one(a)],
672        )
673        .expect("Failed to emit LibCall");
674
675        debug_assert_eq!(outputs.len(), 1);
676
677        outputs[0].only_reg().unwrap()
678    }
679
680    fn libcall_2(&mut self, libcall: &LibCall, a: Reg, b: Reg) -> Reg {
681        let outputs = emit_vm_call(
682            self.lower_ctx,
683            &self.backend.flags,
684            &self.backend.triple,
685            *libcall,
686            &[ValueRegs::one(a), ValueRegs::one(b)],
687        )
688        .expect("Failed to emit LibCall");
689
690        debug_assert_eq!(outputs.len(), 1);
691
692        outputs[0].only_reg().unwrap()
693    }
694
695    fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
696        let outputs = emit_vm_call(
697            self.lower_ctx,
698            &self.backend.flags,
699            &self.backend.triple,
700            *libcall,
701            &[ValueRegs::one(a), ValueRegs::one(b), ValueRegs::one(c)],
702        )
703        .expect("Failed to emit LibCall");
704
705        debug_assert_eq!(outputs.len(), 1);
706
707        outputs[0].only_reg().unwrap()
708    }
709
710    #[inline]
711    fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
712        let const_data = self.lower_ctx.get_constant_data(constant);
713        if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
714            return Some(());
715        }
716        None
717    }
718
719    #[inline]
720    fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
721        let mask = mask
722            .iter()
723            .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
724            .map(|b| if b > 15 { 0b10000000 } else { b })
725            .collect();
726        self.lower_ctx
727            .use_constant(VCodeConstantData::Generated(mask))
728    }
729
730    #[inline]
731    fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
732        let mask = mask
733            .iter()
734            .map(|&b| if b > 15 { 0b10000000 } else { b })
735            .collect();
736        self.lower_ctx
737            .use_constant(VCodeConstantData::Generated(mask))
738    }
739
740    #[inline]
741    fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
742        let mask = mask
743            .iter()
744            .map(|&b| b.wrapping_sub(16))
745            .map(|b| if b > 15 { 0b10000000 } else { b })
746            .collect();
747        self.lower_ctx
748            .use_constant(VCodeConstantData::Generated(mask))
749    }
750
751    #[inline]
752    fn perm_from_mask_with_zeros(
753        &mut self,
754        mask: &VecMask,
755    ) -> Option<(VCodeConstant, VCodeConstant)> {
756        if !mask.iter().any(|&b| b > 31) {
757            return None;
758        }
759
760        let zeros = mask
761            .iter()
762            .map(|&b| if b > 31 { 0x00 } else { 0xff })
763            .collect();
764
765        Some((
766            self.perm_from_mask(mask),
767            self.lower_ctx
768                .use_constant(VCodeConstantData::Generated(zeros)),
769        ))
770    }
771
772    #[inline]
773    fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
774        let mask = mask.iter().cloned().collect();
775        self.lower_ctx
776            .use_constant(VCodeConstantData::Generated(mask))
777    }
778
779    fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
780        match XmmMemAligned::new(arg.clone().into()) {
781            Some(aligned) => aligned,
782            None => match arg.clone().into() {
783                RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
784                _ => unreachable!(),
785            },
786        }
787    }
788
789    fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
790        match XmmMemAlignedImm::new(arg.clone().into()) {
791            Some(aligned) => aligned,
792            None => match arg.clone().into() {
793                RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
794                _ => unreachable!(),
795            },
796        }
797    }
798
799    fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
800        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
801        if a < 4 && b < 4 && c < 4 && d < 4 {
802            Some(a | (b << 2) | (c << 4) | (d << 6))
803        } else {
804            None
805        }
806    }
807
808    fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
809        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
810        // When selecting from the right-hand-side, subtract these all by 4
811        // which will bail out if anything is less than 4. Afterwards the check
812        // is the same as `pshufd_lhs_imm` above.
813        let a = a.checked_sub(4)?;
814        let b = b.checked_sub(4)?;
815        let c = c.checked_sub(4)?;
816        let d = d.checked_sub(4)?;
817        if a < 4 && b < 4 && c < 4 && d < 4 {
818            Some(a | (b << 2) | (c << 4) | (d << 6))
819        } else {
820            None
821        }
822    }
823
824    fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
825        // The `shufps` instruction selects the first two elements from the
826        // first vector and the second two elements from the second vector, so
827        // offset the third/fourth selectors by 4 and then make sure everything
828        // fits in 32-bits.
829        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
830        let c = c.checked_sub(4)?;
831        let d = d.checked_sub(4)?;
832        if a < 4 && b < 4 && c < 4 && d < 4 {
833            Some(a | (b << 2) | (c << 4) | (d << 6))
834        } else {
835            None
836        }
837    }
838
839    fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
840        // This is almost the same as `shufps_imm` except the elements that are
841        // subtracted are reversed. This handles the case that `shufps`
842        // instruction can be emitted if the order of the operands are swapped.
843        let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
844        let a = a.checked_sub(4)?;
845        let b = b.checked_sub(4)?;
846        if a < 4 && b < 4 && c < 4 && d < 4 {
847            Some(a | (b << 2) | (c << 4) | (d << 6))
848        } else {
849            None
850        }
851    }
852
853    fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
854        // Similar to `shufps` except this operates over 16-bit values so four
855        // of them must be fixed and the other four must be in-range to encode
856        // in the immediate.
857        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
858        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
859            Some(a | (b << 2) | (c << 4) | (d << 6))
860        } else {
861            None
862        }
863    }
864
865    fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
866        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
867        let a = a.checked_sub(8)?;
868        let b = b.checked_sub(8)?;
869        let c = c.checked_sub(8)?;
870        let d = d.checked_sub(8)?;
871        let e = e.checked_sub(8)?;
872        let f = f.checked_sub(8)?;
873        let g = g.checked_sub(8)?;
874        let h = h.checked_sub(8)?;
875        if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
876            Some(a | (b << 2) | (c << 4) | (d << 6))
877        } else {
878            None
879        }
880    }
881
882    fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
883        // Similar to `pshuflw` except that the first four operands must be
884        // fixed and the second four are offset by an extra 4 and tested to
885        // make sure they're all in the range [4, 8).
886        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
887        let e = e.checked_sub(4)?;
888        let f = f.checked_sub(4)?;
889        let g = g.checked_sub(4)?;
890        let h = h.checked_sub(4)?;
891        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
892            Some(e | (f << 2) | (g << 4) | (h << 6))
893        } else {
894            None
895        }
896    }
897
898    fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
899        // Note that everything here is offset by at least 8 and the upper
900        // bits are offset by 12 to test they're in the range of [12, 16).
901        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
902        let a = a.checked_sub(8)?;
903        let b = b.checked_sub(8)?;
904        let c = c.checked_sub(8)?;
905        let d = d.checked_sub(8)?;
906        let e = e.checked_sub(12)?;
907        let f = f.checked_sub(12)?;
908        let g = g.checked_sub(12)?;
909        let h = h.checked_sub(12)?;
910        if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
911            Some(e | (f << 2) | (g << 4) | (h << 6))
912        } else {
913            None
914        }
915    }
916
917    fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
918        let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
919
920        if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
921            Some(bytes[0])
922        } else {
923            None
924        }
925    }
926
927    fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
928        // First make sure that the shuffle immediate is selecting 16-bit lanes.
929        let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
930
931        // Next build up an 8-bit mask from each of the bits of the selected
932        // lanes above. This instruction can only be used when each lane
933        // selector chooses from the corresponding lane in either of the two
934        // operands, meaning the Nth lane selection must satisfy `lane % 8 ==
935        // N`.
936        //
937        // This helper closure is used to calculate the value of the
938        // corresponding bit.
939        let bit = |x: u8, c: u8| {
940            if x % 8 == c {
941                if x < 8 { Some(0) } else { Some(1 << c) }
942            } else {
943                None
944            }
945        };
946        Some(
947            bit(a, 0)?
948                | bit(b, 1)?
949                | bit(c, 2)?
950                | bit(d, 3)?
951                | bit(e, 4)?
952                | bit(f, 5)?
953                | bit(g, 6)?
954                | bit(h, 7)?,
955        )
956    }
957
958    fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
959        XmmMemImm::unwrap_new(RegMemImm::imm(imm))
960    }
961
962    fn insert_i8x16_lane_hole(&mut self, hole_idx: u8) -> VCodeConstant {
963        let mask = -1i128 as u128;
964        self.emit_u128_le_const(mask ^ (0xff << (hole_idx * 8)))
965    }
966
967    fn writable_invalid_gpr(&mut self) -> WritableGpr {
968        let reg = Gpr::new(self.invalid_reg()).unwrap();
969        WritableGpr::from_reg(reg)
970    }
971
972    fn box_synthetic_amode(&mut self, amode: &SyntheticAmode) -> BoxSyntheticAmode {
973        Box::new(amode.clone())
974    }
975
976    ////////////////////////////////////////////////////////////////////////////
977    ///// External assembler methods.
978    ////////////////////////////////////////////////////////////////////////////
979
980    fn is_imm8(&mut self, src: &GprMemImm) -> Option<u8> {
981        match src.clone().to_reg_mem_imm() {
982            RegMemImm::Imm { simm32 } => {
983                Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
984            }
985            _ => None,
986        }
987    }
988
989    fn is_imm8_xmm(&mut self, src: &XmmMemImm) -> Option<u8> {
990        match src.clone().to_reg_mem_imm() {
991            RegMemImm::Imm { simm32 } => {
992                Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
993            }
994            _ => None,
995        }
996    }
997
998    fn is_simm8(&mut self, src: &GprMemImm) -> Option<i8> {
999        match src.clone().to_reg_mem_imm() {
1000            RegMemImm::Imm { simm32 } => Some(i8::try_from(simm32.cast_signed()).ok()?),
1001            _ => None,
1002        }
1003    }
1004
1005    fn is_imm16(&mut self, src: &GprMemImm) -> Option<u16> {
1006        match src.clone().to_reg_mem_imm() {
1007            RegMemImm::Imm { simm32 } => {
1008                Some(i16::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
1009            }
1010            _ => None,
1011        }
1012    }
1013
1014    fn is_simm16(&mut self, src: &GprMemImm) -> Option<i16> {
1015        match src.clone().to_reg_mem_imm() {
1016            RegMemImm::Imm { simm32 } => Some(i16::try_from(simm32.cast_signed()).ok()?),
1017            _ => None,
1018        }
1019    }
1020
1021    fn is_imm32(&mut self, src: &GprMemImm) -> Option<u32> {
1022        match src.clone().to_reg_mem_imm() {
1023            RegMemImm::Imm { simm32 } => Some(simm32),
1024            _ => None,
1025        }
1026    }
1027
1028    fn is_simm32(&mut self, src: &GprMemImm) -> Option<i32> {
1029        match src.clone().to_reg_mem_imm() {
1030            RegMemImm::Imm { simm32 } => Some(simm32 as i32),
1031            _ => None,
1032        }
1033    }
1034
1035    fn is_gpr(&mut self, src: &GprMemImm) -> Option<Gpr> {
1036        match src.clone().to_reg_mem_imm() {
1037            RegMemImm::Reg { reg } => Gpr::new(reg),
1038            _ => None,
1039        }
1040    }
1041
1042    fn is_xmm(&mut self, src: &XmmMem) -> Option<Xmm> {
1043        match src.clone().to_reg_mem() {
1044            RegMem::Reg { reg } => Xmm::new(reg),
1045            _ => None,
1046        }
1047    }
1048
1049    fn is_gpr_mem(&mut self, src: &GprMemImm) -> Option<GprMem> {
1050        match src.clone().to_reg_mem_imm() {
1051            RegMemImm::Reg { reg } => GprMem::new(RegMem::Reg { reg }),
1052            RegMemImm::Mem { addr } => GprMem::new(RegMem::Mem { addr }),
1053            _ => None,
1054        }
1055    }
1056
1057    fn is_xmm_mem(&mut self, src: &XmmMemImm) -> Option<XmmMem> {
1058        match src.clone().to_reg_mem_imm() {
1059            RegMemImm::Reg { reg } => XmmMem::new(RegMem::Reg { reg }),
1060            RegMemImm::Mem { addr } => XmmMem::new(RegMem::Mem { addr }),
1061            _ => None,
1062        }
1063    }
1064
1065    fn is_mem(&mut self, src: &XmmMem) -> Option<SyntheticAmode> {
1066        match src.clone().to_reg_mem() {
1067            RegMem::Reg { .. } => None,
1068            RegMem::Mem { addr } => Some(addr),
1069        }
1070    }
1071
1072    // Custom constructors for `mulx` which only calculates the high half of the
1073    // result meaning that the same output operand is used in both destination
1074    // registers. This is in contrast to the assembler-generated version of this
1075    // instruction which generates two distinct temporary registers for output
1076    // which calculates both the high and low halves of the result.
1077
1078    fn x64_mulxl_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1079        let ret = self.temp_writable_gpr();
1080        let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1081        let inst = asm::inst::mulxl_rvm::new(ret, ret, src1, src2);
1082        self.emit(&MInst::External { inst: inst.into() });
1083        ret.to_reg()
1084    }
1085
1086    fn x64_mulxq_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1087        let ret = self.temp_writable_gpr();
1088        let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1089        let inst = asm::inst::mulxq_rvm::new(ret, ret, src1, src2);
1090        self.emit(&MInst::External { inst: inst.into() });
1091        ret.to_reg()
1092    }
1093
1094    fn bt_imm(&mut self, val: u64) -> Option<u8> {
1095        if val.count_ones() == 1 {
1096            Some(u8::try_from(val.trailing_zeros()).unwrap())
1097        } else {
1098            None
1099        }
1100    }
1101}
1102
1103impl IsleContext<'_, '_, MInst, X64Backend> {
1104    fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1105        let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1106        self.lower_ctx.emit(MInst::External {
1107            inst: asm::inst::movdqu_a::new(
1108                Writable::from_reg(Xmm::unwrap_new(tmp.to_reg())),
1109                asm::XmmMem::Mem(addr.into()),
1110            )
1111            .into(),
1112        });
1113        Xmm::unwrap_new(tmp.to_reg())
1114    }
1115
1116    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1117    fn convert_gpr_to_assembler_read_write_gpr(&mut self, read: Gpr) -> asm::Gpr<PairedGpr> {
1118        let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1119        let write = WritableGpr::from_writable_reg(write).unwrap();
1120        asm::Gpr::new(PairedGpr { read, write })
1121    }
1122
1123    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1124    fn convert_gpr_to_assembler_fixed_read_write_gpr<const E: u8>(
1125        &mut self,
1126        read: Gpr,
1127    ) -> asm::Fixed<PairedGpr, E> {
1128        let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1129        let write = WritableGpr::from_writable_reg(write).unwrap();
1130        asm::Fixed(PairedGpr { read, write })
1131    }
1132
1133    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1134    fn convert_xmm_to_assembler_read_write_xmm(&mut self, read: Xmm) -> asm::Xmm<PairedXmm> {
1135        let write = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1136        let write = WritableXmm::from_writable_reg(write).unwrap();
1137        asm::Xmm::new(PairedXmm { read, write })
1138    }
1139
1140    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1141    fn convert_gpr_mem_to_assembler_read_gpr_mem(&self, read: &GprMem) -> asm::GprMem<Gpr, Gpr> {
1142        match read.clone().into() {
1143            RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
1144            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1145        }
1146    }
1147
1148    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1149    fn convert_xmm_mem_to_assembler_read_xmm_mem_aligned(
1150        &self,
1151        read: &XmmMemAligned,
1152    ) -> asm::XmmMem<Xmm, Gpr> {
1153        match read.clone().into() {
1154            RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1155            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1156        }
1157    }
1158
1159    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1160    fn convert_xmm_mem_to_assembler_read_xmm_mem(&self, read: &XmmMem) -> asm::XmmMem<Xmm, Gpr> {
1161        match read.clone().into() {
1162            RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1163            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1164        }
1165    }
1166
1167    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1168    fn convert_xmm_mem_to_assembler_write_xmm_mem(
1169        &self,
1170        write: &XmmMem,
1171    ) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1172        match write.clone().into() {
1173            RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1174            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1175        }
1176    }
1177
1178    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1179    fn convert_xmm_mem_to_assembler_write_xmm_mem_aligned(
1180        &self,
1181        write: &XmmMemAligned,
1182    ) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1183        match write.clone().into() {
1184            RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1185            RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1186        }
1187    }
1188
1189    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1190    fn convert_gpr_mem_to_assembler_read_write_gpr_mem(
1191        &mut self,
1192        read: &GprMem,
1193    ) -> asm::GprMem<PairedGpr, Gpr> {
1194        match read.clone().into() {
1195            RegMem::Reg { reg } => asm::GprMem::Gpr(
1196                *self
1197                    .convert_gpr_to_assembler_read_write_gpr(Gpr::new(reg).unwrap())
1198                    .as_ref(),
1199            ),
1200            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1201        }
1202    }
1203
1204    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1205    fn convert_gpr_mem_to_assembler_write_gpr_mem(
1206        &mut self,
1207        read: &GprMem,
1208    ) -> asm::GprMem<WritableGpr, Gpr> {
1209        match read.clone().into() {
1210            RegMem::Reg { reg } => asm::GprMem::Gpr(WritableGpr::from_reg(Gpr::new(reg).unwrap())),
1211            RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1212        }
1213    }
1214
1215    /// Helper used by code generated by the `cranelift-assembler-x64` crate.
1216    fn convert_amode_to_assembler_amode(&mut self, amode: &SyntheticAmode) -> asm::Amode<Gpr> {
1217        amode.clone().into()
1218    }
1219}
1220
1221// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
1222// need to fix up the bits that migrate from one half of the lane to the
1223// other. Each 16-byte mask is indexed by the shift amount: e.g. if we shift
1224// right by 0 (no movement), we want to retain all the bits so we mask with
1225// `0xff`; if we shift right by 1, we want to retain all bits except the MSB so
1226// we mask with `0x7f`; etc.
1227
1228#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1229const I8X16_ISHL_MASKS: [u8; 128] = [
1230    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1231    0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1232    0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1233    0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1234    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1235    0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1236    0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1237    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1238];
1239
1240#[rustfmt::skip] // Preserve 16 bytes (i.e. one mask) per row.
1241const I8X16_USHR_MASKS: [u8; 128] = [
1242    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1243    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1244    0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1245    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1246    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1247    0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1248    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1249    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1250];