1pub(crate) mod generated_code;
5use crate::{ir::AtomicRmwOp, ir::types};
6use generated_code::{AssemblerOutputs, Context, MInst, RegisterClass};
7
8use super::external::{CraneliftRegisters, PairedGpr, PairedXmm, isle_assembler_methods};
10use super::{MergeableLoadSize, is_int_or_ref_ty, is_mergeable_load, lower_to_amode};
11use crate::ir::condcodes::{FloatCC, IntCC};
12use crate::ir::immediates::*;
13use crate::ir::types::*;
14use crate::ir::{
15 BlockCall, Inst, InstructionData, LibCall, MemFlags, Opcode, TrapCode, Value, ValueList,
16};
17use crate::isa::x64::X64Backend;
18use crate::isa::x64::inst::{ReturnCallInfo, args::*, regs};
19use crate::isa::x64::lower::{InsnInput, emit_vm_call};
20use crate::machinst::isle::*;
21use crate::machinst::{
22 ArgPair, CallArgList, CallInfo, CallRetList, InstOutput, MachInst, VCodeConstant,
23 VCodeConstantData,
24};
25use alloc::vec::Vec;
26use cranelift_assembler_x64 as asm;
27use regalloc2::PReg;
28use std::boxed::Box;
29
30type BoxCallInfo = Box<CallInfo<ExternalName>>;
35type BoxCallIndInfo = Box<CallInfo<RegMem>>;
36type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
37type BoxReturnCallIndInfo = Box<ReturnCallInfo<Reg>>;
38type VecArgPair = Vec<ArgPair>;
39type BoxSyntheticAmode = Box<SyntheticAmode>;
40
41type AssemblerInst = asm::Inst<CraneliftRegisters>;
44
45pub struct SinkableLoad {
46 inst: Inst,
47 addr_input: InsnInput,
48 offset: i32,
49}
50
51pub(crate) fn lower(
53 lower_ctx: &mut Lower<MInst>,
54 backend: &X64Backend,
55 inst: Inst,
56) -> Option<InstOutput> {
57 let mut isle_ctx = IsleContext { lower_ctx, backend };
60 generated_code::constructor_lower(&mut isle_ctx, inst)
61}
62
63pub(crate) fn lower_branch(
64 lower_ctx: &mut Lower<MInst>,
65 backend: &X64Backend,
66 branch: Inst,
67 targets: &[MachLabel],
68) -> Option<()> {
69 let mut isle_ctx = IsleContext { lower_ctx, backend };
72 generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets)
73}
74
75impl Context for IsleContext<'_, '_, MInst, X64Backend> {
76 isle_lower_prelude_methods!();
77 isle_assembler_methods!();
78
79 fn gen_call_info(
80 &mut self,
81 sig: Sig,
82 dest: ExternalName,
83 uses: CallArgList,
84 defs: CallRetList,
85 try_call_info: Option<TryCallInfo>,
86 ) -> BoxCallInfo {
87 let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
88 let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
89 self.lower_ctx
90 .abi_mut()
91 .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
92
93 Box::new(
94 self.lower_ctx
95 .gen_call_info(sig, dest, uses, defs, try_call_info),
96 )
97 }
98
99 fn gen_call_ind_info(
100 &mut self,
101 sig: Sig,
102 dest: &RegMem,
103 uses: CallArgList,
104 defs: CallRetList,
105 try_call_info: Option<TryCallInfo>,
106 ) -> BoxCallIndInfo {
107 let stack_ret_space = self.lower_ctx.sigs()[sig].sized_stack_ret_space();
108 let stack_arg_space = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
109 self.lower_ctx
110 .abi_mut()
111 .accumulate_outgoing_args_size(stack_ret_space + stack_arg_space);
112
113 Box::new(
114 self.lower_ctx
115 .gen_call_info(sig, dest.clone(), uses, defs, try_call_info),
116 )
117 }
118
119 fn gen_return_call_info(
120 &mut self,
121 sig: Sig,
122 dest: ExternalName,
123 uses: CallArgList,
124 ) -> BoxReturnCallInfo {
125 let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
126 self.lower_ctx
127 .abi_mut()
128 .accumulate_tail_args_size(new_stack_arg_size);
129
130 Box::new(ReturnCallInfo {
131 dest,
132 uses,
133 tmp: self.lower_ctx.temp_writable_gpr(),
134 new_stack_arg_size,
135 })
136 }
137
138 fn gen_return_call_ind_info(
139 &mut self,
140 sig: Sig,
141 dest: Reg,
142 uses: CallArgList,
143 ) -> BoxReturnCallIndInfo {
144 let new_stack_arg_size = self.lower_ctx.sigs()[sig].sized_stack_arg_space();
145 self.lower_ctx
146 .abi_mut()
147 .accumulate_tail_args_size(new_stack_arg_size);
148
149 Box::new(ReturnCallInfo {
150 dest,
151 uses,
152 tmp: self.lower_ctx.temp_writable_gpr(),
153 new_stack_arg_size,
154 })
155 }
156
157 #[inline]
158 fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize {
159 if ty.bits() == 64 {
160 OperandSize::Size64
161 } else {
162 OperandSize::Size32
163 }
164 }
165
166 #[inline]
167 fn raw_operand_size_of_type(&mut self, ty: Type) -> OperandSize {
168 OperandSize::from_ty(ty)
169 }
170
171 fn put_in_reg_mem_imm(&mut self, val: Value) -> RegMemImm {
172 if let Some(imm) = self.i64_from_iconst(val) {
173 if let Ok(imm) = i32::try_from(imm) {
174 return RegMemImm::Imm {
175 simm32: imm.cast_unsigned(),
176 };
177 }
178 }
179
180 self.put_in_reg_mem(val).into()
181 }
182
183 fn put_in_xmm_mem_imm(&mut self, val: Value) -> XmmMemImm {
184 if let Some(imm) = self.i64_from_iconst(val) {
185 if let Ok(imm) = i32::try_from(imm) {
186 return XmmMemImm::unwrap_new(RegMemImm::Imm {
187 simm32: imm.cast_unsigned(),
188 });
189 }
190 }
191
192 let res = match self.put_in_xmm_mem(val).to_reg_mem() {
193 RegMem::Reg { reg } => RegMemImm::Reg { reg },
194 RegMem::Mem { addr } => RegMemImm::Mem { addr },
195 };
196
197 XmmMemImm::unwrap_new(res)
198 }
199
200 fn put_in_xmm_mem(&mut self, val: Value) -> XmmMem {
201 let inputs = self.lower_ctx.get_value_as_source_or_const(val);
202
203 if let Some(c) = inputs.constant {
204 let vcode_constant = self.emit_u128_le_const(c as u128);
210 return XmmMem::unwrap_new(RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant)));
211 }
212
213 XmmMem::unwrap_new(self.put_in_reg_mem(val))
214 }
215
216 fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
217 let inputs = self.lower_ctx.get_value_as_source_or_const(val);
218
219 if let Some(c) = inputs.constant {
220 let vcode_constant = self.emit_u64_le_const(c);
224 return RegMem::mem(SyntheticAmode::ConstantOffset(vcode_constant));
225 }
226
227 if let Some(load) = self.sinkable_load(val) {
228 return RegMem::Mem {
229 addr: self.sink_load(&load),
230 };
231 }
232
233 RegMem::reg(self.put_in_reg(val))
234 }
235
236 #[inline]
237 fn encode_fcmp_imm(&mut self, imm: &FcmpImm) -> u8 {
238 imm.encode()
239 }
240
241 #[inline]
242 fn encode_round_imm(&mut self, imm: &RoundImm) -> u8 {
243 imm.encode()
244 }
245
246 #[inline]
247 fn has_avx(&mut self) -> bool {
248 self.backend.x64_flags.has_avx()
249 }
250
251 #[inline]
252 fn use_avx2(&mut self) -> bool {
253 self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_avx2()
254 }
255
256 #[inline]
257 fn has_avx512vl(&mut self) -> bool {
258 self.backend.x64_flags.has_avx512vl()
259 }
260
261 #[inline]
262 fn has_avx512dq(&mut self) -> bool {
263 self.backend.x64_flags.has_avx512dq()
264 }
265
266 #[inline]
267 fn has_avx512f(&mut self) -> bool {
268 self.backend.x64_flags.has_avx512f()
269 }
270
271 #[inline]
272 fn has_avx512bitalg(&mut self) -> bool {
273 self.backend.x64_flags.has_avx512bitalg()
274 }
275
276 #[inline]
277 fn has_avx512vbmi(&mut self) -> bool {
278 self.backend.x64_flags.has_avx512vbmi()
279 }
280
281 #[inline]
282 fn has_lzcnt(&mut self) -> bool {
283 self.backend.x64_flags.has_lzcnt()
284 }
285
286 #[inline]
287 fn has_bmi1(&mut self) -> bool {
288 self.backend.x64_flags.has_bmi1()
289 }
290
291 #[inline]
292 fn has_bmi2(&mut self) -> bool {
293 self.backend.x64_flags.has_bmi2()
294 }
295
296 #[inline]
297 fn use_popcnt(&mut self) -> bool {
298 self.backend.x64_flags.has_popcnt() && self.backend.x64_flags.has_sse42()
299 }
300
301 #[inline]
302 fn use_fma(&mut self) -> bool {
303 self.backend.x64_flags.has_avx() && self.backend.x64_flags.has_fma()
304 }
305
306 #[inline]
307 fn has_sse3(&mut self) -> bool {
308 self.backend.x64_flags.has_sse3()
309 }
310
311 #[inline]
312 fn has_ssse3(&mut self) -> bool {
313 self.backend.x64_flags.has_ssse3()
314 }
315
316 #[inline]
317 fn has_sse41(&mut self) -> bool {
318 self.backend.x64_flags.has_sse41()
319 }
320
321 #[inline]
322 fn use_sse42(&mut self) -> bool {
323 self.backend.x64_flags.has_sse41() && self.backend.x64_flags.has_sse42()
324 }
325
326 #[inline]
327 fn has_cmpxchg16b(&mut self) -> bool {
328 self.backend.x64_flags.has_cmpxchg16b()
329 }
330
331 #[inline]
332 fn shift_mask(&mut self, ty: Type) -> u8 {
333 debug_assert!(ty.lane_bits().is_power_of_two());
334
335 (ty.lane_bits() - 1) as u8
336 }
337
338 fn shift_amount_masked(&mut self, ty: Type, val: Imm64) -> u8 {
339 (val.bits() as u8) & self.shift_mask(ty)
340 }
341
342 #[inline]
343 fn simm32_from_value(&mut self, val: Value) -> Option<GprMemImm> {
344 let imm = self.i64_from_iconst(val)?;
345 Some(GprMemImm::unwrap_new(RegMemImm::Imm {
346 simm32: i32::try_from(imm).ok()?.cast_unsigned(),
347 }))
348 }
349
350 fn sinkable_load(&mut self, val: Value) -> Option<SinkableLoad> {
351 if let Some(inst) = self.is_sinkable_inst(val) {
352 if let Some((addr_input, offset)) =
353 is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Min32)
354 {
355 return Some(SinkableLoad {
356 inst,
357 addr_input,
358 offset,
359 });
360 }
361 }
362 None
363 }
364
365 fn sinkable_load_exact(&mut self, val: Value) -> Option<SinkableLoad> {
366 if let Some(inst) = self.is_sinkable_inst(val) {
367 if let Some((addr_input, offset)) =
368 is_mergeable_load(self.lower_ctx, inst, MergeableLoadSize::Exact)
369 {
370 return Some(SinkableLoad {
371 inst,
372 addr_input,
373 offset,
374 });
375 }
376 }
377 None
378 }
379
380 fn sink_load(&mut self, load: &SinkableLoad) -> SyntheticAmode {
381 self.lower_ctx.sink_inst(load.inst);
382 let addr = lower_to_amode(self.lower_ctx, load.addr_input, load.offset);
383 SyntheticAmode::Real(addr)
384 }
385
386 #[inline]
387 fn ext_mode(&mut self, from_bits: u16, to_bits: u16) -> ExtMode {
388 ExtMode::new(from_bits, to_bits).unwrap()
389 }
390
391 fn emit(&mut self, inst: &MInst) -> Unit {
392 self.lower_ctx.emit(inst.clone());
393 }
394
395 #[inline]
396 fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
397 0b00_00_00_00 | lane << 4
400 }
401
402 #[inline]
403 fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
404 RegMem::mem(addr.clone())
405 }
406
407 #[inline]
408 fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
409 amode.clone().into()
410 }
411
412 #[inline]
413 fn synthetic_amode_slot(&mut self, offset: i32) -> SyntheticAmode {
414 SyntheticAmode::SlotOffset { simm32: offset }
415 }
416
417 #[inline]
418 fn const_to_synthetic_amode(&mut self, c: VCodeConstant) -> SyntheticAmode {
419 SyntheticAmode::ConstantOffset(c)
420 }
421
422 #[inline]
423 fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
424 r.to_writable_reg()
425 }
426
427 #[inline]
428 fn writable_xmm_to_reg(&mut self, r: WritableXmm) -> WritableReg {
429 r.to_writable_reg()
430 }
431
432 fn ishl_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
433 debug_assert!(amt < 8);
436 let mask_offset = amt as usize * 16;
437 let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
438 &I8X16_ISHL_MASKS[mask_offset..mask_offset + 16],
439 ));
440 SyntheticAmode::ConstantOffset(mask_constant)
441 }
442
443 fn ishl_i8x16_mask_table(&mut self) -> SyntheticAmode {
444 let mask_table = self
445 .lower_ctx
446 .use_constant(VCodeConstantData::WellKnown(&I8X16_ISHL_MASKS));
447 SyntheticAmode::ConstantOffset(mask_table)
448 }
449
450 fn ushr_i8x16_mask_for_const(&mut self, amt: u32) -> SyntheticAmode {
451 debug_assert!(amt < 8);
454 let mask_offset = amt as usize * 16;
455 let mask_constant = self.lower_ctx.use_constant(VCodeConstantData::WellKnown(
456 &I8X16_USHR_MASKS[mask_offset..mask_offset + 16],
457 ));
458 SyntheticAmode::ConstantOffset(mask_constant)
459 }
460
461 fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode {
462 let mask_table = self
463 .lower_ctx
464 .use_constant(VCodeConstantData::WellKnown(&I8X16_USHR_MASKS));
465 SyntheticAmode::ConstantOffset(mask_table)
466 }
467
468 #[inline]
469 fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
470 Writable::from_reg(Xmm::unwrap_new(r.to_reg()))
471 }
472
473 #[inline]
474 fn writable_xmm_to_xmm(&mut self, r: WritableXmm) -> Xmm {
475 r.to_reg()
476 }
477
478 #[inline]
479 fn writable_gpr_to_gpr(&mut self, r: WritableGpr) -> Gpr {
480 r.to_reg()
481 }
482
483 #[inline]
484 fn gpr_to_reg(&mut self, r: Gpr) -> Reg {
485 r.into()
486 }
487
488 #[inline]
489 fn xmm_to_reg(&mut self, r: Xmm) -> Reg {
490 r.into()
491 }
492
493 #[inline]
494 fn xmm_to_xmm_mem_imm(&mut self, r: Xmm) -> XmmMemImm {
495 r.into()
496 }
497
498 #[inline]
499 fn xmm_mem_to_xmm_mem_imm(&mut self, r: &XmmMem) -> XmmMemImm {
500 XmmMemImm::unwrap_new(r.clone().to_reg_mem().into())
501 }
502
503 #[inline]
504 fn temp_writable_gpr(&mut self) -> WritableGpr {
505 self.lower_ctx.temp_writable_gpr()
506 }
507
508 #[inline]
509 fn temp_writable_xmm(&mut self) -> WritableXmm {
510 self.lower_ctx.temp_writable_xmm()
511 }
512
513 #[inline]
514 fn reg_to_reg_mem_imm(&mut self, reg: Reg) -> RegMemImm {
515 RegMemImm::Reg { reg }
516 }
517
518 #[inline]
519 fn reg_mem_to_xmm_mem(&mut self, rm: &RegMem) -> XmmMem {
520 XmmMem::unwrap_new(rm.clone())
521 }
522
523 #[inline]
524 fn gpr_mem_imm_new(&mut self, rmi: &RegMemImm) -> GprMemImm {
525 GprMemImm::unwrap_new(rmi.clone())
526 }
527
528 #[inline]
529 fn xmm_mem_imm_new(&mut self, rmi: &RegMemImm) -> XmmMemImm {
530 XmmMemImm::unwrap_new(rmi.clone())
531 }
532
533 #[inline]
534 fn xmm_to_xmm_mem(&mut self, r: Xmm) -> XmmMem {
535 r.into()
536 }
537
538 #[inline]
539 fn xmm_mem_to_reg_mem(&mut self, xm: &XmmMem) -> RegMem {
540 xm.clone().into()
541 }
542
543 #[inline]
544 fn gpr_mem_to_reg_mem(&mut self, gm: &GprMem) -> RegMem {
545 gm.clone().into()
546 }
547
548 #[inline]
549 fn xmm_new(&mut self, r: Reg) -> Xmm {
550 Xmm::unwrap_new(r)
551 }
552
553 #[inline]
554 fn gpr_new(&mut self, r: Reg) -> Gpr {
555 Gpr::unwrap_new(r)
556 }
557
558 #[inline]
559 fn reg_mem_to_gpr_mem(&mut self, rm: &RegMem) -> GprMem {
560 GprMem::unwrap_new(rm.clone())
561 }
562
563 #[inline]
564 fn reg_to_gpr_mem(&mut self, r: Reg) -> GprMem {
565 GprMem::unwrap_new(RegMem::reg(r))
566 }
567
568 #[inline]
569 fn gpr_to_gpr_mem(&mut self, gpr: Gpr) -> GprMem {
570 GprMem::from(gpr)
571 }
572
573 #[inline]
574 fn gpr_to_gpr_mem_imm(&mut self, gpr: Gpr) -> GprMemImm {
575 GprMemImm::from(gpr)
576 }
577
578 #[inline]
579 fn type_register_class(&mut self, ty: Type) -> Option<RegisterClass> {
580 if is_int_or_ref_ty(ty) || ty == I128 {
581 Some(RegisterClass::Gpr {
582 single_register: ty != I128,
583 })
584 } else if ty.is_float() || (ty.is_vector() && ty.bits() <= 128) {
585 Some(RegisterClass::Xmm)
586 } else {
587 None
588 }
589 }
590
591 #[inline]
592 fn ty_int_bool_or_ref(&mut self, ty: Type) -> Option<()> {
593 match ty {
594 types::I8 | types::I16 | types::I32 | types::I64 => Some(()),
595 _ => None,
596 }
597 }
598
599 #[inline]
600 fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
601 CC::from_intcc(*intcc)
602 }
603
604 #[inline]
605 fn cc_invert(&mut self, cc: &CC) -> CC {
606 cc.invert()
607 }
608
609 #[inline]
610 fn cc_nz_or_z(&mut self, cc: &CC) -> Option<CC> {
611 match cc {
612 CC::Z => Some(*cc),
613 CC::NZ => Some(*cc),
614 _ => None,
615 }
616 }
617
618 #[inline]
619 fn sum_extend_fits_in_32_bits(
620 &mut self,
621 extend_from_ty: Type,
622 constant_value: Imm64,
623 offset: Offset32,
624 ) -> Option<u32> {
625 let offset: i64 = offset.into();
626 let constant_value: u64 = constant_value.bits() as u64;
627 let shift = 64 - extend_from_ty.bits();
629 let zero_extended_constant_value = (constant_value << shift) >> shift;
630 let sum = offset.wrapping_add(zero_extended_constant_value as i64);
632 if sum == ((sum << 32) >> 32) {
634 Some(sum as u32)
635 } else {
636 None
637 }
638 }
639
640 #[inline]
641 fn amode_offset(&mut self, addr: &SyntheticAmode, offset: i32) -> SyntheticAmode {
642 addr.offset(offset)
643 }
644
645 #[inline]
646 fn zero_offset(&mut self) -> Offset32 {
647 Offset32::new(0)
648 }
649
650 #[inline]
651 fn preg_rbp(&mut self) -> PReg {
652 regs::rbp().to_real_reg().unwrap().into()
653 }
654
655 #[inline]
656 fn preg_rsp(&mut self) -> PReg {
657 regs::rsp().to_real_reg().unwrap().into()
658 }
659
660 #[inline]
661 fn preg_pinned(&mut self) -> PReg {
662 regs::pinned_reg().to_real_reg().unwrap().into()
663 }
664
665 fn libcall_1(&mut self, libcall: &LibCall, a: Reg) -> Reg {
666 let outputs = emit_vm_call(
667 self.lower_ctx,
668 &self.backend.flags,
669 &self.backend.triple,
670 *libcall,
671 &[ValueRegs::one(a)],
672 )
673 .expect("Failed to emit LibCall");
674
675 debug_assert_eq!(outputs.len(), 1);
676
677 outputs[0].only_reg().unwrap()
678 }
679
680 fn libcall_2(&mut self, libcall: &LibCall, a: Reg, b: Reg) -> Reg {
681 let outputs = emit_vm_call(
682 self.lower_ctx,
683 &self.backend.flags,
684 &self.backend.triple,
685 *libcall,
686 &[ValueRegs::one(a), ValueRegs::one(b)],
687 )
688 .expect("Failed to emit LibCall");
689
690 debug_assert_eq!(outputs.len(), 1);
691
692 outputs[0].only_reg().unwrap()
693 }
694
695 fn libcall_3(&mut self, libcall: &LibCall, a: Reg, b: Reg, c: Reg) -> Reg {
696 let outputs = emit_vm_call(
697 self.lower_ctx,
698 &self.backend.flags,
699 &self.backend.triple,
700 *libcall,
701 &[ValueRegs::one(a), ValueRegs::one(b), ValueRegs::one(c)],
702 )
703 .expect("Failed to emit LibCall");
704
705 debug_assert_eq!(outputs.len(), 1);
706
707 outputs[0].only_reg().unwrap()
708 }
709
710 #[inline]
711 fn vconst_all_ones_or_all_zeros(&mut self, constant: Constant) -> Option<()> {
712 let const_data = self.lower_ctx.get_constant_data(constant);
713 if const_data.iter().all(|&b| b == 0 || b == 0xFF) {
714 return Some(());
715 }
716 None
717 }
718
719 #[inline]
720 fn shuffle_0_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
721 let mask = mask
722 .iter()
723 .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
724 .map(|b| if b > 15 { 0b10000000 } else { b })
725 .collect();
726 self.lower_ctx
727 .use_constant(VCodeConstantData::Generated(mask))
728 }
729
730 #[inline]
731 fn shuffle_0_15_mask(&mut self, mask: &VecMask) -> VCodeConstant {
732 let mask = mask
733 .iter()
734 .map(|&b| if b > 15 { 0b10000000 } else { b })
735 .collect();
736 self.lower_ctx
737 .use_constant(VCodeConstantData::Generated(mask))
738 }
739
740 #[inline]
741 fn shuffle_16_31_mask(&mut self, mask: &VecMask) -> VCodeConstant {
742 let mask = mask
743 .iter()
744 .map(|&b| b.wrapping_sub(16))
745 .map(|b| if b > 15 { 0b10000000 } else { b })
746 .collect();
747 self.lower_ctx
748 .use_constant(VCodeConstantData::Generated(mask))
749 }
750
751 #[inline]
752 fn perm_from_mask_with_zeros(
753 &mut self,
754 mask: &VecMask,
755 ) -> Option<(VCodeConstant, VCodeConstant)> {
756 if !mask.iter().any(|&b| b > 31) {
757 return None;
758 }
759
760 let zeros = mask
761 .iter()
762 .map(|&b| if b > 31 { 0x00 } else { 0xff })
763 .collect();
764
765 Some((
766 self.perm_from_mask(mask),
767 self.lower_ctx
768 .use_constant(VCodeConstantData::Generated(zeros)),
769 ))
770 }
771
772 #[inline]
773 fn perm_from_mask(&mut self, mask: &VecMask) -> VCodeConstant {
774 let mask = mask.iter().cloned().collect();
775 self.lower_ctx
776 .use_constant(VCodeConstantData::Generated(mask))
777 }
778
779 fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned {
780 match XmmMemAligned::new(arg.clone().into()) {
781 Some(aligned) => aligned,
782 None => match arg.clone().into() {
783 RegMem::Mem { addr } => self.load_xmm_unaligned(addr).into(),
784 _ => unreachable!(),
785 },
786 }
787 }
788
789 fn xmm_mem_imm_to_xmm_mem_aligned_imm(&mut self, arg: &XmmMemImm) -> XmmMemAlignedImm {
790 match XmmMemAlignedImm::new(arg.clone().into()) {
791 Some(aligned) => aligned,
792 None => match arg.clone().into() {
793 RegMemImm::Mem { addr } => self.load_xmm_unaligned(addr).into(),
794 _ => unreachable!(),
795 },
796 }
797 }
798
799 fn pshufd_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
800 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
801 if a < 4 && b < 4 && c < 4 && d < 4 {
802 Some(a | (b << 2) | (c << 4) | (d << 6))
803 } else {
804 None
805 }
806 }
807
808 fn pshufd_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
809 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
810 let a = a.checked_sub(4)?;
814 let b = b.checked_sub(4)?;
815 let c = c.checked_sub(4)?;
816 let d = d.checked_sub(4)?;
817 if a < 4 && b < 4 && c < 4 && d < 4 {
818 Some(a | (b << 2) | (c << 4) | (d << 6))
819 } else {
820 None
821 }
822 }
823
824 fn shufps_imm(&mut self, imm: Immediate) -> Option<u8> {
825 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
830 let c = c.checked_sub(4)?;
831 let d = d.checked_sub(4)?;
832 if a < 4 && b < 4 && c < 4 && d < 4 {
833 Some(a | (b << 2) | (c << 4) | (d << 6))
834 } else {
835 None
836 }
837 }
838
839 fn shufps_rev_imm(&mut self, imm: Immediate) -> Option<u8> {
840 let (a, b, c, d) = self.shuffle32_from_imm(imm)?;
844 let a = a.checked_sub(4)?;
845 let b = b.checked_sub(4)?;
846 if a < 4 && b < 4 && c < 4 && d < 4 {
847 Some(a | (b << 2) | (c << 4) | (d << 6))
848 } else {
849 None
850 }
851 }
852
853 fn pshuflw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
854 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
858 if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
859 Some(a | (b << 2) | (c << 4) | (d << 6))
860 } else {
861 None
862 }
863 }
864
865 fn pshuflw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
866 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
867 let a = a.checked_sub(8)?;
868 let b = b.checked_sub(8)?;
869 let c = c.checked_sub(8)?;
870 let d = d.checked_sub(8)?;
871 let e = e.checked_sub(8)?;
872 let f = f.checked_sub(8)?;
873 let g = g.checked_sub(8)?;
874 let h = h.checked_sub(8)?;
875 if a < 4 && b < 4 && c < 4 && d < 4 && [e, f, g, h] == [4, 5, 6, 7] {
876 Some(a | (b << 2) | (c << 4) | (d << 6))
877 } else {
878 None
879 }
880 }
881
882 fn pshufhw_lhs_imm(&mut self, imm: Immediate) -> Option<u8> {
883 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
887 let e = e.checked_sub(4)?;
888 let f = f.checked_sub(4)?;
889 let g = g.checked_sub(4)?;
890 let h = h.checked_sub(4)?;
891 if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
892 Some(e | (f << 2) | (g << 4) | (h << 6))
893 } else {
894 None
895 }
896 }
897
898 fn pshufhw_rhs_imm(&mut self, imm: Immediate) -> Option<u8> {
899 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
902 let a = a.checked_sub(8)?;
903 let b = b.checked_sub(8)?;
904 let c = c.checked_sub(8)?;
905 let d = d.checked_sub(8)?;
906 let e = e.checked_sub(12)?;
907 let f = f.checked_sub(12)?;
908 let g = g.checked_sub(12)?;
909 let h = h.checked_sub(12)?;
910 if e < 4 && f < 4 && g < 4 && h < 4 && [a, b, c, d] == [0, 1, 2, 3] {
911 Some(e | (f << 2) | (g << 4) | (h << 6))
912 } else {
913 None
914 }
915 }
916
917 fn palignr_imm_from_immediate(&mut self, imm: Immediate) -> Option<u8> {
918 let bytes = self.lower_ctx.get_immediate_data(imm).as_slice();
919
920 if bytes.windows(2).all(|a| a[0] + 1 == a[1]) {
921 Some(bytes[0])
922 } else {
923 None
924 }
925 }
926
927 fn pblendw_imm(&mut self, imm: Immediate) -> Option<u8> {
928 let (a, b, c, d, e, f, g, h) = self.shuffle16_from_imm(imm)?;
930
931 let bit = |x: u8, c: u8| {
940 if x % 8 == c {
941 if x < 8 { Some(0) } else { Some(1 << c) }
942 } else {
943 None
944 }
945 };
946 Some(
947 bit(a, 0)?
948 | bit(b, 1)?
949 | bit(c, 2)?
950 | bit(d, 3)?
951 | bit(e, 4)?
952 | bit(f, 5)?
953 | bit(g, 6)?
954 | bit(h, 7)?,
955 )
956 }
957
958 fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
959 XmmMemImm::unwrap_new(RegMemImm::imm(imm))
960 }
961
962 fn insert_i8x16_lane_hole(&mut self, hole_idx: u8) -> VCodeConstant {
963 let mask = -1i128 as u128;
964 self.emit_u128_le_const(mask ^ (0xff << (hole_idx * 8)))
965 }
966
967 fn writable_invalid_gpr(&mut self) -> WritableGpr {
968 let reg = Gpr::new(self.invalid_reg()).unwrap();
969 WritableGpr::from_reg(reg)
970 }
971
972 fn box_synthetic_amode(&mut self, amode: &SyntheticAmode) -> BoxSyntheticAmode {
973 Box::new(amode.clone())
974 }
975
976 fn is_imm8(&mut self, src: &GprMemImm) -> Option<u8> {
981 match src.clone().to_reg_mem_imm() {
982 RegMemImm::Imm { simm32 } => {
983 Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
984 }
985 _ => None,
986 }
987 }
988
989 fn is_imm8_xmm(&mut self, src: &XmmMemImm) -> Option<u8> {
990 match src.clone().to_reg_mem_imm() {
991 RegMemImm::Imm { simm32 } => {
992 Some(i8::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
993 }
994 _ => None,
995 }
996 }
997
998 fn is_simm8(&mut self, src: &GprMemImm) -> Option<i8> {
999 match src.clone().to_reg_mem_imm() {
1000 RegMemImm::Imm { simm32 } => Some(i8::try_from(simm32.cast_signed()).ok()?),
1001 _ => None,
1002 }
1003 }
1004
1005 fn is_imm16(&mut self, src: &GprMemImm) -> Option<u16> {
1006 match src.clone().to_reg_mem_imm() {
1007 RegMemImm::Imm { simm32 } => {
1008 Some(i16::try_from(simm32.cast_signed()).ok()?.cast_unsigned())
1009 }
1010 _ => None,
1011 }
1012 }
1013
1014 fn is_simm16(&mut self, src: &GprMemImm) -> Option<i16> {
1015 match src.clone().to_reg_mem_imm() {
1016 RegMemImm::Imm { simm32 } => Some(i16::try_from(simm32.cast_signed()).ok()?),
1017 _ => None,
1018 }
1019 }
1020
1021 fn is_imm32(&mut self, src: &GprMemImm) -> Option<u32> {
1022 match src.clone().to_reg_mem_imm() {
1023 RegMemImm::Imm { simm32 } => Some(simm32),
1024 _ => None,
1025 }
1026 }
1027
1028 fn is_simm32(&mut self, src: &GprMemImm) -> Option<i32> {
1029 match src.clone().to_reg_mem_imm() {
1030 RegMemImm::Imm { simm32 } => Some(simm32 as i32),
1031 _ => None,
1032 }
1033 }
1034
1035 fn is_gpr(&mut self, src: &GprMemImm) -> Option<Gpr> {
1036 match src.clone().to_reg_mem_imm() {
1037 RegMemImm::Reg { reg } => Gpr::new(reg),
1038 _ => None,
1039 }
1040 }
1041
1042 fn is_xmm(&mut self, src: &XmmMem) -> Option<Xmm> {
1043 match src.clone().to_reg_mem() {
1044 RegMem::Reg { reg } => Xmm::new(reg),
1045 _ => None,
1046 }
1047 }
1048
1049 fn is_gpr_mem(&mut self, src: &GprMemImm) -> Option<GprMem> {
1050 match src.clone().to_reg_mem_imm() {
1051 RegMemImm::Reg { reg } => GprMem::new(RegMem::Reg { reg }),
1052 RegMemImm::Mem { addr } => GprMem::new(RegMem::Mem { addr }),
1053 _ => None,
1054 }
1055 }
1056
1057 fn is_xmm_mem(&mut self, src: &XmmMemImm) -> Option<XmmMem> {
1058 match src.clone().to_reg_mem_imm() {
1059 RegMemImm::Reg { reg } => XmmMem::new(RegMem::Reg { reg }),
1060 RegMemImm::Mem { addr } => XmmMem::new(RegMem::Mem { addr }),
1061 _ => None,
1062 }
1063 }
1064
1065 fn is_mem(&mut self, src: &XmmMem) -> Option<SyntheticAmode> {
1066 match src.clone().to_reg_mem() {
1067 RegMem::Reg { .. } => None,
1068 RegMem::Mem { addr } => Some(addr),
1069 }
1070 }
1071
1072 fn x64_mulxl_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1079 let ret = self.temp_writable_gpr();
1080 let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1081 let inst = asm::inst::mulxl_rvm::new(ret, ret, src1, src2);
1082 self.emit(&MInst::External { inst: inst.into() });
1083 ret.to_reg()
1084 }
1085
1086 fn x64_mulxq_rvm_hi(&mut self, src1: &GprMem, src2: Gpr) -> Gpr {
1087 let ret = self.temp_writable_gpr();
1088 let src1 = self.convert_gpr_mem_to_assembler_read_gpr_mem(src1);
1089 let inst = asm::inst::mulxq_rvm::new(ret, ret, src1, src2);
1090 self.emit(&MInst::External { inst: inst.into() });
1091 ret.to_reg()
1092 }
1093
1094 fn bt_imm(&mut self, val: u64) -> Option<u8> {
1095 if val.count_ones() == 1 {
1096 Some(u8::try_from(val.trailing_zeros()).unwrap())
1097 } else {
1098 None
1099 }
1100 }
1101}
1102
1103impl IsleContext<'_, '_, MInst, X64Backend> {
1104 fn load_xmm_unaligned(&mut self, addr: SyntheticAmode) -> Xmm {
1105 let tmp = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1106 self.lower_ctx.emit(MInst::External {
1107 inst: asm::inst::movdqu_a::new(
1108 Writable::from_reg(Xmm::unwrap_new(tmp.to_reg())),
1109 asm::XmmMem::Mem(addr.into()),
1110 )
1111 .into(),
1112 });
1113 Xmm::unwrap_new(tmp.to_reg())
1114 }
1115
1116 fn convert_gpr_to_assembler_read_write_gpr(&mut self, read: Gpr) -> asm::Gpr<PairedGpr> {
1118 let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1119 let write = WritableGpr::from_writable_reg(write).unwrap();
1120 asm::Gpr::new(PairedGpr { read, write })
1121 }
1122
1123 fn convert_gpr_to_assembler_fixed_read_write_gpr<const E: u8>(
1125 &mut self,
1126 read: Gpr,
1127 ) -> asm::Fixed<PairedGpr, E> {
1128 let write = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
1129 let write = WritableGpr::from_writable_reg(write).unwrap();
1130 asm::Fixed(PairedGpr { read, write })
1131 }
1132
1133 fn convert_xmm_to_assembler_read_write_xmm(&mut self, read: Xmm) -> asm::Xmm<PairedXmm> {
1135 let write = self.lower_ctx.alloc_tmp(types::F32X4).only_reg().unwrap();
1136 let write = WritableXmm::from_writable_reg(write).unwrap();
1137 asm::Xmm::new(PairedXmm { read, write })
1138 }
1139
1140 fn convert_gpr_mem_to_assembler_read_gpr_mem(&self, read: &GprMem) -> asm::GprMem<Gpr, Gpr> {
1142 match read.clone().into() {
1143 RegMem::Reg { reg } => asm::GprMem::Gpr(Gpr::new(reg).unwrap()),
1144 RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1145 }
1146 }
1147
1148 fn convert_xmm_mem_to_assembler_read_xmm_mem_aligned(
1150 &self,
1151 read: &XmmMemAligned,
1152 ) -> asm::XmmMem<Xmm, Gpr> {
1153 match read.clone().into() {
1154 RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1155 RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1156 }
1157 }
1158
1159 fn convert_xmm_mem_to_assembler_read_xmm_mem(&self, read: &XmmMem) -> asm::XmmMem<Xmm, Gpr> {
1161 match read.clone().into() {
1162 RegMem::Reg { reg } => asm::XmmMem::Xmm(Xmm::new(reg).unwrap()),
1163 RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1164 }
1165 }
1166
1167 fn convert_xmm_mem_to_assembler_write_xmm_mem(
1169 &self,
1170 write: &XmmMem,
1171 ) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1172 match write.clone().into() {
1173 RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1174 RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1175 }
1176 }
1177
1178 fn convert_xmm_mem_to_assembler_write_xmm_mem_aligned(
1180 &self,
1181 write: &XmmMemAligned,
1182 ) -> asm::XmmMem<Writable<Xmm>, Gpr> {
1183 match write.clone().into() {
1184 RegMem::Reg { reg } => asm::XmmMem::Xmm(Writable::from_reg(Xmm::new(reg).unwrap())),
1185 RegMem::Mem { addr } => asm::XmmMem::Mem(addr.into()),
1186 }
1187 }
1188
1189 fn convert_gpr_mem_to_assembler_read_write_gpr_mem(
1191 &mut self,
1192 read: &GprMem,
1193 ) -> asm::GprMem<PairedGpr, Gpr> {
1194 match read.clone().into() {
1195 RegMem::Reg { reg } => asm::GprMem::Gpr(
1196 *self
1197 .convert_gpr_to_assembler_read_write_gpr(Gpr::new(reg).unwrap())
1198 .as_ref(),
1199 ),
1200 RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1201 }
1202 }
1203
1204 fn convert_gpr_mem_to_assembler_write_gpr_mem(
1206 &mut self,
1207 read: &GprMem,
1208 ) -> asm::GprMem<WritableGpr, Gpr> {
1209 match read.clone().into() {
1210 RegMem::Reg { reg } => asm::GprMem::Gpr(WritableGpr::from_reg(Gpr::new(reg).unwrap())),
1211 RegMem::Mem { addr } => asm::GprMem::Mem(addr.into()),
1212 }
1213 }
1214
1215 fn convert_amode_to_assembler_amode(&mut self, amode: &SyntheticAmode) -> asm::Amode<Gpr> {
1217 amode.clone().into()
1218 }
1219}
1220
1221#[rustfmt::skip] const I8X16_ISHL_MASKS: [u8; 128] = [
1230 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1231 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
1232 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
1233 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
1234 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
1235 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
1236 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
1237 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
1238];
1239
1240#[rustfmt::skip] const I8X16_USHR_MASKS: [u8; 128] = [
1242 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1243 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
1244 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
1245 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
1246 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1247 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
1248 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
1249 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
1250];