cranelift_codegen/opts/
div_const.rs

1//! Compute "magic numbers" for division-by-constants transformations.
2//!
3//! Math helpers for division by (non-power-of-2) constants. This is based
4//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
5//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
6//! makes little difference, but the signed-vs-unsigned aspect has a large
7//! effect. Therefore everything is presented in the order U32 U64 S32 S64
8//! so as to emphasise the similarity of the U32 and U64 cases and the S32
9//! and S64 cases.
10
11// Structures to hold the "magic numbers" computed.
12
13#[derive(PartialEq, Debug)]
14pub struct MU32 {
15    pub mul_by: u32,
16    pub do_add: bool,
17    pub shift_by: i32,
18}
19
20#[derive(PartialEq, Debug)]
21pub struct MU64 {
22    pub mul_by: u64,
23    pub do_add: bool,
24    pub shift_by: i32,
25}
26
27#[derive(PartialEq, Debug)]
28pub struct MS32 {
29    pub mul_by: i32,
30    pub shift_by: i32,
31}
32
33#[derive(PartialEq, Debug)]
34pub struct MS64 {
35    pub mul_by: i64,
36    pub shift_by: i32,
37}
38
39// The actual "magic number" generators follow.
40
41pub fn magic_u32(d: u32) -> MU32 {
42    debug_assert_ne!(d, 0);
43    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
44
45    let mut do_add: bool = false;
46    let mut p: i32 = 31;
47    let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
48    let mut q1: u32 = 0x80000000u32 / nc;
49    let mut r1: u32 = 0x80000000u32 - q1 * nc;
50    let mut q2: u32 = 0x7FFFFFFFu32 / d;
51    let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
52    loop {
53        p = p + 1;
54        if r1 >= nc - r1 {
55            q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
56            r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
57        } else {
58            q1 = u32::wrapping_mul(2, q1);
59            r1 = 2 * r1;
60        }
61        if r2 + 1 >= d - r2 {
62            if q2 >= 0x7FFFFFFFu32 {
63                do_add = true;
64            }
65            q2 = 2 * q2 + 1;
66            r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
67        } else {
68            if q2 >= 0x80000000u32 {
69                do_add = true;
70            }
71            q2 = u32::wrapping_mul(2, q2);
72            r2 = 2 * r2 + 1;
73        }
74        let delta: u32 = d - 1 - r2;
75        if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
76            break;
77        }
78    }
79
80    MU32 {
81        mul_by: q2 + 1,
82        do_add,
83        shift_by: p - 32,
84    }
85}
86
87pub fn magic_u64(d: u64) -> MU64 {
88    debug_assert_ne!(d, 0);
89    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
90
91    let mut do_add: bool = false;
92    let mut p: i32 = 63;
93    let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
94    let mut q1: u64 = 0x8000000000000000u64 / nc;
95    let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
96    let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
97    let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
98    loop {
99        p = p + 1;
100        if r1 >= nc - r1 {
101            q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
102            r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
103        } else {
104            q1 = u64::wrapping_mul(2, q1);
105            r1 = 2 * r1;
106        }
107        if r2 + 1 >= d - r2 {
108            if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
109                do_add = true;
110            }
111            q2 = 2 * q2 + 1;
112            r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
113        } else {
114            if q2 >= 0x8000000000000000u64 {
115                do_add = true;
116            }
117            q2 = u64::wrapping_mul(2, q2);
118            r2 = 2 * r2 + 1;
119        }
120        let delta: u64 = d - 1 - r2;
121        if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
122            break;
123        }
124    }
125
126    MU64 {
127        mul_by: q2 + 1,
128        do_add,
129        shift_by: p - 64,
130    }
131}
132
133pub fn magic_s32(d: i32) -> MS32 {
134    debug_assert_ne!(d, -1);
135    debug_assert_ne!(d, 0);
136    debug_assert_ne!(d, 1);
137    let two31: u32 = 0x80000000u32;
138    let mut p: i32 = 31;
139    let ad: u32 = i32::wrapping_abs(d) as u32;
140    let t: u32 = two31 + ((d as u32) >> 31);
141    let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
142    let mut q1: u32 = two31 / anc;
143    let mut r1: u32 = two31 - q1 * anc;
144    let mut q2: u32 = two31 / ad;
145    let mut r2: u32 = two31 - q2 * ad;
146    loop {
147        p = p + 1;
148        q1 = 2 * q1;
149        r1 = 2 * r1;
150        if r1 >= anc {
151            q1 = q1 + 1;
152            r1 = r1 - anc;
153        }
154        q2 = 2 * q2;
155        r2 = 2 * r2;
156        if r2 >= ad {
157            q2 = q2 + 1;
158            r2 = r2 - ad;
159        }
160        let delta: u32 = ad - r2;
161        if !(q1 < delta || (q1 == delta && r1 == 0)) {
162            break;
163        }
164    }
165
166    MS32 {
167        mul_by: (if d < 0 {
168            u32::wrapping_neg(q2 + 1)
169        } else {
170            q2 + 1
171        }) as i32,
172        shift_by: p - 32,
173    }
174}
175
176pub fn magic_s64(d: i64) -> MS64 {
177    debug_assert_ne!(d, -1);
178    debug_assert_ne!(d, 0);
179    debug_assert_ne!(d, 1);
180    let two63: u64 = 0x8000000000000000u64;
181    let mut p: i32 = 63;
182    let ad: u64 = i64::wrapping_abs(d) as u64;
183    let t: u64 = two63 + ((d as u64) >> 63);
184    let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
185    let mut q1: u64 = two63 / anc;
186    let mut r1: u64 = two63 - q1 * anc;
187    let mut q2: u64 = two63 / ad;
188    let mut r2: u64 = two63 - q2 * ad;
189    loop {
190        p = p + 1;
191        q1 = 2 * q1;
192        r1 = 2 * r1;
193        if r1 >= anc {
194            q1 = q1 + 1;
195            r1 = r1 - anc;
196        }
197        q2 = 2 * q2;
198        r2 = 2 * r2;
199        if r2 >= ad {
200            q2 = q2 + 1;
201            r2 = r2 - ad;
202        }
203        let delta: u64 = ad - r2;
204        if !(q1 < delta || (q1 == delta && r1 == 0)) {
205            break;
206        }
207    }
208
209    MS64 {
210        mul_by: (if d < 0 {
211            u64::wrapping_neg(q2 + 1)
212        } else {
213            q2 + 1
214        }) as i64,
215        shift_by: p - 64,
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::{MS32, MS64, MU32, MU64};
222    use super::{magic_s32, magic_s64, magic_u32, magic_u64};
223    use proptest::strategy::Strategy;
224
225    fn make_mu32(mul_by: u32, do_add: bool, shift_by: i32) -> MU32 {
226        MU32 {
227            mul_by,
228            do_add,
229            shift_by,
230        }
231    }
232
233    fn make_mu64(mul_by: u64, do_add: bool, shift_by: i32) -> MU64 {
234        MU64 {
235            mul_by,
236            do_add,
237            shift_by,
238        }
239    }
240
241    fn make_ms32(mul_by: i32, shift_by: i32) -> MS32 {
242        MS32 { mul_by, shift_by }
243    }
244
245    fn make_ms64(mul_by: i64, shift_by: i32) -> MS64 {
246        MS64 { mul_by, shift_by }
247    }
248
249    #[test]
250    fn test_magic_u32() {
251        assert_eq!(magic_u32(2u32), make_mu32(0x80000000u32, false, 0));
252        assert_eq!(magic_u32(3u32), make_mu32(0xaaaaaaabu32, false, 1));
253        assert_eq!(magic_u32(4u32), make_mu32(0x40000000u32, false, 0));
254        assert_eq!(magic_u32(5u32), make_mu32(0xcccccccdu32, false, 2));
255        assert_eq!(magic_u32(6u32), make_mu32(0xaaaaaaabu32, false, 2));
256        assert_eq!(magic_u32(7u32), make_mu32(0x24924925u32, true, 3));
257        assert_eq!(magic_u32(9u32), make_mu32(0x38e38e39u32, false, 1));
258        assert_eq!(magic_u32(10u32), make_mu32(0xcccccccdu32, false, 3));
259        assert_eq!(magic_u32(11u32), make_mu32(0xba2e8ba3u32, false, 3));
260        assert_eq!(magic_u32(12u32), make_mu32(0xaaaaaaabu32, false, 3));
261        assert_eq!(magic_u32(25u32), make_mu32(0x51eb851fu32, false, 3));
262        assert_eq!(magic_u32(125u32), make_mu32(0x10624dd3u32, false, 3));
263        assert_eq!(magic_u32(625u32), make_mu32(0xd1b71759u32, false, 9));
264        assert_eq!(magic_u32(1337u32), make_mu32(0x88233b2bu32, true, 11));
265        assert_eq!(magic_u32(65535u32), make_mu32(0x80008001u32, false, 15));
266        assert_eq!(magic_u32(65536u32), make_mu32(0x00010000u32, false, 0));
267        assert_eq!(magic_u32(65537u32), make_mu32(0xffff0001u32, false, 16));
268        assert_eq!(magic_u32(31415927u32), make_mu32(0x445b4553u32, false, 23));
269        assert_eq!(
270            magic_u32(0xdeadbeefu32),
271            make_mu32(0x93275ab3u32, false, 31)
272        );
273        assert_eq!(
274            magic_u32(0xfffffffdu32),
275            make_mu32(0x40000001u32, false, 30)
276        );
277        assert_eq!(magic_u32(0xfffffffeu32), make_mu32(0x00000003u32, true, 32));
278        assert_eq!(
279            magic_u32(0xffffffffu32),
280            make_mu32(0x80000001u32, false, 31)
281        );
282    }
283
284    #[test]
285    fn test_magic_u64() {
286        assert_eq!(magic_u64(2u64), make_mu64(0x8000000000000000u64, false, 0));
287        assert_eq!(magic_u64(3u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 1));
288        assert_eq!(magic_u64(4u64), make_mu64(0x4000000000000000u64, false, 0));
289        assert_eq!(magic_u64(5u64), make_mu64(0xcccccccccccccccdu64, false, 2));
290        assert_eq!(magic_u64(6u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 2));
291        assert_eq!(magic_u64(7u64), make_mu64(0x2492492492492493u64, true, 3));
292        assert_eq!(magic_u64(9u64), make_mu64(0xe38e38e38e38e38fu64, false, 3));
293        assert_eq!(magic_u64(10u64), make_mu64(0xcccccccccccccccdu64, false, 3));
294        assert_eq!(magic_u64(11u64), make_mu64(0x2e8ba2e8ba2e8ba3u64, false, 1));
295        assert_eq!(magic_u64(12u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 3));
296        assert_eq!(magic_u64(25u64), make_mu64(0x47ae147ae147ae15u64, true, 5));
297        assert_eq!(magic_u64(125u64), make_mu64(0x0624dd2f1a9fbe77u64, true, 7));
298        assert_eq!(
299            magic_u64(625u64),
300            make_mu64(0x346dc5d63886594bu64, false, 7)
301        );
302        assert_eq!(
303            magic_u64(1337u64),
304            make_mu64(0xc4119d952866a139u64, false, 10)
305        );
306        assert_eq!(
307            magic_u64(31415927u64),
308            make_mu64(0x116d154b9c3d2f85u64, true, 25)
309        );
310        assert_eq!(
311            magic_u64(0x00000000deadbeefu64),
312            make_mu64(0x93275ab2dfc9094bu64, false, 31)
313        );
314        assert_eq!(
315            magic_u64(0x00000000fffffffdu64),
316            make_mu64(0x8000000180000005u64, false, 31)
317        );
318        assert_eq!(
319            magic_u64(0x00000000fffffffeu64),
320            make_mu64(0x0000000200000005u64, true, 32)
321        );
322        assert_eq!(
323            magic_u64(0x00000000ffffffffu64),
324            make_mu64(0x8000000080000001u64, false, 31)
325        );
326        assert_eq!(
327            magic_u64(0x0000000100000000u64),
328            make_mu64(0x0000000100000000u64, false, 0)
329        );
330        assert_eq!(
331            magic_u64(0x0000000100000001u64),
332            make_mu64(0xffffffff00000001u64, false, 32)
333        );
334        assert_eq!(
335            magic_u64(0x0ddc0ffeebadf00du64),
336            make_mu64(0x2788e9d394b77da1u64, true, 60)
337        );
338        assert_eq!(
339            magic_u64(0xfffffffffffffffdu64),
340            make_mu64(0x4000000000000001u64, false, 62)
341        );
342        assert_eq!(
343            magic_u64(0xfffffffffffffffeu64),
344            make_mu64(0x0000000000000003u64, true, 64)
345        );
346        assert_eq!(
347            magic_u64(0xffffffffffffffffu64),
348            make_mu64(0x8000000000000001u64, false, 63)
349        );
350    }
351
352    #[test]
353    fn test_magic_s32() {
354        assert_eq!(
355            magic_s32(-0x80000000i32),
356            make_ms32(0x7fffffffu32 as i32, 30)
357        );
358        assert_eq!(
359            magic_s32(-0x7FFFFFFFi32),
360            make_ms32(0xbfffffffu32 as i32, 29)
361        );
362        assert_eq!(
363            magic_s32(-0x7FFFFFFEi32),
364            make_ms32(0x7ffffffdu32 as i32, 30)
365        );
366        assert_eq!(magic_s32(-31415927i32), make_ms32(0xbba4baadu32 as i32, 23));
367        assert_eq!(magic_s32(-1337i32), make_ms32(0x9df73135u32 as i32, 9));
368        assert_eq!(magic_s32(-256i32), make_ms32(0x7fffffffu32 as i32, 7));
369        assert_eq!(magic_s32(-5i32), make_ms32(0x99999999u32 as i32, 1));
370        assert_eq!(magic_s32(-3i32), make_ms32(0x55555555u32 as i32, 1));
371        assert_eq!(magic_s32(-2i32), make_ms32(0x7fffffffu32 as i32, 0));
372        assert_eq!(magic_s32(2i32), make_ms32(0x80000001u32 as i32, 0));
373        assert_eq!(magic_s32(3i32), make_ms32(0x55555556u32 as i32, 0));
374        assert_eq!(magic_s32(4i32), make_ms32(0x80000001u32 as i32, 1));
375        assert_eq!(magic_s32(5i32), make_ms32(0x66666667u32 as i32, 1));
376        assert_eq!(magic_s32(6i32), make_ms32(0x2aaaaaabu32 as i32, 0));
377        assert_eq!(magic_s32(7i32), make_ms32(0x92492493u32 as i32, 2));
378        assert_eq!(magic_s32(9i32), make_ms32(0x38e38e39u32 as i32, 1));
379        assert_eq!(magic_s32(10i32), make_ms32(0x66666667u32 as i32, 2));
380        assert_eq!(magic_s32(11i32), make_ms32(0x2e8ba2e9u32 as i32, 1));
381        assert_eq!(magic_s32(12i32), make_ms32(0x2aaaaaabu32 as i32, 1));
382        assert_eq!(magic_s32(25i32), make_ms32(0x51eb851fu32 as i32, 3));
383        assert_eq!(magic_s32(125i32), make_ms32(0x10624dd3u32 as i32, 3));
384        assert_eq!(magic_s32(625i32), make_ms32(0x68db8badu32 as i32, 8));
385        assert_eq!(magic_s32(1337i32), make_ms32(0x6208cecbu32 as i32, 9));
386        assert_eq!(magic_s32(31415927i32), make_ms32(0x445b4553u32 as i32, 23));
387        assert_eq!(
388            magic_s32(0x7ffffffei32),
389            make_ms32(0x80000003u32 as i32, 30)
390        );
391        assert_eq!(
392            magic_s32(0x7fffffffi32),
393            make_ms32(0x40000001u32 as i32, 29)
394        );
395    }
396
397    #[test]
398    fn test_magic_s64() {
399        assert_eq!(
400            magic_s64(-0x8000000000000000i64),
401            make_ms64(0x7fffffffffffffffu64 as i64, 62)
402        );
403        assert_eq!(
404            magic_s64(-0x7FFFFFFFFFFFFFFFi64),
405            make_ms64(0xbfffffffffffffffu64 as i64, 61)
406        );
407        assert_eq!(
408            magic_s64(-0x7FFFFFFFFFFFFFFEi64),
409            make_ms64(0x7ffffffffffffffdu64 as i64, 62)
410        );
411        assert_eq!(
412            magic_s64(-0x0ddC0ffeeBadF00di64),
413            make_ms64(0x6c3b8b1635a4412fu64 as i64, 59)
414        );
415        assert_eq!(
416            magic_s64(-0x100000001i64),
417            make_ms64(0x800000007fffffffu64 as i64, 31)
418        );
419        assert_eq!(
420            magic_s64(-0x100000000i64),
421            make_ms64(0x7fffffffffffffffu64 as i64, 31)
422        );
423        assert_eq!(
424            magic_s64(-0xFFFFFFFFi64),
425            make_ms64(0x7fffffff7fffffffu64 as i64, 31)
426        );
427        assert_eq!(
428            magic_s64(-0xFFFFFFFEi64),
429            make_ms64(0x7ffffffefffffffdu64 as i64, 31)
430        );
431        assert_eq!(
432            magic_s64(-0xFFFFFFFDi64),
433            make_ms64(0x7ffffffe7ffffffbu64 as i64, 31)
434        );
435        assert_eq!(
436            magic_s64(-0xDeadBeefi64),
437            make_ms64(0x6cd8a54d2036f6b5u64 as i64, 31)
438        );
439        assert_eq!(
440            magic_s64(-31415927i64),
441            make_ms64(0x7749755a31e1683du64 as i64, 24)
442        );
443        assert_eq!(
444            magic_s64(-1337i64),
445            make_ms64(0x9df731356bccaf63u64 as i64, 9)
446        );
447        assert_eq!(
448            magic_s64(-256i64),
449            make_ms64(0x7fffffffffffffffu64 as i64, 7)
450        );
451        assert_eq!(magic_s64(-5i64), make_ms64(0x9999999999999999u64 as i64, 1));
452        assert_eq!(magic_s64(-3i64), make_ms64(0x5555555555555555u64 as i64, 1));
453        assert_eq!(magic_s64(-2i64), make_ms64(0x7fffffffffffffffu64 as i64, 0));
454        assert_eq!(magic_s64(2i64), make_ms64(0x8000000000000001u64 as i64, 0));
455        assert_eq!(magic_s64(3i64), make_ms64(0x5555555555555556u64 as i64, 0));
456        assert_eq!(magic_s64(4i64), make_ms64(0x8000000000000001u64 as i64, 1));
457        assert_eq!(magic_s64(5i64), make_ms64(0x6666666666666667u64 as i64, 1));
458        assert_eq!(magic_s64(6i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
459        assert_eq!(magic_s64(7i64), make_ms64(0x4924924924924925u64 as i64, 1));
460        assert_eq!(magic_s64(9i64), make_ms64(0x1c71c71c71c71c72u64 as i64, 0));
461        assert_eq!(magic_s64(10i64), make_ms64(0x6666666666666667u64 as i64, 2));
462        assert_eq!(magic_s64(11i64), make_ms64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
463        assert_eq!(magic_s64(12i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
464        assert_eq!(magic_s64(25i64), make_ms64(0xa3d70a3d70a3d70bu64 as i64, 4));
465        assert_eq!(
466            magic_s64(125i64),
467            make_ms64(0x20c49ba5e353f7cfu64 as i64, 4)
468        );
469        assert_eq!(
470            magic_s64(625i64),
471            make_ms64(0x346dc5d63886594bu64 as i64, 7)
472        );
473        assert_eq!(
474            magic_s64(1337i64),
475            make_ms64(0x6208ceca9433509du64 as i64, 9)
476        );
477        assert_eq!(
478            magic_s64(31415927i64),
479            make_ms64(0x88b68aa5ce1e97c3u64 as i64, 24)
480        );
481        assert_eq!(
482            magic_s64(0x00000000deadbeefi64),
483            make_ms64(0x93275ab2dfc9094bu64 as i64, 31)
484        );
485        assert_eq!(
486            magic_s64(0x00000000fffffffdi64),
487            make_ms64(0x8000000180000005u64 as i64, 31)
488        );
489        assert_eq!(
490            magic_s64(0x00000000fffffffei64),
491            make_ms64(0x8000000100000003u64 as i64, 31)
492        );
493        assert_eq!(
494            magic_s64(0x00000000ffffffffi64),
495            make_ms64(0x8000000080000001u64 as i64, 31)
496        );
497        assert_eq!(
498            magic_s64(0x0000000100000000i64),
499            make_ms64(0x8000000000000001u64 as i64, 31)
500        );
501        assert_eq!(
502            magic_s64(0x0000000100000001i64),
503            make_ms64(0x7fffffff80000001u64 as i64, 31)
504        );
505        assert_eq!(
506            magic_s64(0x0ddc0ffeebadf00di64),
507            make_ms64(0x93c474e9ca5bbed1u64 as i64, 59)
508        );
509        assert_eq!(
510            magic_s64(0x7ffffffffffffffdi64),
511            make_ms64(0x2000000000000001u64 as i64, 60)
512        );
513        assert_eq!(
514            magic_s64(0x7ffffffffffffffei64),
515            make_ms64(0x8000000000000003u64 as i64, 62)
516        );
517        assert_eq!(
518            magic_s64(0x7fffffffffffffffi64),
519            make_ms64(0x4000000000000001u64 as i64, 61)
520        );
521    }
522
523    #[test]
524    fn test_magic_generators_dont_panic() {
525        // The point of this is to check that the magic number generators
526        // don't panic with integer wraparounds, especially at boundary cases
527        // for their arguments. The actual results are thrown away, although
528        // we force `total` to be used, so that rustc can't optimise the
529        // entire computation away.
530
531        // Testing UP magic_u32
532        let mut total: u64 = 0;
533        for x in 2..(200 * 1000u32) {
534            let m = magic_u32(x);
535            total = total ^ (m.mul_by as u64);
536            total = total + (m.shift_by as u64);
537            total = total + (if m.do_add { 123 } else { 456 });
538        }
539        assert_eq!(total, 2481999609);
540
541        total = 0;
542        // Testing MIDPOINT magic_u32
543        for x in 0x8000_0000u32 - 10 * 1000u32..0x8000_0000u32 + 10 * 1000u32 {
544            let m = magic_u32(x);
545            total = total ^ (m.mul_by as u64);
546            total = total + (m.shift_by as u64);
547            total = total + (if m.do_add { 123 } else { 456 });
548        }
549        assert_eq!(total, 2399809723);
550
551        total = 0;
552        // Testing DOWN magic_u32
553        for x in 0..(200 * 1000u32) {
554            let m = magic_u32(0xFFFF_FFFFu32 - x);
555            total = total ^ (m.mul_by as u64);
556            total = total + (m.shift_by as u64);
557            total = total + (if m.do_add { 123 } else { 456 });
558        }
559        assert_eq!(total, 271138267);
560
561        // Testing UP magic_u64
562        total = 0;
563        for x in 2..(200 * 1000u64) {
564            let m = magic_u64(x);
565            total = total ^ m.mul_by;
566            total = total + (m.shift_by as u64);
567            total = total + (if m.do_add { 123 } else { 456 });
568        }
569        assert_eq!(total, 7430004086976261161);
570
571        total = 0;
572        // Testing MIDPOINT magic_u64
573        for x in 0x8000_0000_0000_0000u64 - 10 * 1000u64..0x8000_0000_0000_0000u64 + 10 * 1000u64 {
574            let m = magic_u64(x);
575            total = total ^ m.mul_by;
576            total = total + (m.shift_by as u64);
577            total = total + (if m.do_add { 123 } else { 456 });
578        }
579        assert_eq!(total, 10312117246769520603);
580
581        // Testing DOWN magic_u64
582        total = 0;
583        for x in 0..(200 * 1000u64) {
584            let m = magic_u64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
585            total = total ^ m.mul_by;
586            total = total + (m.shift_by as u64);
587            total = total + (if m.do_add { 123 } else { 456 });
588        }
589        assert_eq!(total, 1126603594357269734);
590
591        // Testing UP magic_s32
592        total = 0;
593        for x in 0..(200 * 1000i32) {
594            let m = magic_s32(-0x8000_0000i32 + x);
595            total = total ^ (m.mul_by as u64);
596            total = total + (m.shift_by as u64);
597        }
598        assert_eq!(total, 18446744069953376812);
599
600        total = 0;
601        // Testing MIDPOINT magic_s32
602        for x in 0..(200 * 1000i32) {
603            let x2 = -100 * 1000i32 + x;
604            if x2 != -1 && x2 != 0 && x2 != 1 {
605                let m = magic_s32(x2);
606                total = total ^ (m.mul_by as u64);
607                total = total + (m.shift_by as u64);
608            }
609        }
610        assert_eq!(total, 351839350);
611
612        // Testing DOWN magic_s32
613        total = 0;
614        for x in 0..(200 * 1000i32) {
615            let m = magic_s32(0x7FFF_FFFFi32 - x);
616            total = total ^ (m.mul_by as u64);
617            total = total + (m.shift_by as u64);
618        }
619        assert_eq!(total, 18446744072916880714);
620
621        // Testing UP magic_s64
622        total = 0;
623        for x in 0..(200 * 1000i64) {
624            let m = magic_s64(-0x8000_0000_0000_0000i64 + x);
625            total = total ^ (m.mul_by as u64);
626            total = total + (m.shift_by as u64);
627        }
628        assert_eq!(total, 17929885647724831014);
629
630        total = 0;
631        // Testing MIDPOINT magic_s64
632        for x in 0..(200 * 1000i64) {
633            let x2 = -100 * 1000i64 + x;
634            if x2 != -1 && x2 != 0 && x2 != 1 {
635                let m = magic_s64(x2);
636                total = total ^ (m.mul_by as u64);
637                total = total + (m.shift_by as u64);
638            }
639        }
640        assert_eq!(total, 18106042338125661964);
641
642        // Testing DOWN magic_s64
643        total = 0;
644        for x in 0..(200 * 1000i64) {
645            let m = magic_s64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
646            total = total ^ (m.mul_by as u64);
647            total = total + (m.shift_by as u64);
648        }
649        assert_eq!(total, 563301797155560970);
650    }
651
652    // These generate reference results for signed/unsigned 32/64 bit
653    // division, rounding towards zero.
654    fn div_u32(x: u32, y: u32) -> u32 {
655        return x / y;
656    }
657    fn div_s32(x: i32, y: i32) -> i32 {
658        return x / y;
659    }
660    fn div_u64(x: u64, y: u64) -> u64 {
661        return x / y;
662    }
663    fn div_s64(x: i64, y: i64) -> i64 {
664        return x / y;
665    }
666
667    // Returns the high half of a 32 bit unsigned widening multiply.
668    fn mulhw_u32(x: u32, y: u32) -> u32 {
669        let x64: u64 = x as u64;
670        let y64: u64 = y as u64;
671        let r64: u64 = x64 * y64;
672        (r64 >> 32) as u32
673    }
674
675    // Returns the high half of a 32 bit signed widening multiply.
676    fn mulhw_s32(x: i32, y: i32) -> i32 {
677        let x64: i64 = x as i64;
678        let y64: i64 = y as i64;
679        let r64: i64 = x64 * y64;
680        (r64 >> 32) as i32
681    }
682
683    // Returns the high half of a 64 bit unsigned widening multiply.
684    fn mulhw_u64(x: u64, y: u64) -> u64 {
685        let x128 = x as u128;
686        let y128 = y as u128;
687        let r128 = x128 * y128;
688        (r128 >> 64) as u64
689    }
690
691    // Returns the high half of a 64 bit signed widening multiply.
692    fn mulhw_s64(x: i64, y: i64) -> i64 {
693        let x128 = x as i128;
694        let y128 = y as i128;
695        let r128 = x128 * y128;
696        (r128 >> 64) as i64
697    }
698
699    /// Compute and check `q = n / d` using `magic`.
700    fn eval_magic_u32_div(n: u32, magic: &MU32) -> u32 {
701        let mut q = mulhw_u32(n, magic.mul_by);
702        if magic.do_add {
703            assert!(magic.shift_by >= 1 && magic.shift_by <= 32);
704            let mut t = n - q;
705            t >>= 1;
706            t = t + q;
707            q = t >> (magic.shift_by - 1);
708        } else {
709            assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
710            q >>= magic.shift_by;
711        }
712        q
713    }
714
715    /// Compute and check `r = n % d` using `magic`.
716    fn eval_magic_u32_rem(n: u32, d: u32, magic: &MU32) -> u32 {
717        let mut q = mulhw_u32(n, magic.mul_by);
718        if magic.do_add {
719            assert!(magic.shift_by >= 1 && magic.shift_by <= 32);
720            let mut t = n - q;
721            t >>= 1;
722            t = t + q;
723            q = t >> (magic.shift_by - 1);
724        } else {
725            assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
726            q >>= magic.shift_by;
727        }
728        let tt = q.wrapping_mul(d);
729        n.wrapping_sub(tt)
730    }
731
732    /// Compute and check `q = n / d` using `magic`.
733    fn eval_magic_u64_div(n: u64, magic: &MU64) -> u64 {
734        let mut q = mulhw_u64(n, magic.mul_by);
735        if magic.do_add {
736            assert!(magic.shift_by >= 1 && magic.shift_by <= 64);
737            let mut t = n - q;
738            t >>= 1;
739            t = t + q;
740            q = t >> (magic.shift_by - 1);
741        } else {
742            assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
743            q >>= magic.shift_by;
744        }
745        q
746    }
747
748    /// Compute and check `r = n % d` using `magic`.
749    fn eval_magic_u64_rem(n: u64, d: u64, magic: &MU64) -> u64 {
750        let mut q = mulhw_u64(n, magic.mul_by);
751        if magic.do_add {
752            assert!(magic.shift_by >= 1 && magic.shift_by <= 64);
753            let mut t = n - q;
754            t >>= 1;
755            t = t + q;
756            q = t >> (magic.shift_by - 1);
757        } else {
758            assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
759            q >>= magic.shift_by;
760        }
761        let tt = q.wrapping_mul(d);
762        n.wrapping_sub(tt)
763    }
764
765    /// Compute and check `q = n / d` using `magic`.
766    fn eval_magic_s32_div(n: i32, d: i32, magic: &MS32) -> i32 {
767        let mut q: i32 = mulhw_s32(n, magic.mul_by);
768        if d > 0 && magic.mul_by < 0 {
769            q = q + n;
770        } else if d < 0 && magic.mul_by > 0 {
771            q = q - n;
772        }
773        assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
774        q = q >> magic.shift_by;
775        let mut t: u32 = q as u32;
776        t = t >> 31;
777        q = q + (t as i32);
778        q
779    }
780
781    /// Compute and check `r = n % d` using `magic`.
782    fn eval_magic_s32_rem(n: i32, d: i32, magic: &MS32) -> i32 {
783        let mut q: i32 = mulhw_s32(n, magic.mul_by);
784        if d > 0 && magic.mul_by < 0 {
785            q = q + n;
786        } else if d < 0 && magic.mul_by > 0 {
787            q = q - n;
788        }
789        assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
790        q = q >> magic.shift_by;
791        let mut t: u32 = q as u32;
792        t = t >> 31;
793        q = q + (t as i32);
794        let tt = q.wrapping_mul(d);
795        n.wrapping_sub(tt)
796    }
797
798    /// Compute and check `q = n / d` using `magic`. */
799    fn eval_magic_s64_div(n: i64, d: i64, magic: &MS64) -> i64 {
800        let mut q: i64 = mulhw_s64(n, magic.mul_by);
801        if d > 0 && magic.mul_by < 0 {
802            q = q + n;
803        } else if d < 0 && magic.mul_by > 0 {
804            q = q - n;
805        }
806        assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
807        q = q >> magic.shift_by;
808        let mut t: u64 = q as u64;
809        t = t >> 63;
810        q = q + (t as i64);
811        q
812    }
813
814    /// Compute and check `r = n % d` using `magic`.
815    fn eval_magic_s64_rem(n: i64, d: i64, magic: &MS64) -> i64 {
816        let mut q: i64 = mulhw_s64(n, magic.mul_by);
817        if d > 0 && magic.mul_by < 0 {
818            q = q + n;
819        } else if d < 0 && magic.mul_by > 0 {
820            q = q - n;
821        }
822        assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
823        q = q >> magic.shift_by;
824        let mut t: u64 = q as u64;
825        t = t >> 63;
826        q = q + (t as i64);
827        let tt = q.wrapping_mul(d);
828        n.wrapping_sub(tt)
829    }
830
831    #[test]
832    fn test_magic_generators_give_correct_numbers() {
833        // For a variety of values for both `n` and `d`, compute the magic
834        // numbers for `d`, and in effect interpret them so as to compute
835        // `n / d`.  Check that that equals the value of `n / d` computed
836        // directly by the hardware.  This serves to check that the magic
837        // number generates work properly.  In total, 50,148,000 tests are
838        // done.
839
840        // Some constants
841        const MIN_U32: u32 = 0;
842        const MAX_U32: u32 = 0xFFFF_FFFFu32;
843        const MAX_U32_HALF: u32 = 0x8000_0000u32; // more or less
844
845        const MIN_S32: i32 = 0x8000_0000u32 as i32;
846        const MAX_S32: i32 = 0x7FFF_FFFFu32 as i32;
847
848        const MIN_U64: u64 = 0;
849        const MAX_U64: u64 = 0xFFFF_FFFF_FFFF_FFFFu64;
850        const MAX_U64_HALF: u64 = 0x8000_0000_0000_0000u64; // ditto
851
852        const MIN_S64: i64 = 0x8000_0000_0000_0000u64 as i64;
853        const MAX_S64: i64 = 0x7FFF_FFFF_FFFF_FFFFu64 as i64;
854
855        // Compute the magic numbers for `d` and then use them to compute and
856        // check `n / d` for around 1000 values of `n`, using unsigned 32-bit
857        // division.
858        fn test_magic_u32_inner(d: u32, n_tests_done: &mut i32) {
859            // Advance the numerator (the `n` in `n / d`) so as to test
860            // densely near the range ends (and, in the signed variants, near
861            // zero) but not so densely away from those regions.
862            fn advance_n_u32(x: u32) -> u32 {
863                if x < MIN_U32 + 110 {
864                    return x + 1;
865                }
866                if x < MIN_U32 + 1700 {
867                    return x + 23;
868                }
869                if x < MAX_U32 - 1700 {
870                    let xd: f64 = (x as f64) * 1.06415927;
871                    return if xd >= (MAX_U32 - 1700) as f64 {
872                        MAX_U32 - 1700
873                    } else {
874                        xd as u32
875                    };
876                }
877                if x < MAX_U32 - 110 {
878                    return x + 23;
879                }
880                u32::wrapping_add(x, 1)
881            }
882
883            let magic: MU32 = magic_u32(d);
884            let mut n: u32 = MIN_U32;
885            loop {
886                *n_tests_done += 1;
887                let q = eval_magic_u32_div(n, &magic);
888                assert_eq!(q, div_u32(n, d));
889
890                n = advance_n_u32(n);
891                if n == MIN_U32 {
892                    break;
893                }
894            }
895        }
896
897        // Compute the magic numbers for `d` and then use them to compute and
898        // check `n / d` for around 1000 values of `n`, using signed 32-bit
899        // division.
900        fn test_magic_s32_inner(d: i32, n_tests_done: &mut i32) {
901            // See comment on advance_n_u32 above.
902            fn advance_n_s32(x: i32) -> i32 {
903                if x >= 0 && x <= 29 {
904                    return x + 1;
905                }
906                if x < MIN_S32 + 110 {
907                    return x + 1;
908                }
909                if x < MIN_S32 + 1700 {
910                    return x + 23;
911                }
912                if x < MAX_S32 - 1700 {
913                    let mut xd: f64 = x as f64;
914                    xd = if xd < 0.0 {
915                        xd / 1.06415927
916                    } else {
917                        xd * 1.06415927
918                    };
919                    return if xd >= (MAX_S32 - 1700) as f64 {
920                        MAX_S32 - 1700
921                    } else {
922                        xd as i32
923                    };
924                }
925                if x < MAX_S32 - 110 {
926                    return x + 23;
927                }
928                if x == MAX_S32 {
929                    return MIN_S32;
930                }
931                x + 1
932            }
933
934            let magic: MS32 = magic_s32(d);
935            let mut n: i32 = MIN_S32;
936            loop {
937                *n_tests_done += 1;
938                let q = eval_magic_s32_div(n, d, &magic);
939                assert_eq!(q, div_s32(n, d));
940
941                n = advance_n_s32(n);
942                if n == MIN_S32 {
943                    break;
944                }
945            }
946        }
947
948        // Compute the magic numbers for `d` and then use them to compute and
949        // check `n / d` for around 1000 values of `n`, using unsigned 64-bit
950        // division.
951        fn test_magic_u64_inner(d: u64, n_tests_done: &mut i32) {
952            // See comment on advance_n_u32 above.
953            fn advance_n_u64(x: u64) -> u64 {
954                if x < MIN_U64 + 110 {
955                    return x + 1;
956                }
957                if x < MIN_U64 + 1700 {
958                    return x + 23;
959                }
960                if x < MAX_U64 - 1700 {
961                    let xd: f64 = (x as f64) * 1.06415927;
962                    return if xd >= (MAX_U64 - 1700) as f64 {
963                        MAX_U64 - 1700
964                    } else {
965                        xd as u64
966                    };
967                }
968                if x < MAX_U64 - 110 {
969                    return x + 23;
970                }
971                u64::wrapping_add(x, 1)
972            }
973
974            let magic: MU64 = magic_u64(d);
975            let mut n: u64 = MIN_U64;
976            loop {
977                *n_tests_done += 1;
978                let q = eval_magic_u64_div(n, &magic);
979                assert_eq!(q, div_u64(n, d));
980
981                n = advance_n_u64(n);
982                if n == MIN_U64 {
983                    break;
984                }
985            }
986        }
987
988        // Compute the magic numbers for `d` and then use them to compute and
989        // check `n / d` for around 1000 values of `n`, using signed 64-bit
990        // division.
991        fn test_magic_s64_inner(d: i64, n_tests_done: &mut i32) {
992            // See comment on advance_n_u32 above.
993            fn advance_n_s64(x: i64) -> i64 {
994                if x >= 0 && x <= 29 {
995                    return x + 1;
996                }
997                if x < MIN_S64 + 110 {
998                    return x + 1;
999                }
1000                if x < MIN_S64 + 1700 {
1001                    return x + 23;
1002                }
1003                if x < MAX_S64 - 1700 {
1004                    let mut xd: f64 = x as f64;
1005                    xd = if xd < 0.0 {
1006                        xd / 1.06415927
1007                    } else {
1008                        xd * 1.06415927
1009                    };
1010                    return if xd >= (MAX_S64 - 1700) as f64 {
1011                        MAX_S64 - 1700
1012                    } else {
1013                        xd as i64
1014                    };
1015                }
1016                if x < MAX_S64 - 110 {
1017                    return x + 23;
1018                }
1019                if x == MAX_S64 {
1020                    return MIN_S64;
1021                }
1022                x + 1
1023            }
1024
1025            let magic: MS64 = magic_s64(d);
1026            let mut n: i64 = MIN_S64;
1027            loop {
1028                *n_tests_done += 1;
1029                let q = eval_magic_s64_div(n, d, &magic);
1030                assert_eq!(q, div_s64(n, d));
1031
1032                n = advance_n_s64(n);
1033                if n == MIN_S64 {
1034                    break;
1035                }
1036            }
1037        }
1038
1039        // Using all the above support machinery, actually run the tests.
1040
1041        let mut n_tests_done: i32 = 0;
1042
1043        // u32 division tests
1044        {
1045            // 2 .. 3k
1046            let mut d: u32 = 2;
1047            for _ in 0..3 * 1000 {
1048                test_magic_u32_inner(d, &mut n_tests_done);
1049                d += 1;
1050            }
1051
1052            // across the midpoint: midpoint - 3k .. midpoint + 3k
1053            d = MAX_U32_HALF - 3 * 1000;
1054            for _ in 0..2 * 3 * 1000 {
1055                test_magic_u32_inner(d, &mut n_tests_done);
1056                d += 1;
1057            }
1058
1059            // MAX_U32 - 3k .. MAX_U32 (in reverse order)
1060            d = MAX_U32;
1061            for _ in 0..3 * 1000 {
1062                test_magic_u32_inner(d, &mut n_tests_done);
1063                d -= 1;
1064            }
1065        }
1066
1067        // s32 division tests
1068        {
1069            // MIN_S32 .. MIN_S32 + 3k
1070            let mut d: i32 = MIN_S32;
1071            for _ in 0..3 * 1000 {
1072                test_magic_s32_inner(d, &mut n_tests_done);
1073                d += 1;
1074            }
1075
1076            // -3k .. -2 (in reverse order)
1077            d = -2;
1078            for _ in 0..3 * 1000 {
1079                test_magic_s32_inner(d, &mut n_tests_done);
1080                d -= 1;
1081            }
1082
1083            // 2 .. 3k
1084            d = 2;
1085            for _ in 0..3 * 1000 {
1086                test_magic_s32_inner(d, &mut n_tests_done);
1087                d += 1;
1088            }
1089
1090            // MAX_S32 - 3k .. MAX_S32 (in reverse order)
1091            d = MAX_S32;
1092            for _ in 0..3 * 1000 {
1093                test_magic_s32_inner(d, &mut n_tests_done);
1094                d -= 1;
1095            }
1096        }
1097
1098        // u64 division tests
1099        {
1100            // 2 .. 3k
1101            let mut d: u64 = 2;
1102            for _ in 0..3 * 1000 {
1103                test_magic_u64_inner(d, &mut n_tests_done);
1104                d += 1;
1105            }
1106
1107            // across the midpoint: midpoint - 3k .. midpoint + 3k
1108            d = MAX_U64_HALF - 3 * 1000;
1109            for _ in 0..2 * 3 * 1000 {
1110                test_magic_u64_inner(d, &mut n_tests_done);
1111                d += 1;
1112            }
1113
1114            // mAX_U64 - 3000 .. mAX_U64 (in reverse order)
1115            d = MAX_U64;
1116            for _ in 0..3 * 1000 {
1117                test_magic_u64_inner(d, &mut n_tests_done);
1118                d -= 1;
1119            }
1120        }
1121
1122        // s64 division tests
1123        {
1124            // MIN_S64 .. MIN_S64 + 3k
1125            let mut d: i64 = MIN_S64;
1126            for _ in 0..3 * 1000 {
1127                test_magic_s64_inner(d, &mut n_tests_done);
1128                d += 1;
1129            }
1130
1131            // -3k .. -2 (in reverse order)
1132            d = -2;
1133            for _ in 0..3 * 1000 {
1134                test_magic_s64_inner(d, &mut n_tests_done);
1135                d -= 1;
1136            }
1137
1138            // 2 .. 3k
1139            d = 2;
1140            for _ in 0..3 * 1000 {
1141                test_magic_s64_inner(d, &mut n_tests_done);
1142                d += 1;
1143            }
1144
1145            // MAX_S64 - 3k .. MAX_S64 (in reverse order)
1146            d = MAX_S64;
1147            for _ in 0..3 * 1000 {
1148                test_magic_s64_inner(d, &mut n_tests_done);
1149                d -= 1;
1150            }
1151        }
1152        assert_eq!(n_tests_done, 50_148_000);
1153    }
1154
1155    proptest::proptest! {
1156        #[test]
1157        fn proptest_magic_u32(numerator in 0..u32::MAX, divisor in 1..u32::MAX) {
1158            let expected_div = numerator.wrapping_div(divisor);
1159            let expected_rem = numerator.wrapping_rem(divisor);
1160
1161            let magic = magic_u32(divisor);
1162            let actual_div = eval_magic_u32_div(numerator, &magic);
1163            let actual_rem = eval_magic_u32_rem(numerator, divisor, &magic);
1164
1165            assert_eq!(expected_div, actual_div);
1166            assert_eq!(expected_rem, actual_rem);
1167        }
1168
1169        #[test]
1170        fn proptest_magic_u64(numerator in 0..u64::MAX, divisor in 1..u64::MAX) {
1171            let expected_div = numerator.wrapping_div(divisor);
1172            let expected_rem = numerator.wrapping_rem(divisor);
1173
1174            let magic = magic_u64(divisor);
1175            let actual_div = eval_magic_u64_div(numerator, &magic);
1176            let actual_rem = eval_magic_u64_rem(numerator, divisor, &magic);
1177
1178            assert_eq!(expected_div, actual_div);
1179            assert_eq!(expected_rem, actual_rem);
1180        }
1181
1182        #[test]
1183        fn proptest_magic_s32(
1184            numerator in i32::MIN..i32::MAX,
1185            divisor in (i32::MIN..i32::MAX).prop_filter("avoid divide-by-zero", |d| *d != 0),
1186        ) {
1187            let expected_div = numerator.wrapping_div(divisor);
1188            let expected_rem = numerator.wrapping_rem(divisor);
1189
1190            let magic = magic_s32(divisor);
1191            let actual_div = eval_magic_s32_div(numerator, divisor, &magic);
1192            let actual_rem = eval_magic_s32_rem(numerator, divisor, &magic);
1193
1194            assert_eq!(expected_div, actual_div);
1195            assert_eq!(expected_rem, actual_rem);
1196        }
1197
1198        #[test]
1199        fn proptest_magic_s64(
1200            numerator in i64::MIN..i64::MAX,
1201            divisor in (i64::MIN..i64::MAX).prop_filter("avoid divide-by-zero", |d| *d != 0),
1202        ) {
1203            let expected_div = numerator.wrapping_div(divisor);
1204            let expected_rem = numerator.wrapping_rem(divisor);
1205
1206            let magic = magic_s64(divisor);
1207            let actual_div = eval_magic_s64_div(numerator, divisor, &magic);
1208            let actual_rem = eval_magic_s64_rem(numerator, divisor, &magic);
1209
1210            assert_eq!(expected_div, actual_div);
1211            assert_eq!(expected_rem, actual_rem);
1212        }
1213    }
1214}