libm/math/
cbrt.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
/* SPDX-License-Identifier: MIT */
/* origin: core-math/src/binary64/cbrt/cbrt.c
 * Copyright (c) 2021-2022 Alexei Sibidanov.
 * Ported to Rust in 2025 by Trevor Gross.
 */

use super::Float;
use super::support::{FpResult, Round, cold_path};

/// Compute the cube root of the argument.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn cbrt(x: f64) -> f64 {
    cbrt_round(x, Round::Nearest).val
}

pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
    const ESCALE: [f64; 3] = [
        1.0,
        hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */
        hf64!("0x1.965fea53d6e3dp+0"), /* 2^(2/3) */
    ];

    /* the polynomial c0+c1*x+c2*x^2+c3*x^3 approximates x^(1/3) on [1,2]
    with maximal error < 9.2e-5 (attained at x=2) */
    const C: [f64; 4] = [
        hf64!("0x1.1b0babccfef9cp-1"),
        hf64!("0x1.2c9a3e94d1da5p-1"),
        hf64!("-0x1.4dc30b1a1ddbap-3"),
        hf64!("0x1.7a8d3e4ec9b07p-6"),
    ];

    let u0: f64 = hf64!("0x1.5555555555555p-2");
    let u1: f64 = hf64!("0x1.c71c71c71c71cp-3");

    let rsc = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25];

    let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0];

    /* rm=0 for rounding to nearest, and other values for directed roundings */
    let hx: u64 = x.to_bits();
    let mut mant: u64 = hx & f64::SIG_MASK;
    let sign: u64 = hx >> 63;

    let mut e: u32 = (hx >> f64::SIG_BITS) as u32 & f64::EXP_SAT;

    if ((e + 1) & f64::EXP_SAT) < 2 {
        cold_path();

        let ix: u64 = hx & !f64::SIGN_MASK;

        /* 0, inf, nan: we return x + x instead of simply x,
        to that for x a signaling NaN, it correctly triggers
        the invalid exception. */
        if e == f64::EXP_SAT || ix == 0 {
            return FpResult::ok(x + x);
        }

        let nz = ix.leading_zeros() - 11; /* subnormal */
        mant <<= nz;
        mant &= f64::SIG_MASK;
        e = e.wrapping_sub(nz - 1);
    }

    e = e.wrapping_add(3072);
    let cvt1: u64 = mant | (0x3ffu64 << 52);
    let mut cvt5: u64 = cvt1;

    let et: u32 = e / 3;
    let it: u32 = e % 3;

    /* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */
    cvt5 += u64::from(it) << f64::SIG_BITS;
    cvt5 |= sign << 63;
    let zz: f64 = f64::from_bits(cvt5);

    /* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */
    let mut isc: u64 = ESCALE[it as usize].to_bits(); // todo: index
    isc |= sign << 63;
    let cvt2: u64 = isc;
    let z: f64 = f64::from_bits(cvt1);

    /* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3),
    and 1 <= z < 2 */
    let r: f64 = 1.0 / z;
    let rr: f64 = r * rsc[((it as usize) << 1) | sign as usize];
    let z2: f64 = z * z;
    let c0: f64 = C[0] + z * C[1];
    let c2: f64 = C[2] + z * C[3];
    let mut y: f64 = c0 + z2 * c2;
    let mut y2: f64 = y * y;

    /* y is an approximation of z^(1/3) */
    let mut h: f64 = y2 * (y * r) - 1.0;

    /* h determines the error between y and z^(1/3) */
    y -= (h * y) * (u0 - u1 * h);

    /* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant
    of Newton's method, with the function f(y) = 1-z/y^3. */
    y *= f64::from_bits(cvt2);

    /* Now y is an approximation of zz^(1/3),
     * and rr an approximation of 1/zz. We now perform another iteration of
     * Newton-Raphson, this time with a linear approximation only. */
    y2 = y * y;
    let mut y2l: f64 = y.fma(y, -y2);

    /* y2 + y2l = y^2 exactly */
    let mut y3: f64 = y2 * y;
    let mut y3l: f64 = y.fma(y2, -y3) + y * y2l;

    /* y3 + y3l approximates y^3 with about 106 bits of accuracy */
    h = ((y3 - zz) + y3l) * rr;
    let mut dy: f64 = h * (y * u0);

    /* the approximation of zz^(1/3) is y - dy */
    let mut y1: f64 = y - dy;
    dy = (y - y1) - dy;

    /* the approximation of zz^(1/3) is now y1 + dy, where |dy| < 1/2 ulp(y)
     * (for rounding to nearest) */
    let mut ady: f64 = dy.abs();

    /* For directed roundings, ady0 is tiny when dy is tiny, or ady0 is near
     * from ulp(1);
     * for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1),
     * or from 3/2 ulp(1). */
    let mut ady0: f64 = (ady - off[round as usize]).abs();
    let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();

    if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") {
        cold_path();

        y2 = y1 * y1;
        y2l = y1.fma(y1, -y2);
        y3 = y2 * y1;
        y3l = y1.fma(y2, -y3) + y1 * y2l;
        h = ((y3 - zz) + y3l) * rr;
        dy = h * (y1 * u0);
        y = y1 - dy;
        dy = (y1 - y) - dy;
        y1 = y;
        ady = dy.abs();
        ady0 = (ady - off[round as usize]).abs();
        ady1 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();

        if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") {
            cold_path();
            let azz: f64 = zz.abs();

            // ~ 0x1.79d15d0e8d59b80000000000000ffc3dp+0
            if azz == hf64!("0x1.9b78223aa307cp+1") {
                y1 = hf64!("0x1.79d15d0e8d59cp+0").copysign(zz);
            }

            // ~ 0x1.de87aa837820e80000000000001c0f08p+0
            if azz == hf64!("0x1.a202bfc89ddffp+2") {
                y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz);
            }

            if round != Round::Nearest {
                let wlist = [
                    (hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0
                    (hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0
                    (hf64!("0x1.d1ef81cbbbe71p+0"), hf64!("0x1.388fb44cdcf5ap+0")), // ~ 0x1.388fb44cdcf5a0000000000002202c55p+0
                    (hf64!("0x1.0a2014f62987cp+1"), hf64!("0x1.46bcbf47dc1e8p+0")), // ~ 0x1.46bcbf47dc1e8000000000000303aa2dp+0
                    (hf64!("0x1.fe18a044a5501p+1"), hf64!("0x1.95decfec9c904p+0")), // ~ 0x1.95decfec9c9040000000000000159e8ep+0
                    (hf64!("0x1.a6bb8c803147bp+2"), hf64!("0x1.e05335a6401dep+0")), // ~ 0x1.e05335a6401de00000000000027ca017p+0
                    (hf64!("0x1.ac8538a031cbdp+2"), hf64!("0x1.e281d87098de8p+0")), // ~ 0x1.e281d87098de80000000000000ee9314p+0
                ];

                for (a, b) in wlist {
                    if azz == a {
                        let tmp = if round as u64 + sign == 2 {
                            hf64!("0x1p-52")
                        } else {
                            0.0
                        };
                        y1 = (b + tmp).copysign(zz);
                    }
                }
            }
        }
    }

    let mut cvt3: u64 = y1.to_bits();
    cvt3 = cvt3.wrapping_add(((et.wrapping_sub(342).wrapping_sub(1023)) as u64) << 52);
    let m0: u64 = cvt3 << 30;
    let m1 = m0 >> 63;

    if (m0 ^ m1) <= (1u64 << 30) {
        cold_path();

        let mut cvt4: u64 = y1.to_bits();
        cvt4 = (cvt4 + (164 << 15)) & 0xffffffffffff0000u64;

        if ((f64::from_bits(cvt4) - y1) - dy).abs() < hf64!("0x1p-60") || (zz).abs() == 1.0 {
            cvt3 = (cvt3 + (1u64 << 15)) & 0xffffffffffff0000u64;
        }
    }

    FpResult::ok(f64::from_bits(cvt3))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn spot_checks() {
        if !cfg!(x86_no_sse) {
            // Exposes a rounding mode problem. Ignored on i586 because of inaccurate FMA.
            assert_biteq!(
                cbrt(f64::from_bits(0xf7f792b28f600000)),
                f64::from_bits(0xd29ce68655d962f3)
            );
        }
    }
}