twox_hash/
sixty_four.rs

1use crate::UnalignedBuffer;
2use core::{cmp, hash::Hasher};
3
4#[cfg(feature = "serialize")]
5use serde::{Deserialize, Serialize};
6
7const CHUNK_SIZE: usize = 32;
8
9pub const PRIME_1: u64 = 11_400_714_785_074_694_791;
10pub const PRIME_2: u64 = 14_029_467_366_897_019_727;
11pub const PRIME_3: u64 = 1_609_587_929_392_839_161;
12pub const PRIME_4: u64 = 9_650_029_242_287_828_579;
13pub const PRIME_5: u64 = 2_870_177_450_012_600_261;
14
15#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
16#[derive(Copy, Clone, PartialEq)]
17struct XxCore {
18    v1: u64,
19    v2: u64,
20    v3: u64,
21    v4: u64,
22}
23
24/// Calculates the 64-bit hash.
25#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
26#[derive(Debug, Copy, Clone, PartialEq)]
27pub struct XxHash64 {
28    total_len: u64,
29    seed: u64,
30    core: XxCore,
31    #[cfg_attr(feature = "serialize", serde(flatten))]
32    buffer: Buffer,
33}
34
35impl XxCore {
36    fn with_seed(seed: u64) -> XxCore {
37        XxCore {
38            v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2),
39            v2: seed.wrapping_add(PRIME_2),
40            v3: seed,
41            v4: seed.wrapping_sub(PRIME_1),
42        }
43    }
44
45    #[inline(always)]
46    fn ingest_chunks<I>(&mut self, values: I)
47    where
48        I: IntoIterator<Item = [u64; 4]>,
49    {
50        #[inline(always)]
51        fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 {
52            value = value.wrapping_mul(PRIME_2);
53            current_value = current_value.wrapping_add(value);
54            current_value = current_value.rotate_left(31);
55            current_value.wrapping_mul(PRIME_1)
56        }
57
58        // By drawing these out, we can avoid going back and forth to
59        // memory. It only really helps for large files, when we need
60        // to iterate multiple times here.
61
62        let mut v1 = self.v1;
63        let mut v2 = self.v2;
64        let mut v3 = self.v3;
65        let mut v4 = self.v4;
66
67        for [n1, n2, n3, n4] in values {
68            v1 = ingest_one_number(v1, n1.to_le());
69            v2 = ingest_one_number(v2, n2.to_le());
70            v3 = ingest_one_number(v3, n3.to_le());
71            v4 = ingest_one_number(v4, n4.to_le());
72        }
73
74        self.v1 = v1;
75        self.v2 = v2;
76        self.v3 = v3;
77        self.v4 = v4;
78    }
79
80    #[inline(always)]
81    fn finish(&self) -> u64 {
82        // The original code pulls out local vars for v[1234]
83        // here. Performance tests did not show that to be effective
84        // here, presumably because this method is not called in a
85        // tight loop.
86
87        #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel
88        let mut hash;
89
90        hash = self.v1.rotate_left(1);
91        hash = hash.wrapping_add(self.v2.rotate_left(7));
92        hash = hash.wrapping_add(self.v3.rotate_left(12));
93        hash = hash.wrapping_add(self.v4.rotate_left(18));
94
95        #[inline(always)]
96        fn mix_one(mut hash: u64, mut value: u64) -> u64 {
97            value = value.wrapping_mul(PRIME_2);
98            value = value.rotate_left(31);
99            value = value.wrapping_mul(PRIME_1);
100            hash ^= value;
101            hash = hash.wrapping_mul(PRIME_1);
102            hash.wrapping_add(PRIME_4)
103        }
104
105        hash = mix_one(hash, self.v1);
106        hash = mix_one(hash, self.v2);
107        hash = mix_one(hash, self.v3);
108        hash = mix_one(hash, self.v4);
109
110        hash
111    }
112}
113
114impl core::fmt::Debug for XxCore {
115    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
116        write!(
117            f,
118            "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}",
119            self.v1, self.v2, self.v3, self.v4
120        )
121    }
122}
123
124#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
125#[derive(Debug, Copy, Clone, Default, PartialEq)]
126#[repr(align(8))]
127#[cfg_attr(feature = "serialize", serde(transparent))]
128struct AlignToU64<T>(T);
129
130#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
131#[derive(Debug, Copy, Clone, Default, PartialEq)]
132struct Buffer {
133    #[cfg_attr(feature = "serialize", serde(rename = "buffer"))]
134    data: AlignToU64<[u8; CHUNK_SIZE]>,
135    #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))]
136    len: usize,
137}
138
139impl Buffer {
140    fn data(&self) -> &[u8] {
141        &self.data.0[..self.len]
142    }
143
144    /// Consumes as much of the parameter as it can, returning the unused part.
145    fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
146        let to_use = cmp::min(self.available(), data.len());
147        let (data, remaining) = data.split_at(to_use);
148        self.data.0[self.len..][..to_use].copy_from_slice(data);
149        self.len += to_use;
150        remaining
151    }
152
153    fn set_data(&mut self, data: &[u8]) {
154        debug_assert!(self.is_empty());
155        debug_assert!(data.len() < CHUNK_SIZE);
156        self.data.0[..data.len()].copy_from_slice(data);
157        self.len = data.len();
158    }
159
160    fn available(&self) -> usize {
161        CHUNK_SIZE - self.len
162    }
163
164    fn is_empty(&self) -> bool {
165        self.len == 0
166    }
167
168    fn is_full(&self) -> bool {
169        self.len == CHUNK_SIZE
170    }
171}
172
173impl XxHash64 {
174    /// Constructs the hash with an initial seed
175    pub fn with_seed(seed: u64) -> XxHash64 {
176        XxHash64 {
177            total_len: 0,
178            seed,
179            core: XxCore::with_seed(seed),
180            buffer: Buffer::default(),
181        }
182    }
183
184    pub(crate) fn write(&mut self, bytes: &[u8]) {
185        let remaining = self.maybe_consume_bytes(bytes);
186        if !remaining.is_empty() {
187            let mut remaining = UnalignedBuffer::new(remaining);
188            self.core.ingest_chunks(&mut remaining);
189            self.buffer.set_data(remaining.remaining());
190        }
191        self.total_len += bytes.len() as u64;
192    }
193
194    // Consume bytes and try to make `self.buffer` empty.
195    // If there are not enough bytes, `self.buffer` can be non-empty, and this
196    // function returns an empty slice.
197    fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
198        if self.buffer.is_empty() {
199            data
200        } else {
201            let data = self.buffer.consume(data);
202            if self.buffer.is_full() {
203                let mut u64s = UnalignedBuffer::new(self.buffer.data());
204                self.core.ingest_chunks(&mut u64s);
205                debug_assert!(u64s.remaining().is_empty());
206                self.buffer.len = 0;
207            }
208            data
209        }
210    }
211
212    pub(crate) fn finish(&self) -> u64 {
213        let mut hash = if self.total_len >= CHUNK_SIZE as u64 {
214            // We have processed at least one full chunk
215            self.core.finish()
216        } else {
217            self.seed.wrapping_add(PRIME_5)
218        };
219
220        hash = hash.wrapping_add(self.total_len);
221
222        let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data());
223        for buffered_u64 in &mut buffered_u64s {
224            let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2);
225            k1 = k1.rotate_left(31);
226            k1 = k1.wrapping_mul(PRIME_1);
227            hash ^= k1;
228            hash = hash.rotate_left(27);
229            hash = hash.wrapping_mul(PRIME_1);
230            hash = hash.wrapping_add(PRIME_4);
231        }
232
233        let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining());
234        for buffered_u32 in &mut buffered_u32s {
235            let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1);
236            hash ^= k1;
237            hash = hash.rotate_left(23);
238            hash = hash.wrapping_mul(PRIME_2);
239            hash = hash.wrapping_add(PRIME_3);
240        }
241
242        let buffered_u8s = buffered_u32s.remaining();
243        for &buffered_u8 in buffered_u8s {
244            let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5);
245            hash ^= k1;
246            hash = hash.rotate_left(11);
247            hash = hash.wrapping_mul(PRIME_1);
248        }
249
250        // The final intermixing
251        hash ^= hash >> 33;
252        hash = hash.wrapping_mul(PRIME_2);
253        hash ^= hash >> 29;
254        hash = hash.wrapping_mul(PRIME_3);
255        hash ^= hash >> 32;
256
257        hash
258    }
259
260    pub fn seed(&self) -> u64 {
261        self.seed
262    }
263
264    pub fn total_len(&self) -> u64 {
265        self.total_len
266    }
267}
268
269impl Default for XxHash64 {
270    fn default() -> XxHash64 {
271        XxHash64::with_seed(0)
272    }
273}
274
275impl Hasher for XxHash64 {
276    fn finish(&self) -> u64 {
277        XxHash64::finish(self)
278    }
279
280    fn write(&mut self, bytes: &[u8]) {
281        XxHash64::write(self, bytes)
282    }
283}
284
285#[cfg(feature = "std")]
286pub use crate::std_support::sixty_four::RandomXxHashBuilder64;
287
288#[cfg(test)]
289mod test {
290    use super::{RandomXxHashBuilder64, XxHash64};
291    use std::collections::HashMap;
292    use std::hash::BuildHasherDefault;
293    use std::prelude::v1::*;
294
295    #[test]
296    fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
297        let bytes: Vec<_> = (0..32).map(|_| 0).collect();
298
299        let mut byte_by_byte = XxHash64::with_seed(0);
300        for byte in bytes.chunks(1) {
301            byte_by_byte.write(byte);
302        }
303
304        let mut one_chunk = XxHash64::with_seed(0);
305        one_chunk.write(&bytes);
306
307        assert_eq!(byte_by_byte.core, one_chunk.core);
308    }
309
310    #[test]
311    fn hash_of_nothing_matches_c_implementation() {
312        let mut hasher = XxHash64::with_seed(0);
313        hasher.write(&[]);
314        assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
315    }
316
317    #[test]
318    fn hash_of_single_byte_matches_c_implementation() {
319        let mut hasher = XxHash64::with_seed(0);
320        hasher.write(&[42]);
321        assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
322    }
323
324    #[test]
325    fn hash_of_multiple_bytes_matches_c_implementation() {
326        let mut hasher = XxHash64::with_seed(0);
327        hasher.write(b"Hello, world!\0");
328        assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
329    }
330
331    #[test]
332    fn hash_of_multiple_chunks_matches_c_implementation() {
333        let bytes: Vec<_> = (0..100).collect();
334        let mut hasher = XxHash64::with_seed(0);
335        hasher.write(&bytes);
336        assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
337    }
338
339    #[test]
340    fn hash_with_different_seed_matches_c_implementation() {
341        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
342        hasher.write(&[]);
343        assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
344    }
345
346    #[test]
347    fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
348        let bytes: Vec<_> = (0..100).collect();
349        let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
350        hasher.write(&bytes);
351        assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
352    }
353
354    #[test]
355    fn can_be_used_in_a_hashmap_with_a_default_seed() {
356        let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
357        hash.insert(42, "the answer");
358        assert_eq!(hash.get(&42), Some(&"the answer"));
359    }
360
361    #[test]
362    fn can_be_used_in_a_hashmap_with_a_random_seed() {
363        let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
364        hash.insert(42, "the answer");
365        assert_eq!(hash.get(&42), Some(&"the answer"));
366    }
367
368    #[cfg(feature = "serialize")]
369    type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
370
371    #[cfg(feature = "serialize")]
372    #[test]
373    fn test_serialization_cycle() -> TestResult {
374        let mut hasher = XxHash64::with_seed(0);
375        hasher.write(b"Hello, world!\0");
376        hasher.finish();
377
378        let serialized = serde_json::to_string(&hasher)?;
379        let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
380        assert_eq!(hasher, unserialized);
381        Ok(())
382    }
383
384    #[cfg(feature = "serialize")]
385    #[test]
386    fn test_serialization_stability() -> TestResult {
387        let mut hasher = XxHash64::with_seed(0);
388        hasher.write(b"Hello, world!\0");
389        hasher.finish();
390
391        let serialized = r#"{
392            "total_len": 14,
393            "seed": 0,
394            "core": {
395              "v1": 6983438078262162902,
396              "v2": 14029467366897019727,
397              "v3": 0,
398              "v4": 7046029288634856825
399            },
400            "buffer": [
401              72,  101, 108, 108, 111, 44, 32, 119,
402              111, 114, 108, 100, 33,  0,  0,  0,
403              0,   0,   0,   0,   0,   0,  0,  0,
404              0,   0,   0,   0,   0,   0,  0,  0
405            ],
406            "buffer_usage": 14
407        }"#;
408
409        let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
410        assert_eq!(hasher, unserialized);
411        Ok(())
412    }
413}