Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/util/bit_chunk_iterator.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Types for iterating over bitmasks in 64-bit chunks
19
20
use crate::util::bit_util::ceil;
21
use std::fmt::Debug;
22
23
/// Iterates over an arbitrarily aligned byte buffer
24
///
25
/// Yields an iterator of aligned u64, along with the leading and trailing
26
/// u64 necessary to align the buffer to a 8-byte boundary
27
///
28
/// This is unlike [`BitChunkIterator`] which only exposes a trailing u64,
29
/// and consequently has to perform more work for each read
30
#[derive(Debug)]
31
pub struct UnalignedBitChunk<'a> {
32
    lead_padding: usize,
33
    trailing_padding: usize,
34
35
    prefix: Option<u64>,
36
    chunks: &'a [u64],
37
    suffix: Option<u64>,
38
}
39
40
impl<'a> UnalignedBitChunk<'a> {
41
    /// Create a from a byte array, and and an offset and length in bits
42
422
    pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
43
422
        if len == 0 {
44
12
            return Self {
45
12
                lead_padding: 0,
46
12
                trailing_padding: 0,
47
12
                prefix: None,
48
12
                chunks: &[],
49
12
                suffix: None,
50
12
            };
51
410
        }
52
53
410
        let byte_offset = offset / 8;
54
410
        let offset_padding = offset % 8;
55
56
410
        let bytes_len = (len + offset_padding).div_ceil(8);
57
410
        let buffer = &buffer[byte_offset..byte_offset + bytes_len];
58
59
410
        let prefix_mask = compute_prefix_mask(offset_padding);
60
61
        // If less than 8 bytes, read into prefix
62
410
        if buffer.len() <= 8 {
63
410
            let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding);
64
410
            let prefix = read_u64(buffer) & suffix_mask & prefix_mask;
65
66
410
            return Self {
67
410
                lead_padding: offset_padding,
68
410
                trailing_padding,
69
410
                prefix: Some(prefix),
70
410
                chunks: &[],
71
410
                suffix: None,
72
410
            };
73
0
        }
74
75
        // If less than 16 bytes, read into prefix and suffix
76
0
        if buffer.len() <= 16 {
77
0
            let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding);
78
0
            let prefix = read_u64(&buffer[..8]) & prefix_mask;
79
0
            let suffix = read_u64(&buffer[8..]) & suffix_mask;
80
81
0
            return Self {
82
0
                lead_padding: offset_padding,
83
0
                trailing_padding,
84
0
                prefix: Some(prefix),
85
0
                chunks: &[],
86
0
                suffix: Some(suffix),
87
0
            };
88
0
        }
89
90
        // Read into prefix and suffix as needed
91
0
        let (prefix, mut chunks, suffix) = unsafe { buffer.align_to::<u64>() };
92
0
        assert!(
93
0
            prefix.len() < 8 && suffix.len() < 8,
94
0
            "align_to did not return largest possible aligned slice"
95
        );
96
97
0
        let (alignment_padding, prefix) = match (offset_padding, prefix.is_empty()) {
98
0
            (0, true) => (0, None),
99
            (_, true) => {
100
0
                let prefix = chunks[0] & prefix_mask;
101
0
                chunks = &chunks[1..];
102
0
                (0, Some(prefix))
103
            }
104
            (_, false) => {
105
0
                let alignment_padding = (8 - prefix.len()) * 8;
106
107
0
                let prefix = (read_u64(prefix) & prefix_mask) << alignment_padding;
108
0
                (alignment_padding, Some(prefix))
109
            }
110
        };
111
112
0
        let lead_padding = offset_padding + alignment_padding;
113
0
        let (suffix_mask, trailing_padding) = compute_suffix_mask(len, lead_padding);
114
115
0
        let suffix = match (trailing_padding, suffix.is_empty()) {
116
0
            (0, _) => None,
117
            (_, true) => {
118
0
                let suffix = chunks[chunks.len() - 1] & suffix_mask;
119
0
                chunks = &chunks[..chunks.len() - 1];
120
0
                Some(suffix)
121
            }
122
0
            (_, false) => Some(read_u64(suffix) & suffix_mask),
123
        };
124
125
0
        Self {
126
0
            lead_padding,
127
0
            trailing_padding,
128
0
            prefix,
129
0
            chunks,
130
0
            suffix,
131
0
        }
132
422
    }
133
134
    /// Returns the number of leading padding bits
135
130
    pub fn lead_padding(&self) -> usize {
136
130
        self.lead_padding
137
130
    }
138
139
    /// Returns the number of trailing padding bits
140
0
    pub fn trailing_padding(&self) -> usize {
141
0
        self.trailing_padding
142
0
    }
143
144
    /// Returns the prefix, if any
145
0
    pub fn prefix(&self) -> Option<u64> {
146
0
        self.prefix
147
0
    }
148
149
    /// Returns the suffix, if any
150
0
    pub fn suffix(&self) -> Option<u64> {
151
0
        self.suffix
152
0
    }
153
154
    /// Returns reference to the chunks
155
0
    pub fn chunks(&self) -> &'a [u64] {
156
0
        self.chunks
157
0
    }
158
159
    /// Returns an iterator over the chunks
160
422
    pub fn iter(&self) -> UnalignedBitChunkIterator<'a> {
161
422
        self.prefix
162
422
            .into_iter()
163
422
            .chain(self.chunks.iter().cloned())
164
422
            .chain(self.suffix)
165
422
    }
166
167
    /// Counts the number of ones
168
292
    pub fn count_ones(&self) -> usize {
169
292
        self.iter().map(|x| x.count_ones() as usize).sum()
170
292
    }
171
}
172
173
/// Iterator over an [`UnalignedBitChunk`]
174
pub type UnalignedBitChunkIterator<'a> = std::iter::Chain<
175
    std::iter::Chain<std::option::IntoIter<u64>, std::iter::Cloned<std::slice::Iter<'a, u64>>>,
176
    std::option::IntoIter<u64>,
177
>;
178
179
#[inline]
180
410
fn read_u64(input: &[u8]) -> u64 {
181
410
    let len = input.len().min(8);
182
410
    let mut buf = [0_u8; 8];
183
410
    buf[..len].copy_from_slice(input);
184
410
    u64::from_le_bytes(buf)
185
410
}
186
187
#[inline]
188
410
fn compute_prefix_mask(lead_padding: usize) -> u64 {
189
410
    !((1 << lead_padding) - 1)
190
410
}
191
192
#[inline]
193
410
fn compute_suffix_mask(len: usize, lead_padding: usize) -> (u64, usize) {
194
410
    let trailing_bits = (len + lead_padding) % 64;
195
196
410
    if trailing_bits == 0 {
197
0
        return (u64::MAX, 0);
198
410
    }
199
200
410
    let trailing_padding = 64 - trailing_bits;
201
410
    let suffix_mask = (1 << trailing_bits) - 1;
202
410
    (suffix_mask, trailing_padding)
203
410
}
204
205
/// Iterates over an arbitrarily aligned byte buffer
206
///
207
/// Yields an iterator of u64, and a remainder. The first byte in the buffer
208
/// will be the least significant byte in output u64
209
///
210
#[derive(Debug)]
211
pub struct BitChunks<'a> {
212
    buffer: &'a [u8],
213
    /// offset inside a byte, guaranteed to be between 0 and 7 (inclusive)
214
    bit_offset: usize,
215
    /// number of complete u64 chunks
216
    chunk_len: usize,
217
    /// number of remaining bits, guaranteed to be between 0 and 63 (inclusive)
218
    remainder_len: usize,
219
}
220
221
impl<'a> BitChunks<'a> {
222
    /// Create a new [`BitChunks`] from a byte array, and an offset and length in bits
223
312
    pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
224
312
        assert!(ceil(offset + len, 8) <= buffer.len() * 8);
225
226
312
        let byte_offset = offset / 8;
227
312
        let bit_offset = offset % 8;
228
229
        // number of complete u64 chunks
230
312
        let chunk_len = len / 64;
231
        // number of remaining bits
232
312
        let remainder_len = len % 64;
233
234
312
        BitChunks::<'a> {
235
312
            buffer: &buffer[byte_offset..],
236
312
            bit_offset,
237
312
            chunk_len,
238
312
            remainder_len,
239
312
        }
240
312
    }
241
}
242
243
/// Iterator over chunks of 64 bits represented as an u64
244
#[derive(Debug)]
245
pub struct BitChunkIterator<'a> {
246
    buffer: &'a [u8],
247
    bit_offset: usize,
248
    chunk_len: usize,
249
    index: usize,
250
}
251
252
impl<'a> BitChunks<'a> {
253
    /// Returns the number of remaining bits, guaranteed to be between 0 and 63 (inclusive)
254
    #[inline]
255
0
    pub const fn remainder_len(&self) -> usize {
256
0
        self.remainder_len
257
0
    }
258
259
    /// Returns the number of chunks
260
    #[inline]
261
    pub const fn chunk_len(&self) -> usize {
262
        self.chunk_len
263
    }
264
265
    /// Returns the bitmask of remaining bits
266
    #[inline]
267
312
    pub fn remainder_bits(&self) -> u64 {
268
312
        let bit_len = self.remainder_len;
269
312
        if bit_len == 0 {
270
28
            0
271
        } else {
272
284
            let bit_offset = self.bit_offset;
273
            // number of bytes to read
274
            // might be one more than sizeof(u64) if the offset is in the middle of a byte
275
284
            let byte_len = ceil(bit_len + bit_offset, 8);
276
            // pointer to remainder bytes after all complete chunks
277
284
            let base = unsafe {
278
284
                self.buffer
279
284
                    .as_ptr()
280
284
                    .add(self.chunk_len * std::mem::size_of::<u64>())
281
            };
282
283
284
            let mut bits = unsafe { std::ptr::read(base) } as u64 >> bit_offset;
284
284
            for 
i10
in 1..byte_len {
285
10
                let byte = unsafe { std::ptr::read(base.add(i)) };
286
10
                bits |= (byte as u64) << (i * 8 - bit_offset);
287
10
            }
288
289
284
            bits & ((1 << bit_len) - 1)
290
        }
291
312
    }
292
293
    /// Returns an iterator over chunks of 64 bits represented as an u64
294
    #[inline]
295
312
    pub const fn iter(&self) -> BitChunkIterator<'a> {
296
312
        BitChunkIterator::<'a> {
297
312
            buffer: self.buffer,
298
312
            bit_offset: self.bit_offset,
299
312
            chunk_len: self.chunk_len,
300
312
            index: 0,
301
312
        }
302
312
    }
303
304
    /// Returns an iterator over chunks of 64 bits, with the remaining bits zero padded to 64-bits
305
    #[inline]
306
312
    pub fn iter_padded(&self) -> impl Iterator<Item = u64> + 'a {
307
312
        self.iter().chain(std::iter::once(self.remainder_bits()))
308
312
    }
309
}
310
311
impl<'a> IntoIterator for BitChunks<'a> {
312
    type Item = u64;
313
    type IntoIter = BitChunkIterator<'a>;
314
315
0
    fn into_iter(self) -> Self::IntoIter {
316
0
        self.iter()
317
0
    }
318
}
319
320
impl Iterator for BitChunkIterator<'_> {
321
    type Item = u64;
322
323
    #[inline]
324
312
    fn next(&mut self) -> Option<u64> {
325
312
        let index = self.index;
326
312
        if index >= self.chunk_len {
327
312
            return None;
328
0
        }
329
330
        // cast to *const u64 should be fine since we are using read_unaligned below
331
        #[allow(clippy::cast_ptr_alignment)]
332
0
        let raw_data = self.buffer.as_ptr() as *const u64;
333
334
        // bit-packed buffers are stored starting with the least-significant byte first
335
        // so when reading as u64 on a big-endian machine, the bytes need to be swapped
336
0
        let current = unsafe { std::ptr::read_unaligned(raw_data.add(index)).to_le() };
337
338
0
        let bit_offset = self.bit_offset;
339
340
0
        let combined = if bit_offset == 0 {
341
0
            current
342
        } else {
343
            // the constructor ensures that bit_offset is in 0..8
344
            // that means we need to read at most one additional byte to fill in the high bits
345
0
            let next =
346
0
                unsafe { std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64 };
347
348
0
            (current >> bit_offset) | (next << (64 - bit_offset))
349
        };
350
351
0
        self.index = index + 1;
352
353
0
        Some(combined)
354
312
    }
355
356
    #[inline]
357
0
    fn size_hint(&self) -> (usize, Option<usize>) {
358
0
        (
359
0
            self.chunk_len - self.index,
360
0
            Some(self.chunk_len - self.index),
361
0
        )
362
0
    }
363
}
364
365
impl ExactSizeIterator for BitChunkIterator<'_> {
366
    #[inline]
367
    fn len(&self) -> usize {
368
        self.chunk_len - self.index
369
    }
370
}
371
372
#[cfg(test)]
373
mod tests {
374
    use rand::distr::uniform::UniformSampler;
375
    use rand::distr::uniform::UniformUsize;
376
    use rand::prelude::*;
377
    use rand::rng;
378
379
    use crate::buffer::Buffer;
380
    use crate::util::bit_chunk_iterator::UnalignedBitChunk;
381
382
    #[test]
383
    fn test_iter_aligned() {
384
        let input: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7];
385
        let buffer: Buffer = Buffer::from(input);
386
387
        let bitchunks = buffer.bit_chunks(0, 64);
388
        let result = bitchunks.into_iter().collect::<Vec<_>>();
389
390
        assert_eq!(vec![0x0706050403020100], result);
391
    }
392
393
    #[test]
394
    fn test_iter_unaligned() {
395
        let input: &[u8] = &[
396
            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000,
397
            0b01000000, 0b11111111,
398
        ];
399
        let buffer: Buffer = Buffer::from(input);
400
401
        let bitchunks = buffer.bit_chunks(4, 64);
402
403
        assert_eq!(0, bitchunks.remainder_len());
404
        assert_eq!(0, bitchunks.remainder_bits());
405
406
        let result = bitchunks.into_iter().collect::<Vec<_>>();
407
408
        assert_eq!(
409
            vec![0b1111010000000010000000010000000010000000010000000010000000010000],
410
            result
411
        );
412
    }
413
414
    #[test]
415
    fn test_iter_unaligned_remainder_1_byte() {
416
        let input: &[u8] = &[
417
            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000,
418
            0b01000000, 0b11111111,
419
        ];
420
        let buffer: Buffer = Buffer::from(input);
421
422
        let bitchunks = buffer.bit_chunks(4, 66);
423
424
        assert_eq!(2, bitchunks.remainder_len());
425
        assert_eq!(0b00000011, bitchunks.remainder_bits());
426
427
        let result = bitchunks.into_iter().collect::<Vec<_>>();
428
429
        assert_eq!(
430
            vec![0b1111010000000010000000010000000010000000010000000010000000010000],
431
            result
432
        );
433
    }
434
435
    #[test]
436
    fn test_iter_unaligned_remainder_bits_across_bytes() {
437
        let input: &[u8] = &[0b00111111, 0b11111100];
438
        let buffer: Buffer = Buffer::from(input);
439
440
        // remainder contains bits from both bytes
441
        // result should be the highest 2 bits from first byte followed by lowest 5 bits of second bytes
442
        let bitchunks = buffer.bit_chunks(6, 7);
443
444
        assert_eq!(7, bitchunks.remainder_len());
445
        assert_eq!(0b1110000, bitchunks.remainder_bits());
446
    }
447
448
    #[test]
449
    fn test_iter_unaligned_remainder_bits_large() {
450
        let input: &[u8] = &[
451
            0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111,
452
            0b00000000, 0b11111111,
453
        ];
454
        let buffer: Buffer = Buffer::from(input);
455
456
        let bitchunks = buffer.bit_chunks(2, 63);
457
458
        assert_eq!(63, bitchunks.remainder_len());
459
        assert_eq!(
460
            0b100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111,
461
            bitchunks.remainder_bits()
462
        );
463
    }
464
465
    #[test]
466
    fn test_iter_remainder_out_of_bounds() {
467
        // allocating a full page should trigger a fault when reading out of bounds
468
        const ALLOC_SIZE: usize = 4 * 1024;
469
        let input = vec![0xFF_u8; ALLOC_SIZE];
470
471
        let buffer: Buffer = Buffer::from_vec(input);
472
473
        let bitchunks = buffer.bit_chunks(57, ALLOC_SIZE * 8 - 57);
474
475
        assert_eq!(u64::MAX, bitchunks.iter().last().unwrap());
476
        assert_eq!(0x7F, bitchunks.remainder_bits());
477
    }
478
479
    #[test]
480
    #[allow(clippy::assertions_on_constants)]
481
    fn test_unaligned_bit_chunk_iterator() {
482
        let buffer = Buffer::from(&[0xFF; 5]);
483
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 40);
484
485
        assert!(unaligned.chunks().is_empty()); // Less than 128 elements
486
        assert_eq!(unaligned.lead_padding(), 0);
487
        assert_eq!(unaligned.trailing_padding(), 24);
488
        // 24x 1 bit then 40x 0 bits
489
        assert_eq!(
490
            unaligned.prefix(),
491
            Some(0b0000000000000000000000001111111111111111111111111111111111111111)
492
        );
493
        assert_eq!(unaligned.suffix(), None);
494
495
        let buffer = buffer.slice(1);
496
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 32);
497
498
        assert!(unaligned.chunks().is_empty()); // Less than 128 elements
499
        assert_eq!(unaligned.lead_padding(), 0);
500
        assert_eq!(unaligned.trailing_padding(), 32);
501
        // 32x 1 bit then 32x 0 bits
502
        assert_eq!(
503
            unaligned.prefix(),
504
            Some(0b0000000000000000000000000000000011111111111111111111111111111111)
505
        );
506
        assert_eq!(unaligned.suffix(), None);
507
508
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 5, 27);
509
510
        assert!(unaligned.chunks().is_empty()); // Less than 128 elements
511
        assert_eq!(unaligned.lead_padding(), 5); // 5 % 8 == 5
512
        assert_eq!(unaligned.trailing_padding(), 32);
513
        // 5x 0 bit, 27x 1 bit then 32x 0 bits
514
        assert_eq!(
515
            unaligned.prefix(),
516
            Some(0b0000000000000000000000000000000011111111111111111111111111100000)
517
        );
518
        assert_eq!(unaligned.suffix(), None);
519
520
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 12, 20);
521
522
        assert!(unaligned.chunks().is_empty()); // Less than 128 elements
523
        assert_eq!(unaligned.lead_padding(), 4); // 12 % 8 == 4
524
        assert_eq!(unaligned.trailing_padding(), 40);
525
        // 4x 0 bit, 20x 1 bit then 40x 0 bits
526
        assert_eq!(
527
            unaligned.prefix(),
528
            Some(0b0000000000000000000000000000000000000000111111111111111111110000)
529
        );
530
        assert_eq!(unaligned.suffix(), None);
531
532
        let buffer = Buffer::from(&[0xFF; 14]);
533
534
        // Verify buffer alignment
535
        let (prefix, aligned, suffix) = unsafe { buffer.as_slice().align_to::<u64>() };
536
        assert_eq!(prefix.len(), 0);
537
        assert_eq!(aligned.len(), 1);
538
        assert_eq!(suffix.len(), 6);
539
540
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 112);
541
542
        assert!(unaligned.chunks().is_empty()); // Less than 128 elements
543
        assert_eq!(unaligned.lead_padding(), 0); // No offset and buffer aligned on 64-bit boundary
544
        assert_eq!(unaligned.trailing_padding(), 16);
545
        assert_eq!(unaligned.prefix(), Some(u64::MAX));
546
        assert_eq!(unaligned.suffix(), Some((1 << 48) - 1));
547
548
        let buffer = Buffer::from(&[0xFF; 16]);
549
550
        // Verify buffer alignment
551
        let (prefix, aligned, suffix) = unsafe { buffer.as_slice().align_to::<u64>() };
552
        assert_eq!(prefix.len(), 0);
553
        assert_eq!(aligned.len(), 2);
554
        assert_eq!(suffix.len(), 0);
555
556
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 128);
557
558
        assert_eq!(unaligned.prefix(), Some(u64::MAX));
559
        assert_eq!(unaligned.suffix(), Some(u64::MAX));
560
        assert!(unaligned.chunks().is_empty()); // Exactly 128 elements
561
562
        let buffer = Buffer::from(&[0xFF; 64]);
563
564
        // Verify buffer alignment
565
        let (prefix, aligned, suffix) = unsafe { buffer.as_slice().align_to::<u64>() };
566
        assert_eq!(prefix.len(), 0);
567
        assert_eq!(aligned.len(), 8);
568
        assert_eq!(suffix.len(), 0);
569
570
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 512);
571
572
        // Buffer is completely aligned and larger than 128 elements -> all in chunks array
573
        assert_eq!(unaligned.suffix(), None);
574
        assert_eq!(unaligned.prefix(), None);
575
        assert_eq!(unaligned.chunks(), [u64::MAX; 8].as_slice());
576
        assert_eq!(unaligned.lead_padding(), 0);
577
        assert_eq!(unaligned.trailing_padding(), 0);
578
579
        let buffer = buffer.slice(1); // Offset buffer 1 byte off 64-bit alignment
580
581
        // Verify buffer alignment
582
        let (prefix, aligned, suffix) = unsafe { buffer.as_slice().align_to::<u64>() };
583
        assert_eq!(prefix.len(), 7);
584
        assert_eq!(aligned.len(), 7);
585
        assert_eq!(suffix.len(), 0);
586
587
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 504);
588
589
        // Need a prefix with 1 byte of lead padding to bring the buffer into alignment
590
        assert_eq!(unaligned.prefix(), Some(u64::MAX - 0xFF));
591
        assert_eq!(unaligned.suffix(), None);
592
        assert_eq!(unaligned.chunks(), [u64::MAX; 7].as_slice());
593
        assert_eq!(unaligned.lead_padding(), 8);
594
        assert_eq!(unaligned.trailing_padding(), 0);
595
596
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 17, 300);
597
598
        // Out of 64-bit alignment by 8 bits from buffer, and 17 bits from provided offset
599
        //   => need 8 + 17 = 25 bits of lead padding + 39 bits in prefix
600
        //
601
        // This leaves 300 - 17 = 261 bits remaining
602
        //   => 4x 64-bit aligned 64-bit chunks + 5 remaining bits
603
        //   => trailing padding of 59 bits
604
        assert_eq!(unaligned.lead_padding(), 25);
605
        assert_eq!(unaligned.trailing_padding(), 59);
606
        assert_eq!(unaligned.prefix(), Some(u64::MAX - (1 << 25) + 1));
607
        assert_eq!(unaligned.suffix(), Some(0b11111));
608
        assert_eq!(unaligned.chunks(), [u64::MAX; 4].as_slice());
609
610
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 17, 0);
611
612
        assert_eq!(unaligned.prefix(), None);
613
        assert_eq!(unaligned.suffix(), None);
614
        assert!(unaligned.chunks().is_empty());
615
        assert_eq!(unaligned.lead_padding(), 0);
616
        assert_eq!(unaligned.trailing_padding(), 0);
617
618
        let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 17, 1);
619
620
        assert_eq!(unaligned.prefix(), Some(2));
621
        assert_eq!(unaligned.suffix(), None);
622
        assert!(unaligned.chunks().is_empty());
623
        assert_eq!(unaligned.lead_padding(), 1);
624
        assert_eq!(unaligned.trailing_padding(), 62);
625
    }
626
627
    #[test]
628
    #[cfg_attr(miri, ignore)]
629
    fn fuzz_unaligned_bit_chunk_iterator() {
630
        let mut rng = rng();
631
632
        let uusize = UniformUsize::new(usize::MIN, usize::MAX).unwrap();
633
        for _ in 0..100 {
634
            let mask_len = rng.random_range(0..1024);
635
            let bools: Vec<_> = std::iter::from_fn(|| Some(rng.random()))
636
                .take(mask_len)
637
                .collect();
638
639
            let buffer = Buffer::from_iter(bools.iter().cloned());
640
641
            let max_offset = 64.min(mask_len);
642
            let offset = uusize.sample(&mut rng).checked_rem(max_offset).unwrap_or(0);
643
644
            let max_truncate = 128.min(mask_len - offset);
645
            let truncate = uusize
646
                .sample(&mut rng)
647
                .checked_rem(max_truncate)
648
                .unwrap_or(0);
649
650
            let unaligned =
651
                UnalignedBitChunk::new(buffer.as_slice(), offset, mask_len - offset - truncate);
652
653
            let bool_slice = &bools[offset..mask_len - truncate];
654
655
            let count = unaligned.count_ones();
656
            let expected_count = bool_slice.iter().filter(|x| **x).count();
657
658
            assert_eq!(count, expected_count);
659
660
            let collected: Vec<u64> = unaligned.iter().collect();
661
662
            let get_bit = |idx: usize| -> bool {
663
                let padded_index = idx + unaligned.lead_padding();
664
                let byte_idx = padded_index / 64;
665
                let bit_idx = padded_index % 64;
666
                (collected[byte_idx] & (1 << bit_idx)) != 0
667
            };
668
669
            for (idx, b) in bool_slice.iter().enumerate() {
670
                assert_eq!(*b, get_bit(idx))
671
            }
672
        }
673
    }
674
}