/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/util/bit_chunk_iterator.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Types for iterating over bitmasks in 64-bit chunks |
19 | | |
20 | | use crate::util::bit_util::ceil; |
21 | | use std::fmt::Debug; |
22 | | |
23 | | /// Iterates over an arbitrarily aligned byte buffer |
24 | | /// |
25 | | /// Yields an iterator of aligned u64, along with the leading and trailing |
26 | | /// u64 necessary to align the buffer to a 8-byte boundary |
27 | | /// |
28 | | /// This is unlike [`BitChunkIterator`] which only exposes a trailing u64, |
29 | | /// and consequently has to perform more work for each read |
30 | | #[derive(Debug)] |
31 | | pub struct UnalignedBitChunk<'a> { |
32 | | lead_padding: usize, |
33 | | trailing_padding: usize, |
34 | | |
35 | | prefix: Option<u64>, |
36 | | chunks: &'a [u64], |
37 | | suffix: Option<u64>, |
38 | | } |
39 | | |
40 | | impl<'a> UnalignedBitChunk<'a> { |
41 | | /// Create a from a byte array, and and an offset and length in bits |
42 | 422 | pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self { |
43 | 422 | if len == 0 { |
44 | 12 | return Self { |
45 | 12 | lead_padding: 0, |
46 | 12 | trailing_padding: 0, |
47 | 12 | prefix: None, |
48 | 12 | chunks: &[], |
49 | 12 | suffix: None, |
50 | 12 | }; |
51 | 410 | } |
52 | | |
53 | 410 | let byte_offset = offset / 8; |
54 | 410 | let offset_padding = offset % 8; |
55 | | |
56 | 410 | let bytes_len = (len + offset_padding).div_ceil(8); |
57 | 410 | let buffer = &buffer[byte_offset..byte_offset + bytes_len]; |
58 | | |
59 | 410 | let prefix_mask = compute_prefix_mask(offset_padding); |
60 | | |
61 | | // If less than 8 bytes, read into prefix |
62 | 410 | if buffer.len() <= 8 { |
63 | 410 | let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding); |
64 | 410 | let prefix = read_u64(buffer) & suffix_mask & prefix_mask; |
65 | | |
66 | 410 | return Self { |
67 | 410 | lead_padding: offset_padding, |
68 | 410 | trailing_padding, |
69 | 410 | prefix: Some(prefix), |
70 | 410 | chunks: &[], |
71 | 410 | suffix: None, |
72 | 410 | }; |
73 | 0 | } |
74 | | |
75 | | // If less than 16 bytes, read into prefix and suffix |
76 | 0 | if buffer.len() <= 16 { |
77 | 0 | let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding); |
78 | 0 | let prefix = read_u64(&buffer[..8]) & prefix_mask; |
79 | 0 | let suffix = read_u64(&buffer[8..]) & suffix_mask; |
80 | | |
81 | 0 | return Self { |
82 | 0 | lead_padding: offset_padding, |
83 | 0 | trailing_padding, |
84 | 0 | prefix: Some(prefix), |
85 | 0 | chunks: &[], |
86 | 0 | suffix: Some(suffix), |
87 | 0 | }; |
88 | 0 | } |
89 | | |
90 | | // Read into prefix and suffix as needed |
91 | 0 | let (prefix, mut chunks, suffix) = unsafe { buffer.align_to::<u64>() }; |
92 | 0 | assert!( |
93 | 0 | prefix.len() < 8 && suffix.len() < 8, |
94 | 0 | "align_to did not return largest possible aligned slice" |
95 | | ); |
96 | | |
97 | 0 | let (alignment_padding, prefix) = match (offset_padding, prefix.is_empty()) { |
98 | 0 | (0, true) => (0, None), |
99 | | (_, true) => { |
100 | 0 | let prefix = chunks[0] & prefix_mask; |
101 | 0 | chunks = &chunks[1..]; |
102 | 0 | (0, Some(prefix)) |
103 | | } |
104 | | (_, false) => { |
105 | 0 | let alignment_padding = (8 - prefix.len()) * 8; |
106 | | |
107 | 0 | let prefix = (read_u64(prefix) & prefix_mask) << alignment_padding; |
108 | 0 | (alignment_padding, Some(prefix)) |
109 | | } |
110 | | }; |
111 | | |
112 | 0 | let lead_padding = offset_padding + alignment_padding; |
113 | 0 | let (suffix_mask, trailing_padding) = compute_suffix_mask(len, lead_padding); |
114 | | |
115 | 0 | let suffix = match (trailing_padding, suffix.is_empty()) { |
116 | 0 | (0, _) => None, |
117 | | (_, true) => { |
118 | 0 | let suffix = chunks[chunks.len() - 1] & suffix_mask; |
119 | 0 | chunks = &chunks[..chunks.len() - 1]; |
120 | 0 | Some(suffix) |
121 | | } |
122 | 0 | (_, false) => Some(read_u64(suffix) & suffix_mask), |
123 | | }; |
124 | | |
125 | 0 | Self { |
126 | 0 | lead_padding, |
127 | 0 | trailing_padding, |
128 | 0 | prefix, |
129 | 0 | chunks, |
130 | 0 | suffix, |
131 | 0 | } |
132 | 422 | } |
133 | | |
134 | | /// Returns the number of leading padding bits |
135 | 130 | pub fn lead_padding(&self) -> usize { |
136 | 130 | self.lead_padding |
137 | 130 | } |
138 | | |
139 | | /// Returns the number of trailing padding bits |
140 | 0 | pub fn trailing_padding(&self) -> usize { |
141 | 0 | self.trailing_padding |
142 | 0 | } |
143 | | |
144 | | /// Returns the prefix, if any |
145 | 0 | pub fn prefix(&self) -> Option<u64> { |
146 | 0 | self.prefix |
147 | 0 | } |
148 | | |
149 | | /// Returns the suffix, if any |
150 | 0 | pub fn suffix(&self) -> Option<u64> { |
151 | 0 | self.suffix |
152 | 0 | } |
153 | | |
154 | | /// Returns reference to the chunks |
155 | 0 | pub fn chunks(&self) -> &'a [u64] { |
156 | 0 | self.chunks |
157 | 0 | } |
158 | | |
159 | | /// Returns an iterator over the chunks |
160 | 422 | pub fn iter(&self) -> UnalignedBitChunkIterator<'a> { |
161 | 422 | self.prefix |
162 | 422 | .into_iter() |
163 | 422 | .chain(self.chunks.iter().cloned()) |
164 | 422 | .chain(self.suffix) |
165 | 422 | } |
166 | | |
167 | | /// Counts the number of ones |
168 | 292 | pub fn count_ones(&self) -> usize { |
169 | 292 | self.iter().map(|x| x.count_ones() as usize).sum() |
170 | 292 | } |
171 | | } |
172 | | |
173 | | /// Iterator over an [`UnalignedBitChunk`] |
174 | | pub type UnalignedBitChunkIterator<'a> = std::iter::Chain< |
175 | | std::iter::Chain<std::option::IntoIter<u64>, std::iter::Cloned<std::slice::Iter<'a, u64>>>, |
176 | | std::option::IntoIter<u64>, |
177 | | >; |
178 | | |
179 | | #[inline] |
180 | 410 | fn read_u64(input: &[u8]) -> u64 { |
181 | 410 | let len = input.len().min(8); |
182 | 410 | let mut buf = [0_u8; 8]; |
183 | 410 | buf[..len].copy_from_slice(input); |
184 | 410 | u64::from_le_bytes(buf) |
185 | 410 | } |
186 | | |
187 | | #[inline] |
188 | 410 | fn compute_prefix_mask(lead_padding: usize) -> u64 { |
189 | 410 | !((1 << lead_padding) - 1) |
190 | 410 | } |
191 | | |
192 | | #[inline] |
193 | 410 | fn compute_suffix_mask(len: usize, lead_padding: usize) -> (u64, usize) { |
194 | 410 | let trailing_bits = (len + lead_padding) % 64; |
195 | | |
196 | 410 | if trailing_bits == 0 { |
197 | 0 | return (u64::MAX, 0); |
198 | 410 | } |
199 | | |
200 | 410 | let trailing_padding = 64 - trailing_bits; |
201 | 410 | let suffix_mask = (1 << trailing_bits) - 1; |
202 | 410 | (suffix_mask, trailing_padding) |
203 | 410 | } |
204 | | |
205 | | /// Iterates over an arbitrarily aligned byte buffer |
206 | | /// |
207 | | /// Yields an iterator of u64, and a remainder. The first byte in the buffer |
208 | | /// will be the least significant byte in output u64 |
209 | | /// |
210 | | #[derive(Debug)] |
211 | | pub struct BitChunks<'a> { |
212 | | buffer: &'a [u8], |
213 | | /// offset inside a byte, guaranteed to be between 0 and 7 (inclusive) |
214 | | bit_offset: usize, |
215 | | /// number of complete u64 chunks |
216 | | chunk_len: usize, |
217 | | /// number of remaining bits, guaranteed to be between 0 and 63 (inclusive) |
218 | | remainder_len: usize, |
219 | | } |
220 | | |
221 | | impl<'a> BitChunks<'a> { |
222 | | /// Create a new [`BitChunks`] from a byte array, and an offset and length in bits |
223 | 312 | pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self { |
224 | 312 | assert!(ceil(offset + len, 8) <= buffer.len() * 8); |
225 | | |
226 | 312 | let byte_offset = offset / 8; |
227 | 312 | let bit_offset = offset % 8; |
228 | | |
229 | | // number of complete u64 chunks |
230 | 312 | let chunk_len = len / 64; |
231 | | // number of remaining bits |
232 | 312 | let remainder_len = len % 64; |
233 | | |
234 | 312 | BitChunks::<'a> { |
235 | 312 | buffer: &buffer[byte_offset..], |
236 | 312 | bit_offset, |
237 | 312 | chunk_len, |
238 | 312 | remainder_len, |
239 | 312 | } |
240 | 312 | } |
241 | | } |
242 | | |
243 | | /// Iterator over chunks of 64 bits represented as an u64 |
244 | | #[derive(Debug)] |
245 | | pub struct BitChunkIterator<'a> { |
246 | | buffer: &'a [u8], |
247 | | bit_offset: usize, |
248 | | chunk_len: usize, |
249 | | index: usize, |
250 | | } |
251 | | |
252 | | impl<'a> BitChunks<'a> { |
253 | | /// Returns the number of remaining bits, guaranteed to be between 0 and 63 (inclusive) |
254 | | #[inline] |
255 | 0 | pub const fn remainder_len(&self) -> usize { |
256 | 0 | self.remainder_len |
257 | 0 | } |
258 | | |
259 | | /// Returns the number of chunks |
260 | | #[inline] |
261 | | pub const fn chunk_len(&self) -> usize { |
262 | | self.chunk_len |
263 | | } |
264 | | |
265 | | /// Returns the bitmask of remaining bits |
266 | | #[inline] |
267 | 312 | pub fn remainder_bits(&self) -> u64 { |
268 | 312 | let bit_len = self.remainder_len; |
269 | 312 | if bit_len == 0 { |
270 | 28 | 0 |
271 | | } else { |
272 | 284 | let bit_offset = self.bit_offset; |
273 | | // number of bytes to read |
274 | | // might be one more than sizeof(u64) if the offset is in the middle of a byte |
275 | 284 | let byte_len = ceil(bit_len + bit_offset, 8); |
276 | | // pointer to remainder bytes after all complete chunks |
277 | 284 | let base = unsafe { |
278 | 284 | self.buffer |
279 | 284 | .as_ptr() |
280 | 284 | .add(self.chunk_len * std::mem::size_of::<u64>()) |
281 | | }; |
282 | | |
283 | 284 | let mut bits = unsafe { std::ptr::read(base) } as u64 >> bit_offset; |
284 | 284 | for i10 in 1..byte_len { |
285 | 10 | let byte = unsafe { std::ptr::read(base.add(i)) }; |
286 | 10 | bits |= (byte as u64) << (i * 8 - bit_offset); |
287 | 10 | } |
288 | | |
289 | 284 | bits & ((1 << bit_len) - 1) |
290 | | } |
291 | 312 | } |
292 | | |
293 | | /// Returns an iterator over chunks of 64 bits represented as an u64 |
294 | | #[inline] |
295 | 312 | pub const fn iter(&self) -> BitChunkIterator<'a> { |
296 | 312 | BitChunkIterator::<'a> { |
297 | 312 | buffer: self.buffer, |
298 | 312 | bit_offset: self.bit_offset, |
299 | 312 | chunk_len: self.chunk_len, |
300 | 312 | index: 0, |
301 | 312 | } |
302 | 312 | } |
303 | | |
304 | | /// Returns an iterator over chunks of 64 bits, with the remaining bits zero padded to 64-bits |
305 | | #[inline] |
306 | 312 | pub fn iter_padded(&self) -> impl Iterator<Item = u64> + 'a { |
307 | 312 | self.iter().chain(std::iter::once(self.remainder_bits())) |
308 | 312 | } |
309 | | } |
310 | | |
311 | | impl<'a> IntoIterator for BitChunks<'a> { |
312 | | type Item = u64; |
313 | | type IntoIter = BitChunkIterator<'a>; |
314 | | |
315 | 0 | fn into_iter(self) -> Self::IntoIter { |
316 | 0 | self.iter() |
317 | 0 | } |
318 | | } |
319 | | |
320 | | impl Iterator for BitChunkIterator<'_> { |
321 | | type Item = u64; |
322 | | |
323 | | #[inline] |
324 | 312 | fn next(&mut self) -> Option<u64> { |
325 | 312 | let index = self.index; |
326 | 312 | if index >= self.chunk_len { |
327 | 312 | return None; |
328 | 0 | } |
329 | | |
330 | | // cast to *const u64 should be fine since we are using read_unaligned below |
331 | | #[allow(clippy::cast_ptr_alignment)] |
332 | 0 | let raw_data = self.buffer.as_ptr() as *const u64; |
333 | | |
334 | | // bit-packed buffers are stored starting with the least-significant byte first |
335 | | // so when reading as u64 on a big-endian machine, the bytes need to be swapped |
336 | 0 | let current = unsafe { std::ptr::read_unaligned(raw_data.add(index)).to_le() }; |
337 | | |
338 | 0 | let bit_offset = self.bit_offset; |
339 | | |
340 | 0 | let combined = if bit_offset == 0 { |
341 | 0 | current |
342 | | } else { |
343 | | // the constructor ensures that bit_offset is in 0..8 |
344 | | // that means we need to read at most one additional byte to fill in the high bits |
345 | 0 | let next = |
346 | 0 | unsafe { std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64 }; |
347 | | |
348 | 0 | (current >> bit_offset) | (next << (64 - bit_offset)) |
349 | | }; |
350 | | |
351 | 0 | self.index = index + 1; |
352 | | |
353 | 0 | Some(combined) |
354 | 312 | } |
355 | | |
356 | | #[inline] |
357 | 0 | fn size_hint(&self) -> (usize, Option<usize>) { |
358 | 0 | ( |
359 | 0 | self.chunk_len - self.index, |
360 | 0 | Some(self.chunk_len - self.index), |
361 | 0 | ) |
362 | 0 | } |
363 | | } |
364 | | |
365 | | impl ExactSizeIterator for BitChunkIterator<'_> { |
366 | | #[inline] |
367 | | fn len(&self) -> usize { |
368 | | self.chunk_len - self.index |
369 | | } |
370 | | } |
371 | | |
372 | | #[cfg(test)] |
373 | | mod tests { |
374 | | use rand::distr::uniform::UniformSampler; |
375 | | use rand::distr::uniform::UniformUsize; |
376 | | use rand::prelude::*; |
377 | | use rand::rng; |
378 | | |
379 | | use crate::buffer::Buffer; |
380 | | use crate::util::bit_chunk_iterator::UnalignedBitChunk; |
381 | | |
382 | | #[test] |
383 | | fn test_iter_aligned() { |
384 | | let input: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7]; |
385 | | let buffer: Buffer = Buffer::from(input); |
386 | | |
387 | | let bitchunks = buffer.bit_chunks(0, 64); |
388 | | let result = bitchunks.into_iter().collect::<Vec<_>>(); |
389 | | |
390 | | assert_eq!(vec![0x0706050403020100], result); |
391 | | } |
392 | | |
393 | | #[test] |
394 | | fn test_iter_unaligned() { |
395 | | let input: &[u8] = &[ |
396 | | 0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000, |
397 | | 0b01000000, 0b11111111, |
398 | | ]; |
399 | | let buffer: Buffer = Buffer::from(input); |
400 | | |
401 | | let bitchunks = buffer.bit_chunks(4, 64); |
402 | | |
403 | | assert_eq!(0, bitchunks.remainder_len()); |
404 | | assert_eq!(0, bitchunks.remainder_bits()); |
405 | | |
406 | | let result = bitchunks.into_iter().collect::<Vec<_>>(); |
407 | | |
408 | | assert_eq!( |
409 | | vec![0b1111010000000010000000010000000010000000010000000010000000010000], |
410 | | result |
411 | | ); |
412 | | } |
413 | | |
414 | | #[test] |
415 | | fn test_iter_unaligned_remainder_1_byte() { |
416 | | let input: &[u8] = &[ |
417 | | 0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000, |
418 | | 0b01000000, 0b11111111, |
419 | | ]; |
420 | | let buffer: Buffer = Buffer::from(input); |
421 | | |
422 | | let bitchunks = buffer.bit_chunks(4, 66); |
423 | | |
424 | | assert_eq!(2, bitchunks.remainder_len()); |
425 | | assert_eq!(0b00000011, bitchunks.remainder_bits()); |
426 | | |
427 | | let result = bitchunks.into_iter().collect::<Vec<_>>(); |
428 | | |
429 | | assert_eq!( |
430 | | vec![0b1111010000000010000000010000000010000000010000000010000000010000], |
431 | | result |
432 | | ); |
433 | | } |
434 | | |
435 | | #[test] |
436 | | fn test_iter_unaligned_remainder_bits_across_bytes() { |
437 | | let input: &[u8] = &[0b00111111, 0b11111100]; |
438 | | let buffer: Buffer = Buffer::from(input); |
439 | | |
440 | | // remainder contains bits from both bytes |
441 | | // result should be the highest 2 bits from first byte followed by lowest 5 bits of second bytes |
442 | | let bitchunks = buffer.bit_chunks(6, 7); |
443 | | |
444 | | assert_eq!(7, bitchunks.remainder_len()); |
445 | | assert_eq!(0b1110000, bitchunks.remainder_bits()); |
446 | | } |
447 | | |
448 | | #[test] |
449 | | fn test_iter_unaligned_remainder_bits_large() { |
450 | | let input: &[u8] = &[ |
451 | | 0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, |
452 | | 0b00000000, 0b11111111, |
453 | | ]; |
454 | | let buffer: Buffer = Buffer::from(input); |
455 | | |
456 | | let bitchunks = buffer.bit_chunks(2, 63); |
457 | | |
458 | | assert_eq!(63, bitchunks.remainder_len()); |
459 | | assert_eq!( |
460 | | 0b100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111_1100_0000_0011_1111, |
461 | | bitchunks.remainder_bits() |
462 | | ); |
463 | | } |
464 | | |
465 | | #[test] |
466 | | fn test_iter_remainder_out_of_bounds() { |
467 | | // allocating a full page should trigger a fault when reading out of bounds |
468 | | const ALLOC_SIZE: usize = 4 * 1024; |
469 | | let input = vec![0xFF_u8; ALLOC_SIZE]; |
470 | | |
471 | | let buffer: Buffer = Buffer::from_vec(input); |
472 | | |
473 | | let bitchunks = buffer.bit_chunks(57, ALLOC_SIZE * 8 - 57); |
474 | | |
475 | | assert_eq!(u64::MAX, bitchunks.iter().last().unwrap()); |
476 | | assert_eq!(0x7F, bitchunks.remainder_bits()); |
477 | | } |
478 | | |
479 | | #[test] |
480 | | #[allow(clippy::assertions_on_constants)] |
481 | | fn test_unaligned_bit_chunk_iterator() { |
482 | | let buffer = Buffer::from(&[0xFF; 5]); |
483 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 40); |
484 | | |
485 | | assert!(unaligned.chunks().is_empty()); // Less than 128 elements |
486 | | assert_eq!(unaligned.lead_padding(), 0); |
487 | | assert_eq!(unaligned.trailing_padding(), 24); |
488 | | // 24x 1 bit then 40x 0 bits |
489 | | assert_eq!( |
490 | | unaligned.prefix(), |
491 | | Some(0b0000000000000000000000001111111111111111111111111111111111111111) |
492 | | ); |
493 | | assert_eq!(unaligned.suffix(), None); |
494 | | |
495 | | let buffer = buffer.slice(1); |
496 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 32); |
497 | | |
498 | | assert!(unaligned.chunks().is_empty()); // Less than 128 elements |
499 | | assert_eq!(unaligned.lead_padding(), 0); |
500 | | assert_eq!(unaligned.trailing_padding(), 32); |
501 | | // 32x 1 bit then 32x 0 bits |
502 | | assert_eq!( |
503 | | unaligned.prefix(), |
504 | | Some(0b0000000000000000000000000000000011111111111111111111111111111111) |
505 | | ); |
506 | | assert_eq!(unaligned.suffix(), None); |
507 | | |
508 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 5, 27); |
509 | | |
510 | | assert!(unaligned.chunks().is_empty()); // Less than 128 elements |
511 | | assert_eq!(unaligned.lead_padding(), 5); // 5 % 8 == 5 |
512 | | assert_eq!(unaligned.trailing_padding(), 32); |
513 | | // 5x 0 bit, 27x 1 bit then 32x 0 bits |
514 | | assert_eq!( |
515 | | unaligned.prefix(), |
516 | | Some(0b0000000000000000000000000000000011111111111111111111111111100000) |
517 | | ); |
518 | | assert_eq!(unaligned.suffix(), None); |
519 | | |
520 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 12, 20); |
521 | | |
522 | | assert!(unaligned.chunks().is_empty()); // Less than 128 elements |
523 | | assert_eq!(unaligned.lead_padding(), 4); // 12 % 8 == 4 |
524 | | assert_eq!(unaligned.trailing_padding(), 40); |
525 | | // 4x 0 bit, 20x 1 bit then 40x 0 bits |
526 | | assert_eq!( |
527 | | unaligned.prefix(), |
528 | | Some(0b0000000000000000000000000000000000000000111111111111111111110000) |
529 | | ); |
530 | | assert_eq!(unaligned.suffix(), None); |
531 | | |
532 | | let buffer = Buffer::from(&[0xFF; 14]); |
533 | | |
534 | | // Verify buffer alignment |
535 | | let (prefix, aligned, suffix) = unsafe { buffer.as_slice().align_to::<u64>() }; |
536 | | assert_eq!(prefix.len(), 0); |
537 | | assert_eq!(aligned.len(), 1); |
538 | | assert_eq!(suffix.len(), 6); |
539 | | |
540 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 112); |
541 | | |
542 | | assert!(unaligned.chunks().is_empty()); // Less than 128 elements |
543 | | assert_eq!(unaligned.lead_padding(), 0); // No offset and buffer aligned on 64-bit boundary |
544 | | assert_eq!(unaligned.trailing_padding(), 16); |
545 | | assert_eq!(unaligned.prefix(), Some(u64::MAX)); |
546 | | assert_eq!(unaligned.suffix(), Some((1 << 48) - 1)); |
547 | | |
548 | | let buffer = Buffer::from(&[0xFF; 16]); |
549 | | |
550 | | // Verify buffer alignment |
551 | | let (prefix, aligned, suffix) = unsafe { buffer.as_slice().align_to::<u64>() }; |
552 | | assert_eq!(prefix.len(), 0); |
553 | | assert_eq!(aligned.len(), 2); |
554 | | assert_eq!(suffix.len(), 0); |
555 | | |
556 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 128); |
557 | | |
558 | | assert_eq!(unaligned.prefix(), Some(u64::MAX)); |
559 | | assert_eq!(unaligned.suffix(), Some(u64::MAX)); |
560 | | assert!(unaligned.chunks().is_empty()); // Exactly 128 elements |
561 | | |
562 | | let buffer = Buffer::from(&[0xFF; 64]); |
563 | | |
564 | | // Verify buffer alignment |
565 | | let (prefix, aligned, suffix) = unsafe { buffer.as_slice().align_to::<u64>() }; |
566 | | assert_eq!(prefix.len(), 0); |
567 | | assert_eq!(aligned.len(), 8); |
568 | | assert_eq!(suffix.len(), 0); |
569 | | |
570 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 512); |
571 | | |
572 | | // Buffer is completely aligned and larger than 128 elements -> all in chunks array |
573 | | assert_eq!(unaligned.suffix(), None); |
574 | | assert_eq!(unaligned.prefix(), None); |
575 | | assert_eq!(unaligned.chunks(), [u64::MAX; 8].as_slice()); |
576 | | assert_eq!(unaligned.lead_padding(), 0); |
577 | | assert_eq!(unaligned.trailing_padding(), 0); |
578 | | |
579 | | let buffer = buffer.slice(1); // Offset buffer 1 byte off 64-bit alignment |
580 | | |
581 | | // Verify buffer alignment |
582 | | let (prefix, aligned, suffix) = unsafe { buffer.as_slice().align_to::<u64>() }; |
583 | | assert_eq!(prefix.len(), 7); |
584 | | assert_eq!(aligned.len(), 7); |
585 | | assert_eq!(suffix.len(), 0); |
586 | | |
587 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 0, 504); |
588 | | |
589 | | // Need a prefix with 1 byte of lead padding to bring the buffer into alignment |
590 | | assert_eq!(unaligned.prefix(), Some(u64::MAX - 0xFF)); |
591 | | assert_eq!(unaligned.suffix(), None); |
592 | | assert_eq!(unaligned.chunks(), [u64::MAX; 7].as_slice()); |
593 | | assert_eq!(unaligned.lead_padding(), 8); |
594 | | assert_eq!(unaligned.trailing_padding(), 0); |
595 | | |
596 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 17, 300); |
597 | | |
598 | | // Out of 64-bit alignment by 8 bits from buffer, and 17 bits from provided offset |
599 | | // => need 8 + 17 = 25 bits of lead padding + 39 bits in prefix |
600 | | // |
601 | | // This leaves 300 - 17 = 261 bits remaining |
602 | | // => 4x 64-bit aligned 64-bit chunks + 5 remaining bits |
603 | | // => trailing padding of 59 bits |
604 | | assert_eq!(unaligned.lead_padding(), 25); |
605 | | assert_eq!(unaligned.trailing_padding(), 59); |
606 | | assert_eq!(unaligned.prefix(), Some(u64::MAX - (1 << 25) + 1)); |
607 | | assert_eq!(unaligned.suffix(), Some(0b11111)); |
608 | | assert_eq!(unaligned.chunks(), [u64::MAX; 4].as_slice()); |
609 | | |
610 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 17, 0); |
611 | | |
612 | | assert_eq!(unaligned.prefix(), None); |
613 | | assert_eq!(unaligned.suffix(), None); |
614 | | assert!(unaligned.chunks().is_empty()); |
615 | | assert_eq!(unaligned.lead_padding(), 0); |
616 | | assert_eq!(unaligned.trailing_padding(), 0); |
617 | | |
618 | | let unaligned = UnalignedBitChunk::new(buffer.as_slice(), 17, 1); |
619 | | |
620 | | assert_eq!(unaligned.prefix(), Some(2)); |
621 | | assert_eq!(unaligned.suffix(), None); |
622 | | assert!(unaligned.chunks().is_empty()); |
623 | | assert_eq!(unaligned.lead_padding(), 1); |
624 | | assert_eq!(unaligned.trailing_padding(), 62); |
625 | | } |
626 | | |
627 | | #[test] |
628 | | #[cfg_attr(miri, ignore)] |
629 | | fn fuzz_unaligned_bit_chunk_iterator() { |
630 | | let mut rng = rng(); |
631 | | |
632 | | let uusize = UniformUsize::new(usize::MIN, usize::MAX).unwrap(); |
633 | | for _ in 0..100 { |
634 | | let mask_len = rng.random_range(0..1024); |
635 | | let bools: Vec<_> = std::iter::from_fn(|| Some(rng.random())) |
636 | | .take(mask_len) |
637 | | .collect(); |
638 | | |
639 | | let buffer = Buffer::from_iter(bools.iter().cloned()); |
640 | | |
641 | | let max_offset = 64.min(mask_len); |
642 | | let offset = uusize.sample(&mut rng).checked_rem(max_offset).unwrap_or(0); |
643 | | |
644 | | let max_truncate = 128.min(mask_len - offset); |
645 | | let truncate = uusize |
646 | | .sample(&mut rng) |
647 | | .checked_rem(max_truncate) |
648 | | .unwrap_or(0); |
649 | | |
650 | | let unaligned = |
651 | | UnalignedBitChunk::new(buffer.as_slice(), offset, mask_len - offset - truncate); |
652 | | |
653 | | let bool_slice = &bools[offset..mask_len - truncate]; |
654 | | |
655 | | let count = unaligned.count_ones(); |
656 | | let expected_count = bool_slice.iter().filter(|x| **x).count(); |
657 | | |
658 | | assert_eq!(count, expected_count); |
659 | | |
660 | | let collected: Vec<u64> = unaligned.iter().collect(); |
661 | | |
662 | | let get_bit = |idx: usize| -> bool { |
663 | | let padded_index = idx + unaligned.lead_padding(); |
664 | | let byte_idx = padded_index / 64; |
665 | | let bit_idx = padded_index % 64; |
666 | | (collected[byte_idx] & (1 << bit_idx)) != 0 |
667 | | }; |
668 | | |
669 | | for (idx, b) in bool_slice.iter().enumerate() { |
670 | | assert_eq!(*b, get_bit(idx)) |
671 | | } |
672 | | } |
673 | | } |
674 | | } |