/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/util/bit_util.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | //! Utils for working with bits |
19 | | |
20 | | /// Returns the nearest number that is `>=` than `num` and is a multiple of 64 |
21 | | #[inline] |
22 | 649 | pub fn round_upto_multiple_of_64(num: usize) -> usize { |
23 | 649 | num.checked_next_multiple_of(64) |
24 | 649 | .expect("failed to round upto multiple of 64") |
25 | 649 | } |
26 | | |
27 | | /// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must |
28 | | /// be a power of 2. |
29 | 0 | pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize { |
30 | 0 | debug_assert!(factor > 0 && factor.is_power_of_two()); |
31 | 0 | num.checked_add(factor - 1) |
32 | 0 | .expect("failed to round to next highest power of 2") |
33 | 0 | & !(factor - 1) |
34 | 0 | } |
35 | | |
36 | | /// Returns whether bit at position `i` in `data` is set or not |
37 | | #[inline] |
38 | 0 | pub fn get_bit(data: &[u8], i: usize) -> bool { |
39 | 0 | data[i / 8] & (1 << (i % 8)) != 0 |
40 | 0 | } |
41 | | |
42 | | /// Returns whether bit at position `i` in `data` is set or not. |
43 | | /// |
44 | | /// # Safety |
45 | | /// |
46 | | /// Note this doesn't do any bound checking, for performance reason. The caller is |
47 | | /// responsible to guarantee that `i` is within bounds. |
48 | | #[inline] |
49 | 426 | pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool { |
50 | 426 | (*data.add(i / 8) & (1 << (i % 8))) != 0 |
51 | 426 | } |
52 | | |
53 | | /// Sets bit at position `i` for `data` to 1 |
54 | | #[inline] |
55 | 265 | pub fn set_bit(data: &mut [u8], i: usize) { |
56 | 265 | data[i / 8] |= 1 << (i % 8); |
57 | 265 | } |
58 | | |
59 | | /// Sets bit at position `i` for `data` |
60 | | /// |
61 | | /// # Safety |
62 | | /// |
63 | | /// Note this doesn't do any bound checking, for performance reason. The caller is |
64 | | /// responsible to guarantee that `i` is within bounds. |
65 | | #[inline] |
66 | 350 | pub unsafe fn set_bit_raw(data: *mut u8, i: usize) { |
67 | 350 | *data.add(i / 8) |= 1 << (i % 8); |
68 | 350 | } |
69 | | |
70 | | /// Sets bit at position `i` for `data` to 0 |
71 | | #[inline] |
72 | 0 | pub fn unset_bit(data: &mut [u8], i: usize) { |
73 | 0 | data[i / 8] &= !(1 << (i % 8)); |
74 | 0 | } |
75 | | |
76 | | /// Sets bit at position `i` for `data` to 0 |
77 | | /// |
78 | | /// # Safety |
79 | | /// |
80 | | /// Note this doesn't do any bound checking, for performance reason. The caller is |
81 | | /// responsible to guarantee that `i` is within bounds. |
82 | | #[inline] |
83 | | pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) { |
84 | | *data.add(i / 8) &= !(1 << (i % 8)); |
85 | | } |
86 | | |
87 | | /// Returns the ceil of `value`/`divisor` |
88 | | #[inline] |
89 | 1.90k | pub fn ceil(value: usize, divisor: usize) -> usize { |
90 | 1.90k | value.div_ceil(divisor) |
91 | 1.90k | } |
92 | | |
93 | | #[cfg(test)] |
94 | | mod tests { |
95 | | use std::collections::HashSet; |
96 | | |
97 | | use super::*; |
98 | | use rand::rngs::StdRng; |
99 | | use rand::{Rng, SeedableRng}; |
100 | | |
101 | | #[test] |
102 | | fn test_round_upto_multiple_of_64() { |
103 | | assert_eq!(0, round_upto_multiple_of_64(0)); |
104 | | assert_eq!(64, round_upto_multiple_of_64(1)); |
105 | | assert_eq!(64, round_upto_multiple_of_64(63)); |
106 | | assert_eq!(64, round_upto_multiple_of_64(64)); |
107 | | assert_eq!(128, round_upto_multiple_of_64(65)); |
108 | | assert_eq!(192, round_upto_multiple_of_64(129)); |
109 | | } |
110 | | |
111 | | #[test] |
112 | | #[should_panic(expected = "failed to round upto multiple of 64")] |
113 | | fn test_round_upto_multiple_of_64_panic() { |
114 | | let _ = round_upto_multiple_of_64(usize::MAX); |
115 | | } |
116 | | |
117 | | #[test] |
118 | | #[should_panic(expected = "failed to round to next highest power of 2")] |
119 | | fn test_round_upto_panic() { |
120 | | let _ = round_upto_power_of_2(usize::MAX, 2); |
121 | | } |
122 | | |
123 | | #[test] |
124 | | fn test_get_bit() { |
125 | | // 00001101 |
126 | | assert!(get_bit(&[0b00001101], 0)); |
127 | | assert!(!get_bit(&[0b00001101], 1)); |
128 | | assert!(get_bit(&[0b00001101], 2)); |
129 | | assert!(get_bit(&[0b00001101], 3)); |
130 | | |
131 | | // 01001001 01010010 |
132 | | assert!(get_bit(&[0b01001001, 0b01010010], 0)); |
133 | | assert!(!get_bit(&[0b01001001, 0b01010010], 1)); |
134 | | assert!(!get_bit(&[0b01001001, 0b01010010], 2)); |
135 | | assert!(get_bit(&[0b01001001, 0b01010010], 3)); |
136 | | assert!(!get_bit(&[0b01001001, 0b01010010], 4)); |
137 | | assert!(!get_bit(&[0b01001001, 0b01010010], 5)); |
138 | | assert!(get_bit(&[0b01001001, 0b01010010], 6)); |
139 | | assert!(!get_bit(&[0b01001001, 0b01010010], 7)); |
140 | | assert!(!get_bit(&[0b01001001, 0b01010010], 8)); |
141 | | assert!(get_bit(&[0b01001001, 0b01010010], 9)); |
142 | | assert!(!get_bit(&[0b01001001, 0b01010010], 10)); |
143 | | assert!(!get_bit(&[0b01001001, 0b01010010], 11)); |
144 | | assert!(get_bit(&[0b01001001, 0b01010010], 12)); |
145 | | assert!(!get_bit(&[0b01001001, 0b01010010], 13)); |
146 | | assert!(get_bit(&[0b01001001, 0b01010010], 14)); |
147 | | assert!(!get_bit(&[0b01001001, 0b01010010], 15)); |
148 | | } |
149 | | |
150 | | pub fn seedable_rng() -> StdRng { |
151 | | StdRng::seed_from_u64(42) |
152 | | } |
153 | | |
154 | | #[test] |
155 | | fn test_get_bit_raw() { |
156 | | const NUM_BYTE: usize = 10; |
157 | | let mut buf = [0; NUM_BYTE]; |
158 | | let mut expected = vec![]; |
159 | | let mut rng = seedable_rng(); |
160 | | for i in 0..8 * NUM_BYTE { |
161 | | let b = rng.random_bool(0.5); |
162 | | expected.push(b); |
163 | | if b { |
164 | | set_bit(&mut buf[..], i) |
165 | | } |
166 | | } |
167 | | |
168 | | let raw_ptr = buf.as_ptr(); |
169 | | for (i, b) in expected.iter().enumerate() { |
170 | | unsafe { |
171 | | assert_eq!(*b, get_bit_raw(raw_ptr, i)); |
172 | | } |
173 | | } |
174 | | } |
175 | | |
176 | | #[test] |
177 | | fn test_set_bit() { |
178 | | let mut b = [0b00000010]; |
179 | | set_bit(&mut b, 0); |
180 | | assert_eq!([0b00000011], b); |
181 | | set_bit(&mut b, 1); |
182 | | assert_eq!([0b00000011], b); |
183 | | set_bit(&mut b, 7); |
184 | | assert_eq!([0b10000011], b); |
185 | | } |
186 | | |
187 | | #[test] |
188 | | fn test_unset_bit() { |
189 | | let mut b = [0b11111101]; |
190 | | unset_bit(&mut b, 0); |
191 | | assert_eq!([0b11111100], b); |
192 | | unset_bit(&mut b, 1); |
193 | | assert_eq!([0b11111100], b); |
194 | | unset_bit(&mut b, 7); |
195 | | assert_eq!([0b01111100], b); |
196 | | } |
197 | | |
198 | | #[test] |
199 | | fn test_set_bit_raw() { |
200 | | const NUM_BYTE: usize = 10; |
201 | | let mut buf = vec![0; NUM_BYTE]; |
202 | | let mut expected = vec![]; |
203 | | let mut rng = seedable_rng(); |
204 | | for i in 0..8 * NUM_BYTE { |
205 | | let b = rng.random_bool(0.5); |
206 | | expected.push(b); |
207 | | if b { |
208 | | unsafe { |
209 | | set_bit_raw(buf.as_mut_ptr(), i); |
210 | | } |
211 | | } |
212 | | } |
213 | | |
214 | | let raw_ptr = buf.as_ptr(); |
215 | | for (i, b) in expected.iter().enumerate() { |
216 | | unsafe { |
217 | | assert_eq!(*b, get_bit_raw(raw_ptr, i)); |
218 | | } |
219 | | } |
220 | | } |
221 | | |
222 | | #[test] |
223 | | fn test_unset_bit_raw() { |
224 | | const NUM_BYTE: usize = 10; |
225 | | let mut buf = vec![255; NUM_BYTE]; |
226 | | let mut expected = vec![]; |
227 | | let mut rng = seedable_rng(); |
228 | | for i in 0..8 * NUM_BYTE { |
229 | | let b = rng.random_bool(0.5); |
230 | | expected.push(b); |
231 | | if !b { |
232 | | unsafe { |
233 | | unset_bit_raw(buf.as_mut_ptr(), i); |
234 | | } |
235 | | } |
236 | | } |
237 | | |
238 | | let raw_ptr = buf.as_ptr(); |
239 | | for (i, b) in expected.iter().enumerate() { |
240 | | unsafe { |
241 | | assert_eq!(*b, get_bit_raw(raw_ptr, i)); |
242 | | } |
243 | | } |
244 | | } |
245 | | |
246 | | #[test] |
247 | | fn test_get_set_bit_roundtrip() { |
248 | | const NUM_BYTES: usize = 10; |
249 | | const NUM_SETS: usize = 10; |
250 | | |
251 | | let mut buffer: [u8; NUM_BYTES * 8] = [0; NUM_BYTES * 8]; |
252 | | let mut v = HashSet::new(); |
253 | | let mut rng = seedable_rng(); |
254 | | for _ in 0..NUM_SETS { |
255 | | let offset = rng.random_range(0..8 * NUM_BYTES); |
256 | | v.insert(offset); |
257 | | set_bit(&mut buffer[..], offset); |
258 | | } |
259 | | for i in 0..NUM_BYTES * 8 { |
260 | | assert_eq!(v.contains(&i), get_bit(&buffer[..], i)); |
261 | | } |
262 | | } |
263 | | |
264 | | #[test] |
265 | | fn test_ceil() { |
266 | | assert_eq!(ceil(0, 1), 0); |
267 | | assert_eq!(ceil(1, 1), 1); |
268 | | assert_eq!(ceil(1, 2), 1); |
269 | | assert_eq!(ceil(1, 8), 1); |
270 | | assert_eq!(ceil(7, 8), 1); |
271 | | assert_eq!(ceil(8, 8), 1); |
272 | | assert_eq!(ceil(9, 8), 2); |
273 | | assert_eq!(ceil(9, 9), 1); |
274 | | assert_eq!(ceil(10000000000, 10), 1000000000); |
275 | | assert_eq!(ceil(10, 10000000000), 1); |
276 | | assert_eq!(ceil(10000000000, 1000000000), 10); |
277 | | } |
278 | | } |