Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-avro/src/reader/vlq.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
/// Decoder for zig-zag encoded variable length (VLW) integers
19
///
20
/// See also:
21
/// <https://avro.apache.org/docs/1.11.1/specification/#primitive-types-1>
22
/// <https://protobuf.dev/programming-guides/encoding/#varints>
23
#[derive(Debug, Default)]
24
pub struct VLQDecoder {
25
    /// Scratch space for decoding VLQ integers
26
    in_progress: u64,
27
    shift: u32,
28
}
29
30
impl VLQDecoder {
31
    /// Decode a signed long from `buf`
32
808
    pub fn long(&mut self, buf: &mut &[u8]) -> Option<i64> {
33
961
        while let Some(byte) = buf.first().copied() {
34
961
            *buf = &buf[1..];
35
961
            self.in_progress |= ((byte & 0x7F) as u64) << self.shift;
36
961
            self.shift += 7;
37
961
            if byte & 0x80 == 0 {
38
808
                let val = self.in_progress;
39
808
                self.in_progress = 0;
40
808
                self.shift = 0;
41
808
                return Some((val >> 1) as i64 ^ -((val & 1) as i64));
42
153
            }
43
        }
44
0
        None
45
808
    }
46
}
47
48
/// Read a varint from `buf` returning the decoded `u64` and the number of bytes read
49
#[inline]
50
9.40k
pub(crate) fn read_varint(buf: &[u8]) -> Option<(u64, usize)> {
51
9.40k
    let 
first9.40k
= *buf.first()
?1
;
52
9.40k
    if first < 0x80 {
53
7.02k
        return Some((first as u64, 1));
54
2.37k
    }
55
56
2.37k
    if let Some(
array1.81k
) = buf.get(..10) {
57
1.81k
        return read_varint_array(array.try_into().unwrap());
58
560
    }
59
60
560
    read_varint_slow(buf)
61
9.40k
}
62
63
/// Based on
64
/// - <https://github.com/tokio-rs/prost/blob/master/prost/src/encoding/varint.rs#L71>
65
/// - <https://github.com/google/protobuf/blob/3.3.x/src/google/protobuf/io/coded_stream.cc#L365-L406>
66
/// - <https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/encoding/protowire/wire.go#L358>
67
#[inline]
68
1.81k
fn read_varint_array(buf: [u8; 10]) -> Option<(u64, usize)> {
69
1.81k
    let mut in_progress = 0_u64;
70
15.7k
    for (idx, b) in 
buf1.81k
.
into_iter1.81k
().
take1.81k
(9).
enumerate1.81k
() {
71
15.7k
        in_progress += (b as u64) << (7 * idx);
72
15.7k
        if b < 0x80 {
73
816
            return Some((in_progress, idx + 1));
74
14.8k
        }
75
14.8k
        in_progress -= 0x80 << (7 * idx);
76
    }
77
78
996
    let b = buf[9] as u64;
79
996
    in_progress += b << (7 * 9);
80
996
    (b < 0x02).then_some((in_progress, 10))
81
1.81k
}
82
83
#[inline(never)]
84
#[cold]
85
560
fn read_varint_slow(buf: &[u8]) -> Option<(u64, usize)> {
86
560
    let mut value = 0;
87
4.87k
    for (count, byte) in 
buf560
.
iter560
().
take560
(10).
enumerate560
() {
88
4.87k
        let byte = buf[count];
89
4.87k
        value |= u64::from(byte & 0x7F) << (count * 7);
90
4.87k
        if byte <= 0x7F {
91
            // Check for u64::MAX overflow. See [`ConsumeVarint`][1] for details.
92
            // [1]: https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/encoding/protowire/wire.go#L358
93
560
            return (count != 9 || 
byte < 20
).then_some((value, count + 1));
94
4.31k
        }
95
    }
96
97
0
    None
98
560
}
99
100
#[cfg(test)]
101
mod tests {
102
    use super::*;
103
104
1.00k
    fn encode_var(mut n: u64, dst: &mut [u8]) -> usize {
105
1.00k
        let mut i = 0;
106
107
9.50k
        while n >= 0x80 {
108
8.50k
            dst[i] = 0x80 | (n as u8);
109
8.50k
            i += 1;
110
8.50k
            n >>= 7;
111
8.50k
        }
112
113
1.00k
        dst[i] = n as u8;
114
1.00k
        i + 1
115
1.00k
    }
116
117
1.00k
    fn varint_test(a: u64) {
118
1.00k
        let mut buf = [0_u8; 10];
119
1.00k
        let len = encode_var(a, &mut buf);
120
1.00k
        assert_eq!(read_varint(&buf[..len]).unwrap(), (a, len));
121
1.00k
        assert_eq!(read_varint(&buf).unwrap(), (a, len));
122
1.00k
    }
123
124
    #[test]
125
1
    fn test_varint() {
126
1
        varint_test(0);
127
1
        varint_test(4395932);
128
1
        varint_test(u64::MAX);
129
130
1.00k
        for _ in 0..1000 {
131
1.00k
            varint_test(rand::random());
132
1.00k
        }
133
1
    }
134
}