Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-avro/src/reader/block.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
//! Decoder for [`Block`]
19
20
use crate::reader::vlq::VLQDecoder;
21
use arrow_schema::ArrowError;
22
23
/// A file data block
24
///
25
/// <https://avro.apache.org/docs/1.11.1/specification/#object-container-files>
26
#[derive(Debug, Default)]
27
pub struct Block {
28
    /// The number of objects in this block
29
    pub count: usize,
30
    /// The serialized objects within this block
31
    pub data: Vec<u8>,
32
    /// The sync marker
33
    pub sync: [u8; 16],
34
}
35
36
/// A decoder for [`Block`]
37
#[derive(Debug)]
38
pub struct BlockDecoder {
39
    state: BlockDecoderState,
40
    in_progress: Block,
41
    vlq_decoder: VLQDecoder,
42
    bytes_remaining: usize,
43
}
44
45
#[derive(Debug)]
46
enum BlockDecoderState {
47
    Count,
48
    Size,
49
    Data,
50
    Sync,
51
    Finished,
52
}
53
54
impl Default for BlockDecoder {
55
73
    fn default() -> Self {
56
73
        Self {
57
73
            state: BlockDecoderState::Count,
58
73
            in_progress: Default::default(),
59
73
            vlq_decoder: Default::default(),
60
73
            bytes_remaining: 0,
61
73
        }
62
73
    }
63
}
64
65
impl BlockDecoder {
66
    /// Parse [`Block`] from `buf`, returning the number of bytes read
67
    ///
68
    /// This method can be called multiple times with consecutive chunks of data, allowing
69
    /// integration with chunked IO systems like [`BufRead::fill_buf`]
70
    ///
71
    /// All errors should be considered fatal, and decoding aborted
72
    ///
73
    /// Once an entire [`Block`] has been decoded this method will not read any further
74
    /// input bytes, until [`Self::flush`] is called. Afterwards [`Self::decode`]
75
    /// can then be used again to read the next block, if any
76
    ///
77
    /// [`BufRead::fill_buf`]: std::io::BufRead::fill_buf
78
73
    pub fn decode(&mut self, mut buf: &[u8]) -> Result<usize, ArrowError> {
79
73
        let max_read = buf.len();
80
365
        while !buf.is_empty() {
81
292
            match self.state {
82
                BlockDecoderState::Count => {
83
73
                    if let Some(c) = self.vlq_decoder.long(&mut buf) {
84
73
                        self.in_progress.count = c.try_into().map_err(|_| 
{0
85
0
                            ArrowError::ParseError(format!(
86
0
                                "Block count cannot be negative, got {c}"
87
0
                            ))
88
0
                        })?;
89
90
73
                        self.state = BlockDecoderState::Size;
91
0
                    }
92
                }
93
                BlockDecoderState::Size => {
94
73
                    if let Some(c) = self.vlq_decoder.long(&mut buf) {
95
73
                        self.bytes_remaining = c.try_into().map_err(|_| 
{0
96
0
                            ArrowError::ParseError(format!(
97
0
                                "Block size cannot be negative, got {c}"
98
0
                            ))
99
0
                        })?;
100
101
73
                        self.in_progress.data.reserve(self.bytes_remaining);
102
73
                        self.state = BlockDecoderState::Data;
103
0
                    }
104
                }
105
                BlockDecoderState::Data => {
106
73
                    let to_read = self.bytes_remaining.min(buf.len());
107
73
                    self.in_progress.data.extend_from_slice(&buf[..to_read]);
108
73
                    buf = &buf[to_read..];
109
73
                    self.bytes_remaining -= to_read;
110
73
                    if self.bytes_remaining == 0 {
111
73
                        self.bytes_remaining = 16;
112
73
                        self.state = BlockDecoderState::Sync;
113
73
                    
}0
114
                }
115
                BlockDecoderState::Sync => {
116
73
                    let to_decode = buf.len().min(self.bytes_remaining);
117
73
                    let write = &mut self.in_progress.sync[16 - to_decode..];
118
73
                    write[..to_decode].copy_from_slice(&buf[..to_decode]);
119
73
                    self.bytes_remaining -= to_decode;
120
73
                    buf = &buf[to_decode..];
121
73
                    if self.bytes_remaining == 0 {
122
73
                        self.state = BlockDecoderState::Finished;
123
73
                    
}0
124
                }
125
0
                BlockDecoderState::Finished => return Ok(max_read - buf.len()),
126
            }
127
        }
128
73
        Ok(max_read)
129
73
    }
130
131
    /// Flush this decoder returning the parsed [`Block`] if any
132
73
    pub fn flush(&mut self) -> Option<Block> {
133
73
        match self.state {
134
            BlockDecoderState::Finished => {
135
73
                self.state = BlockDecoderState::Count;
136
73
                Some(std::mem::take(&mut self.in_progress))
137
            }
138
0
            _ => None,
139
        }
140
73
    }
141
}