Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-avro/src/compression.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use arrow_schema::ArrowError;
19
use std::io;
20
use std::io::{Read, Write};
21
22
/// The metadata key used for storing the JSON encoded [`CompressionCodec`]
23
pub const CODEC_METADATA_KEY: &str = "avro.codec";
24
25
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
26
/// Supported compression codecs for Avro data
27
///
28
/// Avro supports multiple compression formats for data blocks.
29
/// This enum represents the compression codecs available in this implementation.
30
pub enum CompressionCodec {
31
    /// Deflate compression (RFC 1951)
32
    Deflate,
33
    /// Snappy compression
34
    Snappy,
35
    /// ZStandard compression
36
    ZStandard,
37
    /// Bzip2 compression
38
    Bzip2,
39
    /// Xz compression
40
    Xz,
41
}
42
43
impl CompressionCodec {
44
60
    pub(crate) fn decompress(&self, block: &[u8]) -> Result<Vec<u8>, ArrowError> {
45
60
        match self {
46
            #[cfg(feature = "deflate")]
47
            CompressionCodec::Deflate => {
48
0
                let mut decoder = flate2::read::DeflateDecoder::new(block);
49
0
                let mut out = Vec::new();
50
0
                decoder.read_to_end(&mut out)?;
51
0
                Ok(out)
52
            }
53
            #[cfg(not(feature = "deflate"))]
54
            CompressionCodec::Deflate => Err(ArrowError::ParseError(
55
                "Deflate codec requires deflate feature".to_string(),
56
            )),
57
            #[cfg(feature = "snappy")]
58
            CompressionCodec::Snappy => {
59
                // Each compressed block is followed by the 4-byte, big-endian CRC32
60
                // checksum of the uncompressed data in the block.
61
39
                let crc = &block[block.len() - 4..];
62
39
                let block = &block[..block.len() - 4];
63
64
39
                let mut decoder = snap::raw::Decoder::new();
65
39
                let decoded = decoder
66
39
                    .decompress_vec(block)
67
39
                    .map_err(|e| ArrowError::ExternalError(
Box::new(e)0
))
?0
;
68
69
39
                let checksum = crc::Crc::<u32>::new(&crc::CRC_32_ISO_HDLC).checksum(&decoded);
70
39
                if checksum != u32::from_be_bytes(crc.try_into().unwrap()) {
71
0
                    return Err(ArrowError::ParseError("Snappy CRC mismatch".to_string()));
72
39
                }
73
39
                Ok(decoded)
74
            }
75
            #[cfg(not(feature = "snappy"))]
76
            CompressionCodec::Snappy => Err(ArrowError::ParseError(
77
                "Snappy codec requires snappy feature".to_string(),
78
            )),
79
80
            #[cfg(feature = "zstd")]
81
            CompressionCodec::ZStandard => {
82
7
                let mut decoder = zstd::Decoder::new(block)
?0
;
83
7
                let mut out = Vec::new();
84
7
                decoder.read_to_end(&mut out)
?0
;
85
7
                Ok(out)
86
            }
87
            #[cfg(not(feature = "zstd"))]
88
            CompressionCodec::ZStandard => Err(ArrowError::ParseError(
89
                "ZStandard codec requires zstd feature".to_string(),
90
            )),
91
            #[cfg(feature = "bzip2")]
92
            CompressionCodec::Bzip2 => {
93
7
                let mut decoder = bzip2::read::BzDecoder::new(block);
94
7
                let mut out = Vec::new();
95
7
                decoder.read_to_end(&mut out)
?0
;
96
7
                Ok(out)
97
            }
98
            #[cfg(not(feature = "bzip2"))]
99
            CompressionCodec::Bzip2 => Err(ArrowError::ParseError(
100
                "Bzip2 codec requires bzip2 feature".to_string(),
101
            )),
102
            #[cfg(feature = "xz")]
103
            CompressionCodec::Xz => {
104
7
                let mut decoder = xz::read::XzDecoder::new(block);
105
7
                let mut out = Vec::new();
106
7
                decoder.read_to_end(&mut out)
?0
;
107
7
                Ok(out)
108
            }
109
            #[cfg(not(feature = "xz"))]
110
            CompressionCodec::Xz => Err(ArrowError::ParseError(
111
                "XZ codec requires xz feature".to_string(),
112
            )),
113
        }
114
60
    }
115
116
4
    pub(crate) fn compress(&self, data: &[u8]) -> Result<Vec<u8>, ArrowError> {
117
4
        match self {
118
            #[cfg(feature = "deflate")]
119
            CompressionCodec::Deflate => {
120
0
                let mut encoder =
121
0
                    flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default());
122
0
                encoder.write_all(data)?;
123
0
                let compressed = encoder.finish()?;
124
0
                Ok(compressed)
125
            }
126
            #[cfg(not(feature = "deflate"))]
127
            CompressionCodec::Deflate => Err(ArrowError::ParseError(
128
                "Deflate codec requires deflate feature".to_string(),
129
            )),
130
131
            #[cfg(feature = "snappy")]
132
            CompressionCodec::Snappy => {
133
1
                let mut encoder = snap::raw::Encoder::new();
134
                // Allocate and compress in one step for efficiency
135
1
                let mut compressed = encoder
136
1
                    .compress_vec(data)
137
1
                    .map_err(|e| ArrowError::ExternalError(
Box::new(e)0
))
?0
;
138
                // Compute CRC32 (ISO‑HDLC poly) of **uncompressed** data
139
1
                let crc_val = crc::Crc::<u32>::new(&crc::CRC_32_ISO_HDLC).checksum(data);
140
1
                compressed.extend_from_slice(&crc_val.to_be_bytes());
141
1
                Ok(compressed)
142
            }
143
            #[cfg(not(feature = "snappy"))]
144
            CompressionCodec::Snappy => Err(ArrowError::ParseError(
145
                "Snappy codec requires snappy feature".to_string(),
146
            )),
147
148
            #[cfg(feature = "zstd")]
149
            CompressionCodec::ZStandard => {
150
1
                let mut encoder = zstd::Encoder::new(Vec::new(), 0)
151
1
                    .map_err(|e| ArrowError::ExternalError(
Box::new(e)0
))
?0
;
152
1
                encoder.write_all(data)
?0
;
153
1
                let compressed = encoder
154
1
                    .finish()
155
1
                    .map_err(|e| ArrowError::ExternalError(
Box::new(e)0
))
?0
;
156
1
                Ok(compressed)
157
            }
158
            #[cfg(not(feature = "zstd"))]
159
            CompressionCodec::ZStandard => Err(ArrowError::ParseError(
160
                "ZStandard codec requires zstd feature".to_string(),
161
            )),
162
163
            #[cfg(feature = "bzip2")]
164
            CompressionCodec::Bzip2 => {
165
1
                let mut encoder =
166
1
                    bzip2::write::BzEncoder::new(Vec::new(), bzip2::Compression::default());
167
1
                encoder.write_all(data)
?0
;
168
1
                let compressed = encoder.finish()
?0
;
169
1
                Ok(compressed)
170
            }
171
            #[cfg(not(feature = "bzip2"))]
172
            CompressionCodec::Bzip2 => Err(ArrowError::ParseError(
173
                "Bzip2 codec requires bzip2 feature".to_string(),
174
            )),
175
            #[cfg(feature = "xz")]
176
            CompressionCodec::Xz => {
177
1
                let mut encoder = xz::write::XzEncoder::new(Vec::new(), 6);
178
1
                encoder.write_all(data)
?0
;
179
1
                let compressed = encoder.finish()
?0
;
180
1
                Ok(compressed)
181
            }
182
            #[cfg(not(feature = "xz"))]
183
            CompressionCodec::Xz => Err(ArrowError::ParseError(
184
                "XZ codec requires xz feature".to_string(),
185
            )),
186
        }
187
4
    }
188
}