Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-avro/src/reader/record.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::codec::{AvroDataType, Codec, Nullability, Promotion, ResolutionInfo};
19
use crate::reader::block::{Block, BlockDecoder};
20
use crate::reader::cursor::AvroCursor;
21
use crate::reader::header::Header;
22
use crate::schema::*;
23
use arrow_array::builder::{
24
    ArrayBuilder, Decimal128Builder, Decimal256Builder, IntervalMonthDayNanoBuilder,
25
    PrimitiveBuilder,
26
};
27
use arrow_array::types::*;
28
use arrow_array::*;
29
use arrow_buffer::*;
30
use arrow_schema::{
31
    ArrowError, DataType, Field as ArrowField, FieldRef, Fields, IntervalUnit,
32
    Schema as ArrowSchema, SchemaRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
33
};
34
use std::cmp::Ordering;
35
use std::collections::HashMap;
36
use std::io::Read;
37
use std::sync::Arc;
38
use uuid::Uuid;
39
40
const DEFAULT_CAPACITY: usize = 1024;
41
42
#[derive(Debug)]
43
pub(crate) struct RecordDecoderBuilder<'a> {
44
    data_type: &'a AvroDataType,
45
    use_utf8view: bool,
46
}
47
48
impl<'a> RecordDecoderBuilder<'a> {
49
0
    pub(crate) fn new(data_type: &'a AvroDataType) -> Self {
50
0
        Self {
51
0
            data_type,
52
0
            use_utf8view: false,
53
0
        }
54
0
    }
55
56
0
    pub(crate) fn with_utf8_view(mut self, use_utf8view: bool) -> Self {
57
0
        self.use_utf8view = use_utf8view;
58
0
        self
59
0
    }
60
61
    /// Builds the `RecordDecoder`.
62
0
    pub(crate) fn build(self) -> Result<RecordDecoder, ArrowError> {
63
0
        RecordDecoder::try_new_with_options(self.data_type, self.use_utf8view)
64
0
    }
65
}
66
67
/// Decodes avro encoded data into [`RecordBatch`]
68
#[derive(Debug)]
69
pub(crate) struct RecordDecoder {
70
    schema: SchemaRef,
71
    fields: Vec<Decoder>,
72
    use_utf8view: bool,
73
    resolved: Option<ResolvedRuntime>,
74
}
75
76
#[derive(Debug)]
77
struct ResolvedRuntime {
78
    /// writer field index -> reader field index (or None if writer-only)
79
    writer_to_reader: Arc<[Option<usize>]>,
80
    /// per-writer-field skipper (Some only when writer-only)
81
    skip_decoders: Vec<Option<Skipper>>,
82
}
83
84
impl RecordDecoder {
85
    /// Creates a new `RecordDecoderBuilder` for configuring a `RecordDecoder`.
86
0
    pub(crate) fn new(data_type: &'_ AvroDataType) -> Self {
87
0
        RecordDecoderBuilder::new(data_type).build().unwrap()
88
0
    }
89
90
    /// Create a new [`RecordDecoder`] from the provided [`AvroDataType`] with default options
91
0
    pub(crate) fn try_new(data_type: &AvroDataType) -> Result<Self, ArrowError> {
92
0
        RecordDecoderBuilder::new(data_type)
93
0
            .with_utf8_view(true)
94
0
            .build()
95
0
    }
96
97
    /// Creates a new [`RecordDecoder`] from the provided [`AvroDataType`] with additional options.
98
    ///
99
    /// This method allows you to customize how the Avro data is decoded into Arrow arrays.
100
    ///
101
    /// # Arguments
102
    /// * `data_type` - The Avro data type to decode.
103
    /// * `use_utf8view` - A flag indicating whether to use `Utf8View` for string types.
104
    /// * `strict_mode` - A flag to enable strict decoding, returning an error if the data
105
    ///   does not conform to the schema.
106
    ///
107
    /// # Errors
108
    /// This function will return an error if the provided `data_type` is not a `Record`.
109
90
    pub(crate) fn try_new_with_options(
110
90
        data_type: &AvroDataType,
111
90
        use_utf8view: bool,
112
90
    ) -> Result<Self, ArrowError> {
113
90
        match data_type.codec() {
114
90
            Codec::Struct(reader_fields) => {
115
                // Build Arrow schema fields and per-child decoders
116
90
                let mut arrow_fields = Vec::with_capacity(reader_fields.len());
117
90
                let mut encodings = Vec::with_capacity(reader_fields.len());
118
521
                for avro_field in 
reader_fields90
.
iter90
() {
119
521
                    arrow_fields.push(avro_field.field());
120
521
                    encodings.push(Decoder::try_new(avro_field.data_type())
?0
);
121
                }
122
                // If this record carries resolution metadata, prepare top-level runtime helpers
123
90
                let resolved = match data_type.resolution.as_ref() {
124
29
                    Some(ResolutionInfo::Record(rec)) => {
125
29
                        let skip_decoders = build_skip_decoders(&rec.skip_fields)
?0
;
126
29
                        Some(ResolvedRuntime {
127
29
                            writer_to_reader: rec.writer_to_reader.clone(),
128
29
                            skip_decoders,
129
29
                        })
130
                    }
131
61
                    _ => None,
132
                };
133
90
                Ok(Self {
134
90
                    schema: Arc::new(ArrowSchema::new(arrow_fields)),
135
90
                    fields: encodings,
136
90
                    use_utf8view,
137
90
                    resolved,
138
90
                })
139
            }
140
0
            other => Err(ArrowError::ParseError(format!(
141
0
                "Expected record got {other:?}"
142
0
            ))),
143
        }
144
90
    }
145
146
    /// Returns the decoder's `SchemaRef`
147
73
    pub(crate) fn schema(&self) -> &SchemaRef {
148
73
        &self.schema
149
73
    }
150
151
    /// Decode `count` records from `buf`
152
135
    pub(crate) fn decode(&mut self, buf: &[u8], count: usize) -> Result<usize, ArrowError> {
153
135
        let mut cursor = AvroCursor::new(buf);
154
135
        match self.resolved.as_mut() {
155
22
            Some(runtime) => {
156
                // Top-level resolved record: read writer fields in writer order,
157
                // project into reader fields, and skip writer-only fields
158
22
                for _ in 0..count {
159
141
                    decode_with_resolution(
160
141
                        &mut cursor,
161
141
                        &mut self.fields,
162
141
                        &runtime.writer_to_reader,
163
141
                        &mut runtime.skip_decoders,
164
0
                    )?;
165
                }
166
            }
167
            None => {
168
113
                for _ in 0..count {
169
2.78k
                    for 
field2.30k
in &mut self.fields {
170
2.30k
                        field.decode(&mut cursor)
?0
;
171
                    }
172
                }
173
            }
174
        }
175
135
        Ok(cursor.position())
176
135
    }
177
178
    /// Flush the decoded records into a [`RecordBatch`]
179
133
    pub(crate) fn flush(&mut self) -> Result<RecordBatch, ArrowError> {
180
133
        let arrays = self
181
133
            .fields
182
133
            .iter_mut()
183
679
            .
map133
(|x| x.flush(None))
184
133
            .collect::<Result<Vec<_>, _>>()
?0
;
185
133
        RecordBatch::try_new(self.schema.clone(), arrays)
186
133
    }
187
}
188
189
147
fn decode_with_resolution(
190
147
    buf: &mut AvroCursor<'_>,
191
147
    encodings: &mut [Decoder],
192
147
    writer_to_reader: &[Option<usize>],
193
147
    skippers: &mut [Option<Skipper>],
194
147
) -> Result<(), ArrowError> {
195
1.51k
    for (w_idx, (target, skipper_opt)) in 
writer_to_reader147
.
iter147
().
zip147
(
skippers147
).
enumerate147
() {
196
1.51k
        match (*target, skipper_opt.as_mut()) {
197
1.35k
            (Some(r_idx), _) => encodings[r_idx].decode(buf)
?0
,
198
158
            (None, Some(sk)) => sk.skip(buf)
?0
,
199
            (None, None) => {
200
0
                return Err(ArrowError::SchemaError(format!(
201
0
                    "No skipper available for writer-only field at index {w_idx}",
202
0
                )));
203
            }
204
        }
205
    }
206
147
    Ok(())
207
147
}
208
209
#[derive(Debug)]
210
enum Decoder {
211
    Null(usize),
212
    Boolean(BooleanBufferBuilder),
213
    Int32(Vec<i32>),
214
    Int64(Vec<i64>),
215
    Float32(Vec<f32>),
216
    Float64(Vec<f64>),
217
    Date32(Vec<i32>),
218
    TimeMillis(Vec<i32>),
219
    TimeMicros(Vec<i64>),
220
    TimestampMillis(bool, Vec<i64>),
221
    TimestampMicros(bool, Vec<i64>),
222
    Int32ToInt64(Vec<i64>),
223
    Int32ToFloat32(Vec<f32>),
224
    Int32ToFloat64(Vec<f64>),
225
    Int64ToFloat32(Vec<f32>),
226
    Int64ToFloat64(Vec<f64>),
227
    Float32ToFloat64(Vec<f64>),
228
    BytesToString(OffsetBufferBuilder<i32>, Vec<u8>),
229
    StringToBytes(OffsetBufferBuilder<i32>, Vec<u8>),
230
    Binary(OffsetBufferBuilder<i32>, Vec<u8>),
231
    /// String data encoded as UTF-8 bytes, mapped to Arrow's StringArray
232
    String(OffsetBufferBuilder<i32>, Vec<u8>),
233
    /// String data encoded as UTF-8 bytes, but mapped to Arrow's StringViewArray
234
    StringView(OffsetBufferBuilder<i32>, Vec<u8>),
235
    Array(FieldRef, OffsetBufferBuilder<i32>, Box<Decoder>),
236
    Record(Fields, Vec<Decoder>),
237
    Map(
238
        FieldRef,
239
        OffsetBufferBuilder<i32>,
240
        OffsetBufferBuilder<i32>,
241
        Vec<u8>,
242
        Box<Decoder>,
243
    ),
244
    Fixed(i32, Vec<u8>),
245
    Enum(Vec<i32>, Arc<[String]>),
246
    Duration(IntervalMonthDayNanoBuilder),
247
    Uuid(Vec<u8>),
248
    Decimal128(usize, Option<usize>, Option<usize>, Decimal128Builder),
249
    Decimal256(usize, Option<usize>, Option<usize>, Decimal256Builder),
250
    Nullable(Nullability, NullBufferBuilder, Box<Decoder>),
251
    /// Resolved record that needs writer->reader projection and skipping writer-only fields
252
    RecordResolved {
253
        fields: Fields,
254
        encodings: Vec<Decoder>,
255
        writer_to_reader: Arc<[Option<usize>]>,
256
        skip_decoders: Vec<Option<Skipper>>,
257
    },
258
}
259
260
impl Decoder {
261
678
    fn try_new(data_type: &AvroDataType) -> Result<Self, ArrowError> {
262
        // Extract just the Promotion (if any) to simplify pattern matching
263
678
        let promotion = match data_type.resolution.as_ref() {
264
69
            Some(ResolutionInfo::Promotion(p)) => Some(p),
265
609
            _ => None,
266
        };
267
678
        let decoder = match (data_type.codec(), promotion) {
268
            (Codec::Int64, Some(Promotion::IntToLong)) => {
269
11
                Self::Int32ToInt64(Vec::with_capacity(DEFAULT_CAPACITY))
270
            }
271
            (Codec::Float32, Some(Promotion::IntToFloat)) => {
272
6
                Self::Int32ToFloat32(Vec::with_capacity(DEFAULT_CAPACITY))
273
            }
274
            (Codec::Float64, Some(Promotion::IntToDouble)) => {
275
11
                Self::Int32ToFloat64(Vec::with_capacity(DEFAULT_CAPACITY))
276
            }
277
            (Codec::Float32, Some(Promotion::LongToFloat)) => {
278
6
                Self::Int64ToFloat32(Vec::with_capacity(DEFAULT_CAPACITY))
279
            }
280
            (Codec::Float64, Some(Promotion::LongToDouble)) => {
281
6
                Self::Int64ToFloat64(Vec::with_capacity(DEFAULT_CAPACITY))
282
            }
283
            (Codec::Float64, Some(Promotion::FloatToDouble)) => {
284
6
                Self::Float32ToFloat64(Vec::with_capacity(DEFAULT_CAPACITY))
285
            }
286
            (Codec::Utf8, Some(Promotion::BytesToString))
287
22
            | (Codec::Utf8View, Some(Promotion::BytesToString)) => Self::BytesToString(
288
22
                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
289
22
                Vec::with_capacity(DEFAULT_CAPACITY),
290
22
            ),
291
1
            (Codec::Binary, Some(Promotion::StringToBytes)) => Self::StringToBytes(
292
1
                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
293
1
                Vec::with_capacity(DEFAULT_CAPACITY),
294
1
            ),
295
0
            (Codec::Null, _) => Self::Null(0),
296
42
            (Codec::Boolean, _) => Self::Boolean(BooleanBufferBuilder::new(DEFAULT_CAPACITY)),
297
179
            (Codec::Int32, _) => Self::Int32(Vec::with_capacity(DEFAULT_CAPACITY)),
298
46
            (Codec::Int64, _) => Self::Int64(Vec::with_capacity(DEFAULT_CAPACITY)),
299
36
            (Codec::Float32, _) => Self::Float32(Vec::with_capacity(DEFAULT_CAPACITY)),
300
49
            (Codec::Float64, _) => Self::Float64(Vec::with_capacity(DEFAULT_CAPACITY)),
301
58
            (Codec::Binary, _) => Self::Binary(
302
58
                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
303
58
                Vec::with_capacity(DEFAULT_CAPACITY),
304
58
            ),
305
20
            (Codec::Utf8, _) => Self::String(
306
20
                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
307
20
                Vec::with_capacity(DEFAULT_CAPACITY),
308
20
            ),
309
0
            (Codec::Utf8View, _) => Self::StringView(
310
0
                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
311
0
                Vec::with_capacity(DEFAULT_CAPACITY),
312
0
            ),
313
0
            (Codec::Date32, _) => Self::Date32(Vec::with_capacity(DEFAULT_CAPACITY)),
314
0
            (Codec::TimeMillis, _) => Self::TimeMillis(Vec::with_capacity(DEFAULT_CAPACITY)),
315
0
            (Codec::TimeMicros, _) => Self::TimeMicros(Vec::with_capacity(DEFAULT_CAPACITY)),
316
0
            (Codec::TimestampMillis(is_utc), _) => {
317
0
                Self::TimestampMillis(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
318
            }
319
38
            (Codec::TimestampMicros(is_utc), _) => {
320
38
                Self::TimestampMicros(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
321
            }
322
8
            (Codec::Fixed(sz), _) => Self::Fixed(*sz, Vec::with_capacity(DEFAULT_CAPACITY)),
323
12
            (Codec::Decimal(precision, scale, size), _) => {
324
12
                let p = *precision;
325
12
                let s = *scale;
326
12
                let sz = *size;
327
12
                let prec = p as u8;
328
12
                let scl = s.unwrap_or(0) as i8;
329
12
                match (sz, p) {
330
11
                    (Some(
fixed_size10
), _) if fixed_size <= 1
610
=> {
331
10
                        let builder =
332
10
                            Decimal128Builder::new().with_precision_and_scale(prec, scl)
?0
;
333
10
                        Self::Decimal128(p, s, sz, builder)
334
                    }
335
1
                    (Some(fixed_size), _) if fixed_size <= 32 => {
336
1
                        let builder =
337
1
                            Decimal256Builder::new().with_precision_and_scale(prec, scl)
?0
;
338
1
                        Self::Decimal256(p, s, sz, builder)
339
                    }
340
0
                    (Some(fixed_size), _) => {
341
0
                        return Err(ArrowError::ParseError(format!(
342
0
                            "Unsupported decimal size: {fixed_size:?}"
343
0
                        )));
344
                    }
345
1
                    (None, p) if p <= DECIMAL128_MAX_PRECISION as usize => {
346
1
                        let builder =
347
1
                            Decimal128Builder::new().with_precision_and_scale(prec, scl)
?0
;
348
1
                        Self::Decimal128(p, s, sz, builder)
349
                    }
350
0
                    (None, p) if p <= DECIMAL256_MAX_PRECISION as usize => {
351
0
                        let builder =
352
0
                            Decimal256Builder::new().with_precision_and_scale(prec, scl)?;
353
0
                        Self::Decimal256(p, s, sz, builder)
354
                    }
355
                    (None, _) => {
356
0
                        return Err(ArrowError::ParseError(format!(
357
0
                            "Decimal precision {p} exceeds maximum supported"
358
0
                        )));
359
                    }
360
                }
361
            }
362
3
            (Codec::Interval, _) => Self::Duration(IntervalMonthDayNanoBuilder::new()),
363
54
            (Codec::List(item), _) => {
364
54
                let decoder = Self::try_new(item)
?0
;
365
54
                Self::Array(
366
54
                    Arc::new(item.field_with_name("item")),
367
54
                    OffsetBufferBuilder::new(DEFAULT_CAPACITY),
368
54
                    Box::new(decoder),
369
54
                )
370
            }
371
11
            (Codec::Enum(symbols), _) => {
372
11
                Self::Enum(Vec::with_capacity(DEFAULT_CAPACITY), symbols.clone())
373
            }
374
37
            (Codec::Struct(fields), _) => {
375
37
                let mut arrow_fields = Vec::with_capacity(fields.len());
376
37
                let mut encodings = Vec::with_capacity(fields.len());
377
61
                for avro_field in 
fields37
.
iter37
() {
378
61
                    let encoding = Self::try_new(avro_field.data_type())
?0
;
379
61
                    arrow_fields.push(avro_field.field());
380
61
                    encodings.push(encoding);
381
                }
382
37
                if let Some(ResolutionInfo::Record(
rec0
)) = data_type.resolution.as_ref() {
383
0
                    let skip_decoders = build_skip_decoders(&rec.skip_fields)?;
384
0
                    Self::RecordResolved {
385
0
                        fields: arrow_fields.into(),
386
0
                        encodings,
387
0
                        writer_to_reader: rec.writer_to_reader.clone(),
388
0
                        skip_decoders,
389
0
                    }
390
                } else {
391
37
                    Self::Record(arrow_fields.into(), encodings)
392
                }
393
            }
394
14
            (Codec::Map(child), _) => {
395
14
                let val_field = child.field_with_name("value").with_nullable(true);
396
14
                let map_field = Arc::new(ArrowField::new(
397
                    "entries",
398
14
                    DataType::Struct(Fields::from(vec![
399
14
                        ArrowField::new("key", DataType::Utf8, false),
400
14
                        val_field,
401
14
                    ])),
402
                    false,
403
                ));
404
14
                let val_dec = Self::try_new(child)
?0
;
405
14
                Self::Map(
406
14
                    map_field,
407
14
                    OffsetBufferBuilder::new(DEFAULT_CAPACITY),
408
14
                    OffsetBufferBuilder::new(DEFAULT_CAPACITY),
409
14
                    Vec::with_capacity(DEFAULT_CAPACITY),
410
14
                    Box::new(val_dec),
411
14
                )
412
            }
413
2
            (Codec::Uuid, _) => Self::Uuid(Vec::with_capacity(DEFAULT_CAPACITY)),
414
        };
415
678
        Ok(match data_type.nullability() {
416
584
            Some(nullability) => Self::Nullable(
417
584
                nullability,
418
584
                NullBufferBuilder::new(DEFAULT_CAPACITY),
419
584
                Box::new(decoder),
420
584
            ),
421
94
            None => decoder,
422
        })
423
678
    }
424
425
    /// Append a null record
426
226
    fn append_null(&mut self) {
427
226
        match self {
428
0
            Self::Null(count) => *count += 1,
429
2
            Self::Boolean(b) => b.append(false),
430
47
            Self::Int32(v) | Self::Date32(
v0
) | Self::TimeMillis(
v0
) => v.push(0),
431
7
            Self::Int64(v)
432
0
            | Self::Int32ToInt64(v)
433
0
            | Self::TimeMicros(v)
434
0
            | Self::TimestampMillis(_, v)
435
7
            | Self::TimestampMicros(_, 
v0
) => v.push(0),
436
2
            Self::Float32(v) | Self::Int32ToFloat32(
v0
) | Self::Int64ToFloat32(
v0
) => v.push(0.),
437
6
            Self::Float64(v)
438
0
            | Self::Int32ToFloat64(v)
439
0
            | Self::Int64ToFloat64(v)
440
6
            | Self::Float32ToFloat64(
v0
) => v.push(0.),
441
3
            Self::Binary(offsets, _)
442
20
            | Self::String(offsets, _)
443
0
            | Self::StringView(offsets, _)
444
0
            | Self::BytesToString(offsets, _)
445
23
            | Self::StringToBytes(
offsets0
, _) => {
446
23
                offsets.push_length(0);
447
23
            }
448
0
            Self::Uuid(v) => {
449
0
                v.extend([0; 16]);
450
0
            }
451
61
            Self::Array(_, offsets, e) => {
452
61
                offsets.push_length(0);
453
61
            }
454
36
            Self::Record(_, 
e26
) =>
e.iter_mut()26
.
for_each26
(|e| e.append_null()),
455
14
            Self::Map(_, _koff, moff, _, _) => {
456
14
                moff.push_length(0);
457
14
            }
458
2
            Self::Fixed(sz, accum) => {
459
2
                accum.extend(std::iter::repeat_n(0u8, *sz as usize));
460
2
            }
461
2
            Self::Decimal128(_, _, _, builder) => builder.append_value(0),
462
0
            Self::Decimal256(_, _, _, builder) => builder.append_value(i256::ZERO),
463
3
            Self::Enum(indices, _) => indices.push(0),
464
1
            Self::Duration(builder) => builder.append_null(),
465
30
            Self::Nullable(_, null_buffer, inner) => {
466
30
                null_buffer.append(false);
467
30
                inner.append_null();
468
30
            }
469
0
            Self::RecordResolved { encodings, .. } => {
470
0
                encodings.iter_mut().for_each(|e| e.append_null());
471
            }
472
        }
473
226
    }
474
475
    /// Decode a single record from `buf`
476
8.15k
    fn decode(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), ArrowError> {
477
8.15k
        match self {
478
0
            Self::Null(x) => *x += 1,
479
295
            Self::Boolean(values) => values.append(buf.get_bool()
?0
),
480
1.17k
            Self::Int32(values) | Self::Date32(
values0
) | Self::TimeMillis(
values0
) => {
481
1.17k
                values.
push1.16k
(buf.get_int()
?1
)
482
            }
483
242
            Self::Int64(values)
484
0
            | Self::TimeMicros(values)
485
0
            | Self::TimestampMillis(_, values)
486
534
            | Self::TimestampMicros(_, 
values292
) => values.push(buf.get_long()
?0
),
487
250
            Self::Float32(values) => values.push(buf.get_float()
?0
),
488
309
            Self::Float64(values) => values.push(buf.get_double()
?0
),
489
44
            Self::Int32ToInt64(values) => values.push(buf.get_int()
?0
as i64),
490
43
            Self::Int32ToFloat32(values) => values.push(buf.get_int()
?0
as f32),
491
83
            Self::Int32ToFloat64(values) => values.push(buf.get_int()
?0
as f64),
492
43
            Self::Int64ToFloat32(values) => values.push(buf.get_long()
?0
as f32),
493
43
            Self::Int64ToFloat64(values) => values.push(buf.get_long()
?0
as f64),
494
43
            Self::Float32ToFloat64(values) => values.push(buf.get_float()
?0
as f64),
495
3
            Self::StringToBytes(offsets, values)
496
164
            | Self::BytesToString(offsets, values)
497
422
            | Self::Binary(offsets, values)
498
68
            | Self::String(offsets, values)
499
0
            | Self::StringView(offsets, values) => {
500
657
                let data = buf.get_bytes()
?0
;
501
657
                offsets.push_length(data.len());
502
657
                values.extend_from_slice(data);
503
            }
504
5
            Self::Uuid(values) => {
505
5
                let s_bytes = buf.get_bytes()
?0
;
506
5
                let s = std::str::from_utf8(s_bytes).map_err(|e| 
{0
507
0
                    ArrowError::ParseError(format!("UUID bytes are not valid UTF-8: {e}"))
508
0
                })?;
509
5
                let uuid = Uuid::try_parse(s)
510
5
                    .map_err(|e| ArrowError::ParseError(
format!0
(
"Failed to parse uuid: {e}"0
)))
?0
;
511
5
                values.extend_from_slice(uuid.as_bytes());
512
            }
513
160
            Self::Array(_, off, encoding) => {
514
289
                let 
total_items160
=
read_blocks160
(
buf160
, |cursor| encoding.decode(cursor))
?0
;
515
160
                off.push_length(total_items);
516
            }
517
115
            Self::Record(_, encodings) => {
518
316
                for 
encoding201
in encodings {
519
201
                    encoding.decode(buf)
?0
;
520
                }
521
            }
522
40
            Self::Map(_, koff, moff, kdata, valdec) => {
523
40
                let newly_added = read_blocks(buf, |cur| 
{37
524
37
                    let kb = cur.get_bytes()
?0
;
525
37
                    koff.push_length(kb.len());
526
37
                    kdata.extend_from_slice(kb);
527
37
                    valdec.decode(cur)
528
37
                })
?0
;
529
40
                moff.push_length(newly_added);
530
            }
531
12
            Self::Fixed(sz, accum) => {
532
12
                let fx = buf.get_fixed(*sz as usize)
?0
;
533
12
                accum.extend_from_slice(fx);
534
            }
535
198
            Self::Decimal128(_, _, size, builder) => {
536
198
                let raw = if let Some(
s196
) = size {
537
196
                    buf.get_fixed(*s)
?0
538
                } else {
539
2
                    buf.get_bytes()
?0
540
                };
541
198
                let ext = sign_extend_to::<16>(raw)
?0
;
542
198
                let val = i128::from_be_bytes(ext);
543
198
                builder.append_value(val);
544
            }
545
2
            Self::Decimal256(_, _, size, builder) => {
546
2
                let raw = if let Some(s) = size {
547
2
                    buf.get_fixed(*s)
?0
548
                } else {
549
0
                    buf.get_bytes()?
550
                };
551
2
                let ext = sign_extend_to::<32>(raw)
?0
;
552
2
                let val = i256::from_be_bytes(ext);
553
2
                builder.append_value(val);
554
            }
555
36
            Self::Enum(indices, _) => {
556
36
                indices.push(buf.get_int()
?0
);
557
            }
558
6
            Self::Duration(builder) => {
559
6
                let b = buf.get_fixed(12)
?0
;
560
6
                let months = u32::from_le_bytes(b[0..4].try_into().unwrap());
561
6
                let days = u32::from_le_bytes(b[4..8].try_into().unwrap());
562
6
                let millis = u32::from_le_bytes(b[8..12].try_into().unwrap());
563
6
                let nanos = (millis as i64) * 1_000_000;
564
6
                builder.append_value(IntervalMonthDayNano::new(months as i32, days as i32, nanos));
565
            }
566
4.06k
            Self::Nullable(order, nb, encoding) => {
567
4.06k
                let branch = buf.read_vlq()
?0
;
568
4.06k
                let is_not_null = match *order {
569
487
                    Nullability::NullFirst => branch != 0,
570
3.57k
                    Nullability::NullSecond => branch == 0,
571
                };
572
4.06k
                if is_not_null {
573
                    // It is important to decode before appending to null buffer in case of decode error
574
3.90k
                    encoding.decode(buf)
?1
;
575
3.90k
                    nb.append(true);
576
160
                } else {
577
160
                    encoding.append_null();
578
160
                    nb.append(false);
579
160
                }
580
            }
581
            Self::RecordResolved {
582
6
                encodings,
583
6
                writer_to_reader,
584
6
                skip_decoders,
585
                ..
586
            } => {
587
6
                decode_with_resolution(buf, encodings, writer_to_reader, skip_decoders)
?0
;
588
            }
589
        }
590
8.15k
        Ok(())
591
8.15k
    }
592
593
    /// Flush decoded records to an [`ArrayRef`]
594
1.68k
    fn flush(&mut self, nulls: Option<NullBuffer>) -> Result<ArrayRef, ArrowError> {
595
1.68k
        Ok(match self {
596
794
            Self::Nullable(_, n, e) => e.flush(n.finish())
?0
,
597
0
            Self::Null(size) => Arc::new(NullArray::new(std::mem::replace(size, 0))),
598
52
            Self::Boolean(b) => Arc::new(BooleanArray::new(b.finish(), nulls)),
599
242
            Self::Int32(values) => Arc::new(flush_primitive::<Int32Type>(values, nulls)),
600
0
            Self::Date32(values) => Arc::new(flush_primitive::<Date32Type>(values, nulls)),
601
53
            Self::Int64(values) => Arc::new(flush_primitive::<Int64Type>(values, nulls)),
602
0
            Self::TimeMillis(values) => {
603
0
                Arc::new(flush_primitive::<Time32MillisecondType>(values, nulls))
604
            }
605
0
            Self::TimeMicros(values) => {
606
0
                Arc::new(flush_primitive::<Time64MicrosecondType>(values, nulls))
607
            }
608
0
            Self::TimestampMillis(is_utc, values) => Arc::new(
609
0
                flush_primitive::<TimestampMillisecondType>(values, nulls)
610
0
                    .with_timezone_opt(is_utc.then(|| "+00:00")),
611
            ),
612
48
            Self::TimestampMicros(is_utc, values) => Arc::new(
613
48
                flush_primitive::<TimestampMicrosecondType>(values, nulls)
614
48
                    .with_timezone_opt(is_utc.then(|| "+00:00")),
615
            ),
616
46
            Self::Float32(values) => Arc::new(flush_primitive::<Float32Type>(values, nulls)),
617
61
            Self::Float64(values) => Arc::new(flush_primitive::<Float64Type>(values, nulls)),
618
6
            Self::Int32ToInt64(values) => Arc::new(flush_primitive::<Int64Type>(values, nulls)),
619
6
            Self::Int32ToFloat32(values) | Self::Int64ToFloat32(values) => {
620
12
                Arc::new(flush_primitive::<Float32Type>(values, nulls))
621
            }
622
11
            Self::Int32ToFloat64(values)
623
6
            | Self::Int64ToFloat64(values)
624
6
            | Self::Float32ToFloat64(values) => {
625
23
                Arc::new(flush_primitive::<Float64Type>(values, nulls))
626
            }
627
79
            Self::StringToBytes(
offsets1
,
values1
) | Self::Binary(offsets, values) => {
628
80
                let offsets = flush_offsets(offsets);
629
80
                let values = flush_values(values).into();
630
80
                Arc::new(BinaryArray::new(offsets, values, nulls))
631
            }
632
23
            Self::BytesToString(
offsets22
,
values22
) | Self::String(offsets, values) => {
633
45
                let offsets = flush_offsets(offsets);
634
45
                let values = flush_values(values).into();
635
45
                Arc::new(StringArray::new(offsets, values, nulls))
636
            }
637
0
            Self::StringView(offsets, values) => {
638
0
                let offsets = flush_offsets(offsets);
639
0
                let values = flush_values(values);
640
0
                let array = StringArray::new(offsets, values.into(), nulls.clone());
641
0
                let values: Vec<&str> = (0..array.len())
642
0
                    .map(|i| {
643
0
                        if array.is_valid(i) {
644
0
                            array.value(i)
645
                        } else {
646
0
                            ""
647
                        }
648
0
                    })
649
0
                    .collect();
650
0
                Arc::new(StringViewArray::from(values))
651
            }
652
71
            Self::Array(field, offsets, values) => {
653
71
                let values = values.flush(None)
?0
;
654
71
                let offsets = flush_offsets(offsets);
655
71
                Arc::new(ListArray::new(field.clone(), offsets, values, nulls))
656
            }
657
49
            Self::Record(fields, encodings) => {
658
49
                let arrays = encodings
659
49
                    .iter_mut()
660
82
                    .
map49
(|x| x.flush(None))
661
49
                    .collect::<Result<Vec<_>, _>>()
?0
;
662
49
                Arc::new(StructArray::new(fields.clone(), arrays, nulls))
663
            }
664
20
            Self::Map(map_field, k_off, m_off, kdata, valdec) => {
665
20
                let moff = flush_offsets(m_off);
666
20
                let koff = flush_offsets(k_off);
667
20
                let kd = flush_values(kdata).into();
668
20
                let val_arr = valdec.flush(None)
?0
;
669
20
                let key_arr = StringArray::new(koff, kd, None);
670
20
                if key_arr.len() != val_arr.len() {
671
0
                    return Err(ArrowError::InvalidArgumentError(format!(
672
0
                        "Map keys length ({}) != map values length ({})",
673
0
                        key_arr.len(),
674
0
                        val_arr.len()
675
0
                    )));
676
20
                }
677
20
                let final_len = moff.len() - 1;
678
20
                if let Some(
n7
) = &nulls {
679
7
                    if n.len() != final_len {
680
0
                        return Err(ArrowError::InvalidArgumentError(format!(
681
0
                            "Map array null buffer length {} != final map length {final_len}",
682
0
                            n.len()
683
0
                        )));
684
7
                    }
685
13
                }
686
20
                let entries_struct = StructArray::new(
687
20
                    Fields::from(vec![
688
20
                        Arc::new(ArrowField::new("key", DataType::Utf8, false)),
689
20
                        Arc::new(ArrowField::new("value", val_arr.data_type().clone(), true)),
690
                    ]),
691
20
                    vec![Arc::new(key_arr), val_arr],
692
20
                    None,
693
                );
694
20
                let map_arr = MapArray::new(map_field.clone(), moff, entries_struct, nulls, false);
695
20
                Arc::new(map_arr)
696
            }
697
11
            Self::Fixed(sz, accum) => {
698
11
                let b: Buffer = flush_values(accum).into();
699
11
                let arr = FixedSizeBinaryArray::try_new(*sz, b, nulls)
700
11
                    .map_err(|e| ArrowError::ParseError(
e0
.
to_string0
()))
?0
;
701
11
                Arc::new(arr)
702
            }
703
2
            Self::Uuid(values) => {
704
2
                let arr = FixedSizeBinaryArray::try_new(16, std::mem::take(values).into(), nulls)
705
2
                    .map_err(|e| ArrowError::ParseError(
e0
.
to_string0
()))
?0
;
706
2
                Arc::new(arr)
707
            }
708
47
            Self::Decimal128(precision, scale, _, builder) => {
709
47
                let (_, vals, _) = builder.finish().into_parts();
710
47
                let scl = scale.unwrap_or(0);
711
47
                let dec = Decimal128Array::new(vals, nulls)
712
47
                    .with_precision_and_scale(*precision as u8, scl as i8)
713
47
                    .map_err(|e| ArrowError::ParseError(
e0
.
to_string0
()))
?0
;
714
47
                Arc::new(dec)
715
            }
716
1
            Self::Decimal256(precision, scale, _, builder) => {
717
1
                let (_, vals, _) = builder.finish().into_parts();
718
1
                let scl = scale.unwrap_or(0);
719
1
                let dec = Decimal256Array::new(vals, nulls)
720
1
                    .with_precision_and_scale(*precision as u8, scl as i8)
721
1
                    .map_err(|e| ArrowError::ParseError(
e0
.
to_string0
()))
?0
;
722
1
                Arc::new(dec)
723
            }
724
14
            Self::Enum(indices, symbols) => {
725
14
                let keys = flush_primitive::<Int32Type>(indices, nulls);
726
14
                let values = Arc::new(StringArray::from(
727
47
                    
symbols.iter()14
.
map14
(|s| s.as_str()).
collect14
::<Vec<_>>(),
728
                ));
729
14
                Arc::new(DictionaryArray::try_new(keys, values)
?0
)
730
            }
731
3
            Self::Duration(builder) => {
732
3
                let (_, vals, _) = builder.finish().into_parts();
733
3
                let vals = IntervalMonthDayNanoArray::try_new(vals, nulls)
734
3
                    .map_err(|e| ArrowError::ParseError(
e0
.
to_string0
()))
?0
;
735
3
                Arc::new(vals)
736
            }
737
            Self::RecordResolved {
738
5
                fields, encodings, ..
739
            } => {
740
5
                let arrays = encodings
741
5
                    .iter_mut()
742
6
                    .
map5
(|x| x.flush(None))
743
5
                    .collect::<Result<Vec<_>, _>>()
?0
;
744
5
                Arc::new(StructArray::new(fields.clone(), arrays, nulls))
745
            }
746
        })
747
1.68k
    }
748
}
749
750
#[derive(Debug, Copy, Clone)]
751
enum NegativeBlockBehavior {
752
    ProcessItems,
753
    SkipBySize,
754
}
755
756
#[inline]
757
2
fn skip_blocks(
758
2
    buf: &mut AvroCursor<'_>,
759
2
    mut skip_item: impl FnMut(&mut AvroCursor<'_>) -> Result<(), ArrowError>,
760
2
    _skip_negative_block_by_size: bool,
761
2
) -> Result<usize, ArrowError> {
762
2
    process_blockwise(
763
2
        buf,
764
0
        move |c| skip_item(c),
765
2
        NegativeBlockBehavior::SkipBySize,
766
    )
767
2
}
768
769
#[inline]
770
200
fn read_blocks(
771
200
    buf: &mut AvroCursor,
772
200
    decode_entry: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
773
200
) -> Result<usize, ArrowError> {
774
200
    process_blockwise(buf, decode_entry, NegativeBlockBehavior::ProcessItems)
775
200
}
776
777
#[inline]
778
202
fn process_blockwise(
779
202
    buf: &mut AvroCursor<'_>,
780
202
    mut on_item: impl FnMut(&mut AvroCursor<'_>) -> Result<(), ArrowError>,
781
202
    negative_behavior: NegativeBlockBehavior,
782
202
) -> Result<usize, ArrowError> {
783
202
    let mut total = 0usize;
784
    loop {
785
363
        let block_count = buf.get_long()
?0
;
786
363
        match block_count.cmp(&0) {
787
202
            Ordering::Equal => break,
788
            Ordering::Less => {
789
3
                let count = (-block_count) as usize;
790
                // A negative count is followed by a long of the size in bytes
791
3
                let size_in_bytes = buf.get_long()
?0
as usize;
792
3
                match negative_behavior {
793
                    NegativeBlockBehavior::ProcessItems => {
794
                        // Process items one-by-one after reading size
795
1
                        for _ in 0..count {
796
3
                            on_item(buf)
?0
;
797
                        }
798
                    }
799
                    NegativeBlockBehavior::SkipBySize => {
800
                        // Skip the entire block payload at once
801
2
                        let _ = buf.get_fixed(size_in_bytes)
?0
;
802
                    }
803
                }
804
3
                total += count;
805
            }
806
            Ordering::Greater => {
807
158
                let count = block_count as usize;
808
158
                for _ in 0..count {
809
323
                    on_item(buf)
?0
;
810
                }
811
158
                total += count;
812
            }
813
        }
814
    }
815
202
    Ok(total)
816
202
}
817
818
#[inline]
819
661
fn flush_values<T>(values: &mut Vec<T>) -> Vec<T> {
820
661
    std::mem::replace(values, Vec::with_capacity(DEFAULT_CAPACITY))
821
661
}
822
823
#[inline]
824
236
fn flush_offsets(offsets: &mut OffsetBufferBuilder<i32>) -> OffsetBuffer<i32> {
825
236
    std::mem::replace(offsets, OffsetBufferBuilder::new(DEFAULT_CAPACITY)).finish()
826
236
}
827
828
#[inline]
829
505
fn flush_primitive<T: ArrowPrimitiveType>(
830
505
    values: &mut Vec<T::Native>,
831
505
    nulls: Option<NullBuffer>,
832
505
) -> PrimitiveArray<T> {
833
505
    PrimitiveArray::new(flush_values(values).into(), nulls)
834
505
}
835
836
/// Sign extends a byte slice to a fixed-size array of N bytes.
837
/// This is done by filling the leading bytes with 0x00 for positive numbers
838
/// or 0xFF for negative numbers.
839
#[inline]
840
200
fn sign_extend_to<const N: usize>(raw: &[u8]) -> Result<[u8; N], ArrowError> {
841
200
    if raw.len() > N {
842
0
        return Err(ArrowError::ParseError(format!(
843
0
            "Cannot extend a slice of length {} to {} bytes.",
844
0
            raw.len(),
845
0
            N
846
0
        )));
847
200
    }
848
200
    let mut arr = [0u8; N];
849
200
    let pad_len = N - raw.len();
850
    // Determine the byte to use for padding based on the sign bit of the raw data.
851
200
    let extension_byte = if raw.is_empty() || (raw[0] & 0x80 == 0) {
852
196
        0x00
853
    } else {
854
4
        0xFF
855
    };
856
200
    arr[..pad_len].fill(extension_byte);
857
200
    arr[pad_len..].copy_from_slice(raw);
858
200
    Ok(arr)
859
200
}
860
861
/// Lightweight skipping decoder for writer-only fields
862
#[derive(Debug)]
863
enum Skipper {
864
    Null,
865
    Boolean,
866
    Int32,
867
    Int64,
868
    Float32,
869
    Float64,
870
    Bytes,
871
    String,
872
    Date32,
873
    TimeMillis,
874
    TimeMicros,
875
    TimestampMillis,
876
    TimestampMicros,
877
    Fixed(usize),
878
    Decimal(Option<usize>),
879
    UuidString,
880
    Enum,
881
    DurationFixed12,
882
    List(Box<Skipper>),
883
    Map(Box<Skipper>),
884
    Struct(Vec<Skipper>),
885
    Nullable(Nullability, Box<Skipper>),
886
}
887
888
impl Skipper {
889
19
    fn from_avro(dt: &AvroDataType) -> Result<Self, ArrowError> {
890
19
        let mut base = match dt.codec() {
891
0
            Codec::Null => Self::Null,
892
2
            Codec::Boolean => Self::Boolean,
893
7
            Codec::Int32 | Codec::Date32 | Codec::TimeMillis => Self::Int32,
894
2
            Codec::Int64 => Self::Int64,
895
0
            Codec::TimeMicros => Self::TimeMicros,
896
0
            Codec::TimestampMillis(_) => Self::TimestampMillis,
897
1
            Codec::TimestampMicros(_) => Self::TimestampMicros,
898
2
            Codec::Float32 => Self::Float32,
899
1
            Codec::Float64 => Self::Float64,
900
4
            Codec::Binary => Self::Bytes,
901
0
            Codec::Utf8 | Codec::Utf8View => Self::String,
902
0
            Codec::Fixed(sz) => Self::Fixed(*sz as usize),
903
0
            Codec::Decimal(_, _, size) => Self::Decimal(*size),
904
0
            Codec::Uuid => Self::UuidString, // encoded as string
905
0
            Codec::Enum(_) => Self::Enum,
906
0
            Codec::List(item) => Self::List(Box::new(Skipper::from_avro(item)?)),
907
0
            Codec::Struct(fields) => Self::Struct(
908
0
                fields
909
0
                    .iter()
910
0
                    .map(|f| Skipper::from_avro(f.data_type()))
911
0
                    .collect::<Result<_, _>>()?,
912
            ),
913
0
            Codec::Map(values) => Self::Map(Box::new(Skipper::from_avro(values)?)),
914
0
            Codec::Interval => Self::DurationFixed12,
915
            _ => {
916
0
                return Err(ArrowError::NotYetImplemented(format!(
917
0
                    "Skipper not implemented for codec {:?}",
918
0
                    dt.codec()
919
0
                )));
920
            }
921
        };
922
19
        if let Some(n) = dt.nullability() {
923
19
            base = Self::Nullable(n, Box::new(base));
924
19
        
}0
925
19
        Ok(base)
926
19
    }
927
928
311
    fn skip(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), ArrowError> {
929
311
        match self {
930
0
            Self::Null => Ok(()),
931
            Self::Boolean => {
932
16
                buf.get_bool()
?0
;
933
16
                Ok(())
934
            }
935
            Self::Int32 | Self::Date32 | Self::TimeMillis => {
936
58
                buf.get_int()
?0
;
937
58
                Ok(())
938
            }
939
            Self::Int64 | Self::TimeMicros | Self::TimestampMillis | Self::TimestampMicros => {
940
24
                buf.get_long()
?0
;
941
24
                Ok(())
942
            }
943
            Self::Float32 => {
944
16
                buf.get_float()
?0
;
945
16
                Ok(())
946
            }
947
            Self::Float64 => {
948
8
                buf.get_double()
?0
;
949
8
                Ok(())
950
            }
951
            Self::Bytes | Self::String | Self::UuidString => {
952
33
                buf.get_bytes()
?0
;
953
33
                Ok(())
954
            }
955
0
            Self::Fixed(sz) => {
956
0
                buf.get_fixed(*sz)?;
957
0
                Ok(())
958
            }
959
0
            Self::Decimal(size) => {
960
0
                if let Some(s) = size {
961
0
                    buf.get_fixed(*s)
962
                } else {
963
0
                    buf.get_bytes()
964
0
                }?;
965
0
                Ok(())
966
            }
967
            Self::Enum => {
968
0
                buf.get_int()?;
969
0
                Ok(())
970
            }
971
            Self::DurationFixed12 => {
972
0
                buf.get_fixed(12)?;
973
0
                Ok(())
974
            }
975
1
            Self::List(item) => {
976
1
                skip_blocks(buf, |c| 
item0
.
skip0
(
c0
), true)
?0
;
977
1
                Ok(())
978
            }
979
1
            Self::Map(value) => {
980
1
                skip_blocks(
981
1
                    buf,
982
0
                    |c| {
983
0
                        c.get_bytes()?; // key
984
0
                        value.skip(c)
985
0
                    },
986
                    true,
987
0
                )?;
988
1
                Ok(())
989
            }
990
0
            Self::Struct(fields) => {
991
0
                for f in fields.iter_mut() {
992
0
                    f.skip(buf)?
993
                }
994
0
                Ok(())
995
            }
996
154
            Self::Nullable(order, inner) => {
997
154
                let branch = buf.read_vlq()
?0
;
998
154
                let is_not_null = match *order {
999
2
                    Nullability::NullFirst => branch != 0,
1000
152
                    Nullability::NullSecond => branch == 0,
1001
                };
1002
154
                if is_not_null {
1003
153
                    inner.skip(buf)
?0
;
1004
1
                }
1005
154
                Ok(())
1006
            }
1007
        }
1008
311
    }
1009
}
1010
1011
#[inline]
1012
29
fn build_skip_decoders(
1013
29
    skip_fields: &[Option<AvroDataType>],
1014
29
) -> Result<Vec<Option<Skipper>>, ArrowError> {
1015
29
    skip_fields
1016
29
        .iter()
1017
199
        .
map29
(|opt| opt.as_ref().map(Skipper::from_avro).transpose())
1018
29
        .collect()
1019
29
}
1020
1021
#[cfg(test)]
1022
mod tests {
1023
    use super::*;
1024
    use crate::codec::AvroField;
1025
    use arrow_array::{
1026
        cast::AsArray, Array, Decimal128Array, DictionaryArray, FixedSizeBinaryArray,
1027
        IntervalMonthDayNanoArray, ListArray, MapArray, StringArray, StructArray,
1028
    };
1029
1030
47
    fn encode_avro_int(value: i32) -> Vec<u8> {
1031
47
        let mut buf = Vec::new();
1032
47
        let mut v = (value << 1) ^ (value >> 31);
1033
54
        while v & !0x7F != 0 {
1034
7
            buf.push(((v & 0x7F) | 0x80) as u8);
1035
7
            v >>= 7;
1036
7
        }
1037
47
        buf.push(v as u8);
1038
47
        buf
1039
47
    }
1040
1041
50
    fn encode_avro_long(value: i64) -> Vec<u8> {
1042
50
        let mut buf = Vec::new();
1043
50
        let mut v = (value << 1) ^ (value >> 63);
1044
57
        while v & !0x7F != 0 {
1045
7
            buf.push(((v & 0x7F) | 0x80) as u8);
1046
7
            v >>= 7;
1047
7
        }
1048
50
        buf.push(v as u8);
1049
50
        buf
1050
50
    }
1051
1052
15
    fn encode_avro_bytes(bytes: &[u8]) -> Vec<u8> {
1053
15
        let mut buf = encode_avro_long(bytes.len() as i64);
1054
15
        buf.extend_from_slice(bytes);
1055
15
        buf
1056
15
    }
1057
1058
21
    fn avro_from_codec(codec: Codec) -> AvroDataType {
1059
21
        AvroDataType::new(codec, Default::default(), None)
1060
21
    }
1061
1062
9
    fn decoder_for_promotion(
1063
9
        writer: PrimitiveType,
1064
9
        reader: PrimitiveType,
1065
9
        use_utf8view: bool,
1066
9
    ) -> Decoder {
1067
9
        let ws = Schema::TypeName(TypeName::Primitive(writer));
1068
9
        let rs = Schema::TypeName(TypeName::Primitive(reader));
1069
9
        let field =
1070
9
            AvroField::resolve_from_writer_and_reader(&ws, &rs, use_utf8view, false).unwrap();
1071
9
        Decoder::try_new(field.data_type()).unwrap()
1072
9
    }
1073
1074
    #[test]
1075
1
    fn test_schema_resolution_promotion_int_to_long() {
1076
1
        let mut dec = decoder_for_promotion(PrimitiveType::Int, PrimitiveType::Long, false);
1077
1
        assert!(
matches!0
(dec, Decoder::Int32ToInt64(_)));
1078
5
        for 
v4
in [0, 1, -2, 123456] {
1079
4
            let data = encode_avro_int(v);
1080
4
            let mut cur = AvroCursor::new(&data);
1081
4
            dec.decode(&mut cur).unwrap();
1082
4
        }
1083
1
        let arr = dec.flush(None).unwrap();
1084
1
        let a = arr.as_any().downcast_ref::<Int64Array>().unwrap();
1085
1
        assert_eq!(a.value(0), 0);
1086
1
        assert_eq!(a.value(1), 1);
1087
1
        assert_eq!(a.value(2), -2);
1088
1
        assert_eq!(a.value(3), 123456);
1089
1
    }
1090
1091
    #[test]
1092
1
    fn test_schema_resolution_promotion_int_to_float() {
1093
1
        let mut dec = decoder_for_promotion(PrimitiveType::Int, PrimitiveType::Float, false);
1094
1
        assert!(
matches!0
(dec, Decoder::Int32ToFloat32(_)));
1095
4
        for 
v3
in [0, 42, -7] {
1096
3
            let data = encode_avro_int(v);
1097
3
            let mut cur = AvroCursor::new(&data);
1098
3
            dec.decode(&mut cur).unwrap();
1099
3
        }
1100
1
        let arr = dec.flush(None).unwrap();
1101
1
        let a = arr.as_any().downcast_ref::<Float32Array>().unwrap();
1102
1
        assert_eq!(a.value(0), 0.0);
1103
1
        assert_eq!(a.value(1), 42.0);
1104
1
        assert_eq!(a.value(2), -7.0);
1105
1
    }
1106
1107
    #[test]
1108
1
    fn test_schema_resolution_promotion_int_to_double() {
1109
1
        let mut dec = decoder_for_promotion(PrimitiveType::Int, PrimitiveType::Double, false);
1110
1
        assert!(
matches!0
(dec, Decoder::Int32ToFloat64(_)));
1111
4
        for 
v3
in [1, -1, 10_000] {
1112
3
            let data = encode_avro_int(v);
1113
3
            let mut cur = AvroCursor::new(&data);
1114
3
            dec.decode(&mut cur).unwrap();
1115
3
        }
1116
1
        let arr = dec.flush(None).unwrap();
1117
1
        let a = arr.as_any().downcast_ref::<Float64Array>().unwrap();
1118
1
        assert_eq!(a.value(0), 1.0);
1119
1
        assert_eq!(a.value(1), -1.0);
1120
1
        assert_eq!(a.value(2), 10_000.0);
1121
1
    }
1122
1123
    #[test]
1124
1
    fn test_schema_resolution_promotion_long_to_float() {
1125
1
        let mut dec = decoder_for_promotion(PrimitiveType::Long, PrimitiveType::Float, false);
1126
1
        assert!(
matches!0
(dec, Decoder::Int64ToFloat32(_)));
1127
4
        for 
v3
in [0_i64, 1_000_000_i64, -123_i64] {
1128
3
            let data = encode_avro_long(v);
1129
3
            let mut cur = AvroCursor::new(&data);
1130
3
            dec.decode(&mut cur).unwrap();
1131
3
        }
1132
1
        let arr = dec.flush(None).unwrap();
1133
1
        let a = arr.as_any().downcast_ref::<Float32Array>().unwrap();
1134
1
        assert_eq!(a.value(0), 0.0);
1135
1
        assert_eq!(a.value(1), 1_000_000.0);
1136
1
        assert_eq!(a.value(2), -123.0);
1137
1
    }
1138
1139
    #[test]
1140
1
    fn test_schema_resolution_promotion_long_to_double() {
1141
1
        let mut dec = decoder_for_promotion(PrimitiveType::Long, PrimitiveType::Double, false);
1142
1
        assert!(
matches!0
(dec, Decoder::Int64ToFloat64(_)));
1143
4
        for 
v3
in [2_i64, -2_i64, 9_223_372_i64] {
1144
3
            let data = encode_avro_long(v);
1145
3
            let mut cur = AvroCursor::new(&data);
1146
3
            dec.decode(&mut cur).unwrap();
1147
3
        }
1148
1
        let arr = dec.flush(None).unwrap();
1149
1
        let a = arr.as_any().downcast_ref::<Float64Array>().unwrap();
1150
1
        assert_eq!(a.value(0), 2.0);
1151
1
        assert_eq!(a.value(1), -2.0);
1152
1
        assert_eq!(a.value(2), 9_223_372.0);
1153
1
    }
1154
1155
    #[test]
1156
1
    fn test_schema_resolution_promotion_float_to_double() {
1157
1
        let mut dec = decoder_for_promotion(PrimitiveType::Float, PrimitiveType::Double, false);
1158
1
        assert!(
matches!0
(dec, Decoder::Float32ToFloat64(_)));
1159
4
        for 
v3
in [0.5_f32, -3.25_f32, 1.0e6_f32] {
1160
3
            let data = v.to_le_bytes().to_vec();
1161
3
            let mut cur = AvroCursor::new(&data);
1162
3
            dec.decode(&mut cur).unwrap();
1163
3
        }
1164
1
        let arr = dec.flush(None).unwrap();
1165
1
        let a = arr.as_any().downcast_ref::<Float64Array>().unwrap();
1166
1
        assert_eq!(a.value(0), 0.5_f64);
1167
1
        assert_eq!(a.value(1), -3.25_f64);
1168
1
        assert_eq!(a.value(2), 1.0e6_f64);
1169
1
    }
1170
1171
    #[test]
1172
1
    fn test_schema_resolution_promotion_bytes_to_string_utf8() {
1173
1
        let mut dec = decoder_for_promotion(PrimitiveType::Bytes, PrimitiveType::String, false);
1174
1
        assert!(
matches!0
(dec, Decoder::BytesToString(_, _)));
1175
3
        for s in ["hello", 
"world"1
,
"héllo"1
] {
1176
3
            let data = encode_avro_bytes(s.as_bytes());
1177
3
            let mut cur = AvroCursor::new(&data);
1178
3
            dec.decode(&mut cur).unwrap();
1179
3
        }
1180
1
        let arr = dec.flush(None).unwrap();
1181
1
        let a = arr.as_any().downcast_ref::<StringArray>().unwrap();
1182
1
        assert_eq!(a.value(0), "hello");
1183
1
        assert_eq!(a.value(1), "world");
1184
1
        assert_eq!(a.value(2), "héllo");
1185
1
    }
1186
1187
    #[test]
1188
1
    fn test_schema_resolution_promotion_bytes_to_string_utf8view_enabled() {
1189
1
        let mut dec = decoder_for_promotion(PrimitiveType::Bytes, PrimitiveType::String, true);
1190
1
        assert!(
matches!0
(dec, Decoder::BytesToString(_, _)));
1191
1
        let data = encode_avro_bytes("abc".as_bytes());
1192
1
        let mut cur = AvroCursor::new(&data);
1193
1
        dec.decode(&mut cur).unwrap();
1194
1
        let arr = dec.flush(None).unwrap();
1195
1
        let a = arr.as_any().downcast_ref::<StringArray>().unwrap();
1196
1
        assert_eq!(a.value(0), "abc");
1197
1
    }
1198
1199
    #[test]
1200
1
    fn test_schema_resolution_promotion_string_to_bytes() {
1201
1
        let mut dec = decoder_for_promotion(PrimitiveType::String, PrimitiveType::Bytes, false);
1202
1
        assert!(
matches!0
(dec, Decoder::StringToBytes(_, _)));
1203
3
        for s in ["", 
"abc"1
,
"data"1
] {
1204
3
            let data = encode_avro_bytes(s.as_bytes());
1205
3
            let mut cur = AvroCursor::new(&data);
1206
3
            dec.decode(&mut cur).unwrap();
1207
3
        }
1208
1
        let arr = dec.flush(None).unwrap();
1209
1
        let a = arr.as_any().downcast_ref::<BinaryArray>().unwrap();
1210
1
        assert_eq!(a.value(0), b"");
1211
1
        assert_eq!(a.value(1), b"abc");
1212
1
        assert_eq!(a.value(2), "data".as_bytes());
1213
1
    }
1214
1215
    #[test]
1216
1
    fn test_schema_resolution_no_promotion_passthrough_int() {
1217
1
        let ws = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
1218
1
        let rs = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
1219
1
        let field = AvroField::resolve_from_writer_and_reader(&ws, &rs, false, false).unwrap();
1220
1
        let mut dec = Decoder::try_new(field.data_type()).unwrap();
1221
1
        assert!(
matches!0
(dec, Decoder::Int32(_)));
1222
3
        for 
v2
in [7, -9] {
1223
2
            let data = encode_avro_int(v);
1224
2
            let mut cur = AvroCursor::new(&data);
1225
2
            dec.decode(&mut cur).unwrap();
1226
2
        }
1227
1
        let arr = dec.flush(None).unwrap();
1228
1
        let a = arr.as_any().downcast_ref::<Int32Array>().unwrap();
1229
1
        assert_eq!(a.value(0), 7);
1230
1
        assert_eq!(a.value(1), -9);
1231
1
    }
1232
1233
    #[test]
1234
1
    fn test_schema_resolution_illegal_promotion_int_to_boolean_errors() {
1235
1
        let ws = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
1236
1
        let rs = Schema::TypeName(TypeName::Primitive(PrimitiveType::Boolean));
1237
1
        let res = AvroField::resolve_from_writer_and_reader(&ws, &rs, false, false);
1238
1
        assert!(res.is_err(), 
"expected error for illegal promotion"0
);
1239
1
    }
1240
1241
    #[test]
1242
1
    fn test_map_decoding_one_entry() {
1243
1
        let value_type = avro_from_codec(Codec::Utf8);
1244
1
        let map_type = avro_from_codec(Codec::Map(Arc::new(value_type)));
1245
1
        let mut decoder = Decoder::try_new(&map_type).unwrap();
1246
        // Encode a single map with one entry: {"hello": "world"}
1247
1
        let mut data = Vec::new();
1248
1
        data.extend_from_slice(&encode_avro_long(1));
1249
1
        data.extend_from_slice(&encode_avro_bytes(b"hello")); // key
1250
1
        data.extend_from_slice(&encode_avro_bytes(b"world")); // value
1251
1
        data.extend_from_slice(&encode_avro_long(0));
1252
1
        let mut cursor = AvroCursor::new(&data);
1253
1
        decoder.decode(&mut cursor).unwrap();
1254
1
        let array = decoder.flush(None).unwrap();
1255
1
        let map_arr = array.as_any().downcast_ref::<MapArray>().unwrap();
1256
1
        assert_eq!(map_arr.len(), 1); // one map
1257
1
        assert_eq!(map_arr.value_length(0), 1);
1258
1
        let entries = map_arr.value(0);
1259
1
        let struct_entries = entries.as_any().downcast_ref::<StructArray>().unwrap();
1260
1
        assert_eq!(struct_entries.len(), 1);
1261
1
        let key_arr = struct_entries
1262
1
            .column_by_name("key")
1263
1
            .unwrap()
1264
1
            .as_any()
1265
1
            .downcast_ref::<StringArray>()
1266
1
            .unwrap();
1267
1
        let val_arr = struct_entries
1268
1
            .column_by_name("value")
1269
1
            .unwrap()
1270
1
            .as_any()
1271
1
            .downcast_ref::<StringArray>()
1272
1
            .unwrap();
1273
1
        assert_eq!(key_arr.value(0), "hello");
1274
1
        assert_eq!(val_arr.value(0), "world");
1275
1
    }
1276
1277
    #[test]
1278
1
    fn test_map_decoding_empty() {
1279
1
        let value_type = avro_from_codec(Codec::Utf8);
1280
1
        let map_type = avro_from_codec(Codec::Map(Arc::new(value_type)));
1281
1
        let mut decoder = Decoder::try_new(&map_type).unwrap();
1282
1
        let data = encode_avro_long(0);
1283
1
        decoder.decode(&mut AvroCursor::new(&data)).unwrap();
1284
1
        let array = decoder.flush(None).unwrap();
1285
1
        let map_arr = array.as_any().downcast_ref::<MapArray>().unwrap();
1286
1
        assert_eq!(map_arr.len(), 1);
1287
1
        assert_eq!(map_arr.value_length(0), 0);
1288
1
    }
1289
1290
    #[test]
1291
1
    fn test_fixed_decoding() {
1292
1
        let avro_type = avro_from_codec(Codec::Fixed(3));
1293
1
        let mut decoder = Decoder::try_new(&avro_type).expect("Failed to create decoder");
1294
1295
1
        let data1 = [1u8, 2, 3];
1296
1
        let mut cursor1 = AvroCursor::new(&data1);
1297
1
        decoder
1298
1
            .decode(&mut cursor1)
1299
1
            .expect("Failed to decode data1");
1300
1
        assert_eq!(cursor1.position(), 3, 
"Cursor should advance by fixed size"0
);
1301
1
        let data2 = [4u8, 5, 6];
1302
1
        let mut cursor2 = AvroCursor::new(&data2);
1303
1
        decoder
1304
1
            .decode(&mut cursor2)
1305
1
            .expect("Failed to decode data2");
1306
1
        assert_eq!(cursor2.position(), 3, 
"Cursor should advance by fixed size"0
);
1307
1
        let array = decoder.flush(None).expect("Failed to flush decoder");
1308
1
        assert_eq!(array.len(), 2, 
"Array should contain two items"0
);
1309
1
        let fixed_size_binary_array = array
1310
1
            .as_any()
1311
1
            .downcast_ref::<FixedSizeBinaryArray>()
1312
1
            .expect("Failed to downcast to FixedSizeBinaryArray");
1313
1
        assert_eq!(
1314
1
            fixed_size_binary_array.value_length(),
1315
            3,
1316
0
            "Fixed size of binary values should be 3"
1317
        );
1318
1
        assert_eq!(
1319
1
            fixed_size_binary_array.value(0),
1320
            &[1, 2, 3],
1321
0
            "First item mismatch"
1322
        );
1323
1
        assert_eq!(
1324
1
            fixed_size_binary_array.value(1),
1325
            &[4, 5, 6],
1326
0
            "Second item mismatch"
1327
        );
1328
1
    }
1329
1330
    #[test]
1331
1
    fn test_fixed_decoding_empty() {
1332
1
        let avro_type = avro_from_codec(Codec::Fixed(5));
1333
1
        let mut decoder = Decoder::try_new(&avro_type).expect("Failed to create decoder");
1334
1335
1
        let array = decoder
1336
1
            .flush(None)
1337
1
            .expect("Failed to flush decoder for empty input");
1338
1339
1
        assert_eq!(array.len(), 0, 
"Array should be empty"0
);
1340
1
        let fixed_size_binary_array = array
1341
1
            .as_any()
1342
1
            .downcast_ref::<FixedSizeBinaryArray>()
1343
1
            .expect("Failed to downcast to FixedSizeBinaryArray for empty array");
1344
1345
1
        assert_eq!(
1346
1
            fixed_size_binary_array.value_length(),
1347
            5,
1348
0
            "Fixed size of binary values should be 5 as per type"
1349
        );
1350
1
    }
1351
1352
    #[test]
1353
1
    fn test_uuid_decoding() {
1354
1
        let avro_type = avro_from_codec(Codec::Uuid);
1355
1
        let mut decoder = Decoder::try_new(&avro_type).expect("Failed to create decoder");
1356
1
        let uuid_str = "f81d4fae-7dec-11d0-a765-00a0c91e6bf6";
1357
1
        let data = encode_avro_bytes(uuid_str.as_bytes());
1358
1
        let mut cursor = AvroCursor::new(&data);
1359
1
        decoder.decode(&mut cursor).expect("Failed to decode data");
1360
1
        assert_eq!(
1361
1
            cursor.position(),
1362
1
            data.len(),
1363
0
            "Cursor should advance by varint size + data size"
1364
        );
1365
1
        let array = decoder.flush(None).expect("Failed to flush decoder");
1366
1
        let fixed_size_binary_array = array
1367
1
            .as_any()
1368
1
            .downcast_ref::<FixedSizeBinaryArray>()
1369
1
            .expect("Array should be a FixedSizeBinaryArray");
1370
1
        assert_eq!(fixed_size_binary_array.len(), 1);
1371
1
        assert_eq!(fixed_size_binary_array.value_length(), 16);
1372
1
        let expected_bytes = [
1373
1
            0xf8, 0x1d, 0x4f, 0xae, 0x7d, 0xec, 0x11, 0xd0, 0xa7, 0x65, 0x00, 0xa0, 0xc9, 0x1e,
1374
1
            0x6b, 0xf6,
1375
1
        ];
1376
1
        assert_eq!(fixed_size_binary_array.value(0), &expected_bytes);
1377
1
    }
1378
1379
    #[test]
1380
1
    fn test_array_decoding() {
1381
1
        let item_dt = avro_from_codec(Codec::Int32);
1382
1
        let list_dt = avro_from_codec(Codec::List(Arc::new(item_dt)));
1383
1
        let mut decoder = Decoder::try_new(&list_dt).unwrap();
1384
1
        let mut row1 = Vec::new();
1385
1
        row1.extend_from_slice(&encode_avro_long(2));
1386
1
        row1.extend_from_slice(&encode_avro_int(10));
1387
1
        row1.extend_from_slice(&encode_avro_int(20));
1388
1
        row1.extend_from_slice(&encode_avro_long(0));
1389
1
        let row2 = encode_avro_long(0);
1390
1
        let mut cursor = AvroCursor::new(&row1);
1391
1
        decoder.decode(&mut cursor).unwrap();
1392
1
        let mut cursor2 = AvroCursor::new(&row2);
1393
1
        decoder.decode(&mut cursor2).unwrap();
1394
1
        let array = decoder.flush(None).unwrap();
1395
1
        let list_arr = array.as_any().downcast_ref::<ListArray>().unwrap();
1396
1
        assert_eq!(list_arr.len(), 2);
1397
1
        let offsets = list_arr.value_offsets();
1398
1
        assert_eq!(offsets, &[0, 2, 2]);
1399
1
        let values = list_arr.values();
1400
1
        let int_arr = values.as_primitive::<Int32Type>();
1401
1
        assert_eq!(int_arr.len(), 2);
1402
1
        assert_eq!(int_arr.value(0), 10);
1403
1
        assert_eq!(int_arr.value(1), 20);
1404
1
    }
1405
1406
    #[test]
1407
1
    fn test_array_decoding_with_negative_block_count() {
1408
1
        let item_dt = avro_from_codec(Codec::Int32);
1409
1
        let list_dt = avro_from_codec(Codec::List(Arc::new(item_dt)));
1410
1
        let mut decoder = Decoder::try_new(&list_dt).unwrap();
1411
1
        let mut data = encode_avro_long(-3);
1412
1
        data.extend_from_slice(&encode_avro_long(12));
1413
1
        data.extend_from_slice(&encode_avro_int(1));
1414
1
        data.extend_from_slice(&encode_avro_int(2));
1415
1
        data.extend_from_slice(&encode_avro_int(3));
1416
1
        data.extend_from_slice(&encode_avro_long(0));
1417
1
        let mut cursor = AvroCursor::new(&data);
1418
1
        decoder.decode(&mut cursor).unwrap();
1419
1
        let array = decoder.flush(None).unwrap();
1420
1
        let list_arr = array.as_any().downcast_ref::<ListArray>().unwrap();
1421
1
        assert_eq!(list_arr.len(), 1);
1422
1
        assert_eq!(list_arr.value_length(0), 3);
1423
1
        let values = list_arr.values().as_primitive::<Int32Type>();
1424
1
        assert_eq!(values.len(), 3);
1425
1
        assert_eq!(values.value(0), 1);
1426
1
        assert_eq!(values.value(1), 2);
1427
1
        assert_eq!(values.value(2), 3);
1428
1
    }
1429
1430
    #[test]
1431
1
    fn test_nested_array_decoding() {
1432
1
        let inner_ty = avro_from_codec(Codec::List(Arc::new(avro_from_codec(Codec::Int32))));
1433
1
        let nested_ty = avro_from_codec(Codec::List(Arc::new(inner_ty.clone())));
1434
1
        let mut decoder = Decoder::try_new(&nested_ty).unwrap();
1435
1
        let mut buf = Vec::new();
1436
1
        buf.extend(encode_avro_long(1));
1437
1
        buf.extend(encode_avro_long(2));
1438
1
        buf.extend(encode_avro_int(5));
1439
1
        buf.extend(encode_avro_int(6));
1440
1
        buf.extend(encode_avro_long(0));
1441
1
        buf.extend(encode_avro_long(0));
1442
1
        let mut cursor = AvroCursor::new(&buf);
1443
1
        decoder.decode(&mut cursor).unwrap();
1444
1
        let arr = decoder.flush(None).unwrap();
1445
1
        let outer = arr.as_any().downcast_ref::<ListArray>().unwrap();
1446
1
        assert_eq!(outer.len(), 1);
1447
1
        assert_eq!(outer.value_length(0), 1);
1448
1
        let inner = outer.values().as_any().downcast_ref::<ListArray>().unwrap();
1449
1
        assert_eq!(inner.len(), 1);
1450
1
        assert_eq!(inner.value_length(0), 2);
1451
1
        let values = inner
1452
1
            .values()
1453
1
            .as_any()
1454
1
            .downcast_ref::<Int32Array>()
1455
1
            .unwrap();
1456
1
        assert_eq!(values.values(), &[5, 6]);
1457
1
    }
1458
1459
    #[test]
1460
1
    fn test_array_decoding_empty_array() {
1461
1
        let value_type = avro_from_codec(Codec::Utf8);
1462
1
        let map_type = avro_from_codec(Codec::List(Arc::new(value_type)));
1463
1
        let mut decoder = Decoder::try_new(&map_type).unwrap();
1464
1
        let data = encode_avro_long(0);
1465
1
        decoder.decode(&mut AvroCursor::new(&data)).unwrap();
1466
1
        let array = decoder.flush(None).unwrap();
1467
1
        let list_arr = array.as_any().downcast_ref::<ListArray>().unwrap();
1468
1
        assert_eq!(list_arr.len(), 1);
1469
1
        assert_eq!(list_arr.value_length(0), 0);
1470
1
    }
1471
1472
    #[test]
1473
1
    fn test_decimal_decoding_fixed256() {
1474
1
        let dt = avro_from_codec(Codec::Decimal(5, Some(2), Some(32)));
1475
1
        let mut decoder = Decoder::try_new(&dt).unwrap();
1476
1
        let row1 = [
1477
1
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1478
1
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1479
1
            0x00, 0x00, 0x30, 0x39,
1480
1
        ];
1481
1
        let row2 = [
1482
1
            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1483
1
            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1484
1
            0xFF, 0xFF, 0xFF, 0x85,
1485
1
        ];
1486
1
        let mut data = Vec::new();
1487
1
        data.extend_from_slice(&row1);
1488
1
        data.extend_from_slice(&row2);
1489
1
        let mut cursor = AvroCursor::new(&data);
1490
1
        decoder.decode(&mut cursor).unwrap();
1491
1
        decoder.decode(&mut cursor).unwrap();
1492
1
        let arr = decoder.flush(None).unwrap();
1493
1
        let dec = arr.as_any().downcast_ref::<Decimal256Array>().unwrap();
1494
1
        assert_eq!(dec.len(), 2);
1495
1
        assert_eq!(dec.value_as_string(0), "123.45");
1496
1
        assert_eq!(dec.value_as_string(1), "-1.23");
1497
1
    }
1498
1499
    #[test]
1500
1
    fn test_decimal_decoding_fixed128() {
1501
1
        let dt = avro_from_codec(Codec::Decimal(5, Some(2), Some(16)));
1502
1
        let mut decoder = Decoder::try_new(&dt).unwrap();
1503
1
        let row1 = [
1504
1
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1505
1
            0x30, 0x39,
1506
1
        ];
1507
1
        let row2 = [
1508
1
            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1509
1
            0xFF, 0x85,
1510
1
        ];
1511
1
        let mut data = Vec::new();
1512
1
        data.extend_from_slice(&row1);
1513
1
        data.extend_from_slice(&row2);
1514
1
        let mut cursor = AvroCursor::new(&data);
1515
1
        decoder.decode(&mut cursor).unwrap();
1516
1
        decoder.decode(&mut cursor).unwrap();
1517
1
        let arr = decoder.flush(None).unwrap();
1518
1
        let dec = arr.as_any().downcast_ref::<Decimal128Array>().unwrap();
1519
1
        assert_eq!(dec.len(), 2);
1520
1
        assert_eq!(dec.value_as_string(0), "123.45");
1521
1
        assert_eq!(dec.value_as_string(1), "-1.23");
1522
1
    }
1523
1524
    #[test]
1525
1
    fn test_decimal_decoding_bytes_with_nulls() {
1526
1
        let dt = avro_from_codec(Codec::Decimal(4, Some(1), None));
1527
1
        let inner = Decoder::try_new(&dt).unwrap();
1528
1
        let mut decoder = Decoder::Nullable(
1529
1
            Nullability::NullSecond,
1530
1
            NullBufferBuilder::new(DEFAULT_CAPACITY),
1531
1
            Box::new(inner),
1532
1
        );
1533
1
        let mut data = Vec::new();
1534
1
        data.extend_from_slice(&encode_avro_int(0));
1535
1
        data.extend_from_slice(&encode_avro_bytes(&[0x04, 0xD2]));
1536
1
        data.extend_from_slice(&encode_avro_int(1));
1537
1
        data.extend_from_slice(&encode_avro_int(0));
1538
1
        data.extend_from_slice(&encode_avro_bytes(&[0xFB, 0x2E]));
1539
1
        let mut cursor = AvroCursor::new(&data);
1540
1
        decoder.decode(&mut cursor).unwrap(); // row1
1541
1
        decoder.decode(&mut cursor).unwrap(); // row2
1542
1
        decoder.decode(&mut cursor).unwrap(); // row3
1543
1
        let arr = decoder.flush(None).unwrap();
1544
1
        let dec_arr = arr.as_any().downcast_ref::<Decimal128Array>().unwrap();
1545
1
        assert_eq!(dec_arr.len(), 3);
1546
1
        assert!(dec_arr.is_valid(0));
1547
1
        assert!(!dec_arr.is_valid(1));
1548
1
        assert!(dec_arr.is_valid(2));
1549
1
        assert_eq!(dec_arr.value_as_string(0), "123.4");
1550
1
        assert_eq!(dec_arr.value_as_string(2), "-123.4");
1551
1
    }
1552
1553
    #[test]
1554
1
    fn test_decimal_decoding_bytes_with_nulls_fixed_size() {
1555
1
        let dt = avro_from_codec(Codec::Decimal(6, Some(2), Some(16)));
1556
1
        let inner = Decoder::try_new(&dt).unwrap();
1557
1
        let mut decoder = Decoder::Nullable(
1558
1
            Nullability::NullSecond,
1559
1
            NullBufferBuilder::new(DEFAULT_CAPACITY),
1560
1
            Box::new(inner),
1561
1
        );
1562
1
        let row1 = [
1563
1
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
1564
1
            0xE2, 0x40,
1565
1
        ];
1566
1
        let row3 = [
1567
1
            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE,
1568
1
            0x1D, 0xC0,
1569
1
        ];
1570
1
        let mut data = Vec::new();
1571
1
        data.extend_from_slice(&encode_avro_int(0));
1572
1
        data.extend_from_slice(&row1);
1573
1
        data.extend_from_slice(&encode_avro_int(1));
1574
1
        data.extend_from_slice(&encode_avro_int(0));
1575
1
        data.extend_from_slice(&row3);
1576
1
        let mut cursor = AvroCursor::new(&data);
1577
1
        decoder.decode(&mut cursor).unwrap();
1578
1
        decoder.decode(&mut cursor).unwrap();
1579
1
        decoder.decode(&mut cursor).unwrap();
1580
1
        let arr = decoder.flush(None).unwrap();
1581
1
        let dec_arr = arr.as_any().downcast_ref::<Decimal128Array>().unwrap();
1582
1
        assert_eq!(dec_arr.len(), 3);
1583
1
        assert!(dec_arr.is_valid(0));
1584
1
        assert!(!dec_arr.is_valid(1));
1585
1
        assert!(dec_arr.is_valid(2));
1586
1
        assert_eq!(dec_arr.value_as_string(0), "1234.56");
1587
1
        assert_eq!(dec_arr.value_as_string(2), "-1234.56");
1588
1
    }
1589
1590
    #[test]
1591
1
    fn test_enum_decoding() {
1592
1
        let symbols: Arc<[String]> = vec!["A", "B", "C"].into_iter().map(String::from).collect();
1593
1
        let avro_type = avro_from_codec(Codec::Enum(symbols.clone()));
1594
1
        let mut decoder = Decoder::try_new(&avro_type).unwrap();
1595
1
        let mut data = Vec::new();
1596
1
        data.extend_from_slice(&encode_avro_int(2));
1597
1
        data.extend_from_slice(&encode_avro_int(0));
1598
1
        data.extend_from_slice(&encode_avro_int(1));
1599
1
        let mut cursor = AvroCursor::new(&data);
1600
1
        decoder.decode(&mut cursor).unwrap();
1601
1
        decoder.decode(&mut cursor).unwrap();
1602
1
        decoder.decode(&mut cursor).unwrap();
1603
1
        let array = decoder.flush(None).unwrap();
1604
1
        let dict_array = array
1605
1
            .as_any()
1606
1
            .downcast_ref::<DictionaryArray<Int32Type>>()
1607
1
            .unwrap();
1608
1609
1
        assert_eq!(dict_array.len(), 3);
1610
1
        let values = dict_array
1611
1
            .values()
1612
1
            .as_any()
1613
1
            .downcast_ref::<StringArray>()
1614
1
            .unwrap();
1615
1
        assert_eq!(values.value(0), "A");
1616
1
        assert_eq!(values.value(1), "B");
1617
1
        assert_eq!(values.value(2), "C");
1618
1
        assert_eq!(dict_array.keys().values(), &[2, 0, 1]);
1619
1
    }
1620
1621
    #[test]
1622
1
    fn test_enum_decoding_with_nulls() {
1623
1
        let symbols: Arc<[String]> = vec!["X", "Y"].into_iter().map(String::from).collect();
1624
1
        let enum_codec = Codec::Enum(symbols.clone());
1625
1
        let avro_type =
1626
1
            AvroDataType::new(enum_codec, Default::default(), Some(Nullability::NullFirst));
1627
1
        let mut decoder = Decoder::try_new(&avro_type).unwrap();
1628
1
        let mut data = Vec::new();
1629
1
        data.extend_from_slice(&encode_avro_long(1));
1630
1
        data.extend_from_slice(&encode_avro_int(1));
1631
1
        data.extend_from_slice(&encode_avro_long(0));
1632
1
        data.extend_from_slice(&encode_avro_long(1));
1633
1
        data.extend_from_slice(&encode_avro_int(0));
1634
1
        let mut cursor = AvroCursor::new(&data);
1635
1
        decoder.decode(&mut cursor).unwrap();
1636
1
        decoder.decode(&mut cursor).unwrap();
1637
1
        decoder.decode(&mut cursor).unwrap();
1638
1
        let array = decoder.flush(None).unwrap();
1639
1
        let dict_array = array
1640
1
            .as_any()
1641
1
            .downcast_ref::<DictionaryArray<Int32Type>>()
1642
1
            .unwrap();
1643
1
        assert_eq!(dict_array.len(), 3);
1644
1
        assert!(dict_array.is_valid(0));
1645
1
        assert!(dict_array.is_null(1));
1646
1
        assert!(dict_array.is_valid(2));
1647
1
        let expected_keys = Int32Array::from(vec![Some(1), None, Some(0)]);
1648
1
        assert_eq!(dict_array.keys(), &expected_keys);
1649
1
        let values = dict_array
1650
1
            .values()
1651
1
            .as_any()
1652
1
            .downcast_ref::<StringArray>()
1653
1
            .unwrap();
1654
1
        assert_eq!(values.value(0), "X");
1655
1
        assert_eq!(values.value(1), "Y");
1656
1
    }
1657
1658
    #[test]
1659
1
    fn test_duration_decoding_with_nulls() {
1660
1
        let duration_codec = Codec::Interval;
1661
1
        let avro_type = AvroDataType::new(
1662
1
            duration_codec,
1663
1
            Default::default(),
1664
1
            Some(Nullability::NullFirst),
1665
        );
1666
1
        let mut decoder = Decoder::try_new(&avro_type).unwrap();
1667
1
        let mut data = Vec::new();
1668
        // First value: 1 month, 2 days, 3 millis
1669
1
        data.extend_from_slice(&encode_avro_long(1)); // not null
1670
1
        let mut duration1 = Vec::new();
1671
1
        duration1.extend_from_slice(&1u32.to_le_bytes());
1672
1
        duration1.extend_from_slice(&2u32.to_le_bytes());
1673
1
        duration1.extend_from_slice(&3u32.to_le_bytes());
1674
1
        data.extend_from_slice(&duration1);
1675
        // Second value: null
1676
1
        data.extend_from_slice(&encode_avro_long(0)); // null
1677
1
        data.extend_from_slice(&encode_avro_long(1)); // not null
1678
1
        let mut duration2 = Vec::new();
1679
1
        duration2.extend_from_slice(&4u32.to_le_bytes());
1680
1
        duration2.extend_from_slice(&5u32.to_le_bytes());
1681
1
        duration2.extend_from_slice(&6u32.to_le_bytes());
1682
1
        data.extend_from_slice(&duration2);
1683
1
        let mut cursor = AvroCursor::new(&data);
1684
1
        decoder.decode(&mut cursor).unwrap();
1685
1
        decoder.decode(&mut cursor).unwrap();
1686
1
        decoder.decode(&mut cursor).unwrap();
1687
1
        let array = decoder.flush(None).unwrap();
1688
1
        let interval_array = array
1689
1
            .as_any()
1690
1
            .downcast_ref::<IntervalMonthDayNanoArray>()
1691
1
            .unwrap();
1692
1
        assert_eq!(interval_array.len(), 3);
1693
1
        assert!(interval_array.is_valid(0));
1694
1
        assert!(interval_array.is_null(1));
1695
1
        assert!(interval_array.is_valid(2));
1696
1
        let expected = IntervalMonthDayNanoArray::from(vec![
1697
1
            Some(IntervalMonthDayNano {
1698
1
                months: 1,
1699
1
                days: 2,
1700
1
                nanoseconds: 3_000_000,
1701
1
            }),
1702
1
            None,
1703
1
            Some(IntervalMonthDayNano {
1704
1
                months: 4,
1705
1
                days: 5,
1706
1
                nanoseconds: 6_000_000,
1707
1
            }),
1708
        ]);
1709
1
        assert_eq!(interval_array, &expected);
1710
1
    }
1711
1712
    #[test]
1713
1
    fn test_duration_decoding_empty() {
1714
1
        let duration_codec = Codec::Interval;
1715
1
        let avro_type = AvroDataType::new(duration_codec, Default::default(), None);
1716
1
        let mut decoder = Decoder::try_new(&avro_type).unwrap();
1717
1
        let array = decoder.flush(None).unwrap();
1718
1
        assert_eq!(array.len(), 0);
1719
1
    }
1720
1721
    #[test]
1722
1
    fn test_nullable_decode_error_bitmap_corruption() {
1723
        // Nullable Int32 with ['T','null'] encoding (NullSecond)
1724
1
        let avro_type = AvroDataType::new(
1725
1
            Codec::Int32,
1726
1
            Default::default(),
1727
1
            Some(Nullability::NullSecond),
1728
        );
1729
1
        let mut decoder = Decoder::try_new(&avro_type).unwrap();
1730
1731
        // Row 1: union branch 1 (null)
1732
1
        let mut row1 = Vec::new();
1733
1
        row1.extend_from_slice(&encode_avro_int(1));
1734
1735
        // Row 2: union branch 0 (non-null) but missing the int payload -> decode error
1736
1
        let mut row2 = Vec::new();
1737
1
        row2.extend_from_slice(&encode_avro_int(0)); // branch = 0 => non-null
1738
1739
        // Row 3: union branch 0 (non-null) with correct int payload -> should succeed
1740
1
        let mut row3 = Vec::new();
1741
1
        row3.extend_from_slice(&encode_avro_int(0)); // branch
1742
1
        row3.extend_from_slice(&encode_avro_int(42)); // actual value
1743
1744
1
        decoder.decode(&mut AvroCursor::new(&row1)).unwrap();
1745
1
        assert!(decoder.decode(&mut AvroCursor::new(&row2)).is_err()); // decode error
1746
1
        decoder.decode(&mut AvroCursor::new(&row3)).unwrap();
1747
1748
1
        let array = decoder.flush(None).unwrap();
1749
1750
        // Should contain 2 elements: row1 (null) and row3 (42)
1751
1
        assert_eq!(array.len(), 2);
1752
1
        let int_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1753
1
        assert!(int_array.is_null(0)); // row1 is null
1754
1
        assert_eq!(int_array.value(1), 42); // row3 value is 42
1755
1
    }
1756
1757
5
    fn make_record_resolved_decoder(
1758
5
        reader_fields: &[(&str, DataType, bool)],
1759
5
        writer_to_reader: Vec<Option<usize>>,
1760
5
        mut skip_decoders: Vec<Option<super::Skipper>>,
1761
5
    ) -> Decoder {
1762
5
        let mut field_refs: Vec<FieldRef> = Vec::with_capacity(reader_fields.len());
1763
5
        let mut encodings: Vec<Decoder> = Vec::with_capacity(reader_fields.len());
1764
11
        for (
name6
,
dt6
,
nullable6
) in reader_fields {
1765
6
            field_refs.push(Arc::new(ArrowField::new(*name, dt.clone(), *nullable)));
1766
6
            let enc = match dt {
1767
5
                DataType::Int32 => Decoder::Int32(Vec::new()),
1768
1
                DataType::Int64 => Decoder::Int64(Vec::new()),
1769
                DataType::Utf8 => {
1770
0
                    Decoder::String(OffsetBufferBuilder::new(DEFAULT_CAPACITY), Vec::new())
1771
                }
1772
0
                other => panic!("Unsupported test reader field type: {other:?}"),
1773
            };
1774
6
            encodings.push(enc);
1775
        }
1776
5
        let fields: Fields = field_refs.into();
1777
5
        Decoder::RecordResolved {
1778
5
            fields,
1779
5
            encodings,
1780
5
            writer_to_reader: Arc::from(writer_to_reader),
1781
5
            skip_decoders,
1782
5
        }
1783
5
    }
1784
1785
    #[test]
1786
1
    fn test_skip_writer_trailing_field_int32() {
1787
1
        let mut dec = make_record_resolved_decoder(
1788
1
            &[("id", arrow_schema::DataType::Int32, false)],
1789
1
            vec![Some(0), None],
1790
1
            vec![None, Some(super::Skipper::Int32)],
1791
        );
1792
1
        let mut data = Vec::new();
1793
1
        data.extend_from_slice(&encode_avro_int(7));
1794
1
        data.extend_from_slice(&encode_avro_int(999));
1795
1
        let mut cur = AvroCursor::new(&data);
1796
1
        dec.decode(&mut cur).unwrap();
1797
1
        assert_eq!(cur.position(), data.len());
1798
1
        let arr = dec.flush(None).unwrap();
1799
1
        let struct_arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
1800
1
        assert_eq!(struct_arr.len(), 1);
1801
1
        let id = struct_arr
1802
1
            .column_by_name("id")
1803
1
            .unwrap()
1804
1
            .as_any()
1805
1
            .downcast_ref::<Int32Array>()
1806
1
            .unwrap();
1807
1
        assert_eq!(id.value(0), 7);
1808
1
    }
1809
1810
    #[test]
1811
1
    fn test_skip_writer_middle_field_string() {
1812
1
        let mut dec = make_record_resolved_decoder(
1813
1
            &[
1814
1
                ("id", DataType::Int32, false),
1815
1
                ("score", DataType::Int64, false),
1816
1
            ],
1817
1
            vec![Some(0), None, Some(1)],
1818
1
            vec![None, Some(Skipper::String), None],
1819
        );
1820
1
        let mut data = Vec::new();
1821
1
        data.extend_from_slice(&encode_avro_int(42));
1822
1
        data.extend_from_slice(&encode_avro_bytes(b"abcdef"));
1823
1
        data.extend_from_slice(&encode_avro_long(1000));
1824
1
        let mut cur = AvroCursor::new(&data);
1825
1
        dec.decode(&mut cur).unwrap();
1826
1
        assert_eq!(cur.position(), data.len());
1827
1
        let arr = dec.flush(None).unwrap();
1828
1
        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
1829
1
        let id = s
1830
1
            .column_by_name("id")
1831
1
            .unwrap()
1832
1
            .as_any()
1833
1
            .downcast_ref::<Int32Array>()
1834
1
            .unwrap();
1835
1
        let score = s
1836
1
            .column_by_name("score")
1837
1
            .unwrap()
1838
1
            .as_any()
1839
1
            .downcast_ref::<Int64Array>()
1840
1
            .unwrap();
1841
1
        assert_eq!(id.value(0), 42);
1842
1
        assert_eq!(score.value(0), 1000);
1843
1
    }
1844
1845
    #[test]
1846
1
    fn test_skip_writer_array_with_negative_block_count_fast() {
1847
1
        let mut dec = make_record_resolved_decoder(
1848
1
            &[("id", DataType::Int32, false)],
1849
1
            vec![None, Some(0)],
1850
1
            vec![Some(super::Skipper::List(Box::new(Skipper::Int32))), None],
1851
        );
1852
1
        let mut array_payload = Vec::new();
1853
1
        array_payload.extend_from_slice(&encode_avro_int(1));
1854
1
        array_payload.extend_from_slice(&encode_avro_int(2));
1855
1
        array_payload.extend_from_slice(&encode_avro_int(3));
1856
1
        let mut data = Vec::new();
1857
1
        data.extend_from_slice(&encode_avro_long(-3));
1858
1
        data.extend_from_slice(&encode_avro_long(array_payload.len() as i64));
1859
1
        data.extend_from_slice(&array_payload);
1860
1
        data.extend_from_slice(&encode_avro_long(0));
1861
1
        data.extend_from_slice(&encode_avro_int(5));
1862
1
        let mut cur = AvroCursor::new(&data);
1863
1
        dec.decode(&mut cur).unwrap();
1864
1
        assert_eq!(cur.position(), data.len());
1865
1
        let arr = dec.flush(None).unwrap();
1866
1
        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
1867
1
        let id = s
1868
1
            .column_by_name("id")
1869
1
            .unwrap()
1870
1
            .as_any()
1871
1
            .downcast_ref::<Int32Array>()
1872
1
            .unwrap();
1873
1
        assert_eq!(id.len(), 1);
1874
1
        assert_eq!(id.value(0), 5);
1875
1
    }
1876
1877
    #[test]
1878
1
    fn test_skip_writer_map_with_negative_block_count_fast() {
1879
1
        let mut dec = make_record_resolved_decoder(
1880
1
            &[("id", DataType::Int32, false)],
1881
1
            vec![None, Some(0)],
1882
1
            vec![Some(Skipper::Map(Box::new(Skipper::Int32))), None],
1883
        );
1884
1
        let mut entries = Vec::new();
1885
1
        entries.extend_from_slice(&encode_avro_bytes(b"k1"));
1886
1
        entries.extend_from_slice(&encode_avro_int(10));
1887
1
        entries.extend_from_slice(&encode_avro_bytes(b"k2"));
1888
1
        entries.extend_from_slice(&encode_avro_int(20));
1889
1
        let mut data = Vec::new();
1890
1
        data.extend_from_slice(&encode_avro_long(-2));
1891
1
        data.extend_from_slice(&encode_avro_long(entries.len() as i64));
1892
1
        data.extend_from_slice(&entries);
1893
1
        data.extend_from_slice(&encode_avro_long(0));
1894
1
        data.extend_from_slice(&encode_avro_int(123));
1895
1
        let mut cur = AvroCursor::new(&data);
1896
1
        dec.decode(&mut cur).unwrap();
1897
1
        assert_eq!(cur.position(), data.len());
1898
1
        let arr = dec.flush(None).unwrap();
1899
1
        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
1900
1
        let id = s
1901
1
            .column_by_name("id")
1902
1
            .unwrap()
1903
1
            .as_any()
1904
1
            .downcast_ref::<Int32Array>()
1905
1
            .unwrap();
1906
1
        assert_eq!(id.len(), 1);
1907
1
        assert_eq!(id.value(0), 123);
1908
1
    }
1909
1910
    #[test]
1911
1
    fn test_skip_writer_nullable_field_union_nullfirst() {
1912
1
        let mut dec = make_record_resolved_decoder(
1913
1
            &[("id", DataType::Int32, false)],
1914
1
            vec![None, Some(0)],
1915
1
            vec![
1916
1
                Some(super::Skipper::Nullable(
1917
1
                    Nullability::NullFirst,
1918
1
                    Box::new(super::Skipper::Int32),
1919
1
                )),
1920
1
                None,
1921
            ],
1922
        );
1923
1
        let mut row1 = Vec::new();
1924
1
        row1.extend_from_slice(&encode_avro_long(0));
1925
1
        row1.extend_from_slice(&encode_avro_int(5));
1926
1
        let mut row2 = Vec::new();
1927
1
        row2.extend_from_slice(&encode_avro_long(1));
1928
1
        row2.extend_from_slice(&encode_avro_int(123));
1929
1
        row2.extend_from_slice(&encode_avro_int(7));
1930
1
        let mut cur1 = AvroCursor::new(&row1);
1931
1
        let mut cur2 = AvroCursor::new(&row2);
1932
1
        dec.decode(&mut cur1).unwrap();
1933
1
        dec.decode(&mut cur2).unwrap();
1934
1
        assert_eq!(cur1.position(), row1.len());
1935
1
        assert_eq!(cur2.position(), row2.len());
1936
1
        let arr = dec.flush(None).unwrap();
1937
1
        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
1938
1
        let id = s
1939
1
            .column_by_name("id")
1940
1
            .unwrap()
1941
1
            .as_any()
1942
1
            .downcast_ref::<Int32Array>()
1943
1
            .unwrap();
1944
1
        assert_eq!(id.len(), 2);
1945
1
        assert_eq!(id.value(0), 5);
1946
1
        assert_eq!(id.value(1), 7);
1947
1
    }
1948
}