/Users/andrewlamb/Software/arrow-rs/arrow-avro/src/codec.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::schema::{ |
19 | | Attributes, AvroSchema, ComplexType, PrimitiveType, Record, Schema, Type, TypeName, |
20 | | AVRO_ENUM_SYMBOLS_METADATA_KEY, |
21 | | }; |
22 | | use arrow_schema::{ |
23 | | ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, |
24 | | DECIMAL128_MAX_SCALE, |
25 | | }; |
26 | | use serde_json::Value; |
27 | | use std::borrow::Cow; |
28 | | use std::collections::HashMap; |
29 | | use std::sync::Arc; |
30 | | |
31 | | /// Avro types are not nullable, with nullability instead encoded as a union |
32 | | /// where one of the variants is the null type. |
33 | | /// |
34 | | /// To accommodate this we special case two-variant unions where one of the |
35 | | /// variants is the null type, and use this to derive arrow's notion of nullability |
36 | | #[derive(Debug, Copy, Clone, PartialEq)] |
37 | | pub enum Nullability { |
38 | | /// The nulls are encoded as the first union variant |
39 | | NullFirst, |
40 | | /// The nulls are encoded as the second union variant |
41 | | NullSecond, |
42 | | } |
43 | | |
44 | | /// Contains information about how to resolve differences between a writer's and a reader's schema. |
45 | | #[derive(Debug, Clone, PartialEq)] |
46 | | pub(crate) enum ResolutionInfo { |
47 | | /// Indicates that the writer's type should be promoted to the reader's type. |
48 | | Promotion(Promotion), |
49 | | /// Indicates that a default value should be used for a field. (Implemented in a Follow-up PR) |
50 | | DefaultValue(AvroLiteral), |
51 | | /// Provides mapping information for resolving enums. (Implemented in a Follow-up PR) |
52 | | EnumMapping(EnumMapping), |
53 | | /// Provides resolution information for record fields. (Implemented in a Follow-up PR) |
54 | | Record(ResolvedRecord), |
55 | | } |
56 | | |
57 | | /// Represents a literal Avro value. |
58 | | /// |
59 | | /// This is used to represent default values in an Avro schema. |
60 | | #[derive(Debug, Clone, PartialEq)] |
61 | | pub(crate) enum AvroLiteral { |
62 | | /// Represents a null value. |
63 | | Null, |
64 | | /// Represents a boolean value. |
65 | | Boolean(bool), |
66 | | /// Represents an integer value. |
67 | | Int(i32), |
68 | | /// Represents a long value. |
69 | | Long(i64), |
70 | | /// Represents a float value. |
71 | | Float(f32), |
72 | | /// Represents a double value. |
73 | | Double(f64), |
74 | | /// Represents a bytes value. |
75 | | Bytes(Vec<u8>), |
76 | | /// Represents a string value. |
77 | | String(String), |
78 | | /// Represents an enum symbol. |
79 | | Enum(String), |
80 | | /// Represents an unsupported literal type. |
81 | | Unsupported, |
82 | | } |
83 | | |
84 | | /// Contains the necessary information to resolve a writer's record against a reader's record schema. |
85 | | #[derive(Debug, Clone, PartialEq)] |
86 | | pub struct ResolvedRecord { |
87 | | /// Maps a writer's field index to the corresponding reader's field index. |
88 | | /// `None` if the writer's field is not present in the reader's schema. |
89 | | pub(crate) writer_to_reader: Arc<[Option<usize>]>, |
90 | | /// A list of indices in the reader's schema for fields that have a default value. |
91 | | pub(crate) default_fields: Arc<[usize]>, |
92 | | /// For fields present in the writer's schema but not the reader's, this stores their data type. |
93 | | /// This is needed to correctly skip over these fields during deserialization. |
94 | | pub(crate) skip_fields: Arc<[Option<AvroDataType>]>, |
95 | | } |
96 | | |
97 | | /// Defines the type of promotion to be applied during schema resolution. |
98 | | /// |
99 | | /// Schema resolution may require promoting a writer's data type to a reader's data type. |
100 | | /// For example, an `int` can be promoted to a `long`, `float`, or `double`. |
101 | | #[derive(Debug, Clone, PartialEq, Eq)] |
102 | | pub(crate) enum Promotion { |
103 | | /// Promotes an `int` to a `long`. |
104 | | IntToLong, |
105 | | /// Promotes an `int` to a `float`. |
106 | | IntToFloat, |
107 | | /// Promotes an `int` to a `double`. |
108 | | IntToDouble, |
109 | | /// Promotes a `long` to a `float`. |
110 | | LongToFloat, |
111 | | /// Promotes a `long` to a `double`. |
112 | | LongToDouble, |
113 | | /// Promotes a `float` to a `double`. |
114 | | FloatToDouble, |
115 | | /// Promotes a `string` to `bytes`. |
116 | | StringToBytes, |
117 | | /// Promotes `bytes` to a `string`. |
118 | | BytesToString, |
119 | | } |
120 | | |
121 | | /// Holds the mapping information for resolving Avro enums. |
122 | | /// |
123 | | /// When resolving schemas, the writer's enum symbols must be mapped to the reader's symbols. |
124 | | #[derive(Debug, Clone, PartialEq, Eq)] |
125 | | pub struct EnumMapping { |
126 | | /// A mapping from the writer's symbol index to the reader's symbol index. |
127 | | pub(crate) mapping: Arc<[i32]>, |
128 | | /// The index to use for a writer's symbol that is not present in the reader's enum |
129 | | /// and a default value is specified in the reader's schema. |
130 | | pub(crate) default_index: i32, |
131 | | } |
132 | | |
133 | | #[cfg(feature = "canonical_extension_types")] |
134 | | fn with_extension_type(codec: &Codec, field: Field) -> Field { |
135 | | match codec { |
136 | | Codec::Uuid => field.with_extension_type(arrow_schema::extension::Uuid), |
137 | | _ => field, |
138 | | } |
139 | | } |
140 | | |
141 | | /// An Avro datatype mapped to the arrow data model |
142 | | #[derive(Debug, Clone, PartialEq)] |
143 | | pub struct AvroDataType { |
144 | | nullability: Option<Nullability>, |
145 | | metadata: HashMap<String, String>, |
146 | | codec: Codec, |
147 | | pub(crate) resolution: Option<ResolutionInfo>, |
148 | | } |
149 | | |
150 | | impl AvroDataType { |
151 | | /// Create a new [`AvroDataType`] with the given parts. |
152 | 610 | pub fn new( |
153 | 610 | codec: Codec, |
154 | 610 | metadata: HashMap<String, String>, |
155 | 610 | nullability: Option<Nullability>, |
156 | 610 | ) -> Self { |
157 | 610 | AvroDataType { |
158 | 610 | codec, |
159 | 610 | metadata, |
160 | 610 | nullability, |
161 | 610 | resolution: None, |
162 | 610 | } |
163 | 610 | } |
164 | | |
165 | | #[inline] |
166 | 29 | fn new_with_resolution( |
167 | 29 | codec: Codec, |
168 | 29 | metadata: HashMap<String, String>, |
169 | 29 | nullability: Option<Nullability>, |
170 | 29 | resolution: Option<ResolutionInfo>, |
171 | 29 | ) -> Self { |
172 | 29 | Self { |
173 | 29 | codec, |
174 | 29 | metadata, |
175 | 29 | nullability, |
176 | 29 | resolution, |
177 | 29 | } |
178 | 29 | } |
179 | | |
180 | | /// Returns an arrow [`Field`] with the given name |
181 | 907 | pub fn field_with_name(&self, name: &str) -> Field { |
182 | 907 | let nullable = self.nullability.is_some(); |
183 | 907 | let data_type = self.codec.data_type(); |
184 | 907 | let field = Field::new(name, data_type, nullable).with_metadata(self.metadata.clone()); |
185 | | #[cfg(feature = "canonical_extension_types")] |
186 | | return with_extension_type(&self.codec, field); |
187 | | #[cfg(not(feature = "canonical_extension_types"))] |
188 | 907 | field |
189 | 907 | } |
190 | | |
191 | | /// Returns a reference to the codec used by this data type |
192 | | /// |
193 | | /// The codec determines how Avro data is encoded and mapped to Arrow data types. |
194 | | /// This is useful when we need to inspect or use the specific encoding of a field. |
195 | 808 | pub fn codec(&self) -> &Codec { |
196 | 808 | &self.codec |
197 | 808 | } |
198 | | |
199 | | /// Returns the nullability status of this data type |
200 | | /// |
201 | | /// In Avro, nullability is represented through unions with null types. |
202 | | /// The returned value indicates how nulls are encoded in the Avro format: |
203 | | /// - `Some(Nullability::NullFirst)` - Nulls are encoded as the first union variant |
204 | | /// - `Some(Nullability::NullSecond)` - Nulls are encoded as the second union variant |
205 | | /// - `None` - The type is not nullable |
206 | 697 | pub fn nullability(&self) -> Option<Nullability> { |
207 | 697 | self.nullability |
208 | 697 | } |
209 | | } |
210 | | |
211 | | /// A named [`AvroDataType`] |
212 | | #[derive(Debug, Clone, PartialEq)] |
213 | | pub struct AvroField { |
214 | | name: String, |
215 | | data_type: AvroDataType, |
216 | | } |
217 | | |
218 | | impl AvroField { |
219 | | /// Returns the arrow [`Field`] |
220 | 737 | pub fn field(&self) -> Field { |
221 | 737 | self.data_type.field_with_name(&self.name) |
222 | 737 | } |
223 | | |
224 | | /// Returns the [`AvroDataType`] |
225 | 693 | pub fn data_type(&self) -> &AvroDataType { |
226 | 693 | &self.data_type |
227 | 693 | } |
228 | | |
229 | | /// Returns a new [`AvroField`] with Utf8View support enabled |
230 | | /// |
231 | | /// This will convert any Utf8 codecs to Utf8View codecs. This method is used to |
232 | | /// enable potential performance optimizations in string-heavy workloads by using |
233 | | /// Arrow's StringViewArray data structure. |
234 | | /// |
235 | | /// Returns a new `AvroField` with the same structure, but with string types |
236 | | /// converted to use `Utf8View` instead of `Utf8`. |
237 | 0 | pub fn with_utf8view(&self) -> Self { |
238 | 0 | let mut field = self.clone(); |
239 | 0 | if let Codec::Utf8 = field.data_type.codec { |
240 | 0 | field.data_type.codec = Codec::Utf8View; |
241 | 0 | } |
242 | 0 | field |
243 | 0 | } |
244 | | |
245 | | /// Returns the name of this Avro field |
246 | | /// |
247 | | /// This is the field name as defined in the Avro schema. |
248 | | /// It's used to identify fields within a record structure. |
249 | 9 | pub fn name(&self) -> &str { |
250 | 9 | &self.name |
251 | 9 | } |
252 | | |
253 | | /// Performs schema resolution between a writer and reader schema. |
254 | | /// |
255 | | /// This is the primary entry point for handling schema evolution. It produces an |
256 | | /// `AvroField` that contains all the necessary information to read data written |
257 | | /// with the `writer` schema as if it were written with the `reader` schema. |
258 | 11 | pub(crate) fn resolve_from_writer_and_reader<'a>( |
259 | 11 | writer_schema: &'a Schema<'a>, |
260 | 11 | reader_schema: &'a Schema<'a>, |
261 | 11 | use_utf8view: bool, |
262 | 11 | strict_mode: bool, |
263 | 11 | ) -> Result<Self, ArrowError> { |
264 | 11 | let top_name = match reader_schema0 { |
265 | 0 | Schema::Complex(ComplexType::Record(r)) => r.name.to_string(), |
266 | 11 | _ => "root".to_string(), |
267 | | }; |
268 | 11 | let mut resolver = Maker::new(use_utf8view, strict_mode); |
269 | 11 | let data_type10 = resolver.make_data_type(writer_schema, Some(reader_schema), None)?1 ; |
270 | 10 | Ok(Self { |
271 | 10 | name: top_name, |
272 | 10 | data_type, |
273 | 10 | }) |
274 | 11 | } |
275 | | } |
276 | | |
277 | | impl<'a> TryFrom<&Schema<'a>> for AvroField { |
278 | | type Error = ArrowError; |
279 | | |
280 | 4 | fn try_from(schema: &Schema<'a>) -> Result<Self, Self::Error> { |
281 | 4 | match schema { |
282 | 4 | Schema::Complex(ComplexType::Record(r)) => { |
283 | 4 | let mut resolver = Maker::new(false, false); |
284 | 4 | let data_type3 = resolver.make_data_type(schema, None, None)?1 ; |
285 | 3 | Ok(AvroField { |
286 | 3 | data_type, |
287 | 3 | name: r.name.to_string(), |
288 | 3 | }) |
289 | | } |
290 | 0 | _ => Err(ArrowError::ParseError(format!( |
291 | 0 | "Expected record got {schema:?}" |
292 | 0 | ))), |
293 | | } |
294 | 4 | } |
295 | | } |
296 | | |
297 | | /// Builder for an [`AvroField`] |
298 | | #[derive(Debug)] |
299 | | pub struct AvroFieldBuilder<'a> { |
300 | | writer_schema: &'a Schema<'a>, |
301 | | reader_schema: Option<&'a Schema<'a>>, |
302 | | use_utf8view: bool, |
303 | | strict_mode: bool, |
304 | | } |
305 | | |
306 | | impl<'a> AvroFieldBuilder<'a> { |
307 | | /// Creates a new [`AvroFieldBuilder`] for a given writer schema. |
308 | 93 | pub fn new(writer_schema: &'a Schema<'a>) -> Self { |
309 | 93 | Self { |
310 | 93 | writer_schema, |
311 | 93 | reader_schema: None, |
312 | 93 | use_utf8view: false, |
313 | 93 | strict_mode: false, |
314 | 93 | } |
315 | 93 | } |
316 | | |
317 | | /// Sets the reader schema for schema resolution. |
318 | | /// |
319 | | /// If a reader schema is provided, the builder will produce a resolved `AvroField` |
320 | | /// that can handle differences between the writer's and reader's schemas. |
321 | | #[inline] |
322 | 30 | pub fn with_reader_schema(mut self, reader_schema: &'a Schema<'a>) -> Self { |
323 | 30 | self.reader_schema = Some(reader_schema); |
324 | 30 | self |
325 | 30 | } |
326 | | |
327 | | /// Enable or disable Utf8View support |
328 | 92 | pub fn with_utf8view(mut self, use_utf8view: bool) -> Self { |
329 | 92 | self.use_utf8view = use_utf8view; |
330 | 92 | self |
331 | 92 | } |
332 | | |
333 | | /// Enable or disable strict mode. |
334 | 92 | pub fn with_strict_mode(mut self, strict_mode: bool) -> Self { |
335 | 92 | self.strict_mode = strict_mode; |
336 | 92 | self |
337 | 92 | } |
338 | | |
339 | | /// Build an [`AvroField`] from the builder |
340 | 93 | pub fn build(self) -> Result<AvroField, ArrowError> { |
341 | 93 | match self.writer_schema { |
342 | 93 | Schema::Complex(ComplexType::Record(r)) => { |
343 | 93 | let mut resolver = Maker::new(self.use_utf8view, self.strict_mode); |
344 | 90 | let data_type = |
345 | 93 | resolver.make_data_type(self.writer_schema, self.reader_schema, None)?3 ; |
346 | 90 | Ok(AvroField { |
347 | 90 | name: r.name.to_string(), |
348 | 90 | data_type, |
349 | 90 | }) |
350 | | } |
351 | 0 | _ => Err(ArrowError::ParseError(format!( |
352 | 0 | "Expected a Record schema to build an AvroField, but got {:?}", |
353 | 0 | self.writer_schema |
354 | 0 | ))), |
355 | | } |
356 | 93 | } |
357 | | } |
358 | | |
359 | | /// An Avro encoding |
360 | | /// |
361 | | /// <https://avro.apache.org/docs/1.11.1/specification/#encodings> |
362 | | #[derive(Debug, Clone, PartialEq)] |
363 | | pub enum Codec { |
364 | | /// Represents Avro null type, maps to Arrow's Null data type |
365 | | Null, |
366 | | /// Represents Avro boolean type, maps to Arrow's Boolean data type |
367 | | Boolean, |
368 | | /// Represents Avro int type, maps to Arrow's Int32 data type |
369 | | Int32, |
370 | | /// Represents Avro long type, maps to Arrow's Int64 data type |
371 | | Int64, |
372 | | /// Represents Avro float type, maps to Arrow's Float32 data type |
373 | | Float32, |
374 | | /// Represents Avro double type, maps to Arrow's Float64 data type |
375 | | Float64, |
376 | | /// Represents Avro bytes type, maps to Arrow's Binary data type |
377 | | Binary, |
378 | | /// String data represented as UTF-8 encoded bytes, corresponding to Arrow's StringArray |
379 | | Utf8, |
380 | | /// String data represented as UTF-8 encoded bytes with an optimized view representation, |
381 | | /// corresponding to Arrow's StringViewArray which provides better performance for string operations |
382 | | /// |
383 | | /// The Utf8View option can be enabled via `ReadOptions::use_utf8view`. |
384 | | Utf8View, |
385 | | /// Represents Avro date logical type, maps to Arrow's Date32 data type |
386 | | Date32, |
387 | | /// Represents Avro time-millis logical type, maps to Arrow's Time32(TimeUnit::Millisecond) data type |
388 | | TimeMillis, |
389 | | /// Represents Avro time-micros logical type, maps to Arrow's Time64(TimeUnit::Microsecond) data type |
390 | | TimeMicros, |
391 | | /// Represents Avro timestamp-millis or local-timestamp-millis logical type |
392 | | /// |
393 | | /// Maps to Arrow's Timestamp(TimeUnit::Millisecond) data type |
394 | | /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false) |
395 | | TimestampMillis(bool), |
396 | | /// Represents Avro timestamp-micros or local-timestamp-micros logical type |
397 | | /// |
398 | | /// Maps to Arrow's Timestamp(TimeUnit::Microsecond) data type |
399 | | /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false) |
400 | | TimestampMicros(bool), |
401 | | /// Represents Avro fixed type, maps to Arrow's FixedSizeBinary data type |
402 | | /// The i32 parameter indicates the fixed binary size |
403 | | Fixed(i32), |
404 | | /// Represents Avro decimal type, maps to Arrow's Decimal128 or Decimal256 data types |
405 | | /// |
406 | | /// The fields are `(precision, scale, fixed_size)`. |
407 | | /// - `precision` (`usize`): Total number of digits. |
408 | | /// - `scale` (`Option<usize>`): Number of fractional digits. |
409 | | /// - `fixed_size` (`Option<usize>`): Size in bytes if backed by a `fixed` type, otherwise `None`. |
410 | | Decimal(usize, Option<usize>, Option<usize>), |
411 | | /// Represents Avro Uuid type, a FixedSizeBinary with a length of 16. |
412 | | Uuid, |
413 | | /// Represents an Avro enum, maps to Arrow's Dictionary(Int32, Utf8) type. |
414 | | /// |
415 | | /// The enclosed value contains the enum's symbols. |
416 | | Enum(Arc<[String]>), |
417 | | /// Represents Avro array type, maps to Arrow's List data type |
418 | | List(Arc<AvroDataType>), |
419 | | /// Represents Avro record type, maps to Arrow's Struct data type |
420 | | Struct(Arc<[AvroField]>), |
421 | | /// Represents Avro map type, maps to Arrow's Map data type |
422 | | Map(Arc<AvroDataType>), |
423 | | /// Represents Avro duration logical type, maps to Arrow's Interval(IntervalUnit::MonthDayNano) data type |
424 | | Interval, |
425 | | } |
426 | | |
427 | | impl Codec { |
428 | 939 | fn data_type(&self) -> DataType { |
429 | 939 | match self { |
430 | 0 | Self::Null => DataType::Null, |
431 | 47 | Self::Boolean => DataType::Boolean, |
432 | 252 | Self::Int32 => DataType::Int32, |
433 | 68 | Self::Int64 => DataType::Int64, |
434 | 51 | Self::Float32 => DataType::Float32, |
435 | 94 | Self::Float64 => DataType::Float64, |
436 | 60 | Self::Binary => DataType::Binary, |
437 | 77 | Self::Utf8 => DataType::Utf8, |
438 | 0 | Self::Utf8View => DataType::Utf8View, |
439 | 0 | Self::Date32 => DataType::Date32, |
440 | 0 | Self::TimeMillis => DataType::Time32(TimeUnit::Millisecond), |
441 | 0 | Self::TimeMicros => DataType::Time64(TimeUnit::Microsecond), |
442 | 0 | Self::TimestampMillis(is_utc) => { |
443 | 0 | DataType::Timestamp(TimeUnit::Millisecond, is_utc.then(|| "+00:00".into())) |
444 | | } |
445 | 40 | Self::TimestampMicros(is_utc) => { |
446 | 40 | DataType::Timestamp(TimeUnit::Microsecond, is_utc.then(|| "+00:00".into())) |
447 | | } |
448 | 1 | Self::Interval => DataType::Interval(IntervalUnit::MonthDayNano), |
449 | 6 | Self::Fixed(size) => DataType::FixedSizeBinary(*size), |
450 | 8 | Self::Decimal(precision, scale, size) => { |
451 | 8 | let p = *precision as u8; |
452 | 8 | let s = scale.unwrap_or(0) as i8; |
453 | 8 | let too_large_for_128 = match *size { |
454 | 8 | Some(sz) => sz > 16, |
455 | | None => { |
456 | 0 | (p as usize) > DECIMAL128_MAX_PRECISION as usize |
457 | 0 | || (s as usize) > DECIMAL128_MAX_SCALE as usize |
458 | | } |
459 | | }; |
460 | 8 | if too_large_for_128 { |
461 | 0 | DataType::Decimal256(p, s) |
462 | | } else { |
463 | 8 | DataType::Decimal128(p, s) |
464 | | } |
465 | | } |
466 | 1 | Self::Uuid => DataType::FixedSizeBinary(16), |
467 | | Self::Enum(_) => { |
468 | 16 | DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)) |
469 | | } |
470 | 102 | Self::List(f) => { |
471 | 102 | DataType::List(Arc::new(f.field_with_name(Field::LIST_FIELD_DEFAULT_NAME))) |
472 | | } |
473 | 153 | Self::Struct(f96 ) => DataType::Struct(f.iter()96 .map96 (|x| x.field()).collect96 ()), |
474 | 20 | Self::Map(value_type) => { |
475 | 20 | let val_dt = value_type.codec.data_type(); |
476 | 20 | let val_field = Field::new("value", val_dt, value_type.nullability.is_some()) |
477 | 20 | .with_metadata(value_type.metadata.clone()); |
478 | 20 | DataType::Map( |
479 | 20 | Arc::new(Field::new( |
480 | 20 | "entries", |
481 | 20 | DataType::Struct(Fields::from(vec![ |
482 | 20 | Field::new("key", DataType::Utf8, false), |
483 | 20 | val_field, |
484 | 20 | ])), |
485 | 20 | false, |
486 | 20 | )), |
487 | 20 | false, |
488 | 20 | ) |
489 | | } |
490 | | } |
491 | 939 | } |
492 | | } |
493 | | |
494 | | impl From<PrimitiveType> for Codec { |
495 | 585 | fn from(value: PrimitiveType) -> Self { |
496 | 585 | match value { |
497 | 0 | PrimitiveType::Null => Self::Null, |
498 | 45 | PrimitiveType::Boolean => Self::Boolean, |
499 | 190 | PrimitiveType::Int => Self::Int32, |
500 | 108 | PrimitiveType::Long => Self::Int64, |
501 | 53 | PrimitiveType::Float => Self::Float32, |
502 | 78 | PrimitiveType::Double => Self::Float64, |
503 | 66 | PrimitiveType::Bytes => Self::Binary, |
504 | 45 | PrimitiveType::String => Self::Utf8, |
505 | | } |
506 | 585 | } |
507 | | } |
508 | | |
509 | 8 | fn parse_decimal_attributes( |
510 | 8 | attributes: &Attributes, |
511 | 8 | fallback_size: Option<usize>, |
512 | 8 | precision_required: bool, |
513 | 8 | ) -> Result<(usize, usize, Option<usize>), ArrowError> { |
514 | 8 | let precision = attributes |
515 | 8 | .additional |
516 | 8 | .get("precision") |
517 | 8 | .and_then(|v| v.as_u64()) |
518 | 8 | .or(if precision_required { None } else { Some(10)0 }) |
519 | 8 | .ok_or_else(|| ArrowError::ParseError("Decimal requires precision"0 .to_string0 ()))?0 |
520 | | as usize; |
521 | 8 | let scale = attributes |
522 | 8 | .additional |
523 | 8 | .get("scale") |
524 | 8 | .and_then(|v| v.as_u64()) |
525 | 8 | .unwrap_or(0) as usize; |
526 | 8 | let size = attributes |
527 | 8 | .additional |
528 | 8 | .get("size") |
529 | 8 | .and_then(|v| v0 .as_u640 ()) |
530 | 8 | .map(|s| s0 as usize) |
531 | 8 | .or(fallback_size); |
532 | 8 | Ok((precision, scale, size)) |
533 | 8 | } |
534 | | |
535 | | impl Codec { |
536 | | /// Converts a string codec to use Utf8View if requested |
537 | | /// |
538 | | /// The conversion only happens if both: |
539 | | /// 1. `use_utf8view` is true |
540 | | /// 2. The codec is currently `Utf8` |
541 | | /// |
542 | | /// # Example |
543 | | /// ``` |
544 | | /// # use arrow_avro::codec::Codec; |
545 | | /// let utf8_codec1 = Codec::Utf8; |
546 | | /// let utf8_codec2 = Codec::Utf8; |
547 | | /// |
548 | | /// // Convert to Utf8View |
549 | | /// let view_codec = utf8_codec1.with_utf8view(true); |
550 | | /// assert!(matches!(view_codec, Codec::Utf8View)); |
551 | | /// |
552 | | /// // Don't convert if use_utf8view is false |
553 | | /// let unchanged_codec = utf8_codec2.with_utf8view(false); |
554 | | /// assert!(matches!(unchanged_codec, Codec::Utf8)); |
555 | | /// ``` |
556 | 585 | pub fn with_utf8view(self, use_utf8view: bool) -> Self { |
557 | 585 | if use_utf8view && matches!0 (self3 , Self::Utf8) { |
558 | 3 | Self::Utf8View |
559 | | } else { |
560 | 582 | self |
561 | | } |
562 | 585 | } |
563 | | } |
564 | | |
565 | | /// Resolves Avro type names to [`AvroDataType`] |
566 | | /// |
567 | | /// See <https://avro.apache.org/docs/1.11.1/specification/#names> |
568 | | #[derive(Debug, Default)] |
569 | | struct Resolver<'a> { |
570 | | map: HashMap<(&'a str, &'a str), AvroDataType>, |
571 | | } |
572 | | |
573 | | impl<'a> Resolver<'a> { |
574 | 155 | fn register(&mut self, name: &'a str, namespace: Option<&'a str>, schema: AvroDataType) { |
575 | 155 | self.map.insert((namespace.unwrap_or(""), name), schema); |
576 | 155 | } |
577 | | |
578 | 11 | fn resolve(&self, name: &str, namespace: Option<&'a str>) -> Result<AvroDataType, ArrowError> { |
579 | 11 | let (namespace, name) = name |
580 | 11 | .rsplit_once('.') |
581 | 11 | .unwrap_or_else(|| (namespace8 .unwrap_or(""), name8 )); |
582 | | |
583 | 11 | self.map |
584 | 11 | .get(&(namespace, name)) |
585 | 11 | .ok_or_else(|| ArrowError::ParseError(format!1 ("Failed to resolve {namespace}.{name}"1 ))) |
586 | 11 | .cloned() |
587 | 11 | } |
588 | | } |
589 | | |
590 | | /// Resolves Avro type names to [`AvroDataType`] |
591 | | /// |
592 | | /// See <https://avro.apache.org/docs/1.11.1/specification/#names> |
593 | | struct Maker<'a> { |
594 | | resolver: Resolver<'a>, |
595 | | use_utf8view: bool, |
596 | | strict_mode: bool, |
597 | | } |
598 | | |
599 | | impl<'a> Maker<'a> { |
600 | 132 | fn new(use_utf8view: bool, strict_mode: bool) -> Self { |
601 | 132 | Self { |
602 | 132 | resolver: Default::default(), |
603 | 132 | use_utf8view, |
604 | 132 | strict_mode, |
605 | 132 | } |
606 | 132 | } |
607 | 485 | fn make_data_type<'s>( |
608 | 485 | &mut self, |
609 | 485 | writer_schema: &'s Schema<'a>, |
610 | 485 | reader_schema: Option<&'s Schema<'a>>, |
611 | 485 | namespace: Option<&'a str>, |
612 | 485 | ) -> Result<AvroDataType, ArrowError> { |
613 | 485 | match reader_schema { |
614 | 404 | Some(reader_schema) => self.resolve_type(writer_schema, reader_schema, namespace), |
615 | 81 | None => self.parse_type(writer_schema, namespace), |
616 | | } |
617 | 485 | } |
618 | | |
619 | | /// Parses a [`AvroDataType`] from the provided [`Schema`] and the given `name` and `namespace` |
620 | | /// |
621 | | /// `name`: is the name used to refer to `schema` in its parent |
622 | | /// `namespace`: an optional qualifier used as part of a type hierarchy |
623 | | /// If the data type is a string, convert to use Utf8View if requested |
624 | | /// |
625 | | /// This function is used during the schema conversion process to determine whether |
626 | | /// string data should be represented as StringArray (default) or StringViewArray. |
627 | | /// |
628 | | /// `use_utf8view`: if true, use Utf8View instead of Utf8 for string types |
629 | | /// |
630 | | /// See [`Resolver`] for more information |
631 | 1.28k | fn parse_type<'s>( |
632 | 1.28k | &mut self, |
633 | 1.28k | schema: &'s Schema<'a>, |
634 | 1.28k | namespace: Option<&'a str>, |
635 | 1.28k | ) -> Result<AvroDataType, ArrowError> { |
636 | 596 | match schema { |
637 | 585 | Schema::TypeName(TypeName::Primitive(p)) => Ok(AvroDataType::new( |
638 | 585 | Codec::from(*p).with_utf8view(self.use_utf8view), |
639 | 585 | Default::default(), |
640 | 585 | None, |
641 | 585 | )), |
642 | 11 | Schema::TypeName(TypeName::Ref(name)) => self.resolver.resolve(name, namespace), |
643 | 449 | Schema::Union(f) => { |
644 | | // Special case the common case of nullable primitives |
645 | 449 | let null = f |
646 | 449 | .iter() |
647 | 819 | .position449 (|x| x == &Schema::TypeName(TypeName::Primitive(PrimitiveType::Null))); |
648 | 449 | match (f.len() == 2, null) { |
649 | | (true, Some(0)) => { |
650 | 79 | let mut field78 = self.parse_type(&f[1], namespace)?1 ; |
651 | 78 | field.nullability = Some(Nullability::NullFirst); |
652 | 78 | Ok(field) |
653 | | } |
654 | | (true, Some(1)) => { |
655 | 370 | if self.strict_mode { |
656 | 3 | return Err(ArrowError::SchemaError( |
657 | 3 | "Found Avro union of the form ['T','null'], which is disallowed in strict_mode" |
658 | 3 | .to_string(), |
659 | 3 | )); |
660 | 367 | } |
661 | 367 | let mut field = self.parse_type(&f[0], namespace)?0 ; |
662 | 367 | field.nullability = Some(Nullability::NullSecond); |
663 | 367 | Ok(field) |
664 | | } |
665 | 0 | _ => Err(ArrowError::NotYetImplemented(format!( |
666 | 0 | "Union of {f:?} not currently supported" |
667 | 0 | ))), |
668 | | } |
669 | | } |
670 | 194 | Schema::Complex(c) => match c { |
671 | 106 | ComplexType::Record(r) => { |
672 | 106 | let namespace = r.namespace.or(namespace); |
673 | 106 | let fields103 = r |
674 | 106 | .fields |
675 | 106 | .iter() |
676 | 428 | .map106 (|field| { |
677 | | Ok(AvroField { |
678 | 428 | name: field.name.to_string(), |
679 | 428 | data_type: self.parse_type(&field.r#type, namespace)?3 , |
680 | | }) |
681 | 428 | }) |
682 | 106 | .collect::<Result<_, ArrowError>>()?3 ; |
683 | 103 | let field = AvroDataType { |
684 | 103 | nullability: None, |
685 | 103 | codec: Codec::Struct(fields), |
686 | 103 | metadata: r.attributes.field_metadata(), |
687 | 103 | resolution: None, |
688 | 103 | }; |
689 | 103 | self.resolver.register(r.name, namespace, field.clone()); |
690 | 103 | Ok(field) |
691 | | } |
692 | 51 | ComplexType::Array(a) => { |
693 | 51 | let mut field = self.parse_type(a.items.as_ref(), namespace)?0 ; |
694 | 51 | Ok(AvroDataType { |
695 | 51 | nullability: None, |
696 | 51 | metadata: a.attributes.field_metadata(), |
697 | 51 | codec: Codec::List(Arc::new(field)), |
698 | 51 | resolution: None, |
699 | 51 | }) |
700 | | } |
701 | 15 | ComplexType::Fixed(f) => { |
702 | 15 | let size = f.size.try_into().map_err(|e| {0 |
703 | 0 | ArrowError::ParseError(format!("Overflow converting size to i32: {e}")) |
704 | 0 | })?; |
705 | 15 | let md = f.attributes.field_metadata(); |
706 | 15 | let field = match f.attributes.logical_type { |
707 | 9 | Some("decimal") => { |
708 | 8 | let (precision, scale, _) = |
709 | 8 | parse_decimal_attributes(&f.attributes, Some(size as usize), true)?0 ; |
710 | 8 | AvroDataType { |
711 | 8 | nullability: None, |
712 | 8 | metadata: md, |
713 | 8 | codec: Codec::Decimal(precision, Some(scale), Some(size as usize)), |
714 | 8 | resolution: None, |
715 | 8 | } |
716 | | } |
717 | 1 | Some("duration") => { |
718 | 1 | if size != 12 { |
719 | 0 | return Err(ArrowError::ParseError(format!( |
720 | 0 | "Invalid fixed size for Duration: {size}, must be 12" |
721 | 0 | ))); |
722 | 1 | }; |
723 | 1 | AvroDataType { |
724 | 1 | nullability: None, |
725 | 1 | metadata: md, |
726 | 1 | codec: Codec::Interval, |
727 | 1 | resolution: None, |
728 | 1 | } |
729 | | } |
730 | 6 | _ => AvroDataType { |
731 | 6 | nullability: None, |
732 | 6 | metadata: md, |
733 | 6 | codec: Codec::Fixed(size), |
734 | 6 | resolution: None, |
735 | 6 | }, |
736 | | }; |
737 | 15 | self.resolver.register(f.name, namespace, field.clone()); |
738 | 15 | Ok(field) |
739 | | } |
740 | 8 | ComplexType::Enum(e) => { |
741 | 8 | let namespace = e.namespace.or(namespace); |
742 | 8 | let symbols = e |
743 | 8 | .symbols |
744 | 8 | .iter() |
745 | 28 | .map8 (|s| s.to_string()) |
746 | 8 | .collect::<Arc<[String]>>(); |
747 | | |
748 | 8 | let mut metadata = e.attributes.field_metadata(); |
749 | 8 | let symbols_json = serde_json::to_string(&e.symbols).map_err(|e| {0 |
750 | 0 | ArrowError::ParseError(format!("Failed to serialize enum symbols: {e}")) |
751 | 0 | })?; |
752 | 8 | metadata.insert(AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(), symbols_json); |
753 | 8 | let field = AvroDataType { |
754 | 8 | nullability: None, |
755 | 8 | metadata, |
756 | 8 | codec: Codec::Enum(symbols), |
757 | 8 | resolution: None, |
758 | 8 | }; |
759 | 8 | self.resolver.register(e.name, namespace, field.clone()); |
760 | 8 | Ok(field) |
761 | | } |
762 | 14 | ComplexType::Map(m) => { |
763 | 14 | let val = self.parse_type(&m.values, namespace)?0 ; |
764 | 14 | Ok(AvroDataType { |
765 | 14 | nullability: None, |
766 | 14 | metadata: m.attributes.field_metadata(), |
767 | 14 | codec: Codec::Map(Arc::new(val)), |
768 | 14 | resolution: None, |
769 | 14 | }) |
770 | | } |
771 | | }, |
772 | 50 | Schema::Type(t) => { |
773 | 50 | let mut field = self.parse_type(&Schema::TypeName(t.r#type.clone()), namespace)?0 ; |
774 | | // https://avro.apache.org/docs/1.11.1/specification/#logical-types |
775 | 50 | match (t.attributes.logical_type, &mut field.codec) { |
776 | 0 | (Some("decimal"), c @ Codec::Binary) => { |
777 | 0 | let (prec, sc, _) = parse_decimal_attributes(&t.attributes, None, false)?; |
778 | 0 | *c = Codec::Decimal(prec, Some(sc), None); |
779 | | } |
780 | 3 | (Some("date"), c1 @ Codec::Int32) => *c = Codec::Date321 , |
781 | 2 | (Some("time-millis"), c1 @ Codec::Int32) => *c = Codec::TimeMillis1 , |
782 | 46 | (Some("time-micros"), c1 @ Codec::Int64) => *c = Codec::TimeMicros1 , |
783 | 45 | (Some("timestamp-millis"), c1 @ Codec::Int64) => { |
784 | 1 | *c = Codec::TimestampMillis(true) |
785 | | } |
786 | 44 | (Some("timestamp-micros"), c42 @ Codec::Int64) => { |
787 | 42 | *c = Codec::TimestampMicros(true) |
788 | | } |
789 | 2 | (Some("local-timestamp-millis"), c1 @ Codec::Int64) => { |
790 | 1 | *c = Codec::TimestampMillis(false) |
791 | | } |
792 | 1 | (Some("local-timestamp-micros"), c @ Codec::Int64) => { |
793 | 1 | *c = Codec::TimestampMicros(false) |
794 | | } |
795 | 1 | (Some("uuid"), c @ Codec::Utf8) => *c = Codec::Uuid, |
796 | 1 | (Some(logical), _) => { |
797 | 1 | // Insert unrecognized logical type into metadata map |
798 | 1 | field.metadata.insert("logicalType".into(), logical.into()); |
799 | 1 | } |
800 | 0 | (None, _) => {} |
801 | | } |
802 | 50 | if !t.attributes.additional.is_empty() { |
803 | 0 | for (k, v) in &t.attributes.additional { |
804 | 0 | field.metadata.insert(k.to_string(), v.to_string()); |
805 | 0 | } |
806 | 50 | } |
807 | 50 | Ok(field) |
808 | | } |
809 | | } |
810 | 1.28k | } |
811 | | |
812 | 404 | fn resolve_type<'s>( |
813 | 404 | &mut self, |
814 | 404 | writer_schema: &'s Schema<'a>, |
815 | 404 | reader_schema: &'s Schema<'a>, |
816 | 404 | namespace: Option<&'a str>, |
817 | 404 | ) -> Result<AvroDataType, ArrowError> { |
818 | 404 | match (writer_schema, reader_schema) { |
819 | | ( |
820 | 187 | Schema::TypeName(TypeName::Primitive(writer_primitive)), |
821 | 187 | Schema::TypeName(TypeName::Primitive(reader_primitive)), |
822 | 187 | ) => self.resolve_primitives(*writer_primitive, *reader_primitive, reader_schema), |
823 | | ( |
824 | | Schema::Type(Type { |
825 | 16 | r#type: TypeName::Primitive(writer_primitive), |
826 | | .. |
827 | | }), |
828 | | Schema::Type(Type { |
829 | 16 | r#type: TypeName::Primitive(reader_primitive), |
830 | | .. |
831 | | }), |
832 | 16 | ) => self.resolve_primitives(*writer_primitive, *reader_primitive, reader_schema), |
833 | | ( |
834 | 0 | Schema::TypeName(TypeName::Primitive(writer_primitive)), |
835 | | Schema::Type(Type { |
836 | 0 | r#type: TypeName::Primitive(reader_primitive), |
837 | | .. |
838 | | }), |
839 | 0 | ) => self.resolve_primitives(*writer_primitive, *reader_primitive, reader_schema), |
840 | | ( |
841 | | Schema::Type(Type { |
842 | 0 | r#type: TypeName::Primitive(writer_primitive), |
843 | | .. |
844 | | }), |
845 | 0 | Schema::TypeName(TypeName::Primitive(reader_primitive)), |
846 | 0 | ) => self.resolve_primitives(*writer_primitive, *reader_primitive, reader_schema), |
847 | | ( |
848 | 30 | Schema::Complex(ComplexType::Record(writer_record)), |
849 | 30 | Schema::Complex(ComplexType::Record(reader_record)), |
850 | 30 | ) => self.resolve_records(writer_record, reader_record, namespace), |
851 | 171 | (Schema::Union(writer_variants), Schema::Union(reader_variants)) => { |
852 | 171 | self.resolve_nullable_union(writer_variants, reader_variants, namespace) |
853 | | } |
854 | 0 | _ => Err(ArrowError::NotYetImplemented( |
855 | 0 | "Other resolutions not yet implemented".to_string(), |
856 | 0 | )), |
857 | | } |
858 | 404 | } |
859 | | |
860 | 203 | fn resolve_primitives( |
861 | 203 | &mut self, |
862 | 203 | write_primitive: PrimitiveType, |
863 | 203 | read_primitive: PrimitiveType, |
864 | 203 | reader_schema: &Schema<'a>, |
865 | 203 | ) -> Result<AvroDataType, ArrowError> { |
866 | 203 | if write_primitive == read_primitive { |
867 | 122 | return self.parse_type(reader_schema, None); |
868 | 81 | } |
869 | 81 | let promotion78 = match (write_primitive, read_primitive) { |
870 | 12 | (PrimitiveType::Int, PrimitiveType::Long) => Promotion::IntToLong, |
871 | 7 | (PrimitiveType::Int, PrimitiveType::Float) => Promotion::IntToFloat, |
872 | 13 | (PrimitiveType::Int, PrimitiveType::Double) => Promotion::IntToDouble, |
873 | 7 | (PrimitiveType::Long, PrimitiveType::Float) => Promotion::LongToFloat, |
874 | 7 | (PrimitiveType::Long, PrimitiveType::Double) => Promotion::LongToDouble, |
875 | 7 | (PrimitiveType::Float, PrimitiveType::Double) => Promotion::FloatToDouble, |
876 | 2 | (PrimitiveType::String, PrimitiveType::Bytes) => Promotion::StringToBytes, |
877 | 23 | (PrimitiveType::Bytes, PrimitiveType::String) => Promotion::BytesToString, |
878 | | _ => { |
879 | 3 | return Err(ArrowError::ParseError(format!( |
880 | 3 | "Illegal promotion {write_primitive:?} to {read_primitive:?}" |
881 | 3 | ))) |
882 | | } |
883 | | }; |
884 | 78 | let mut datatype = self.parse_type(reader_schema, None)?0 ; |
885 | 78 | datatype.resolution = Some(ResolutionInfo::Promotion(promotion)); |
886 | 78 | Ok(datatype) |
887 | 203 | } |
888 | | |
889 | 171 | fn resolve_nullable_union( |
890 | 171 | &mut self, |
891 | 171 | writer_variants: &[Schema<'a>], |
892 | 171 | reader_variants: &[Schema<'a>], |
893 | 171 | namespace: Option<&'a str>, |
894 | 171 | ) -> Result<AvroDataType, ArrowError> { |
895 | | // Only support unions with exactly two branches, one of which is `null` on both sides |
896 | 171 | if writer_variants.len() != 2 || reader_variants.len() != 2 { |
897 | 0 | return Err(ArrowError::NotYetImplemented( |
898 | 0 | "Only 2-branch unions are supported for schema resolution".to_string(), |
899 | 0 | )); |
900 | 171 | } |
901 | 683 | let is_null171 = |s: &Schema<'a>| { |
902 | 341 | matches!( |
903 | 651 | s, |
904 | | Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)) |
905 | | ) |
906 | 683 | }; |
907 | 171 | let w_null_pos = writer_variants.iter().position(is_null); |
908 | 171 | let r_null_pos = reader_variants.iter().position(is_null); |
909 | 171 | match (w_null_pos, r_null_pos) { |
910 | 171 | (Some(wp), Some(rp)) => { |
911 | | // Extract a non-null branch on each side |
912 | 171 | let w_nonnull = &writer_variants[1 - wp]; |
913 | 171 | let r_nonnull = &reader_variants[1 - rp]; |
914 | | // Resolve the non-null branch |
915 | 171 | let mut dt170 = self.make_data_type(w_nonnull, Some(r_nonnull), namespace)?1 ; |
916 | | // Adopt reader union null ordering |
917 | 170 | dt.nullability = Some(match rp { |
918 | 0 | 0 => Nullability::NullFirst, |
919 | 170 | 1 => Nullability::NullSecond, |
920 | 0 | _ => unreachable!(), |
921 | | }); |
922 | 170 | Ok(dt) |
923 | | } |
924 | 0 | _ => Err(ArrowError::NotYetImplemented( |
925 | 0 | "Union resolution requires both writer and reader to be nullable unions" |
926 | 0 | .to_string(), |
927 | 0 | )), |
928 | | } |
929 | 171 | } |
930 | | |
931 | 30 | fn resolve_records( |
932 | 30 | &mut self, |
933 | 30 | writer_record: &Record<'a>, |
934 | 30 | reader_record: &Record<'a>, |
935 | 30 | namespace: Option<&'a str>, |
936 | 30 | ) -> Result<AvroDataType, ArrowError> { |
937 | | // Names must match or be aliased |
938 | 30 | let names_match = writer_record.name == reader_record.name |
939 | 0 | || reader_record.aliases.contains(&writer_record.name) |
940 | 0 | || writer_record.aliases.contains(&reader_record.name); |
941 | 30 | if !names_match { |
942 | 0 | return Err(ArrowError::ParseError(format!( |
943 | 0 | "Record name mismatch writer={}, reader={}", |
944 | 0 | writer_record.name, reader_record.name |
945 | 0 | ))); |
946 | 30 | } |
947 | 30 | let writer_ns = writer_record.namespace.or(namespace); |
948 | 30 | let reader_ns = reader_record.namespace.or(namespace); |
949 | | // Map writer field name -> index |
950 | 30 | let mut writer_index_map = |
951 | 30 | HashMap::<&str, usize>::with_capacity(writer_record.fields.len()); |
952 | 210 | for (idx, write_field) in writer_record.fields.iter()30 .enumerate30 () { |
953 | 210 | writer_index_map.insert(write_field.name, idx); |
954 | 210 | } |
955 | | // Prepare outputs |
956 | 30 | let mut reader_fields: Vec<AvroField> = Vec::with_capacity(reader_record.fields.len()); |
957 | 30 | let mut writer_to_reader: Vec<Option<usize>> = vec![None; writer_record.fields.len()]; |
958 | 30 | let mut skip_fields: Vec<Option<AvroDataType>> = vec![None; writer_record.fields.len()]; |
959 | | //let mut default_fields: Vec<usize> = Vec::new(); |
960 | | // Build reader fields and mapping |
961 | 182 | for (reader_idx, r_field) in reader_record.fields.iter()30 .enumerate30 () { |
962 | 182 | if let Some(&writer_idx) = writer_index_map.get(r_field.name) { |
963 | | // Field exists in writer: resolve types (including promotions and union-of-null) |
964 | 182 | let w_schema = &writer_record.fields[writer_idx].r#type; |
965 | 181 | let resolved_dt = |
966 | 182 | self.make_data_type(w_schema, Some(&r_field.r#type), reader_ns)?1 ; |
967 | 181 | reader_fields.push(AvroField { |
968 | 181 | name: r_field.name.to_string(), |
969 | 181 | data_type: resolved_dt, |
970 | 181 | }); |
971 | 181 | writer_to_reader[writer_idx] = Some(reader_idx); |
972 | | } else { |
973 | 0 | return Err(ArrowError::NotYetImplemented( |
974 | 0 | "New fields from reader with default values not yet implemented".to_string(), |
975 | 0 | )); |
976 | | } |
977 | | } |
978 | | // Any writer fields not mapped should be skipped |
979 | 199 | for (writer_idx, writer_field) in writer_record.fields.iter()29 .enumerate29 () { |
980 | 199 | if writer_to_reader[writer_idx].is_none() { |
981 | | // Parse writer field type to know how to skip data |
982 | 19 | let writer_dt = self.parse_type(&writer_field.r#type, writer_ns)?0 ; |
983 | 19 | skip_fields[writer_idx] = Some(writer_dt); |
984 | 180 | } |
985 | | } |
986 | | // Implement writer-only fields to skip in Follow-up PR here |
987 | | // Build resolved record AvroDataType |
988 | 29 | let resolved = AvroDataType::new_with_resolution( |
989 | 29 | Codec::Struct(Arc::from(reader_fields)), |
990 | 29 | reader_record.attributes.field_metadata(), |
991 | 29 | None, |
992 | 29 | Some(ResolutionInfo::Record(ResolvedRecord { |
993 | 29 | writer_to_reader: Arc::from(writer_to_reader), |
994 | 29 | default_fields: Arc::default(), |
995 | 29 | skip_fields: Arc::from(skip_fields), |
996 | 29 | })), |
997 | | ); |
998 | | // Register a resolved record by reader name+namespace for potential named type refs |
999 | 29 | self.resolver |
1000 | 29 | .register(reader_record.name, reader_ns, resolved.clone()); |
1001 | 29 | Ok(resolved) |
1002 | 30 | } |
1003 | | } |
1004 | | |
1005 | | #[cfg(test)] |
1006 | | mod tests { |
1007 | | use super::*; |
1008 | | use crate::schema::{Attributes, Fixed, PrimitiveType, Schema, Type, TypeName}; |
1009 | | use serde_json; |
1010 | | |
1011 | 8 | fn create_schema_with_logical_type( |
1012 | 8 | primitive_type: PrimitiveType, |
1013 | 8 | logical_type: &'static str, |
1014 | 8 | ) -> Schema<'static> { |
1015 | 8 | let attributes = Attributes { |
1016 | 8 | logical_type: Some(logical_type), |
1017 | 8 | additional: Default::default(), |
1018 | 8 | }; |
1019 | | |
1020 | 8 | Schema::Type(Type { |
1021 | 8 | r#type: TypeName::Primitive(primitive_type), |
1022 | 8 | attributes, |
1023 | 8 | }) |
1024 | 8 | } |
1025 | | |
1026 | 0 | fn create_fixed_schema(size: usize, logical_type: &'static str) -> Schema<'static> { |
1027 | 0 | let attributes = Attributes { |
1028 | 0 | logical_type: Some(logical_type), |
1029 | 0 | additional: Default::default(), |
1030 | 0 | }; |
1031 | | |
1032 | 0 | Schema::Complex(ComplexType::Fixed(Fixed { |
1033 | 0 | name: "fixed_type", |
1034 | 0 | namespace: None, |
1035 | 0 | aliases: Vec::new(), |
1036 | 0 | size, |
1037 | 0 | attributes, |
1038 | 0 | })) |
1039 | 0 | } |
1040 | | |
1041 | 7 | fn resolve_promotion(writer: PrimitiveType, reader: PrimitiveType) -> AvroDataType { |
1042 | 7 | let writer_schema = Schema::TypeName(TypeName::Primitive(writer)); |
1043 | 7 | let reader_schema = Schema::TypeName(TypeName::Primitive(reader)); |
1044 | 7 | let mut maker = Maker::new(false, false); |
1045 | 7 | maker |
1046 | 7 | .make_data_type(&writer_schema, Some(&reader_schema), None) |
1047 | 7 | .expect("promotion should resolve") |
1048 | 7 | } |
1049 | | |
1050 | | #[test] |
1051 | 1 | fn test_date_logical_type() { |
1052 | 1 | let schema = create_schema_with_logical_type(PrimitiveType::Int, "date"); |
1053 | | |
1054 | 1 | let mut maker = Maker::new(false, false); |
1055 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1056 | | |
1057 | 1 | assert!(matches!0 (result.codec, Codec::Date32)); |
1058 | 1 | } |
1059 | | |
1060 | | #[test] |
1061 | 1 | fn test_time_millis_logical_type() { |
1062 | 1 | let schema = create_schema_with_logical_type(PrimitiveType::Int, "time-millis"); |
1063 | | |
1064 | 1 | let mut maker = Maker::new(false, false); |
1065 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1066 | | |
1067 | 1 | assert!(matches!0 (result.codec, Codec::TimeMillis)); |
1068 | 1 | } |
1069 | | |
1070 | | #[test] |
1071 | 1 | fn test_time_micros_logical_type() { |
1072 | 1 | let schema = create_schema_with_logical_type(PrimitiveType::Long, "time-micros"); |
1073 | | |
1074 | 1 | let mut maker = Maker::new(false, false); |
1075 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1076 | | |
1077 | 1 | assert!(matches!0 (result.codec, Codec::TimeMicros)); |
1078 | 1 | } |
1079 | | |
1080 | | #[test] |
1081 | 1 | fn test_timestamp_millis_logical_type() { |
1082 | 1 | let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-millis"); |
1083 | | |
1084 | 1 | let mut maker = Maker::new(false, false); |
1085 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1086 | | |
1087 | 1 | assert!(matches!(result.codec, Codec::TimestampMillis(true))); |
1088 | 1 | } |
1089 | | |
1090 | | #[test] |
1091 | 1 | fn test_timestamp_micros_logical_type() { |
1092 | 1 | let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-micros"); |
1093 | | |
1094 | 1 | let mut maker = Maker::new(false, false); |
1095 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1096 | | |
1097 | 1 | assert!(matches!(result.codec, Codec::TimestampMicros(true))); |
1098 | 1 | } |
1099 | | |
1100 | | #[test] |
1101 | 1 | fn test_local_timestamp_millis_logical_type() { |
1102 | 1 | let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-millis"); |
1103 | | |
1104 | 1 | let mut maker = Maker::new(false, false); |
1105 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1106 | | |
1107 | 1 | assert!(matches!0 (result.codec, Codec::TimestampMillis(false))); |
1108 | 1 | } |
1109 | | |
1110 | | #[test] |
1111 | 1 | fn test_local_timestamp_micros_logical_type() { |
1112 | 1 | let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-micros"); |
1113 | | |
1114 | 1 | let mut maker = Maker::new(false, false); |
1115 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1116 | | |
1117 | 1 | assert!(matches!0 (result.codec, Codec::TimestampMicros(false))); |
1118 | 1 | } |
1119 | | |
1120 | | #[test] |
1121 | 1 | fn test_uuid_type() { |
1122 | 1 | let mut codec = Codec::Fixed(16); |
1123 | 1 | if let c @ Codec::Fixed(16) = &mut codec { |
1124 | 1 | *c = Codec::Uuid; |
1125 | 1 | }0 |
1126 | 1 | assert!(matches!0 (codec, Codec::Uuid)); |
1127 | 1 | } |
1128 | | |
1129 | | #[test] |
1130 | 1 | fn test_duration_logical_type() { |
1131 | 1 | let mut codec = Codec::Fixed(12); |
1132 | | |
1133 | 1 | if let c @ Codec::Fixed(12) = &mut codec { |
1134 | 1 | *c = Codec::Interval; |
1135 | 1 | }0 |
1136 | | |
1137 | 1 | assert!(matches!0 (codec, Codec::Interval)); |
1138 | 1 | } |
1139 | | |
1140 | | #[test] |
1141 | 1 | fn test_decimal_logical_type_not_implemented() { |
1142 | 1 | let mut codec = Codec::Fixed(16); |
1143 | | |
1144 | 1 | let process_decimal = || -> Result<(), ArrowError> { |
1145 | 1 | if let Codec::Fixed(_) = codec { |
1146 | 1 | return Err(ArrowError::NotYetImplemented( |
1147 | 1 | "Decimals are not currently supported".to_string(), |
1148 | 1 | )); |
1149 | 0 | } |
1150 | 0 | Ok(()) |
1151 | 1 | }; |
1152 | | |
1153 | 1 | let result = process_decimal(); |
1154 | | |
1155 | 1 | assert!(result.is_err()); |
1156 | 1 | if let Err(ArrowError::NotYetImplemented(msg)) = result { |
1157 | 1 | assert!(msg.contains("Decimals are not currently supported")); |
1158 | | } else { |
1159 | 0 | panic!("Expected NotYetImplemented error"); |
1160 | | } |
1161 | 1 | } |
1162 | | #[test] |
1163 | 1 | fn test_unknown_logical_type_added_to_metadata() { |
1164 | 1 | let schema = create_schema_with_logical_type(PrimitiveType::Int, "custom-type"); |
1165 | | |
1166 | 1 | let mut maker = Maker::new(false, false); |
1167 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1168 | | |
1169 | 1 | assert_eq!( |
1170 | 1 | result.metadata.get("logicalType"), |
1171 | 1 | Some(&"custom-type".to_string()) |
1172 | | ); |
1173 | 1 | } |
1174 | | |
1175 | | #[test] |
1176 | 1 | fn test_string_with_utf8view_enabled() { |
1177 | 1 | let schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String)); |
1178 | | |
1179 | 1 | let mut maker = Maker::new(true, false); |
1180 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1181 | | |
1182 | 1 | assert!(matches!0 (result.codec, Codec::Utf8View)); |
1183 | 1 | } |
1184 | | |
1185 | | #[test] |
1186 | 1 | fn test_string_without_utf8view_enabled() { |
1187 | 1 | let schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String)); |
1188 | | |
1189 | 1 | let mut maker = Maker::new(false, false); |
1190 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1191 | | |
1192 | 1 | assert!(matches!0 (result.codec, Codec::Utf8)); |
1193 | 1 | } |
1194 | | |
1195 | | #[test] |
1196 | 1 | fn test_record_with_string_and_utf8view_enabled() { |
1197 | 1 | let field_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String)); |
1198 | | |
1199 | 1 | let avro_field = crate::schema::Field { |
1200 | 1 | name: "string_field", |
1201 | 1 | r#type: field_schema, |
1202 | 1 | default: None, |
1203 | 1 | doc: None, |
1204 | 1 | }; |
1205 | | |
1206 | 1 | let record = Record { |
1207 | 1 | name: "test_record", |
1208 | 1 | namespace: None, |
1209 | 1 | aliases: vec![], |
1210 | 1 | doc: None, |
1211 | 1 | fields: vec![avro_field], |
1212 | 1 | attributes: Attributes::default(), |
1213 | 1 | }; |
1214 | | |
1215 | 1 | let schema = Schema::Complex(ComplexType::Record(record)); |
1216 | | |
1217 | 1 | let mut maker = Maker::new(true, false); |
1218 | 1 | let result = maker.make_data_type(&schema, None, None).unwrap(); |
1219 | | |
1220 | 1 | if let Codec::Struct(fields) = &result.codec { |
1221 | 1 | let first_field_codec = &fields[0].data_type().codec; |
1222 | 1 | assert!(matches!0 (first_field_codec, Codec::Utf8View)); |
1223 | | } else { |
1224 | 0 | panic!("Expected Struct codec"); |
1225 | | } |
1226 | 1 | } |
1227 | | |
1228 | | #[test] |
1229 | 1 | fn test_union_with_strict_mode() { |
1230 | 1 | let schema = Schema::Union(vec![ |
1231 | 1 | Schema::TypeName(TypeName::Primitive(PrimitiveType::String)), |
1232 | 1 | Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)), |
1233 | 1 | ]); |
1234 | | |
1235 | 1 | let mut maker = Maker::new(false, true); |
1236 | 1 | let result = maker.make_data_type(&schema, None, None); |
1237 | | |
1238 | 1 | assert!(result.is_err()); |
1239 | 1 | match result { |
1240 | 1 | Err(ArrowError::SchemaError(msg)) => { |
1241 | 1 | assert!(msg.contains( |
1242 | 1 | "Found Avro union of the form ['T','null'], which is disallowed in strict_mode" |
1243 | 1 | )); |
1244 | | } |
1245 | 0 | _ => panic!("Expected SchemaError"), |
1246 | | } |
1247 | 1 | } |
1248 | | |
1249 | | #[test] |
1250 | 1 | fn test_resolve_int_to_float_promotion() { |
1251 | 1 | let result = resolve_promotion(PrimitiveType::Int, PrimitiveType::Float); |
1252 | 1 | assert!(matches!0 (result.codec, Codec::Float32)); |
1253 | 1 | assert_eq!( |
1254 | | result.resolution, |
1255 | | Some(ResolutionInfo::Promotion(Promotion::IntToFloat)) |
1256 | | ); |
1257 | 1 | } |
1258 | | |
1259 | | #[test] |
1260 | 1 | fn test_resolve_int_to_double_promotion() { |
1261 | 1 | let result = resolve_promotion(PrimitiveType::Int, PrimitiveType::Double); |
1262 | 1 | assert!(matches!0 (result.codec, Codec::Float64)); |
1263 | 1 | assert_eq!( |
1264 | | result.resolution, |
1265 | | Some(ResolutionInfo::Promotion(Promotion::IntToDouble)) |
1266 | | ); |
1267 | 1 | } |
1268 | | |
1269 | | #[test] |
1270 | 1 | fn test_resolve_long_to_float_promotion() { |
1271 | 1 | let result = resolve_promotion(PrimitiveType::Long, PrimitiveType::Float); |
1272 | 1 | assert!(matches!0 (result.codec, Codec::Float32)); |
1273 | 1 | assert_eq!( |
1274 | | result.resolution, |
1275 | | Some(ResolutionInfo::Promotion(Promotion::LongToFloat)) |
1276 | | ); |
1277 | 1 | } |
1278 | | |
1279 | | #[test] |
1280 | 1 | fn test_resolve_long_to_double_promotion() { |
1281 | 1 | let result = resolve_promotion(PrimitiveType::Long, PrimitiveType::Double); |
1282 | 1 | assert!(matches!0 (result.codec, Codec::Float64)); |
1283 | 1 | assert_eq!( |
1284 | | result.resolution, |
1285 | | Some(ResolutionInfo::Promotion(Promotion::LongToDouble)) |
1286 | | ); |
1287 | 1 | } |
1288 | | |
1289 | | #[test] |
1290 | 1 | fn test_resolve_float_to_double_promotion() { |
1291 | 1 | let result = resolve_promotion(PrimitiveType::Float, PrimitiveType::Double); |
1292 | 1 | assert!(matches!0 (result.codec, Codec::Float64)); |
1293 | 1 | assert_eq!( |
1294 | | result.resolution, |
1295 | | Some(ResolutionInfo::Promotion(Promotion::FloatToDouble)) |
1296 | | ); |
1297 | 1 | } |
1298 | | |
1299 | | #[test] |
1300 | 1 | fn test_resolve_string_to_bytes_promotion() { |
1301 | 1 | let result = resolve_promotion(PrimitiveType::String, PrimitiveType::Bytes); |
1302 | 1 | assert!(matches!0 (result.codec, Codec::Binary)); |
1303 | 1 | assert_eq!( |
1304 | | result.resolution, |
1305 | | Some(ResolutionInfo::Promotion(Promotion::StringToBytes)) |
1306 | | ); |
1307 | 1 | } |
1308 | | |
1309 | | #[test] |
1310 | 1 | fn test_resolve_bytes_to_string_promotion() { |
1311 | 1 | let result = resolve_promotion(PrimitiveType::Bytes, PrimitiveType::String); |
1312 | 1 | assert!(matches!0 (result.codec, Codec::Utf8)); |
1313 | 1 | assert_eq!( |
1314 | | result.resolution, |
1315 | | Some(ResolutionInfo::Promotion(Promotion::BytesToString)) |
1316 | | ); |
1317 | 1 | } |
1318 | | |
1319 | | #[test] |
1320 | 1 | fn test_resolve_illegal_promotion_double_to_float_errors() { |
1321 | 1 | let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Double)); |
1322 | 1 | let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Float)); |
1323 | 1 | let mut maker = Maker::new(false, false); |
1324 | 1 | let result = maker.make_data_type(&writer_schema, Some(&reader_schema), None); |
1325 | 1 | assert!(result.is_err()); |
1326 | 1 | match result { |
1327 | 1 | Err(ArrowError::ParseError(msg)) => { |
1328 | 1 | assert!(msg.contains("Illegal promotion")); |
1329 | | } |
1330 | 0 | _ => panic!("Expected ParseError for illegal promotion Double -> Float"), |
1331 | | } |
1332 | 1 | } |
1333 | | |
1334 | | #[test] |
1335 | 1 | fn test_promotion_within_nullable_union_keeps_reader_null_ordering() { |
1336 | 1 | let writer = Schema::Union(vec![ |
1337 | 1 | Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)), |
1338 | 1 | Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)), |
1339 | 1 | ]); |
1340 | 1 | let reader = Schema::Union(vec![ |
1341 | 1 | Schema::TypeName(TypeName::Primitive(PrimitiveType::Double)), |
1342 | 1 | Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)), |
1343 | 1 | ]); |
1344 | 1 | let mut maker = Maker::new(false, false); |
1345 | 1 | let result = maker.make_data_type(&writer, Some(&reader), None).unwrap(); |
1346 | 1 | assert!(matches!0 (result.codec, Codec::Float64)); |
1347 | 1 | assert_eq!( |
1348 | | result.resolution, |
1349 | | Some(ResolutionInfo::Promotion(Promotion::IntToDouble)) |
1350 | | ); |
1351 | 1 | assert_eq!(result.nullability, Some(Nullability::NullSecond)); |
1352 | 1 | } |
1353 | | |
1354 | | #[test] |
1355 | 1 | fn test_resolve_type_promotion() { |
1356 | 1 | let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)); |
1357 | 1 | let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)); |
1358 | 1 | let mut maker = Maker::new(false, false); |
1359 | 1 | let result = maker |
1360 | 1 | .make_data_type(&writer_schema, Some(&reader_schema), None) |
1361 | 1 | .unwrap(); |
1362 | 1 | assert!(matches!0 (result.codec, Codec::Int64)); |
1363 | 1 | assert_eq!( |
1364 | | result.resolution, |
1365 | | Some(ResolutionInfo::Promotion(Promotion::IntToLong)) |
1366 | | ); |
1367 | 1 | } |
1368 | | |
1369 | | #[test] |
1370 | 1 | fn test_nested_record_type_reuse_without_namespace() { |
1371 | 1 | let schema_str = r#" |
1372 | 1 | { |
1373 | 1 | "type": "record", |
1374 | 1 | "name": "Record", |
1375 | 1 | "fields": [ |
1376 | 1 | { |
1377 | 1 | "name": "nested", |
1378 | 1 | "type": { |
1379 | 1 | "type": "record", |
1380 | 1 | "name": "Nested", |
1381 | 1 | "fields": [ |
1382 | 1 | { "name": "nested_int", "type": "int" } |
1383 | 1 | ] |
1384 | 1 | } |
1385 | 1 | }, |
1386 | 1 | { "name": "nestedRecord", "type": "Nested" }, |
1387 | 1 | { "name": "nestedArray", "type": { "type": "array", "items": "Nested" } }, |
1388 | 1 | { "name": "nestedMap", "type": { "type": "map", "values": "Nested" } } |
1389 | 1 | ] |
1390 | 1 | } |
1391 | 1 | "#; |
1392 | | |
1393 | 1 | let schema: Schema = serde_json::from_str(schema_str).unwrap(); |
1394 | | |
1395 | 1 | let mut maker = Maker::new(false, false); |
1396 | 1 | let avro_data_type = maker.make_data_type(&schema, None, None).unwrap(); |
1397 | | |
1398 | 1 | if let Codec::Struct(fields) = avro_data_type.codec() { |
1399 | 1 | assert_eq!(fields.len(), 4); |
1400 | | |
1401 | | // nested |
1402 | 1 | assert_eq!(fields[0].name(), "nested"); |
1403 | 1 | let nested_data_type = fields[0].data_type(); |
1404 | 1 | if let Codec::Struct(nested_fields) = nested_data_type.codec() { |
1405 | 1 | assert_eq!(nested_fields.len(), 1); |
1406 | 1 | assert_eq!(nested_fields[0].name(), "nested_int"); |
1407 | 1 | assert!(matches!0 (nested_fields[0].data_type().codec(), Codec::Int32)); |
1408 | | } else { |
1409 | 0 | panic!( |
1410 | 0 | "'nested' field is not a struct but {:?}", |
1411 | 0 | nested_data_type.codec() |
1412 | | ); |
1413 | | } |
1414 | | |
1415 | | // nestedRecord |
1416 | 1 | assert_eq!(fields[1].name(), "nestedRecord"); |
1417 | 1 | let nested_record_data_type = fields[1].data_type(); |
1418 | 1 | assert_eq!( |
1419 | 1 | nested_record_data_type.codec().data_type(), |
1420 | 1 | nested_data_type.codec().data_type() |
1421 | | ); |
1422 | | |
1423 | | // nestedArray |
1424 | 1 | assert_eq!(fields[2].name(), "nestedArray"); |
1425 | 1 | if let Codec::List(item_type) = fields[2].data_type().codec() { |
1426 | 1 | assert_eq!( |
1427 | 1 | item_type.codec().data_type(), |
1428 | 1 | nested_data_type.codec().data_type() |
1429 | | ); |
1430 | | } else { |
1431 | 0 | panic!("'nestedArray' field is not a list"); |
1432 | | } |
1433 | | |
1434 | | // nestedMap |
1435 | 1 | assert_eq!(fields[3].name(), "nestedMap"); |
1436 | 1 | if let Codec::Map(value_type) = fields[3].data_type().codec() { |
1437 | 1 | assert_eq!( |
1438 | 1 | value_type.codec().data_type(), |
1439 | 1 | nested_data_type.codec().data_type() |
1440 | | ); |
1441 | | } else { |
1442 | 0 | panic!("'nestedMap' field is not a map"); |
1443 | | } |
1444 | | } else { |
1445 | 0 | panic!("Top-level schema is not a struct"); |
1446 | | } |
1447 | 1 | } |
1448 | | |
1449 | | #[test] |
1450 | 1 | fn test_nested_enum_type_reuse_with_namespace() { |
1451 | 1 | let schema_str = r#" |
1452 | 1 | { |
1453 | 1 | "type": "record", |
1454 | 1 | "name": "Record", |
1455 | 1 | "namespace": "record_ns", |
1456 | 1 | "fields": [ |
1457 | 1 | { |
1458 | 1 | "name": "status", |
1459 | 1 | "type": { |
1460 | 1 | "type": "enum", |
1461 | 1 | "name": "Status", |
1462 | 1 | "namespace": "enum_ns", |
1463 | 1 | "symbols": ["ACTIVE", "INACTIVE", "PENDING"] |
1464 | 1 | } |
1465 | 1 | }, |
1466 | 1 | { "name": "backupStatus", "type": "enum_ns.Status" }, |
1467 | 1 | { "name": "statusHistory", "type": { "type": "array", "items": "enum_ns.Status" } }, |
1468 | 1 | { "name": "statusMap", "type": { "type": "map", "values": "enum_ns.Status" } } |
1469 | 1 | ] |
1470 | 1 | } |
1471 | 1 | "#; |
1472 | | |
1473 | 1 | let schema: Schema = serde_json::from_str(schema_str).unwrap(); |
1474 | | |
1475 | 1 | let mut maker = Maker::new(false, false); |
1476 | 1 | let avro_data_type = maker.make_data_type(&schema, None, None).unwrap(); |
1477 | | |
1478 | 1 | if let Codec::Struct(fields) = avro_data_type.codec() { |
1479 | 1 | assert_eq!(fields.len(), 4); |
1480 | | |
1481 | | // status |
1482 | 1 | assert_eq!(fields[0].name(), "status"); |
1483 | 1 | let status_data_type = fields[0].data_type(); |
1484 | 1 | if let Codec::Enum(symbols) = status_data_type.codec() { |
1485 | 1 | assert_eq!(symbols.as_ref(), &["ACTIVE", "INACTIVE", "PENDING"]); |
1486 | | } else { |
1487 | 0 | panic!( |
1488 | 0 | "'status' field is not an enum but {:?}", |
1489 | 0 | status_data_type.codec() |
1490 | | ); |
1491 | | } |
1492 | | |
1493 | | // backupStatus |
1494 | 1 | assert_eq!(fields[1].name(), "backupStatus"); |
1495 | 1 | let backup_status_data_type = fields[1].data_type(); |
1496 | 1 | assert_eq!( |
1497 | 1 | backup_status_data_type.codec().data_type(), |
1498 | 1 | status_data_type.codec().data_type() |
1499 | | ); |
1500 | | |
1501 | | // statusHistory |
1502 | 1 | assert_eq!(fields[2].name(), "statusHistory"); |
1503 | 1 | if let Codec::List(item_type) = fields[2].data_type().codec() { |
1504 | 1 | assert_eq!( |
1505 | 1 | item_type.codec().data_type(), |
1506 | 1 | status_data_type.codec().data_type() |
1507 | | ); |
1508 | | } else { |
1509 | 0 | panic!("'statusHistory' field is not a list"); |
1510 | | } |
1511 | | |
1512 | | // statusMap |
1513 | 1 | assert_eq!(fields[3].name(), "statusMap"); |
1514 | 1 | if let Codec::Map(value_type) = fields[3].data_type().codec() { |
1515 | 1 | assert_eq!( |
1516 | 1 | value_type.codec().data_type(), |
1517 | 1 | status_data_type.codec().data_type() |
1518 | | ); |
1519 | | } else { |
1520 | 0 | panic!("'statusMap' field is not a map"); |
1521 | | } |
1522 | | } else { |
1523 | 0 | panic!("Top-level schema is not a struct"); |
1524 | | } |
1525 | 1 | } |
1526 | | } |