Coverage Report

Created: 2025-08-26 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-schema/src/schema.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::collections::HashMap;
19
use std::fmt;
20
use std::hash::Hash;
21
use std::sync::Arc;
22
23
use crate::error::ArrowError;
24
use crate::field::Field;
25
use crate::{DataType, FieldRef, Fields};
26
27
/// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`]
28
#[derive(Debug, Default)]
29
pub struct SchemaBuilder {
30
    fields: Vec<FieldRef>,
31
    metadata: HashMap<String, String>,
32
}
33
34
impl SchemaBuilder {
35
    /// Creates a new empty [`SchemaBuilder`]
36
35
    pub fn new() -> Self {
37
35
        Self::default()
38
35
    }
39
40
    /// Creates a new empty [`SchemaBuilder`] with space for `capacity` fields
41
29
    pub fn with_capacity(capacity: usize) -> Self {
42
29
        Self {
43
29
            fields: Vec::with_capacity(capacity),
44
29
            metadata: Default::default(),
45
29
        }
46
29
    }
47
48
    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] without checking for collision
49
256
    pub fn push(&mut self, field: impl Into<FieldRef>) {
50
256
        self.fields.push(field.into())
51
256
    }
52
53
    /// Removes and returns the [`FieldRef`] as index `idx`
54
    ///
55
    /// # Panics
56
    ///
57
    /// Panics if index out of bounds
58
0
    pub fn remove(&mut self, idx: usize) -> FieldRef {
59
0
        self.fields.remove(idx)
60
0
    }
61
62
    /// Returns an immutable reference to the [`FieldRef`] at index `idx`
63
    ///
64
    /// # Panics
65
    ///
66
    /// Panics if index out of bounds
67
0
    pub fn field(&mut self, idx: usize) -> &FieldRef {
68
0
        &mut self.fields[idx]
69
0
    }
70
71
    /// Returns a mutable reference to the [`FieldRef`] at index `idx`
72
    ///
73
    /// # Panics
74
    ///
75
    /// Panics if index out of bounds
76
0
    pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
77
0
        &mut self.fields[idx]
78
0
    }
79
80
    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
81
0
    pub fn metadata(&mut self) -> &HashMap<String, String> {
82
0
        &self.metadata
83
0
    }
84
85
    /// Returns a mutable reference to the Map of custom metadata key-value pairs.
86
0
    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
87
0
        &mut self.metadata
88
0
    }
89
90
    /// Reverse the fileds
91
0
    pub fn reverse(&mut self) {
92
0
        self.fields.reverse();
93
0
    }
94
95
    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] checking for collision
96
    ///
97
    /// If an existing field exists with the same name, calls [`Field::try_merge`]
98
0
    pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> {
99
        // This could potentially be sped up with a HashMap or similar
100
0
        let existing = self.fields.iter_mut().find(|f| f.name() == field.name());
101
0
        match existing {
102
0
            Some(e) if Arc::ptr_eq(e, field) => {} // Nothing to do
103
0
            Some(e) => match Arc::get_mut(e) {
104
0
                Some(e) => e.try_merge(field.as_ref())?,
105
                None => {
106
0
                    let mut t = e.as_ref().clone();
107
0
                    t.try_merge(field)?;
108
0
                    *e = Arc::new(t)
109
                }
110
            },
111
0
            None => self.fields.push(field.clone()),
112
        }
113
0
        Ok(())
114
0
    }
115
116
    /// Consume this [`SchemaBuilder`] yielding the final [`Schema`]
117
64
    pub fn finish(self) -> Schema {
118
64
        Schema {
119
64
            fields: self.fields.into(),
120
64
            metadata: self.metadata,
121
64
        }
122
64
    }
123
}
124
125
impl From<&Fields> for SchemaBuilder {
126
0
    fn from(value: &Fields) -> Self {
127
0
        Self {
128
0
            fields: value.to_vec(),
129
0
            metadata: Default::default(),
130
0
        }
131
0
    }
132
}
133
134
impl From<Fields> for SchemaBuilder {
135
0
    fn from(value: Fields) -> Self {
136
0
        Self {
137
0
            fields: value.to_vec(),
138
0
            metadata: Default::default(),
139
0
        }
140
0
    }
141
}
142
143
impl From<&Schema> for SchemaBuilder {
144
0
    fn from(value: &Schema) -> Self {
145
0
        Self::from(value.clone())
146
0
    }
147
}
148
149
impl From<Schema> for SchemaBuilder {
150
0
    fn from(value: Schema) -> Self {
151
0
        Self {
152
0
            fields: value.fields.to_vec(),
153
0
            metadata: value.metadata,
154
0
        }
155
0
    }
156
}
157
158
impl Extend<FieldRef> for SchemaBuilder {
159
0
    fn extend<T: IntoIterator<Item = FieldRef>>(&mut self, iter: T) {
160
0
        let iter = iter.into_iter();
161
0
        self.fields.reserve(iter.size_hint().0);
162
0
        for f in iter {
163
0
            self.push(f)
164
        }
165
0
    }
166
}
167
168
impl Extend<Field> for SchemaBuilder {
169
0
    fn extend<T: IntoIterator<Item = Field>>(&mut self, iter: T) {
170
0
        let iter = iter.into_iter();
171
0
        self.fields.reserve(iter.size_hint().0);
172
0
        for f in iter {
173
0
            self.push(f)
174
        }
175
0
    }
176
}
177
178
/// A reference-counted reference to a [`Schema`].
179
pub type SchemaRef = Arc<Schema>;
180
181
/// Describes the meta-data of an ordered sequence of relative types.
182
///
183
/// Note that this information is only part of the meta-data and not part of the physical
184
/// memory layout.
185
#[derive(Debug, Clone, PartialEq, Eq)]
186
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187
pub struct Schema {
188
    /// A sequence of fields that describe the schema.
189
    pub fields: Fields,
190
    /// A map of key-value pairs containing additional meta data.
191
    pub metadata: HashMap<String, String>,
192
}
193
194
impl Schema {
195
    /// Creates an empty `Schema`
196
0
    pub fn empty() -> Self {
197
0
        Self {
198
0
            fields: Default::default(),
199
0
            metadata: HashMap::new(),
200
0
        }
201
0
    }
202
203
    /// Creates a new [`Schema`] from a sequence of [`Field`] values.
204
    ///
205
    /// # Example
206
    ///
207
    /// ```
208
    /// # use arrow_schema::*;
209
    /// let field_a = Field::new("a", DataType::Int64, false);
210
    /// let field_b = Field::new("b", DataType::Boolean, false);
211
    ///
212
    /// let schema = Schema::new(vec![field_a, field_b]);
213
    /// ```
214
109
    pub fn new(fields: impl Into<Fields>) -> Self {
215
109
        Self::new_with_metadata(fields, HashMap::new())
216
109
    }
217
218
    /// Creates a new [`Schema`] from a sequence of [`Field`] values
219
    /// and adds additional metadata in form of key value pairs.
220
    ///
221
    /// # Example
222
    ///
223
    /// ```
224
    /// # use arrow_schema::*;
225
    /// # use std::collections::HashMap;
226
    ///
227
    /// let field_a = Field::new("a", DataType::Int64, false);
228
    /// let field_b = Field::new("b", DataType::Boolean, false);
229
    ///
230
    /// let mut metadata: HashMap<String, String> = HashMap::new();
231
    /// metadata.insert("row_count".to_string(), "100".to_string());
232
    ///
233
    /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata);
234
    /// ```
235
    #[inline]
236
109
    pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
237
109
        Self {
238
109
            fields: fields.into(),
239
109
            metadata,
240
109
        }
241
109
    }
242
243
    /// Sets the metadata of this `Schema` to be `metadata` and returns self
244
0
    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
245
0
        self.metadata = metadata;
246
0
        self
247
0
    }
248
249
    /// Returns a new schema with only the specified columns in the new schema
250
    /// This carries metadata from the parent schema over as well
251
0
    pub fn project(&self, indices: &[usize]) -> Result<Schema, ArrowError> {
252
0
        let new_fields = indices
253
0
            .iter()
254
0
            .map(|i| {
255
0
                self.fields.get(*i).cloned().ok_or_else(|| {
256
0
                    ArrowError::SchemaError(format!(
257
0
                        "project index {} out of bounds, max field {}",
258
0
                        i,
259
0
                        self.fields().len()
260
0
                    ))
261
0
                })
262
0
            })
263
0
            .collect::<Result<Vec<_>, _>>()?;
264
0
        Ok(Self::new_with_metadata(new_fields, self.metadata.clone()))
265
0
    }
266
267
    /// Merge schema into self if it is compatible. Struct fields will be merged recursively.
268
    ///
269
    /// Example:
270
    ///
271
    /// ```
272
    /// # use arrow_schema::*;
273
    ///
274
    /// let merged = Schema::try_merge(vec![
275
    ///     Schema::new(vec![
276
    ///         Field::new("c1", DataType::Int64, false),
277
    ///         Field::new("c2", DataType::Utf8, false),
278
    ///     ]),
279
    ///     Schema::new(vec![
280
    ///         Field::new("c1", DataType::Int64, true),
281
    ///         Field::new("c2", DataType::Utf8, false),
282
    ///         Field::new("c3", DataType::Utf8, false),
283
    ///     ]),
284
    /// ]).unwrap();
285
    ///
286
    /// assert_eq!(
287
    ///     merged,
288
    ///     Schema::new(vec![
289
    ///         Field::new("c1", DataType::Int64, true),
290
    ///         Field::new("c2", DataType::Utf8, false),
291
    ///         Field::new("c3", DataType::Utf8, false),
292
    ///     ]),
293
    /// );
294
    /// ```
295
0
    pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
296
0
        let mut out_meta = HashMap::new();
297
0
        let mut out_fields = SchemaBuilder::new();
298
0
        for schema in schemas {
299
0
            let Schema { metadata, fields } = schema;
300
301
            // merge metadata
302
0
            for (key, value) in metadata.into_iter() {
303
0
                if let Some(old_val) = out_meta.get(&key) {
304
0
                    if old_val != &value {
305
0
                        return Err(ArrowError::SchemaError(format!(
306
0
                            "Fail to merge schema due to conflicting metadata. \
307
0
                                         Key '{key}' has different values '{old_val}' and '{value}'"
308
0
                        )));
309
0
                    }
310
0
                }
311
0
                out_meta.insert(key, value);
312
            }
313
314
            // merge fields
315
0
            fields.iter().try_for_each(|x| out_fields.try_merge(x))?
316
        }
317
318
0
        Ok(out_fields.finish().with_metadata(out_meta))
319
0
    }
320
321
    /// Returns an immutable reference of the vector of `Field` instances.
322
    #[inline]
323
704
    pub const fn fields(&self) -> &Fields {
324
704
        &self.fields
325
704
    }
326
327
    /// Returns a vector with references to all fields (including nested fields)
328
    ///
329
    /// # Example
330
    ///
331
    /// ```
332
    /// use std::sync::Arc;
333
    /// use arrow_schema::{DataType, Field, Fields, Schema};
334
    ///
335
    /// let f1 = Arc::new(Field::new("a", DataType::Boolean, false));
336
    ///
337
    /// let f2_inner = Arc::new(Field::new("b_inner", DataType::Int8, false));
338
    /// let f2 = Arc::new(Field::new("b", DataType::List(f2_inner.clone()), false));
339
    ///
340
    /// let f3_inner1 = Arc::new(Field::new("c_inner1", DataType::Int8, false));
341
    /// let f3_inner2 = Arc::new(Field::new("c_inner2", DataType::Int8, false));
342
    /// let f3 = Arc::new(Field::new(
343
    ///     "c",
344
    ///     DataType::Struct(vec![f3_inner1.clone(), f3_inner2.clone()].into()),
345
    ///     false
346
    /// ));
347
    ///
348
    /// let mut schema = Schema::new(vec![
349
    ///   f1.clone(), f2.clone(), f3.clone()
350
    /// ]);
351
    /// assert_eq!(
352
    ///     schema.flattened_fields(),
353
    ///     vec![
354
    ///         f1.as_ref(),
355
    ///         f2.as_ref(),
356
    ///         f2_inner.as_ref(),
357
    ///         f3.as_ref(),
358
    ///         f3_inner1.as_ref(),
359
    ///         f3_inner2.as_ref()
360
    ///    ]
361
    /// );
362
    /// ```
363
    #[inline]
364
0
    pub fn flattened_fields(&self) -> Vec<&Field> {
365
0
        self.fields.iter().flat_map(|f| f.fields()).collect()
366
0
    }
367
368
    /// Returns an immutable reference of a specific [`Field`] instance selected using an
369
    /// offset within the internal `fields` vector.
370
    ///
371
    /// # Panics
372
    ///
373
    /// Panics if index out of bounds
374
1
    pub fn field(&self, i: usize) -> &Field {
375
1
        &self.fields[i]
376
1
    }
377
378
    /// Returns an immutable reference of a specific [`Field`] instance selected by name.
379
0
    pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> {
380
0
        Ok(&self.fields[self.index_of(name)?])
381
0
    }
382
383
    /// Returns a vector of immutable references to all [`Field`] instances selected by
384
    /// the dictionary ID they use.
385
    #[deprecated(
386
        since = "54.0.0",
387
        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
388
    )]
389
0
    pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
390
        #[allow(deprecated)]
391
0
        self.fields
392
0
            .iter()
393
0
            .flat_map(|f| f.fields_with_dict_id(dict_id))
394
0
            .collect()
395
0
    }
396
397
    /// Find the index of the column with the given name.
398
0
    pub fn index_of(&self, name: &str) -> Result<usize, ArrowError> {
399
0
        let (idx, _) = self.fields().find(name).ok_or_else(|| {
400
0
            let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
401
0
            ArrowError::SchemaError(format!(
402
0
                "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
403
0
            ))
404
0
        })?;
405
0
        Ok(idx)
406
0
    }
407
408
    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
409
    #[inline]
410
0
    pub const fn metadata(&self) -> &HashMap<String, String> {
411
0
        &self.metadata
412
0
    }
413
414
    /// Normalize a [`Schema`] into a flat table.
415
    ///
416
    /// Nested [`Field`]s will generate names separated by `separator`, up to a depth of `max_level`
417
    /// (unlimited if `None`).
418
    ///
419
    /// e.g. given a [`Schema`]:
420
    ///
421
    /// ```text
422
    ///     "foo": StructArray<"bar": Utf8>
423
    /// ```
424
    ///
425
    /// A separator of `"."` would generate a batch with the schema:
426
    ///
427
    /// ```text
428
    ///     "foo.bar": Utf8
429
    /// ```
430
    ///
431
    /// Note that giving a depth of `Some(0)` to `max_level` is the same as passing in `None`;
432
    /// it will be treated as unlimited.
433
    ///
434
    /// # Example
435
    ///
436
    /// ```
437
    /// # use std::sync::Arc;
438
    /// # use arrow_schema::{DataType, Field, Fields, Schema};
439
    /// let schema = Schema::new(vec![
440
    ///     Field::new(
441
    ///         "a",
442
    ///         DataType::Struct(Fields::from(vec![
443
    ///             Arc::new(Field::new("animals", DataType::Utf8, true)),
444
    ///             Arc::new(Field::new("n_legs", DataType::Int64, true)),
445
    ///         ])),
446
    ///         false,
447
    ///     ),
448
    /// ])
449
    /// .normalize(".", None)
450
    /// .expect("valid normalization");
451
    /// let expected = Schema::new(vec![
452
    ///     Field::new("a.animals", DataType::Utf8, true),
453
    ///     Field::new("a.n_legs", DataType::Int64, true),
454
    /// ]);
455
    /// assert_eq!(schema, expected);
456
    /// ```
457
0
    pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> Result<Self, ArrowError> {
458
0
        let max_level = match max_level.unwrap_or(usize::MAX) {
459
0
            0 => usize::MAX,
460
0
            val => val,
461
        };
462
0
        let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self
463
0
            .fields()
464
0
            .iter()
465
0
            .rev()
466
0
            .map(|f| {
467
0
                let name_vec: Vec<&str> = vec![f.name()];
468
0
                (0, name_vec, f)
469
0
            })
470
0
            .collect();
471
0
        let mut fields: Vec<FieldRef> = Vec::new();
472
473
0
        while let Some((depth, name, field_ref)) = stack.pop() {
474
0
            match field_ref.data_type() {
475
0
                DataType::Struct(ff) if depth < max_level => {
476
                    // Need to zip these in reverse to maintain original order
477
0
                    for fff in ff.into_iter().rev() {
478
0
                        let mut name = name.clone();
479
0
                        name.push(separator);
480
0
                        name.push(fff.name());
481
0
                        stack.push((depth + 1, name, fff))
482
                    }
483
                }
484
0
                _ => {
485
0
                    let updated_field = Field::new(
486
0
                        name.concat(),
487
0
                        field_ref.data_type().clone(),
488
0
                        field_ref.is_nullable(),
489
0
                    );
490
0
                    fields.push(Arc::new(updated_field));
491
0
                }
492
            }
493
        }
494
0
        Ok(Schema::new(fields))
495
0
    }
496
497
    /// Look up a column by name and return a immutable reference to the column along with
498
    /// its index.
499
0
    pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
500
0
        let (idx, field) = self.fields.find(name)?;
501
0
        Some((idx, field.as_ref()))
502
0
    }
503
504
    /// Check to see if `self` is a superset of `other` schema.
505
    ///
506
    /// In particular returns true if `self.metadata` is a superset of `other.metadata`
507
    /// and [`Fields::contains`] for `self.fields` and `other.fields`
508
    ///
509
    /// In other words, any record that conforms to `other` should also conform to `self`.
510
0
    pub fn contains(&self, other: &Schema) -> bool {
511
        // make sure self.metadata is a superset of other.metadata
512
0
        self.fields.contains(&other.fields)
513
0
            && other
514
0
                .metadata
515
0
                .iter()
516
0
                .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
517
0
    }
518
}
519
520
impl fmt::Display for Schema {
521
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
522
0
        f.write_str(
523
0
            &self
524
0
                .fields
525
0
                .iter()
526
0
                .map(|c| c.to_string())
527
0
                .collect::<Vec<String>>()
528
0
                .join(", "),
529
        )
530
0
    }
531
}
532
533
// need to implement `Hash` manually because `HashMap` implement Eq but no `Hash`
534
#[allow(clippy::derived_hash_with_manual_eq)]
535
impl Hash for Schema {
536
0
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
537
0
        self.fields.hash(state);
538
539
        // ensure deterministic key order
540
0
        let mut keys: Vec<&String> = self.metadata.keys().collect();
541
0
        keys.sort();
542
0
        for k in keys {
543
0
            k.hash(state);
544
0
            self.metadata.get(k).expect("key valid").hash(state);
545
0
        }
546
0
    }
547
}
548
549
#[cfg(test)]
550
mod tests {
551
    use crate::datatype::DataType;
552
    use crate::{TimeUnit, UnionMode};
553
554
    use super::*;
555
556
    #[test]
557
    #[cfg(feature = "serde")]
558
    fn test_ser_de_metadata() {
559
        // ser/de with empty metadata
560
        let schema = Schema::new(vec![
561
            Field::new("name", DataType::Utf8, false),
562
            Field::new("address", DataType::Utf8, false),
563
            Field::new("priority", DataType::UInt8, false),
564
        ]);
565
566
        let json = serde_json::to_string(&schema).unwrap();
567
        let de_schema = serde_json::from_str(&json).unwrap();
568
569
        assert_eq!(schema, de_schema);
570
571
        // ser/de with non-empty metadata
572
        let schema =
573
            schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
574
        let json = serde_json::to_string(&schema).unwrap();
575
        let de_schema = serde_json::from_str(&json).unwrap();
576
577
        assert_eq!(schema, de_schema);
578
    }
579
580
    #[test]
581
    fn test_projection() {
582
        let mut metadata = HashMap::new();
583
        metadata.insert("meta".to_string(), "data".to_string());
584
585
        let schema = Schema::new(vec![
586
            Field::new("name", DataType::Utf8, false),
587
            Field::new("address", DataType::Utf8, false),
588
            Field::new("priority", DataType::UInt8, false),
589
        ])
590
        .with_metadata(metadata);
591
592
        let projected: Schema = schema.project(&[0, 2]).unwrap();
593
594
        assert_eq!(projected.fields().len(), 2);
595
        assert_eq!(projected.fields()[0].name(), "name");
596
        assert_eq!(projected.fields()[1].name(), "priority");
597
        assert_eq!(projected.metadata.get("meta").unwrap(), "data")
598
    }
599
600
    #[test]
601
    fn test_oob_projection() {
602
        let mut metadata = HashMap::new();
603
        metadata.insert("meta".to_string(), "data".to_string());
604
605
        let schema = Schema::new(vec![
606
            Field::new("name", DataType::Utf8, false),
607
            Field::new("address", DataType::Utf8, false),
608
            Field::new("priority", DataType::UInt8, false),
609
        ])
610
        .with_metadata(metadata);
611
612
        let projected = schema.project(&[0, 3]);
613
614
        assert!(projected.is_err());
615
        if let Err(e) = projected {
616
            assert_eq!(
617
                e.to_string(),
618
                "Schema error: project index 3 out of bounds, max field 3".to_string()
619
            )
620
        }
621
    }
622
623
    #[test]
624
    fn test_schema_contains() {
625
        let mut metadata1 = HashMap::new();
626
        metadata1.insert("meta".to_string(), "data".to_string());
627
628
        let schema1 = Schema::new(vec![
629
            Field::new("name", DataType::Utf8, false),
630
            Field::new("address", DataType::Utf8, false),
631
            Field::new("priority", DataType::UInt8, false),
632
        ])
633
        .with_metadata(metadata1.clone());
634
635
        let mut metadata2 = HashMap::new();
636
        metadata2.insert("meta".to_string(), "data".to_string());
637
        metadata2.insert("meta2".to_string(), "data".to_string());
638
        let schema2 = Schema::new(vec![
639
            Field::new("name", DataType::Utf8, false),
640
            Field::new("address", DataType::Utf8, false),
641
            Field::new("priority", DataType::UInt8, false),
642
        ])
643
        .with_metadata(metadata2);
644
645
        // reflexivity
646
        assert!(schema1.contains(&schema1));
647
        assert!(schema2.contains(&schema2));
648
649
        assert!(!schema1.contains(&schema2));
650
        assert!(schema2.contains(&schema1));
651
    }
652
653
    #[test]
654
    fn schema_equality() {
655
        let schema1 = Schema::new(vec![
656
            Field::new("c1", DataType::Utf8, false),
657
            Field::new("c2", DataType::Float64, true),
658
            Field::new("c3", DataType::LargeBinary, true),
659
        ]);
660
        let schema2 = Schema::new(vec![
661
            Field::new("c1", DataType::Utf8, false),
662
            Field::new("c2", DataType::Float64, true),
663
            Field::new("c3", DataType::LargeBinary, true),
664
        ]);
665
666
        assert_eq!(schema1, schema2);
667
668
        let schema3 = Schema::new(vec![
669
            Field::new("c1", DataType::Utf8, false),
670
            Field::new("c2", DataType::Float32, true),
671
        ]);
672
        let schema4 = Schema::new(vec![
673
            Field::new("C1", DataType::Utf8, false),
674
            Field::new("C2", DataType::Float64, true),
675
        ]);
676
677
        assert_ne!(schema1, schema3);
678
        assert_ne!(schema1, schema4);
679
        assert_ne!(schema2, schema3);
680
        assert_ne!(schema2, schema4);
681
        assert_ne!(schema3, schema4);
682
683
        let f = Field::new("c1", DataType::Utf8, false).with_metadata(
684
            [("foo".to_string(), "bar".to_string())]
685
                .iter()
686
                .cloned()
687
                .collect(),
688
        );
689
        let schema5 = Schema::new(vec![
690
            f,
691
            Field::new("c2", DataType::Float64, true),
692
            Field::new("c3", DataType::LargeBinary, true),
693
        ]);
694
        assert_ne!(schema1, schema5);
695
    }
696
697
    #[test]
698
    fn create_schema_string() {
699
        let schema = person_schema();
700
        assert_eq!(schema.to_string(),
701
                   "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {\"k\": \"v\"} }, \
702
        Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
703
        Field { name: \"address\", data_type: Struct([\
704
            Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
705
            Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }\
706
        ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
707
        Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: {} }")
708
    }
709
710
    #[test]
711
    fn schema_field_accessors() {
712
        let schema = person_schema();
713
714
        // test schema accessors
715
        assert_eq!(schema.fields().len(), 4);
716
717
        // test field accessors
718
        let first_name = &schema.fields()[0];
719
        assert_eq!(first_name.name(), "first_name");
720
        assert_eq!(first_name.data_type(), &DataType::Utf8);
721
        assert!(!first_name.is_nullable());
722
        #[allow(deprecated)]
723
        let dict_id = first_name.dict_id();
724
        assert_eq!(dict_id, None);
725
        assert_eq!(first_name.dict_is_ordered(), None);
726
727
        let metadata = first_name.metadata();
728
        assert!(!metadata.is_empty());
729
        let md = &metadata;
730
        assert_eq!(md.len(), 1);
731
        let key = md.get("k");
732
        assert!(key.is_some());
733
        assert_eq!(key.unwrap(), "v");
734
735
        let interests = &schema.fields()[3];
736
        assert_eq!(interests.name(), "interests");
737
        assert_eq!(
738
            interests.data_type(),
739
            &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
740
        );
741
        #[allow(deprecated)]
742
        let dict_id = interests.dict_id();
743
        assert_eq!(dict_id, Some(123));
744
        assert_eq!(interests.dict_is_ordered(), Some(true));
745
    }
746
747
    #[test]
748
    #[should_panic(
749
        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
750
    )]
751
    fn schema_index_of() {
752
        let schema = person_schema();
753
        assert_eq!(schema.index_of("first_name").unwrap(), 0);
754
        assert_eq!(schema.index_of("last_name").unwrap(), 1);
755
        schema.index_of("nickname").unwrap();
756
    }
757
758
    #[test]
759
    fn normalize_simple() {
760
        let schema = Schema::new(vec![
761
            Field::new(
762
                "a",
763
                DataType::Struct(Fields::from(vec![
764
                    Arc::new(Field::new("animals", DataType::Utf8, true)),
765
                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
766
                    Arc::new(Field::new("year", DataType::Int64, true)),
767
                ])),
768
                false,
769
            ),
770
            Field::new("month", DataType::Int64, true),
771
        ])
772
        .normalize(".", Some(0))
773
        .expect("valid normalization");
774
775
        let expected = Schema::new(vec![
776
            Field::new("a.animals", DataType::Utf8, true),
777
            Field::new("a.n_legs", DataType::Int64, true),
778
            Field::new("a.year", DataType::Int64, true),
779
            Field::new("month", DataType::Int64, true),
780
        ]);
781
782
        assert_eq!(schema, expected);
783
784
        // Check that 0, None have the same result
785
        let schema = Schema::new(vec![
786
            Field::new(
787
                "a",
788
                DataType::Struct(Fields::from(vec![
789
                    Arc::new(Field::new("animals", DataType::Utf8, true)),
790
                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
791
                    Arc::new(Field::new("year", DataType::Int64, true)),
792
                ])),
793
                false,
794
            ),
795
            Field::new("month", DataType::Int64, true),
796
        ])
797
        .normalize(".", None)
798
        .expect("valid normalization");
799
800
        assert_eq!(schema, expected);
801
    }
802
803
    #[test]
804
    fn normalize_nested() {
805
        let a = Arc::new(Field::new("a", DataType::Utf8, true));
806
        let b = Arc::new(Field::new("b", DataType::Int64, false));
807
        let c = Arc::new(Field::new("c", DataType::Int64, true));
808
809
        let d = Arc::new(Field::new("d", DataType::Utf8, true));
810
        let e = Arc::new(Field::new("e", DataType::Int64, false));
811
        let f = Arc::new(Field::new("f", DataType::Int64, true));
812
813
        let one = Arc::new(Field::new(
814
            "1",
815
            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
816
            false,
817
        ));
818
        let two = Arc::new(Field::new(
819
            "2",
820
            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
821
            true,
822
        ));
823
824
        let exclamation = Arc::new(Field::new(
825
            "!",
826
            DataType::Struct(Fields::from(vec![one, two])),
827
            false,
828
        ));
829
830
        let normalize_all = Schema::new(vec![exclamation.clone()])
831
            .normalize(".", Some(0))
832
            .expect("valid normalization");
833
834
        let expected = Schema::new(vec![
835
            Field::new("!.1.a", DataType::Utf8, true),
836
            Field::new("!.1.b", DataType::Int64, false),
837
            Field::new("!.1.c", DataType::Int64, true),
838
            Field::new("!.2.d", DataType::Utf8, true),
839
            Field::new("!.2.e", DataType::Int64, false),
840
            Field::new("!.2.f", DataType::Int64, true),
841
        ]);
842
843
        assert_eq!(normalize_all, expected);
844
845
        let normalize_depth_one = Schema::new(vec![exclamation])
846
            .normalize(".", Some(1))
847
            .expect("valid normalization");
848
849
        let expected = Schema::new(vec![
850
            Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false),
851
            Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true),
852
        ]);
853
854
        assert_eq!(normalize_depth_one, expected);
855
    }
856
857
    #[test]
858
    fn normalize_list() {
859
        // Only the Struct type field should be unwrapped
860
        let a = Arc::new(Field::new("a", DataType::Utf8, true));
861
        let b = Arc::new(Field::new("b", DataType::Int64, false));
862
        let c = Arc::new(Field::new("c", DataType::Int64, true));
863
        let d = Arc::new(Field::new("d", DataType::Utf8, true));
864
        let e = Arc::new(Field::new("e", DataType::Int64, false));
865
        let f = Arc::new(Field::new("f", DataType::Int64, true));
866
867
        let one = Arc::new(Field::new(
868
            "1",
869
            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
870
            true,
871
        ));
872
873
        let two = Arc::new(Field::new(
874
            "2",
875
            DataType::List(Arc::new(Field::new_list_field(
876
                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
877
                true,
878
            ))),
879
            false,
880
        ));
881
882
        let exclamation = Arc::new(Field::new(
883
            "!",
884
            DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
885
            false,
886
        ));
887
888
        let normalize_all = Schema::new(vec![exclamation.clone()])
889
            .normalize(".", None)
890
            .expect("valid normalization");
891
892
        // List shouldn't be affected
893
        let expected = Schema::new(vec![
894
            Field::new("!.1.a", DataType::Utf8, true),
895
            Field::new("!.1.b", DataType::Int64, false),
896
            Field::new("!.1.c", DataType::Int64, true),
897
            Field::new(
898
                "!.2",
899
                DataType::List(Arc::new(Field::new_list_field(
900
                    DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
901
                    true,
902
                ))),
903
                false,
904
            ),
905
        ]);
906
907
        assert_eq!(normalize_all, expected);
908
        assert_eq!(normalize_all.fields().len(), 4);
909
910
        // FixedSizeList
911
        let two = Arc::new(Field::new(
912
            "2",
913
            DataType::FixedSizeList(
914
                Arc::new(Field::new_fixed_size_list(
915
                    "3",
916
                    Arc::new(Field::new_list_field(
917
                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
918
                        true,
919
                    )),
920
                    1,
921
                    true,
922
                )),
923
                1,
924
            ),
925
            false,
926
        ));
927
928
        let exclamation = Arc::new(Field::new(
929
            "!",
930
            DataType::Struct(Fields::from(vec![one.clone(), two])),
931
            false,
932
        ));
933
934
        let normalize_all = Schema::new(vec![exclamation.clone()])
935
            .normalize(".", None)
936
            .expect("valid normalization");
937
938
        // FixedSizeList shouldn't be affected
939
        let expected = Schema::new(vec![
940
            Field::new("!.1.a", DataType::Utf8, true),
941
            Field::new("!.1.b", DataType::Int64, false),
942
            Field::new("!.1.c", DataType::Int64, true),
943
            Field::new(
944
                "!.2",
945
                DataType::FixedSizeList(
946
                    Arc::new(Field::new_fixed_size_list(
947
                        "3",
948
                        Arc::new(Field::new_list_field(
949
                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
950
                            true,
951
                        )),
952
                        1,
953
                        true,
954
                    )),
955
                    1,
956
                ),
957
                false,
958
            ),
959
        ]);
960
961
        assert_eq!(normalize_all, expected);
962
        assert_eq!(normalize_all.fields().len(), 4);
963
964
        // LargeList
965
        let two = Arc::new(Field::new(
966
            "2",
967
            DataType::FixedSizeList(
968
                Arc::new(Field::new_large_list(
969
                    "3",
970
                    Arc::new(Field::new_list_field(
971
                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
972
                        true,
973
                    )),
974
                    true,
975
                )),
976
                1,
977
            ),
978
            false,
979
        ));
980
981
        let exclamation = Arc::new(Field::new(
982
            "!",
983
            DataType::Struct(Fields::from(vec![one.clone(), two])),
984
            false,
985
        ));
986
987
        let normalize_all = Schema::new(vec![exclamation.clone()])
988
            .normalize(".", None)
989
            .expect("valid normalization");
990
991
        // LargeList shouldn't be affected
992
        let expected = Schema::new(vec![
993
            Field::new("!.1.a", DataType::Utf8, true),
994
            Field::new("!.1.b", DataType::Int64, false),
995
            Field::new("!.1.c", DataType::Int64, true),
996
            Field::new(
997
                "!.2",
998
                DataType::FixedSizeList(
999
                    Arc::new(Field::new_large_list(
1000
                        "3",
1001
                        Arc::new(Field::new_list_field(
1002
                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1003
                            true,
1004
                        )),
1005
                        true,
1006
                    )),
1007
                    1,
1008
                ),
1009
                false,
1010
            ),
1011
        ]);
1012
1013
        assert_eq!(normalize_all, expected);
1014
        assert_eq!(normalize_all.fields().len(), 4);
1015
    }
1016
1017
    #[test]
1018
    fn normalize_deep_nested() {
1019
        // No unwrapping expected
1020
        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1021
        let b = Arc::new(Field::new("b", DataType::Int64, false));
1022
        let c = Arc::new(Field::new("c", DataType::Int64, true));
1023
        let d = Arc::new(Field::new("d", DataType::Utf8, true));
1024
        let e = Arc::new(Field::new("e", DataType::Int64, false));
1025
        let f = Arc::new(Field::new("f", DataType::Int64, true));
1026
1027
        let one = Arc::new(Field::new(
1028
            "1",
1029
            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
1030
            true,
1031
        ));
1032
1033
        let two = Arc::new(Field::new(
1034
            "2",
1035
            DataType::List(Arc::new(Field::new_list_field(
1036
                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1037
                true,
1038
            ))),
1039
            false,
1040
        ));
1041
1042
        let l10 = Arc::new(Field::new(
1043
            "l10",
1044
            DataType::List(Arc::new(Field::new_list_field(
1045
                DataType::Struct(Fields::from(vec![one, two])),
1046
                true,
1047
            ))),
1048
            false,
1049
        ));
1050
1051
        let l9 = Arc::new(Field::new(
1052
            "l9",
1053
            DataType::List(Arc::new(Field::new_list_field(
1054
                DataType::Struct(Fields::from(vec![l10])),
1055
                true,
1056
            ))),
1057
            false,
1058
        ));
1059
1060
        let l8 = Arc::new(Field::new(
1061
            "l8",
1062
            DataType::List(Arc::new(Field::new_list_field(
1063
                DataType::Struct(Fields::from(vec![l9])),
1064
                true,
1065
            ))),
1066
            false,
1067
        ));
1068
        let l7 = Arc::new(Field::new(
1069
            "l7",
1070
            DataType::List(Arc::new(Field::new_list_field(
1071
                DataType::Struct(Fields::from(vec![l8])),
1072
                true,
1073
            ))),
1074
            false,
1075
        ));
1076
        let l6 = Arc::new(Field::new(
1077
            "l6",
1078
            DataType::List(Arc::new(Field::new_list_field(
1079
                DataType::Struct(Fields::from(vec![l7])),
1080
                true,
1081
            ))),
1082
            false,
1083
        ));
1084
        let l5 = Arc::new(Field::new(
1085
            "l5",
1086
            DataType::List(Arc::new(Field::new_list_field(
1087
                DataType::Struct(Fields::from(vec![l6])),
1088
                true,
1089
            ))),
1090
            false,
1091
        ));
1092
        let l4 = Arc::new(Field::new(
1093
            "l4",
1094
            DataType::List(Arc::new(Field::new_list_field(
1095
                DataType::Struct(Fields::from(vec![l5])),
1096
                true,
1097
            ))),
1098
            false,
1099
        ));
1100
        let l3 = Arc::new(Field::new(
1101
            "l3",
1102
            DataType::List(Arc::new(Field::new_list_field(
1103
                DataType::Struct(Fields::from(vec![l4])),
1104
                true,
1105
            ))),
1106
            false,
1107
        ));
1108
        let l2 = Arc::new(Field::new(
1109
            "l2",
1110
            DataType::List(Arc::new(Field::new_list_field(
1111
                DataType::Struct(Fields::from(vec![l3])),
1112
                true,
1113
            ))),
1114
            false,
1115
        ));
1116
        let l1 = Arc::new(Field::new(
1117
            "l1",
1118
            DataType::List(Arc::new(Field::new_list_field(
1119
                DataType::Struct(Fields::from(vec![l2])),
1120
                true,
1121
            ))),
1122
            false,
1123
        ));
1124
1125
        let normalize_all = Schema::new(vec![l1])
1126
            .normalize(".", None)
1127
            .expect("valid normalization");
1128
1129
        assert_eq!(normalize_all.fields().len(), 1);
1130
    }
1131
1132
    #[test]
1133
    fn normalize_dictionary() {
1134
        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1135
        let b = Arc::new(Field::new("b", DataType::Int64, false));
1136
1137
        let one = Arc::new(Field::new(
1138
            "1",
1139
            DataType::Dictionary(
1140
                Box::new(DataType::Int32),
1141
                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1142
            ),
1143
            false,
1144
        ));
1145
1146
        let normalize_all = Schema::new(vec![one.clone()])
1147
            .normalize(".", None)
1148
            .expect("valid normalization");
1149
1150
        let expected = Schema::new(vec![Field::new(
1151
            "1",
1152
            DataType::Dictionary(
1153
                Box::new(DataType::Int32),
1154
                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1155
            ),
1156
            false,
1157
        )]);
1158
1159
        assert_eq!(normalize_all, expected);
1160
    }
1161
1162
    #[test]
1163
    #[should_panic(
1164
        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
1165
    )]
1166
    fn schema_field_with_name() {
1167
        let schema = person_schema();
1168
        assert_eq!(
1169
            schema.field_with_name("first_name").unwrap().name(),
1170
            "first_name"
1171
        );
1172
        assert_eq!(
1173
            schema.field_with_name("last_name").unwrap().name(),
1174
            "last_name"
1175
        );
1176
        schema.field_with_name("nickname").unwrap();
1177
    }
1178
1179
    #[test]
1180
    fn schema_field_with_dict_id() {
1181
        let schema = person_schema();
1182
1183
        #[allow(deprecated)]
1184
        let fields_dict_123: Vec<_> = schema
1185
            .fields_with_dict_id(123)
1186
            .iter()
1187
            .map(|f| f.name())
1188
            .collect();
1189
        assert_eq!(fields_dict_123, vec!["interests"]);
1190
1191
        #[allow(deprecated)]
1192
        let is_empty = schema.fields_with_dict_id(456).is_empty();
1193
        assert!(is_empty);
1194
    }
1195
1196
    fn person_schema() -> Schema {
1197
        let kv_array = [("k".to_string(), "v".to_string())];
1198
        let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
1199
        let first_name =
1200
            Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
1201
1202
        Schema::new(vec![
1203
            first_name,
1204
            Field::new("last_name", DataType::Utf8, false),
1205
            Field::new(
1206
                "address",
1207
                DataType::Struct(Fields::from(vec![
1208
                    Field::new("street", DataType::Utf8, false),
1209
                    Field::new("zip", DataType::UInt16, false),
1210
                ])),
1211
                false,
1212
            ),
1213
            #[allow(deprecated)]
1214
            Field::new_dict(
1215
                "interests",
1216
                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1217
                true,
1218
                123,
1219
                true,
1220
            ),
1221
        ])
1222
    }
1223
1224
    #[test]
1225
    fn test_try_merge_field_with_metadata() {
1226
        // 1. Different values for the same key should cause error.
1227
        let metadata1: HashMap<String, String> = [("foo".to_string(), "bar".to_string())]
1228
            .iter()
1229
            .cloned()
1230
            .collect();
1231
        let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
1232
1233
        let metadata2: HashMap<String, String> = [("foo".to_string(), "baz".to_string())]
1234
            .iter()
1235
            .cloned()
1236
            .collect();
1237
        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1238
1239
        assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
1240
1241
        // 2. None + Some
1242
        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1243
        let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
1244
            .iter()
1245
            .cloned()
1246
            .collect();
1247
        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1248
1249
        assert!(f1.try_merge(&f2).is_ok());
1250
        assert!(!f1.metadata().is_empty());
1251
        assert_eq!(f1.metadata(), f2.metadata());
1252
1253
        // 3. Some + Some
1254
        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1255
            [("foo".to_string(), "bar".to_string())]
1256
                .iter()
1257
                .cloned()
1258
                .collect(),
1259
        );
1260
        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1261
            [("foo2".to_string(), "bar2".to_string())]
1262
                .iter()
1263
                .cloned()
1264
                .collect(),
1265
        );
1266
1267
        assert!(f1.try_merge(&f2).is_ok());
1268
        assert!(!f1.metadata().is_empty());
1269
        assert_eq!(
1270
            f1.metadata().clone(),
1271
            [
1272
                ("foo".to_string(), "bar".to_string()),
1273
                ("foo2".to_string(), "bar2".to_string())
1274
            ]
1275
            .iter()
1276
            .cloned()
1277
            .collect()
1278
        );
1279
1280
        // 4. Some + None.
1281
        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1282
            [("foo".to_string(), "bar".to_string())]
1283
                .iter()
1284
                .cloned()
1285
                .collect(),
1286
        );
1287
        let f2 = Field::new("first_name", DataType::Utf8, false);
1288
        assert!(f1.try_merge(&f2).is_ok());
1289
        assert!(!f1.metadata().is_empty());
1290
        assert_eq!(
1291
            f1.metadata().clone(),
1292
            [("foo".to_string(), "bar".to_string())]
1293
                .iter()
1294
                .cloned()
1295
                .collect()
1296
        );
1297
1298
        // 5. None + None.
1299
        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1300
        let f2 = Field::new("first_name", DataType::Utf8, false);
1301
        assert!(f1.try_merge(&f2).is_ok());
1302
        assert!(f1.metadata().is_empty());
1303
    }
1304
1305
    #[test]
1306
    fn test_schema_merge() {
1307
        let merged = Schema::try_merge(vec![
1308
            Schema::new(vec![
1309
                Field::new("first_name", DataType::Utf8, false),
1310
                Field::new("last_name", DataType::Utf8, false),
1311
                Field::new(
1312
                    "address",
1313
                    DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
1314
                    false,
1315
                ),
1316
            ]),
1317
            Schema::new_with_metadata(
1318
                vec![
1319
                    // nullable merge
1320
                    Field::new("last_name", DataType::Utf8, true),
1321
                    Field::new(
1322
                        "address",
1323
                        DataType::Struct(Fields::from(vec![
1324
                            // add new nested field
1325
                            Field::new("street", DataType::Utf8, false),
1326
                            // nullable merge on nested field
1327
                            Field::new("zip", DataType::UInt16, true),
1328
                        ])),
1329
                        false,
1330
                    ),
1331
                    // new field
1332
                    Field::new("number", DataType::Utf8, true),
1333
                ],
1334
                [("foo".to_string(), "bar".to_string())]
1335
                    .iter()
1336
                    .cloned()
1337
                    .collect::<HashMap<String, String>>(),
1338
            ),
1339
        ])
1340
        .unwrap();
1341
1342
        assert_eq!(
1343
            merged,
1344
            Schema::new_with_metadata(
1345
                vec![
1346
                    Field::new("first_name", DataType::Utf8, false),
1347
                    Field::new("last_name", DataType::Utf8, true),
1348
                    Field::new(
1349
                        "address",
1350
                        DataType::Struct(Fields::from(vec![
1351
                            Field::new("zip", DataType::UInt16, true),
1352
                            Field::new("street", DataType::Utf8, false),
1353
                        ])),
1354
                        false,
1355
                    ),
1356
                    Field::new("number", DataType::Utf8, true),
1357
                ],
1358
                [("foo".to_string(), "bar".to_string())]
1359
                    .iter()
1360
                    .cloned()
1361
                    .collect::<HashMap<String, String>>()
1362
            )
1363
        );
1364
1365
        // support merge union fields
1366
        assert_eq!(
1367
            Schema::try_merge(vec![
1368
                Schema::new(vec![Field::new_union(
1369
                    "c1",
1370
                    vec![0, 1],
1371
                    vec![
1372
                        Field::new("c11", DataType::Utf8, true),
1373
                        Field::new("c12", DataType::Utf8, true),
1374
                    ],
1375
                    UnionMode::Dense
1376
                ),]),
1377
                Schema::new(vec![Field::new_union(
1378
                    "c1",
1379
                    vec![1, 2],
1380
                    vec![
1381
                        Field::new("c12", DataType::Utf8, true),
1382
                        Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1383
                    ],
1384
                    UnionMode::Dense
1385
                ),])
1386
            ])
1387
            .unwrap(),
1388
            Schema::new(vec![Field::new_union(
1389
                "c1",
1390
                vec![0, 1, 2],
1391
                vec![
1392
                    Field::new("c11", DataType::Utf8, true),
1393
                    Field::new("c12", DataType::Utf8, true),
1394
                    Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1395
                ],
1396
                UnionMode::Dense
1397
            ),]),
1398
        );
1399
1400
        // incompatible field should throw error
1401
        assert!(Schema::try_merge(vec![
1402
            Schema::new(vec![
1403
                Field::new("first_name", DataType::Utf8, false),
1404
                Field::new("last_name", DataType::Utf8, false),
1405
            ]),
1406
            Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1407
        ])
1408
        .is_err());
1409
1410
        // incompatible metadata should throw error
1411
        let res = Schema::try_merge(vec![
1412
            Schema::new_with_metadata(
1413
                vec![Field::new("first_name", DataType::Utf8, false)],
1414
                [("foo".to_string(), "bar".to_string())]
1415
                    .iter()
1416
                    .cloned()
1417
                    .collect::<HashMap<String, String>>(),
1418
            ),
1419
            Schema::new_with_metadata(
1420
                vec![Field::new("last_name", DataType::Utf8, false)],
1421
                [("foo".to_string(), "baz".to_string())]
1422
                    .iter()
1423
                    .cloned()
1424
                    .collect::<HashMap<String, String>>(),
1425
            ),
1426
        ])
1427
        .unwrap_err();
1428
1429
        let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'";
1430
        assert!(
1431
            res.to_string().contains(expected),
1432
            "Could not find expected string '{expected}' in '{res}'"
1433
        );
1434
    }
1435
1436
    #[test]
1437
    fn test_schema_builder_change_field() {
1438
        let mut builder = SchemaBuilder::new();
1439
        builder.push(Field::new("a", DataType::Int32, false));
1440
        builder.push(Field::new("b", DataType::Utf8, false));
1441
        *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
1442
        assert_eq!(
1443
            builder.fields,
1444
            vec![
1445
                Arc::new(Field::new("a", DataType::Int32, false)),
1446
                Arc::new(Field::new("c", DataType::Int32, false))
1447
            ]
1448
        );
1449
    }
1450
1451
    #[test]
1452
    fn test_schema_builder_reverse() {
1453
        let mut builder = SchemaBuilder::new();
1454
        builder.push(Field::new("a", DataType::Int32, false));
1455
        builder.push(Field::new("b", DataType::Utf8, true));
1456
        builder.reverse();
1457
        assert_eq!(
1458
            builder.fields,
1459
            vec![
1460
                Arc::new(Field::new("b", DataType::Utf8, true)),
1461
                Arc::new(Field::new("a", DataType::Int32, false))
1462
            ]
1463
        );
1464
    }
1465
1466
    #[test]
1467
    fn test_schema_builder_metadata() {
1468
        let mut metadata = HashMap::with_capacity(1);
1469
        metadata.insert("key".to_string(), "value".to_string());
1470
1471
        let fields = vec![Field::new("test", DataType::Int8, true)];
1472
        let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
1473
        builder.metadata_mut().insert("k".into(), "v".into());
1474
        let out = builder.finish();
1475
        assert_eq!(out.metadata.len(), 2);
1476
        assert_eq!(out.metadata["k"], "v");
1477
        assert_eq!(out.metadata["key"], "value");
1478
    }
1479
}